Ajout brouillon
This commit is contained in:
parent
dc5e3d0df1
commit
3670299a0b
|
@ -524,6 +524,65 @@
|
||||||
"export_in_temporary(target_agg, 'Target_kpi_concatenate')"
|
"export_in_temporary(target_agg, 'Target_kpi_concatenate')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "cb6f06e6-78de-4b8d-a103-8366eff0493a",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"v"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "c5e864b1-adad-4267-b956-3f7ef371d677",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"def display_covering_time(df, company, datecover):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" This function draws the time coverage of each company\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" min_date = df['purchase_date'].min().strftime(\"%Y-%m-%d\")\n",
|
||||||
|
" max_date = df['purchase_date'].max().strftime(\"%Y-%m-%d\")\n",
|
||||||
|
" datecover[company] = [datetime.strptime(min_date, \"%Y-%m-%d\") + timedelta(days=x) for x in range((datetime.strptime(max_date, \"%Y-%m-%d\") - datetime.strptime(min_date, \"%Y-%m-%d\")).days)]\n",
|
||||||
|
" print(f'Couverture Company {company} : {min_date} - {max_date}')\n",
|
||||||
|
" return datecover\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def compute_time_intersection(datecover):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" This function returns the time coverage for all companies\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" timestamps_sets = [set(timestamps) for timestamps in datecover.values()]\n",
|
||||||
|
" intersection = set.intersection(*timestamps_sets)\n",
|
||||||
|
" intersection_list = list(intersection)\n",
|
||||||
|
" formated_dates = [dt.strftime(\"%Y-%m-%d\") for dt in intersection_list]\n",
|
||||||
|
" return sorted(formated_dates)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def df_coverage_modelization(sport, coverage_features = 0.7):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" This function returns start_date, end_of_features and final dates\n",
|
||||||
|
" that help to construct train and test datasets\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" datecover = {}\n",
|
||||||
|
" for company in sport:\n",
|
||||||
|
" df_products_purchased_reduced = display_input_databases(company, file_name = \"products_purchased_reduced\",\n",
|
||||||
|
" datetime_col = ['purchase_date'])\n",
|
||||||
|
" datecover = display_covering_time(df_products_purchased_reduced, company, datecover)\n",
|
||||||
|
" #print(datecover.keys())\n",
|
||||||
|
" dt_coverage = compute_time_intersection(datecover)\n",
|
||||||
|
" start_date = dt_coverage[0]\n",
|
||||||
|
" end_of_features = dt_coverage[int(0.7 * len(dt_coverage))]\n",
|
||||||
|
" final_date = dt_coverage[-1]\n",
|
||||||
|
" return start_date, end_of_features, final_date\n",
|
||||||
|
" "
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "2435097a-95a5-43e1-84d0-7f6b701441ba",
|
"id": "2435097a-95a5-43e1-84d0-7f6b701441ba",
|
||||||
|
|
Loading…
Reference in New Issue
Block a user