Ajout brouillon
This commit is contained in:
parent
dc5e3d0df1
commit
3670299a0b
|
@ -524,6 +524,65 @@
|
|||
"export_in_temporary(target_agg, 'Target_kpi_concatenate')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cb6f06e6-78de-4b8d-a103-8366eff0493a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"v"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c5e864b1-adad-4267-b956-3f7ef371d677",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"def display_covering_time(df, company, datecover):\n",
|
||||
" \"\"\"\n",
|
||||
" This function draws the time coverage of each company\n",
|
||||
" \"\"\"\n",
|
||||
" min_date = df['purchase_date'].min().strftime(\"%Y-%m-%d\")\n",
|
||||
" max_date = df['purchase_date'].max().strftime(\"%Y-%m-%d\")\n",
|
||||
" datecover[company] = [datetime.strptime(min_date, \"%Y-%m-%d\") + timedelta(days=x) for x in range((datetime.strptime(max_date, \"%Y-%m-%d\") - datetime.strptime(min_date, \"%Y-%m-%d\")).days)]\n",
|
||||
" print(f'Couverture Company {company} : {min_date} - {max_date}')\n",
|
||||
" return datecover\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def compute_time_intersection(datecover):\n",
|
||||
" \"\"\"\n",
|
||||
" This function returns the time coverage for all companies\n",
|
||||
" \"\"\"\n",
|
||||
" timestamps_sets = [set(timestamps) for timestamps in datecover.values()]\n",
|
||||
" intersection = set.intersection(*timestamps_sets)\n",
|
||||
" intersection_list = list(intersection)\n",
|
||||
" formated_dates = [dt.strftime(\"%Y-%m-%d\") for dt in intersection_list]\n",
|
||||
" return sorted(formated_dates)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def df_coverage_modelization(sport, coverage_features = 0.7):\n",
|
||||
" \"\"\"\n",
|
||||
" This function returns start_date, end_of_features and final dates\n",
|
||||
" that help to construct train and test datasets\n",
|
||||
" \"\"\"\n",
|
||||
" datecover = {}\n",
|
||||
" for company in sport:\n",
|
||||
" df_products_purchased_reduced = display_input_databases(company, file_name = \"products_purchased_reduced\",\n",
|
||||
" datetime_col = ['purchase_date'])\n",
|
||||
" datecover = display_covering_time(df_products_purchased_reduced, company, datecover)\n",
|
||||
" #print(datecover.keys())\n",
|
||||
" dt_coverage = compute_time_intersection(datecover)\n",
|
||||
" start_date = dt_coverage[0]\n",
|
||||
" end_of_features = dt_coverage[int(0.7 * len(dt_coverage))]\n",
|
||||
" final_date = dt_coverage[-1]\n",
|
||||
" return start_date, end_of_features, final_date\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2435097a-95a5-43e1-84d0-7f6b701441ba",
|
||||
|
|
Loading…
Reference in New Issue
Block a user