Finalisation de la fonction pour créer un dataset
This commit is contained in:
parent
5f621c2352
commit
197a847085
|
@ -1529,7 +1529,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 25,
|
||||
"id": "98f78cd5-b694-4cc6-b033-20170aa13e8d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -1559,7 +1559,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 26,
|
||||
"id": "e2c88552-b863-47a2-be23-8d2898fb28bc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -1593,7 +1593,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 27,
|
||||
"id": "24537647-bc29-4777-9848-ac4120a4aa60",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -1603,7 +1603,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 28,
|
||||
"id": "6be2a9a6-056b-4e19-8c26-a18ba3df36b3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -1639,7 +1639,7 @@
|
|||
" <th>0</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
|
@ -1660,14 +1660,14 @@
|
|||
" <th>3</th>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>20</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
|
@ -1676,14 +1676,14 @@
|
|||
],
|
||||
"text/plain": [
|
||||
" customer_id nb_campaigns nb_campaigns_opened time_to_open\n",
|
||||
"0 2 4 0.0 NaT\n",
|
||||
"0 2 4 NaN NaT\n",
|
||||
"1 3 222 124.0 1 days 00:28:30.169354838\n",
|
||||
"2 4 7 7.0 1 days 04:31:01.428571428\n",
|
||||
"3 5 4 0.0 NaT\n",
|
||||
"4 6 20 0.0 NaT"
|
||||
"3 5 4 NaN NaT\n",
|
||||
"4 6 20 NaN NaT"
|
||||
]
|
||||
},
|
||||
"execution_count": 29,
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -1702,7 +1702,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 69,
|
||||
"execution_count": 29,
|
||||
"id": "043303fe-e90f-4689-a2a9-5d690555a045",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -1765,7 +1765,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 70,
|
||||
"execution_count": 30,
|
||||
"id": "5882234a-1ed5-4269-87a6-0d75613476e3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -1775,7 +1775,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"execution_count": 31,
|
||||
"id": "5f2046cf-ffde-4521-91e7-b727b8bc17f5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -1811,6 +1811,8 @@
|
|||
" <th>purchase_date_max</th>\n",
|
||||
" <th>time_between_purchase</th>\n",
|
||||
" <th>nb_tickets_internet</th>\n",
|
||||
" <th>name_event_types</th>\n",
|
||||
" <th>avg_amount</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
|
@ -1827,6 +1829,8 @@
|
|||
" <td>4.179306</td>\n",
|
||||
" <td>3258.011562</td>\n",
|
||||
" <td>51.0</td>\n",
|
||||
" <td>offre muséale individuel</td>\n",
|
||||
" <td>6.150659</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
|
@ -1841,6 +1845,8 @@
|
|||
" <td>5.221840</td>\n",
|
||||
" <td>3692.976389</td>\n",
|
||||
" <td>2988.0</td>\n",
|
||||
" <td>spectacle vivant</td>\n",
|
||||
" <td>7.762474</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
|
@ -1855,6 +1861,8 @@
|
|||
" <td>0.146331</td>\n",
|
||||
" <td>3803.223461</td>\n",
|
||||
" <td>9.0</td>\n",
|
||||
" <td>offre muséale groupe</td>\n",
|
||||
" <td>4.452618</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
|
@ -1869,6 +1877,8 @@
|
|||
" <td>1408.715532</td>\n",
|
||||
" <td>1093.999977</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>formule adhésion</td>\n",
|
||||
" <td>6.439463</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
|
@ -1883,6 +1893,8 @@
|
|||
" <td>1340.308160</td>\n",
|
||||
" <td>700.966389</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>offre muséale individuel</td>\n",
|
||||
" <td>6.150659</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
|
@ -1903,15 +1915,22 @@
|
|||
"3 5 1 2502.715509 1408.715532 \n",
|
||||
"4 1 0 2041.274549 1340.308160 \n",
|
||||
"\n",
|
||||
" time_between_purchase nb_tickets_internet \n",
|
||||
"0 3258.011562 51.0 \n",
|
||||
"1 3692.976389 2988.0 \n",
|
||||
"2 3803.223461 9.0 \n",
|
||||
"3 1093.999977 5.0 \n",
|
||||
"4 700.966389 0.0 "
|
||||
" time_between_purchase nb_tickets_internet name_event_types \\\n",
|
||||
"0 3258.011562 51.0 offre muséale individuel \n",
|
||||
"1 3692.976389 2988.0 spectacle vivant \n",
|
||||
"2 3803.223461 9.0 offre muséale groupe \n",
|
||||
"3 1093.999977 5.0 formule adhésion \n",
|
||||
"4 700.966389 0.0 offre muséale individuel \n",
|
||||
"\n",
|
||||
" avg_amount \n",
|
||||
"0 6.150659 \n",
|
||||
"1 7.762474 \n",
|
||||
"2 4.452618 \n",
|
||||
"3 6.439463 \n",
|
||||
"4 6.150659 "
|
||||
]
|
||||
},
|
||||
"execution_count": 36,
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -1922,7 +1941,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"execution_count": 32,
|
||||
"id": "a4a2311d-8a72-4030-afd5-218004d5d2a5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -1946,7 +1965,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"execution_count": 33,
|
||||
"id": "83230baa-9a8a-4614-b629-e99c2505c696",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -2159,7 +2178,7 @@
|
|||
"[5 rows x 37 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 43,
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -2173,7 +2192,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"execution_count": 34,
|
||||
"id": "433921de-03ad-4024-9462-ecd267db1756",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -2339,7 +2358,7 @@
|
|||
" <td>formule adhésion</td>\n",
|
||||
" <td>6.439463</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
|
@ -2381,12 +2400,12 @@
|
|||
"1 NaN NaT \n",
|
||||
"2 NaN NaT \n",
|
||||
"3 NaN NaT \n",
|
||||
"4 0.0 NaT \n",
|
||||
"4 NaN NaT \n",
|
||||
"\n",
|
||||
"[5 rows x 40 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 44,
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -2400,7 +2419,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"execution_count": 35,
|
||||
"id": "25e54131-6835-4e94-86d3-1a78520ed7bc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -2426,7 +2445,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"execution_count": 36,
|
||||
"id": "8710611c-7eb8-45ca-bdcc-009f4081f9e2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -2468,7 +2487,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 63,
|
||||
"execution_count": 38,
|
||||
"id": "46de1912-4a66-46e5-8b9e-7768b2d2723b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -2482,7 +2501,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 64,
|
||||
"execution_count": 39,
|
||||
"id": "d53825e4-6453-45bc-94f2-7b2504ec4afb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -2688,7 +2707,7 @@
|
|||
"[5 rows x 28 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 64,
|
||||
"execution_count": 39,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -2699,7 +2718,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 67,
|
||||
"execution_count": 40,
|
||||
"id": "1e42a790-b215-4107-a969-85005da06ebd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -2713,28 +2732,394 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 66,
|
||||
"execution_count": 41,
|
||||
"id": "d950f24d-a5d1-4f1e-aeaa-ca826470365f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>customer_id</th>\n",
|
||||
" <th>event_type_id</th>\n",
|
||||
" <th>nb_tickets</th>\n",
|
||||
" <th>nb_purchases</th>\n",
|
||||
" <th>total_amount</th>\n",
|
||||
" <th>nb_suppliers</th>\n",
|
||||
" <th>vente_internet_max</th>\n",
|
||||
" <th>purchase_date_min</th>\n",
|
||||
" <th>purchase_date_max</th>\n",
|
||||
" <th>time_between_purchase</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>average_ticket_basket</th>\n",
|
||||
" <th>total_price</th>\n",
|
||||
" <th>purchase_count</th>\n",
|
||||
" <th>first_buying_date</th>\n",
|
||||
" <th>country</th>\n",
|
||||
" <th>age</th>\n",
|
||||
" <th>tenant_id</th>\n",
|
||||
" <th>nb_campaigns</th>\n",
|
||||
" <th>nb_campaigns_opened</th>\n",
|
||||
" <th>time_to_open</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>384226.0</td>\n",
|
||||
" <td>194790.0</td>\n",
|
||||
" <td>2686540.5</td>\n",
|
||||
" <td>7.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>3262.190868</td>\n",
|
||||
" <td>4.179306</td>\n",
|
||||
" <td>3258.011562</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>1.956087</td>\n",
|
||||
" <td>8821221.5</td>\n",
|
||||
" <td>641472.0</td>\n",
|
||||
" <td>2013-06-10 10:37:58+00:00</td>\n",
|
||||
" <td>fr</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1311.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" <td>453242.0</td>\n",
|
||||
" <td>228945.0</td>\n",
|
||||
" <td>3248965.5</td>\n",
|
||||
" <td>6.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>3698.198229</td>\n",
|
||||
" <td>5.221840</td>\n",
|
||||
" <td>3692.976389</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>1.956087</td>\n",
|
||||
" <td>8821221.5</td>\n",
|
||||
" <td>641472.0</td>\n",
|
||||
" <td>2013-06-10 10:37:58+00:00</td>\n",
|
||||
" <td>fr</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1311.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>201750.0</td>\n",
|
||||
" <td>107110.0</td>\n",
|
||||
" <td>1459190.0</td>\n",
|
||||
" <td>6.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>3803.369792</td>\n",
|
||||
" <td>0.146331</td>\n",
|
||||
" <td>3803.223461</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>1.956087</td>\n",
|
||||
" <td>8821221.5</td>\n",
|
||||
" <td>641472.0</td>\n",
|
||||
" <td>2013-06-10 10:37:58+00:00</td>\n",
|
||||
" <td>fr</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1311.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>6.0</td>\n",
|
||||
" <td>217356.0</td>\n",
|
||||
" <td>111786.0</td>\n",
|
||||
" <td>1435871.5</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>2502.715509</td>\n",
|
||||
" <td>1408.715532</td>\n",
|
||||
" <td>1093.999977</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>1.956087</td>\n",
|
||||
" <td>8821221.5</td>\n",
|
||||
" <td>641472.0</td>\n",
|
||||
" <td>2013-06-10 10:37:58+00:00</td>\n",
|
||||
" <td>fr</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1311.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>143.0</td>\n",
|
||||
" <td>143.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>2041.274549</td>\n",
|
||||
" <td>1340.308160</td>\n",
|
||||
" <td>700.966389</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>1.000000</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>307.0</td>\n",
|
||||
" <td>2018-04-07 12:55:07+00:00</td>\n",
|
||||
" <td>fr</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1311.0</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>156291</th>\n",
|
||||
" <td>1256133</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>33.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.110521</td>\n",
|
||||
" <td>0.110521</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>156292</th>\n",
|
||||
" <td>1256134</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>44.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.092095</td>\n",
|
||||
" <td>0.092095</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>156293</th>\n",
|
||||
" <td>1256135</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>11.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.087894</td>\n",
|
||||
" <td>0.087894</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>156294</th>\n",
|
||||
" <td>1256136</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>22.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.040394</td>\n",
|
||||
" <td>0.040394</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>156295</th>\n",
|
||||
" <td>1256137</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>22.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>156296 rows × 40 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"Index(['customer_id', 'event_type_id', 'nb_tickets', 'nb_purchases',\n",
|
||||
" 'total_amount', 'nb_suppliers', 'vente_internet_max',\n",
|
||||
" 'purchase_date_min', 'purchase_date_max', 'time_between_purchase',\n",
|
||||
" 'nb_tickets_internet', 'name_event_types', 'avg_amount', 'birthdate',\n",
|
||||
" 'street_id', 'is_partner', 'gender', 'is_email_true', 'opt_in',\n",
|
||||
" 'structure_id', 'profession', 'language', 'mcp_contact_id',\n",
|
||||
" 'last_buying_date', 'max_price', 'ticket_sum', 'average_price',\n",
|
||||
" 'fidelity', 'average_purchase_delay', 'average_price_basket',\n",
|
||||
" 'average_ticket_basket', 'total_price', 'purchase_count',\n",
|
||||
" 'first_buying_date', 'country', 'age', 'tenant_id', 'nb_campaigns',\n",
|
||||
" 'nb_campaigns_opened', 'time_to_open'],\n",
|
||||
" dtype='object')"
|
||||
" customer_id event_type_id nb_tickets nb_purchases total_amount \\\n",
|
||||
"0 1 2.0 384226.0 194790.0 2686540.5 \n",
|
||||
"1 1 4.0 453242.0 228945.0 3248965.5 \n",
|
||||
"2 1 5.0 201750.0 107110.0 1459190.0 \n",
|
||||
"3 1 6.0 217356.0 111786.0 1435871.5 \n",
|
||||
"4 2 2.0 143.0 143.0 0.0 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"156291 1256133 5.0 3.0 1.0 33.0 \n",
|
||||
"156292 1256134 5.0 4.0 1.0 44.0 \n",
|
||||
"156293 1256135 5.0 1.0 1.0 11.0 \n",
|
||||
"156294 1256136 5.0 2.0 1.0 22.0 \n",
|
||||
"156295 1256137 5.0 2.0 1.0 22.0 \n",
|
||||
"\n",
|
||||
" nb_suppliers vente_internet_max purchase_date_min \\\n",
|
||||
"0 7.0 1.0 3262.190868 \n",
|
||||
"1 6.0 1.0 3698.198229 \n",
|
||||
"2 6.0 1.0 3803.369792 \n",
|
||||
"3 5.0 1.0 2502.715509 \n",
|
||||
"4 1.0 0.0 2041.274549 \n",
|
||||
"... ... ... ... \n",
|
||||
"156291 1.0 1.0 0.110521 \n",
|
||||
"156292 1.0 1.0 0.092095 \n",
|
||||
"156293 1.0 1.0 0.087894 \n",
|
||||
"156294 1.0 1.0 0.040394 \n",
|
||||
"156295 1.0 1.0 0.000000 \n",
|
||||
"\n",
|
||||
" purchase_date_max time_between_purchase ... average_ticket_basket \\\n",
|
||||
"0 4.179306 3258.011562 ... 1.956087 \n",
|
||||
"1 5.221840 3692.976389 ... 1.956087 \n",
|
||||
"2 0.146331 3803.223461 ... 1.956087 \n",
|
||||
"3 1408.715532 1093.999977 ... 1.956087 \n",
|
||||
"4 1340.308160 700.966389 ... 1.000000 \n",
|
||||
"... ... ... ... ... \n",
|
||||
"156291 0.110521 0.000000 ... NaN \n",
|
||||
"156292 0.092095 0.000000 ... NaN \n",
|
||||
"156293 0.087894 0.000000 ... NaN \n",
|
||||
"156294 0.040394 0.000000 ... NaN \n",
|
||||
"156295 0.000000 0.000000 ... NaN \n",
|
||||
"\n",
|
||||
" total_price purchase_count first_buying_date country age \\\n",
|
||||
"0 8821221.5 641472.0 2013-06-10 10:37:58+00:00 fr NaN \n",
|
||||
"1 8821221.5 641472.0 2013-06-10 10:37:58+00:00 fr NaN \n",
|
||||
"2 8821221.5 641472.0 2013-06-10 10:37:58+00:00 fr NaN \n",
|
||||
"3 8821221.5 641472.0 2013-06-10 10:37:58+00:00 fr NaN \n",
|
||||
"4 0.0 307.0 2018-04-07 12:55:07+00:00 fr NaN \n",
|
||||
"... ... ... ... ... .. \n",
|
||||
"156291 NaN NaN NaT NaN NaN \n",
|
||||
"156292 NaN NaN NaT NaN NaN \n",
|
||||
"156293 NaN NaN NaT NaN NaN \n",
|
||||
"156294 NaN NaN NaT NaN NaN \n",
|
||||
"156295 NaN NaN NaT NaN NaN \n",
|
||||
"\n",
|
||||
" tenant_id nb_campaigns nb_campaigns_opened time_to_open \n",
|
||||
"0 1311.0 0.0 0.0 NaT \n",
|
||||
"1 1311.0 0.0 0.0 NaT \n",
|
||||
"2 1311.0 0.0 0.0 NaT \n",
|
||||
"3 1311.0 0.0 0.0 NaT \n",
|
||||
"4 1311.0 4.0 0.0 NaT \n",
|
||||
"... ... ... ... ... \n",
|
||||
"156291 NaN NaN NaN NaT \n",
|
||||
"156292 NaN NaN NaN NaT \n",
|
||||
"156293 NaN NaN NaN NaT \n",
|
||||
"156294 NaN NaN NaN NaT \n",
|
||||
"156295 NaN NaN NaN NaT \n",
|
||||
"\n",
|
||||
"[156296 rows x 40 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 66,
|
||||
"execution_count": 41,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -2745,7 +3130,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 68,
|
||||
"execution_count": 42,
|
||||
"id": "ebf6d843-dcc0-4e83-b063-94806c0bac17",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -2778,7 +3163,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
"version": "3.11.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
@ -34,6 +34,8 @@ for i in range(len(liste_database)) :
|
|||
nom_dataframe = df_prefix + re.search(r'\/(\d+)\/(\d+)([a-zA-Z_]+)\.csv$', current_path).group(3)
|
||||
globals()[nom_dataframe] = df
|
||||
|
||||
## 1 - Cleaning of the datasets
|
||||
|
||||
# Cleaning customerplus
|
||||
df1_customerplus_clean = preprocessing_customerplus(df1_customersplus)
|
||||
|
||||
|
@ -61,29 +63,91 @@ df1_products_purchased = pd.merge(df1_ticket_information, products_global, left_
|
|||
# Selection des variables d'intérêts
|
||||
df1_products_purchased_reduced = df1_products_purchased[['ticket_id', 'customer_id', 'purchase_id' ,'event_type_id', 'supplier_name', 'purchase_date', 'type_of_ticket_name', 'amount', 'children', 'is_full_price', 'name_event_types', 'name_facilities', 'name_categories', 'name_events', 'name_seasons']]
|
||||
|
||||
# Fusion de l'ensemble et creation des KPI
|
||||
df1_campaigns_kpi = campaigns_kpi_function(campaigns_information = df1_campaigns_information)
|
||||
## 2 - Construction of KPIs on a given period
|
||||
|
||||
df1_tickets_kpi = tickets_kpi_function(tickets_information = df1_products_purchased_reduced)
|
||||
def explanatory_variables(min_date = "2021-09-01", max_date = "2023-09-01", df_campaigns_information = df1_campaigns_information, df_products_purchased_reduced = df1_products_purchased_reduced, df_customerplus_clean = df1_customerplus_clean):
|
||||
|
||||
# Fusion avec KPI liés au customer
|
||||
df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')
|
||||
# Filtre de cohérence pour la mise en pratique de notre méthode
|
||||
max_date = pd.to_datetime(max_date, utc = True, format = 'ISO8601')
|
||||
min_date = pd.to_datetime(min_date, utc = True, format = 'ISO8601')
|
||||
|
||||
# Fill NaN values
|
||||
df1_customer[['nb_campaigns', 'nb_campaigns_opened']] = df1_customer[['nb_campaigns', 'nb_campaigns_opened']].fillna(0)
|
||||
#Filtre de la base df_campaigns_information
|
||||
df_campaigns_information = df_campaigns_information[(df_campaigns_information['sent_at'] <= max_date) & (df_campaigns_information['sent_at'] >= min_date)]
|
||||
df_campaigns_information['opened_at'][df_campaigns_information['opened_at'] >= max_date] = np.datetime64('NaT')
|
||||
|
||||
# Fusion avec KPI liés au comportement d'achat
|
||||
df1_customer_product = pd.merge(df1_tickets_kpi, df1_customer, on = 'customer_id', how = 'outer')
|
||||
#Filtre de la base df_products_purchased_reduced
|
||||
df_products_purchased_reduced = df_products_purchased_reduced[(df_products_purchased_reduced['purchase_date'] <= max_date) & (df_products_purchased_reduced['purchase_date'] >= min_date)]
|
||||
|
||||
# Fill NaN values
|
||||
df1_customer_product[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet']] = df1_customer_product[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet']].fillna(0)
|
||||
print("Data filtering : SUCCESS")
|
||||
|
||||
# Fusion de l'ensemble et creation des KPI
|
||||
df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information)
|
||||
df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)
|
||||
|
||||
print("KPIs construction : SUCCESS")
|
||||
# Fusion avec KPI liés au customer
|
||||
df_customer = pd.merge(df_customerplus_clean, df_campaigns_kpi, on = 'customer_id', how = 'left')
|
||||
|
||||
# Fill NaN values
|
||||
df_customer[['nb_campaigns', 'nb_campaigns_opened']] = df_customer[['nb_campaigns', 'nb_campaigns_opened']].fillna(0)
|
||||
|
||||
# Fusion avec KPI liés au comportement d'achat
|
||||
df_customer_product = pd.merge(df_tickets_kpi, df_customer, on = 'customer_id', how = 'outer')
|
||||
|
||||
# Fill NaN values
|
||||
df_customer_product[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet']] = df_customer_product[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet']].fillna(0)
|
||||
|
||||
return df_customer_product
|
||||
|
||||
# Fonction pour créer les variables expliquée
|
||||
def explained_variable(min_date = "2023-08-01", max_date = "2023-11-01", df_products_purchased_reduced = df1_products_purchased_reduced):
|
||||
|
||||
# Filtrer la base d'achat
|
||||
df_products_purchased_reduced = df_products_purchased_reduced[(df_products_purchased_reduced['purchase_date'] <= max_date) & (df_products_purchased_reduced['purchase_date'] > min_date)]
|
||||
|
||||
# Indicatrice d'achat
|
||||
df_products_purchased_reduced['y_has_purchased'] = 1
|
||||
|
||||
y = df_products_purchased_reduced[['customer_id', 'event_type_id', 'y_has_purchased']].drop_duplicates()
|
||||
|
||||
return y
|
||||
|
||||
## Exportation
|
||||
|
||||
# Exportation vers 'projet-bdc2324-team1'
|
||||
BUCKET_OUT = "projet-bdc2324-team1"
|
||||
FILE_KEY_OUT_S3 = "1_Output/Company 1 - Segmentation base.csv"
|
||||
# Dossier d'exportation
|
||||
BUCKET_OUT = "projet-bdc2324-team1/1_Output/Logistique Regression databases - First approach"
|
||||
|
||||
X_test = explanatory_variables(min_date = "2021-08-01", max_date = "2023-08-01", df_campaigns_information = df1_campaigns_information, df_products_purchased_reduced = df1_products_purchased_reduced, df_customerplus_clean = df1_customerplus_clean)
|
||||
|
||||
y_test = explained_variable(min_date = "2023-08-01", max_date = "2023-11-01", df_products_purchased_reduced = df1_products_purchased_reduced)
|
||||
|
||||
dataset_test = pd.merge(X_test, y_test, on = ['customer_id', 'event_type_id'], how = 'left')
|
||||
|
||||
FILE_KEY_OUT_S3 = "dataset_test.csv"
|
||||
FILE_PATH_OUT_S3 = BUCKET_OUT + "/" + FILE_KEY_OUT_S3
|
||||
|
||||
with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:
|
||||
df1_customer_product.to_csv(file_out, index = False)
|
||||
dataset_test.to_csv(file_out, index = False)
|
||||
|
||||
print("Exportation dataset test : SUCCESS")
|
||||
|
||||
X_train = explanatory_variables(min_date = "2021-05-01", max_date = "2023-05-01", df_campaigns_information = df1_campaigns_information, df_products_purchased_reduced = df1_products_purchased_reduced, df_customerplus_clean = df1_customerplus_clean)
|
||||
|
||||
y_train = explained_variable(min_date = "2023-05-01", max_date = "2023-08-01", df_products_purchased_reduced = df1_products_purchased_reduced)
|
||||
|
||||
dataset_train = pd.merge(X_train, y_train, on = ['customer_id', 'event_type_id'], how = 'left')
|
||||
|
||||
FILE_KEY_OUT_S3 = "dataset_train.csv"
|
||||
FILE_PATH_OUT_S3 = BUCKET_OUT + "/" + FILE_KEY_OUT_S3
|
||||
|
||||
with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:
|
||||
dataset_test.to_csv(file_out, index = False)
|
||||
|
||||
print("Exportation dataset train : SUCCESS")
|
||||
|
||||
|
||||
|
||||
# # Exportation vers 'projet-bdc2324-team1'
|
||||
|
||||
|
||||
print("Exportation base de la base X d'entraînement : SUCCESS")
|
||||
|
|
|
@ -5,7 +5,7 @@ def campaigns_kpi_function(campaigns_information = None):
|
|||
nb_campaigns = campaigns_information[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()
|
||||
nb_campaigns.rename(columns = {'campaign_name' : 'nb_campaigns'}, inplace = True)
|
||||
# Temps d'ouverture en min moyen
|
||||
campaigns_information['time_to_open'] = campaigns_information['opened_at'] - campaigns_information['delivered_at']
|
||||
campaigns_information['time_to_open'] = pd.to_datetime(campaigns_information['opened_at'], utc = True, format = 'ISO8601') - pd.to_datetime(campaigns_information['delivered_at'], utc = True, format = 'ISO8601')
|
||||
time_to_open = campaigns_information[['customer_id', 'time_to_open']].groupby('customer_id').mean().reset_index()
|
||||
|
||||
# Nombre de mail ouvert
|
||||
|
|
Loading…
Reference in New Issue
Block a user