diff --git a/0_Cleaning_and_merge.ipynb b/0_Cleaning_and_merge.ipynb
index b61b004..8d925b0 100644
--- a/0_Cleaning_and_merge.ipynb
+++ b/0_Cleaning_and_merge.ipynb
@@ -10,7 +10,7 @@
},
{
"cell_type": "code",
- "execution_count": 80,
+ "execution_count": 1,
"id": "15103481-8d74-404c-aa09-7601fe7730da",
"metadata": {},
"outputs": [],
@@ -33,7 +33,7 @@
},
{
"cell_type": "code",
- "execution_count": 81,
+ "execution_count": 2,
"id": "5d83bb1a-d341-446e-91f6-1c428607f6d4",
"metadata": {},
"outputs": [],
@@ -45,7 +45,7 @@
},
{
"cell_type": "code",
- "execution_count": 82,
+ "execution_count": 3,
"id": "a9b84234-d5df-4c43-a9cd-80cfe2f1e34d",
"metadata": {},
"outputs": [],
@@ -72,7 +72,7 @@
},
{
"cell_type": "code",
- "execution_count": 83,
+ "execution_count": 4,
"id": "699664b9-eee4-4f8d-a207-e524526560c5",
"metadata": {},
"outputs": [],
@@ -83,7 +83,7 @@
},
{
"cell_type": "code",
- "execution_count": 84,
+ "execution_count": 5,
"id": "dd6a3518-b752-4a1e-b77b-9e03e853c3ed",
"metadata": {},
"outputs": [],
@@ -114,7 +114,7 @@
},
{
"cell_type": "code",
- "execution_count": 85,
+ "execution_count": 6,
"id": "d237be96-8c86-4a91-b7a1-487e87a16c3d",
"metadata": {},
"outputs": [],
@@ -155,7 +155,7 @@
},
{
"cell_type": "code",
- "execution_count": 86,
+ "execution_count": 7,
"id": "7e7b90ce-da54-4f00-bc34-64c543b0858f",
"metadata": {},
"outputs": [],
@@ -177,7 +177,7 @@
},
{
"cell_type": "code",
- "execution_count": 87,
+ "execution_count": 8,
"id": "03329e32-00a5-42c8-9470-75f7b6216ccd",
"metadata": {},
"outputs": [],
@@ -195,7 +195,7 @@
},
{
"cell_type": "code",
- "execution_count": 88,
+ "execution_count": 9,
"id": "b95464b1-26bc-4aac-84b4-45da83b92251",
"metadata": {},
"outputs": [],
@@ -232,14 +232,14 @@
" \n",
" # Fusion avec achats\n",
" ticket_information = pd.merge(ticket_information, purchases, left_on = 'purchase_id', right_on = 'id', how = 'inner')\n",
- " ticket_information.drop(['purchase_id', 'id'], axis = 1, inplace=True)\n",
+ " ticket_information.drop(['id'], axis = 1, inplace=True)\n",
"\n",
" return ticket_information"
]
},
{
"cell_type": "code",
- "execution_count": 89,
+ "execution_count": 10,
"id": "3e1d2ba7-ff4f-48eb-93a8-2bb648c70396",
"metadata": {},
"outputs": [],
@@ -249,7 +249,7 @@
},
{
"cell_type": "code",
- "execution_count": 90,
+ "execution_count": 11,
"id": "4b18edfc-6450-4c6a-9e7b-ee5a5808c8c9",
"metadata": {},
"outputs": [
@@ -275,6 +275,7 @@
"
\n",
" | \n",
" ticket_id | \n",
+ " purchase_id | \n",
" product_id | \n",
" is_from_subscription | \n",
" supplier_name | \n",
@@ -288,6 +289,7 @@
"
\n",
" 0 | \n",
" 13070859 | \n",
+ " 5107462 | \n",
" 225251 | \n",
" False | \n",
" vente en ligne | \n",
@@ -299,6 +301,7 @@
"
\n",
" 1 | \n",
" 13070860 | \n",
+ " 5107462 | \n",
" 224914 | \n",
" False | \n",
" vente en ligne | \n",
@@ -310,6 +313,7 @@
"
\n",
" 2 | \n",
" 13070861 | \n",
+ " 5107462 | \n",
" 224914 | \n",
" False | \n",
" vente en ligne | \n",
@@ -321,6 +325,7 @@
"
\n",
" 3 | \n",
" 13070862 | \n",
+ " 5107462 | \n",
" 224914 | \n",
" False | \n",
" vente en ligne | \n",
@@ -332,6 +337,7 @@
"
\n",
" 4 | \n",
" 13070863 | \n",
+ " 5107462 | \n",
" 224914 | \n",
" False | \n",
" vente en ligne | \n",
@@ -345,12 +351,12 @@
""
],
"text/plain": [
- " ticket_id product_id is_from_subscription supplier_name \\\n",
- "0 13070859 225251 False vente en ligne \n",
- "1 13070860 224914 False vente en ligne \n",
- "2 13070861 224914 False vente en ligne \n",
- "3 13070862 224914 False vente en ligne \n",
- "4 13070863 224914 False vente en ligne \n",
+ " ticket_id purchase_id product_id is_from_subscription supplier_name \\\n",
+ "0 13070859 5107462 225251 False vente en ligne \n",
+ "1 13070860 5107462 224914 False vente en ligne \n",
+ "2 13070861 5107462 224914 False vente en ligne \n",
+ "3 13070862 5107462 224914 False vente en ligne \n",
+ "4 13070863 5107462 224914 False vente en ligne \n",
"\n",
" type_of_ticket_name children purchase_date customer_id \n",
"0 Atelier pricing_formula 2018-12-28 14:47:50+00:00 48187 \n",
@@ -360,7 +366,7 @@
"4 Atelier pricing_formula 2018-12-28 14:47:50+00:00 48187 "
]
},
- "execution_count": 90,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -379,7 +385,7 @@
},
{
"cell_type": "code",
- "execution_count": 91,
+ "execution_count": 12,
"id": "baed146a-9d3a-4397-a812-3d50c9a2f038",
"metadata": {},
"outputs": [],
@@ -408,7 +414,7 @@
},
{
"cell_type": "code",
- "execution_count": 92,
+ "execution_count": 13,
"id": "5fbfd88b-b94c-489c-9201-670e96e453e7",
"metadata": {},
"outputs": [],
@@ -426,7 +432,7 @@
},
{
"cell_type": "code",
- "execution_count": 93,
+ "execution_count": 14,
"id": "d883cc7b-ac43-4485-b86f-eaf595fbad85",
"metadata": {},
"outputs": [],
@@ -451,7 +457,7 @@
},
{
"cell_type": "code",
- "execution_count": 94,
+ "execution_count": 15,
"id": "c8552dd6-52c5-4431-b43d-3cd6c578fd9f",
"metadata": {},
"outputs": [],
@@ -461,7 +467,7 @@
},
{
"cell_type": "code",
- "execution_count": 95,
+ "execution_count": 16,
"id": "c24457e7-3cad-451a-a65b-7373b656bd6e",
"metadata": {
"scrolled": true
@@ -581,7 +587,7 @@
"4 404 2021-03-27 23:00:00+00:00 "
]
},
- "execution_count": 95,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -608,7 +614,7 @@
},
{
"cell_type": "code",
- "execution_count": 96,
+ "execution_count": 17,
"id": "30488a40-1b38-4b9a-9d3b-26a0597c5e6d",
"metadata": {},
"outputs": [],
@@ -619,7 +625,7 @@
},
{
"cell_type": "code",
- "execution_count": 97,
+ "execution_count": 18,
"id": "607eb4b4-eed9-4b50-b823-f75c116dd37c",
"metadata": {},
"outputs": [],
@@ -690,7 +696,7 @@
},
{
"cell_type": "code",
- "execution_count": 98,
+ "execution_count": 19,
"id": "350b09b9-451f-4d47-81fe-f34b892db027",
"metadata": {},
"outputs": [],
@@ -778,7 +784,7 @@
},
{
"cell_type": "code",
- "execution_count": 99,
+ "execution_count": 20,
"id": "0fccc8ef-e575-4857-a401-94a7274394df",
"metadata": {},
"outputs": [
@@ -931,7 +937,7 @@
"4 indiv entrées tp "
]
},
- "execution_count": 99,
+ "execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
@@ -943,7 +949,7 @@
},
{
"cell_type": "code",
- "execution_count": 100,
+ "execution_count": 21,
"id": "779d8aaf-6668-4f66-8852-847304407ea3",
"metadata": {},
"outputs": [
@@ -1113,7 +1119,7 @@
"4 spectacle vivant mucem "
]
},
- "execution_count": 100,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
@@ -1125,7 +1131,7 @@
},
{
"cell_type": "code",
- "execution_count": 101,
+ "execution_count": 22,
"id": "7714fa32-303b-4ea7-b174-3fd0fcab5af0",
"metadata": {},
"outputs": [
@@ -1224,7 +1230,7 @@
"4 37 383 269 1"
]
},
- "execution_count": 101,
+ "execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
@@ -1244,7 +1250,7 @@
},
{
"cell_type": "code",
- "execution_count": 102,
+ "execution_count": 23,
"id": "15a62ed6-35e4-4abc-aeef-a7daeec0a4ba",
"metadata": {},
"outputs": [],
@@ -1272,7 +1278,7 @@
},
{
"cell_type": "code",
- "execution_count": 103,
+ "execution_count": 24,
"id": "89dc9685-1de9-4ce3-a6c0-8d7f1931a951",
"metadata": {},
"outputs": [
@@ -1511,7 +1517,7 @@
"[5 rows x 21 columns]"
]
},
- "execution_count": 103,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -1523,7 +1529,7 @@
},
{
"cell_type": "code",
- "execution_count": 104,
+ "execution_count": 26,
"id": "98f78cd5-b694-4cc6-b033-20170aa13e8d",
"metadata": {},
"outputs": [],
@@ -1532,7 +1538,7 @@
"df1_products_purchased = pd.merge(df1_ticket_information, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')\n",
"\n",
"# Selection des variables d'intérêts\n",
- "df1_products_purchased_reduced = df1_products_purchased[['ticket_id', 'customer_id', 'event_type_id', 'supplier_name', 'purchase_date', 'type_of_ticket_name', 'amount', 'children', 'is_full_price', 'name_event_types', 'name_facilities', 'name_categories', 'name_events', 'name_seasons']]"
+ "df1_products_purchased_reduced = df1_products_purchased[['ticket_id', 'customer_id', 'purchase_id' ,'event_type_id', 'supplier_name', 'purchase_date', 'type_of_ticket_name', 'amount', 'children', 'is_full_price', 'name_event_types', 'name_facilities', 'name_categories', 'name_events', 'name_seasons']]"
]
},
{
@@ -1553,7 +1559,7 @@
},
{
"cell_type": "code",
- "execution_count": 105,
+ "execution_count": 27,
"id": "e2c88552-b863-47a2-be23-8d2898fb28bc",
"metadata": {},
"outputs": [],
@@ -1577,7 +1583,7 @@
" campaigns_reduced = pd.merge(campaigns_reduced, time_to_open, on = 'customer_id', how = 'left')\n",
"\n",
" # Remplir les NaN : nb_campaigns_opened\n",
- " campaigns_reduced['nb_campaigns_opened'].fillna(0, inplace=True)\n",
+ " campaigns_reduced['nb_campaigns_opened'].fillna(0)\n",
"\n",
" # Remplir les NaT : time_to_open (??)\n",
"\n",
@@ -1587,7 +1593,7 @@
},
{
"cell_type": "code",
- "execution_count": 106,
+ "execution_count": 28,
"id": "24537647-bc29-4777-9848-ac4120a4aa60",
"metadata": {},
"outputs": [],
@@ -1597,7 +1603,7 @@
},
{
"cell_type": "code",
- "execution_count": 107,
+ "execution_count": 29,
"id": "6be2a9a6-056b-4e19-8c26-a18ba3df36b3",
"metadata": {},
"outputs": [
@@ -1677,7 +1683,7 @@
"4 6 20 0.0 NaT"
]
},
- "execution_count": 107,
+ "execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
@@ -1696,7 +1702,7 @@
},
{
"cell_type": "code",
- "execution_count": 108,
+ "execution_count": 30,
"id": "b913a69e-3146-4919-b5f6-a6108532bffa",
"metadata": {},
"outputs": [
@@ -1707,7 +1713,7 @@
" 'offre muséale groupe'], dtype=object)"
]
},
- "execution_count": 108,
+ "execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
@@ -1718,7 +1724,7 @@
},
{
"cell_type": "code",
- "execution_count": 109,
+ "execution_count": 31,
"id": "2bda0b97-b28b-4070-a57d-aeab0e2f7dfe",
"metadata": {},
"outputs": [],
@@ -1729,7 +1735,7 @@
},
{
"cell_type": "code",
- "execution_count": 110,
+ "execution_count": 34,
"id": "043303fe-e90f-4689-a2a9-5d690555a045",
"metadata": {},
"outputs": [],
@@ -1746,9 +1752,10 @@
" prop_vente_internet = tickets_information_copy[tickets_information_copy['vente_internet'] == 1].groupby(['customer_id', 'event_type_id'])['ticket_id'].count().reset_index()\n",
" prop_vente_internet.rename(columns = {'ticket_id' : 'nb_tickets_internet'}, inplace = True)\n",
" \n",
- " tickets_kpi = (tickets_information_copy[['event_type_id', 'customer_id', 'ticket_id','supplier_name', 'purchase_date', 'amount', 'vente_internet']]\n",
+ " tickets_kpi = (tickets_information_copy[['event_type_id', 'customer_id', 'purchase_id' ,'ticket_id','supplier_name', 'purchase_date', 'amount', 'vente_internet']]\n",
" .groupby(['customer_id', 'event_type_id']) \n",
" .agg({'ticket_id': 'count', \n",
+ " 'purchase_id' : 'nunique',\n",
" 'amount' : 'sum',\n",
" 'supplier_name': 'nunique',\n",
" 'vente_internet' : 'max',\n",
@@ -1759,23 +1766,33 @@
" tickets_kpi.columns = tickets_kpi.columns.map('_'.join)\n",
" \n",
" tickets_kpi.rename(columns = {'ticket_id_count' : 'nb_tickets', \n",
+ " 'purchase_id_nunique' : 'nb_purchases',\n",
" 'amount_sum' : 'total_amount',\n",
" 'supplier_name_nunique' : 'nb_suppliers', \n",
" 'customer_id_' : 'customer_id',\n",
" 'event_type_id_' : 'event_type_id'}, inplace = True)\n",
" \n",
" tickets_kpi['time_between_purchase'] = tickets_kpi['purchase_date_max'] - tickets_kpi['purchase_date_min']\n",
+ " tickets_kpi['time_between_purchase'] = tickets_kpi['time_between_purchase'] / np.timedelta64(1, 'D') # En nombre de jours\n",
"\n",
+ " # Convertir date et en chiffre\n",
+ " max_date = tickets_kpi['purchase_date_max'].max()\n",
+ " tickets_kpi['purchase_date_max'] = (max_date - tickets_kpi['purchase_date_max']) / np.timedelta64(1, 'D')\n",
+ " tickets_kpi['purchase_date_min'] = (max_date - tickets_kpi['purchase_date_min']) / np.timedelta64(1, 'D')\n",
+ "\n",
+ " \n",
" tickets_kpi = tickets_kpi.merge(prop_vente_internet, on = ['customer_id', 'event_type_id'], how = 'left')\n",
" tickets_kpi['nb_tickets_internet'] = tickets_kpi['nb_tickets_internet'].fillna(0)\n",
- " \n",
+ "\n",
+ " \n",
+ " \n",
" return tickets_kpi\n",
" "
]
},
{
"cell_type": "code",
- "execution_count": 111,
+ "execution_count": 35,
"id": "5882234a-1ed5-4269-87a6-0d75613476e3",
"metadata": {},
"outputs": [],
@@ -1783,34 +1800,10 @@
"df1_tickets_kpi = tickets_kpi_function(tickets_information = df1_products_purchased_reduced)"
]
},
- {
- "cell_type": "markdown",
- "id": "597b241e-a83d-4b7c-8ad7-eec50295dff2",
- "metadata": {},
- "source": [
- "#### Exportation"
- ]
- },
{
"cell_type": "code",
- "execution_count": 112,
- "id": "a4a2311d-8a72-4030-afd5-218004d5d2a5",
- "metadata": {},
- "outputs": [],
- "source": [
- "# Exportation vers 'projet-bdc2324-team1'\n",
- "BUCKET_OUT = \"projet-bdc2324-team1\"\n",
- "FILE_KEY_OUT_S3 = \"0_Temp/Company 1 - Purchasing behaviour.csv\"\n",
- "FILE_PATH_OUT_S3 = BUCKET_OUT + \"/\" + FILE_KEY_OUT_S3\n",
- "\n",
- "with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
- " df1_tickets_kpi.to_csv(file_out, index = False)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 113,
- "id": "a7a452a6-cd5e-4c8b-b250-8a7d26e48fad",
+ "execution_count": 36,
+ "id": "5f2046cf-ffde-4521-91e7-b727b8bc17f5",
"metadata": {},
"outputs": [
{
@@ -1837,6 +1830,7 @@
" customer_id | \n",
" event_type_id | \n",
" nb_tickets | \n",
+ " nb_purchases | \n",
" total_amount | \n",
" nb_suppliers | \n",
" vente_internet_max | \n",
@@ -1848,104 +1842,125 @@
" \n",
"
\n",
" \n",
- " 1 | \n",
- " 1 | \n",
- " 4 | \n",
- " 453242 | \n",
- " 3248965.5 | \n",
- " 6 | \n",
- " 1 | \n",
- " 2013-09-23 14:45:01+00:00 | \n",
- " 2023-11-03 14:11:01+00:00 | \n",
- " 3692 days 23:26:00 | \n",
- " 2988.0 | \n",
- "
\n",
- " \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 384226 | \n",
+ " 194790 | \n",
" 2686540.5 | \n",
" 7 | \n",
" 1 | \n",
- " 2014-12-03 14:55:37+00:00 | \n",
- " 2023-11-04 15:12:16+00:00 | \n",
- " 3258 days 00:16:39 | \n",
+ " 3262.190868 | \n",
+ " 4.179306 | \n",
+ " 3258.011562 | \n",
" 51.0 | \n",
"
\n",
" \n",
- " 3 | \n",
+ " 1 | \n",
" 1 | \n",
+ " 4 | \n",
+ " 453242 | \n",
+ " 228945 | \n",
+ " 3248965.5 | \n",
" 6 | \n",
- " 217356 | \n",
- " 1435871.5 | \n",
- " 5 | \n",
" 1 | \n",
- " 2017-01-01 02:20:08+00:00 | \n",
- " 2019-12-31 02:20:06+00:00 | \n",
- " 1093 days 23:59:58 | \n",
- " 5.0 | \n",
+ " 3698.198229 | \n",
+ " 5.221840 | \n",
+ " 3692.976389 | \n",
+ " 2988.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 1 | \n",
" 5 | \n",
" 201750 | \n",
+ " 107110 | \n",
" 1459190.0 | \n",
" 6 | \n",
" 1 | \n",
- " 2013-06-10 10:37:58+00:00 | \n",
- " 2023-11-08 15:59:45+00:00 | \n",
- " 3803 days 05:21:47 | \n",
+ " 3803.369792 | \n",
+ " 0.146331 | \n",
+ " 3803.223461 | \n",
" 9.0 | \n",
"
\n",
" \n",
- " 5032 | \n",
- " 6733 | \n",
- " 6 | \n",
- " 14208 | \n",
- " 0.0 | \n",
- " 3 | \n",
+ " 3 | \n",
" 1 | \n",
- " 2017-01-11 15:00:54+00:00 | \n",
- " 2019-11-27 09:47:06+00:00 | \n",
- " 1049 days 18:46:12 | \n",
- " 13497.0 | \n",
+ " 6 | \n",
+ " 217356 | \n",
+ " 111786 | \n",
+ " 1435871.5 | \n",
+ " 5 | \n",
+ " 1 | \n",
+ " 2502.715509 | \n",
+ " 1408.715532 | \n",
+ " 1093.999977 | \n",
+ " 5.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 143 | \n",
+ " 143 | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 2041.274549 | \n",
+ " 1340.308160 | \n",
+ " 700.966389 | \n",
+ " 0.0 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " customer_id event_type_id nb_tickets total_amount nb_suppliers \\\n",
- "1 1 4 453242 3248965.5 6 \n",
- "0 1 2 384226 2686540.5 7 \n",
- "3 1 6 217356 1435871.5 5 \n",
- "2 1 5 201750 1459190.0 6 \n",
- "5032 6733 6 14208 0.0 3 \n",
+ " customer_id event_type_id nb_tickets nb_purchases total_amount \\\n",
+ "0 1 2 384226 194790 2686540.5 \n",
+ "1 1 4 453242 228945 3248965.5 \n",
+ "2 1 5 201750 107110 1459190.0 \n",
+ "3 1 6 217356 111786 1435871.5 \n",
+ "4 2 2 143 143 0.0 \n",
"\n",
- " vente_internet_max purchase_date_min purchase_date_max \\\n",
- "1 1 2013-09-23 14:45:01+00:00 2023-11-03 14:11:01+00:00 \n",
- "0 1 2014-12-03 14:55:37+00:00 2023-11-04 15:12:16+00:00 \n",
- "3 1 2017-01-01 02:20:08+00:00 2019-12-31 02:20:06+00:00 \n",
- "2 1 2013-06-10 10:37:58+00:00 2023-11-08 15:59:45+00:00 \n",
- "5032 1 2017-01-11 15:00:54+00:00 2019-11-27 09:47:06+00:00 \n",
+ " nb_suppliers vente_internet_max purchase_date_min purchase_date_max \\\n",
+ "0 7 1 3262.190868 4.179306 \n",
+ "1 6 1 3698.198229 5.221840 \n",
+ "2 6 1 3803.369792 0.146331 \n",
+ "3 5 1 2502.715509 1408.715532 \n",
+ "4 1 0 2041.274549 1340.308160 \n",
"\n",
- " time_between_purchase nb_tickets_internet \n",
- "1 3692 days 23:26:00 2988.0 \n",
- "0 3258 days 00:16:39 51.0 \n",
- "3 1093 days 23:59:58 5.0 \n",
- "2 3803 days 05:21:47 9.0 \n",
- "5032 1049 days 18:46:12 13497.0 "
+ " time_between_purchase nb_tickets_internet \n",
+ "0 3258.011562 51.0 \n",
+ "1 3692.976389 2988.0 \n",
+ "2 3803.223461 9.0 \n",
+ "3 1093.999977 5.0 \n",
+ "4 700.966389 0.0 "
]
},
- "execution_count": 113,
+ "execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "df1_tickets_kpi.sort_values(by='nb_tickets', ascending=False).head(5)"
+ "df1_tickets_kpi.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "a4a2311d-8a72-4030-afd5-218004d5d2a5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Exportation vers 'projet-bdc2324-team1'\n",
+ "BUCKET_OUT = \"projet-bdc2324-team1\"\n",
+ "FILE_KEY_OUT_S3 = \"0_Temp/Company 1 - Purchasing behaviour.csv\"\n",
+ "FILE_PATH_OUT_S3 = BUCKET_OUT + \"/\" + FILE_KEY_OUT_S3\n",
+ "\n",
+ "with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
+ " df1_tickets_kpi.to_csv(file_out, index = False)"
]
},
{
@@ -1958,7 +1973,7 @@
},
{
"cell_type": "code",
- "execution_count": 114,
+ "execution_count": 39,
"id": "273857e0-7112-4294-8ba6-3c39c5cbc13a",
"metadata": {},
"outputs": [
@@ -1986,6 +2001,7 @@
" customer_id | \n",
" event_type_id | \n",
" nb_tickets | \n",
+ " nb_purchases | \n",
" total_amount | \n",
" nb_suppliers | \n",
" vente_internet_max | \n",
@@ -2001,12 +2017,13 @@
" 1 | \n",
" 2 | \n",
" 384226 | \n",
+ " 194790 | \n",
" 2686540.5 | \n",
" 7 | \n",
" 1 | \n",
- " 2014-12-03 14:55:37+00:00 | \n",
- " 2023-11-04 15:12:16+00:00 | \n",
- " 3258 days 00:16:39 | \n",
+ " 3262.190868 | \n",
+ " 4.179306 | \n",
+ " 3258.011562 | \n",
" 51.0 | \n",
" \n",
" \n",
@@ -2014,12 +2031,13 @@
" 1 | \n",
" 4 | \n",
" 453242 | \n",
+ " 228945 | \n",
" 3248965.5 | \n",
" 6 | \n",
" 1 | \n",
- " 2013-09-23 14:45:01+00:00 | \n",
- " 2023-11-03 14:11:01+00:00 | \n",
- " 3692 days 23:26:00 | \n",
+ " 3698.198229 | \n",
+ " 5.221840 | \n",
+ " 3692.976389 | \n",
" 2988.0 | \n",
"
\n",
" \n",
@@ -2027,12 +2045,13 @@
" 1 | \n",
" 5 | \n",
" 201750 | \n",
+ " 107110 | \n",
" 1459190.0 | \n",
" 6 | \n",
" 1 | \n",
- " 2013-06-10 10:37:58+00:00 | \n",
- " 2023-11-08 15:59:45+00:00 | \n",
- " 3803 days 05:21:47 | \n",
+ " 3803.369792 | \n",
+ " 0.146331 | \n",
+ " 3803.223461 | \n",
" 9.0 | \n",
"
\n",
" \n",
@@ -2040,12 +2059,13 @@
" 1 | \n",
" 6 | \n",
" 217356 | \n",
+ " 111786 | \n",
" 1435871.5 | \n",
" 5 | \n",
" 1 | \n",
- " 2017-01-01 02:20:08+00:00 | \n",
- " 2019-12-31 02:20:06+00:00 | \n",
- " 1093 days 23:59:58 | \n",
+ " 2502.715509 | \n",
+ " 1408.715532 | \n",
+ " 1093.999977 | \n",
" 5.0 | \n",
"
\n",
" \n",
@@ -2053,12 +2073,13 @@
" 2 | \n",
" 2 | \n",
" 143 | \n",
+ " 143 | \n",
" 0.0 | \n",
" 1 | \n",
" 0 | \n",
- " 2018-04-07 12:55:07+00:00 | \n",
- " 2020-03-08 12:06:43+00:00 | \n",
- " 700 days 23:11:36 | \n",
+ " 2041.274549 | \n",
+ " 1340.308160 | \n",
+ " 700.966389 | \n",
" 0.0 | \n",
"
\n",
" \n",
@@ -2066,29 +2087,29 @@
""
],
"text/plain": [
- " customer_id event_type_id nb_tickets total_amount nb_suppliers \\\n",
- "0 1 2 384226 2686540.5 7 \n",
- "1 1 4 453242 3248965.5 6 \n",
- "2 1 5 201750 1459190.0 6 \n",
- "3 1 6 217356 1435871.5 5 \n",
- "4 2 2 143 0.0 1 \n",
+ " customer_id event_type_id nb_tickets nb_purchases total_amount \\\n",
+ "0 1 2 384226 194790 2686540.5 \n",
+ "1 1 4 453242 228945 3248965.5 \n",
+ "2 1 5 201750 107110 1459190.0 \n",
+ "3 1 6 217356 111786 1435871.5 \n",
+ "4 2 2 143 143 0.0 \n",
"\n",
- " vente_internet_max purchase_date_min purchase_date_max \\\n",
- "0 1 2014-12-03 14:55:37+00:00 2023-11-04 15:12:16+00:00 \n",
- "1 1 2013-09-23 14:45:01+00:00 2023-11-03 14:11:01+00:00 \n",
- "2 1 2013-06-10 10:37:58+00:00 2023-11-08 15:59:45+00:00 \n",
- "3 1 2017-01-01 02:20:08+00:00 2019-12-31 02:20:06+00:00 \n",
- "4 0 2018-04-07 12:55:07+00:00 2020-03-08 12:06:43+00:00 \n",
+ " nb_suppliers vente_internet_max purchase_date_min purchase_date_max \\\n",
+ "0 7 1 3262.190868 4.179306 \n",
+ "1 6 1 3698.198229 5.221840 \n",
+ "2 6 1 3803.369792 0.146331 \n",
+ "3 5 1 2502.715509 1408.715532 \n",
+ "4 1 0 2041.274549 1340.308160 \n",
"\n",
- " time_between_purchase nb_tickets_internet \n",
- "0 3258 days 00:16:39 51.0 \n",
- "1 3692 days 23:26:00 2988.0 \n",
- "2 3803 days 05:21:47 9.0 \n",
- "3 1093 days 23:59:58 5.0 \n",
- "4 700 days 23:11:36 0.0 "
+ " time_between_purchase nb_tickets_internet \n",
+ "0 3258.011562 51.0 \n",
+ "1 3692.976389 2988.0 \n",
+ "2 3803.223461 9.0 \n",
+ "3 1093.999977 5.0 \n",
+ "4 700.966389 0.0 "
]
},
- "execution_count": 114,
+ "execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
@@ -2099,7 +2120,7 @@
},
{
"cell_type": "code",
- "execution_count": 115,
+ "execution_count": 40,
"id": "449731f3-340f-4648-8210-4622c7dbc174",
"metadata": {},
"outputs": [
@@ -2166,7 +2187,7 @@
"3 6 formule adhésion 6.439463"
]
},
- "execution_count": 115,
+ "execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
@@ -2181,7 +2202,7 @@
},
{
"cell_type": "code",
- "execution_count": 116,
+ "execution_count": 41,
"id": "b54bd9e8-3cad-453b-8e58-bf6d047912eb",
"metadata": {},
"outputs": [
@@ -2209,6 +2230,7 @@
" customer_id | \n",
" event_type_id | \n",
" nb_tickets | \n",
+ " nb_purchases | \n",
" total_amount | \n",
" nb_suppliers | \n",
" vente_internet_max | \n",
@@ -2226,12 +2248,13 @@
" 1 | \n",
" 2 | \n",
" 384226 | \n",
+ " 194790 | \n",
" 2686540.5 | \n",
" 7 | \n",
" 1 | \n",
- " 2014-12-03 14:55:37+00:00 | \n",
- " 2023-11-04 15:12:16+00:00 | \n",
- " 3258 days 00:16:39 | \n",
+ " 3262.190868 | \n",
+ " 4.179306 | \n",
+ " 3258.011562 | \n",
" 51.0 | \n",
" offre muséale individuel | \n",
" 6.150659 | \n",
@@ -2241,12 +2264,13 @@
" 1 | \n",
" 4 | \n",
" 453242 | \n",
+ " 228945 | \n",
" 3248965.5 | \n",
" 6 | \n",
" 1 | \n",
- " 2013-09-23 14:45:01+00:00 | \n",
- " 2023-11-03 14:11:01+00:00 | \n",
- " 3692 days 23:26:00 | \n",
+ " 3698.198229 | \n",
+ " 5.221840 | \n",
+ " 3692.976389 | \n",
" 2988.0 | \n",
" spectacle vivant | \n",
" 7.762474 | \n",
@@ -2256,12 +2280,13 @@
" 1 | \n",
" 5 | \n",
" 201750 | \n",
+ " 107110 | \n",
" 1459190.0 | \n",
" 6 | \n",
" 1 | \n",
- " 2013-06-10 10:37:58+00:00 | \n",
- " 2023-11-08 15:59:45+00:00 | \n",
- " 3803 days 05:21:47 | \n",
+ " 3803.369792 | \n",
+ " 0.146331 | \n",
+ " 3803.223461 | \n",
" 9.0 | \n",
" offre muséale groupe | \n",
" 4.452618 | \n",
@@ -2271,12 +2296,13 @@
" 1 | \n",
" 6 | \n",
" 217356 | \n",
+ " 111786 | \n",
" 1435871.5 | \n",
" 5 | \n",
" 1 | \n",
- " 2017-01-01 02:20:08+00:00 | \n",
- " 2019-12-31 02:20:06+00:00 | \n",
- " 1093 days 23:59:58 | \n",
+ " 2502.715509 | \n",
+ " 1408.715532 | \n",
+ " 1093.999977 | \n",
" 5.0 | \n",
" formule adhésion | \n",
" 6.439463 | \n",
@@ -2286,12 +2312,13 @@
" 2 | \n",
" 2 | \n",
" 143 | \n",
+ " 143 | \n",
" 0.0 | \n",
" 1 | \n",
" 0 | \n",
- " 2018-04-07 12:55:07+00:00 | \n",
- " 2020-03-08 12:06:43+00:00 | \n",
- " 700 days 23:11:36 | \n",
+ " 2041.274549 | \n",
+ " 1340.308160 | \n",
+ " 700.966389 | \n",
" 0.0 | \n",
" offre muséale individuel | \n",
" 6.150659 | \n",
@@ -2301,26 +2328,26 @@
""
],
"text/plain": [
- " customer_id event_type_id nb_tickets total_amount nb_suppliers \\\n",
- "0 1 2 384226 2686540.5 7 \n",
- "1 1 4 453242 3248965.5 6 \n",
- "2 1 5 201750 1459190.0 6 \n",
- "3 1 6 217356 1435871.5 5 \n",
- "4 2 2 143 0.0 1 \n",
+ " customer_id event_type_id nb_tickets nb_purchases total_amount \\\n",
+ "0 1 2 384226 194790 2686540.5 \n",
+ "1 1 4 453242 228945 3248965.5 \n",
+ "2 1 5 201750 107110 1459190.0 \n",
+ "3 1 6 217356 111786 1435871.5 \n",
+ "4 2 2 143 143 0.0 \n",
"\n",
- " vente_internet_max purchase_date_min purchase_date_max \\\n",
- "0 1 2014-12-03 14:55:37+00:00 2023-11-04 15:12:16+00:00 \n",
- "1 1 2013-09-23 14:45:01+00:00 2023-11-03 14:11:01+00:00 \n",
- "2 1 2013-06-10 10:37:58+00:00 2023-11-08 15:59:45+00:00 \n",
- "3 1 2017-01-01 02:20:08+00:00 2019-12-31 02:20:06+00:00 \n",
- "4 0 2018-04-07 12:55:07+00:00 2020-03-08 12:06:43+00:00 \n",
+ " nb_suppliers vente_internet_max purchase_date_min purchase_date_max \\\n",
+ "0 7 1 3262.190868 4.179306 \n",
+ "1 6 1 3698.198229 5.221840 \n",
+ "2 6 1 3803.369792 0.146331 \n",
+ "3 5 1 2502.715509 1408.715532 \n",
+ "4 1 0 2041.274549 1340.308160 \n",
"\n",
- " time_between_purchase nb_tickets_internet name_event_types \\\n",
- "0 3258 days 00:16:39 51.0 offre muséale individuel \n",
- "1 3692 days 23:26:00 2988.0 spectacle vivant \n",
- "2 3803 days 05:21:47 9.0 offre muséale groupe \n",
- "3 1093 days 23:59:58 5.0 formule adhésion \n",
- "4 700 days 23:11:36 0.0 offre muséale individuel \n",
+ " time_between_purchase nb_tickets_internet name_event_types \\\n",
+ "0 3258.011562 51.0 offre muséale individuel \n",
+ "1 3692.976389 2988.0 spectacle vivant \n",
+ "2 3803.223461 9.0 offre muséale groupe \n",
+ "3 1093.999977 5.0 formule adhésion \n",
+ "4 700.966389 0.0 offre muséale individuel \n",
"\n",
" avg_amount \n",
"0 6.150659 \n",
@@ -2330,7 +2357,7 @@
"4 6.150659 "
]
},
- "execution_count": 116,
+ "execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
@@ -2342,17 +2369,405 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 42,
"id": "2d6afe74-2517-478b-a99c-da9c7bd2edd4",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " birthdate | \n",
+ " street_id | \n",
+ " is_partner | \n",
+ " gender | \n",
+ " is_email_true | \n",
+ " opt_in | \n",
+ " structure_id | \n",
+ " profession | \n",
+ " language | \n",
+ " ... | \n",
+ " fidelity | \n",
+ " average_purchase_delay | \n",
+ " average_price_basket | \n",
+ " average_ticket_basket | \n",
+ " total_price | \n",
+ " purchase_count | \n",
+ " first_buying_date | \n",
+ " country | \n",
+ " age | \n",
+ " tenant_id | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 12751 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 1 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 12825 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 11261 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 1 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 13071 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " True | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 653061 | \n",
+ " NaN | \n",
+ " 10 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 151861 | \n",
+ " 295252 | \n",
+ " NaN | \n",
+ " 10 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ "
\n",
+ " \n",
+ " 151862 | \n",
+ " 295271 | \n",
+ " NaN | \n",
+ " 10 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ "
\n",
+ " \n",
+ " 151863 | \n",
+ " 295275 | \n",
+ " NaN | \n",
+ " 10 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ "
\n",
+ " \n",
+ " 151864 | \n",
+ " 295366 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 1 | \n",
+ " 3.0 | \n",
+ " 33.0 | \n",
+ " 3.0 | \n",
+ " 33.0 | \n",
+ " 1 | \n",
+ " 2021-05-26 17:20:37+00:00 | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ "
\n",
+ " \n",
+ " 151865 | \n",
+ " 295368 | \n",
+ " NaN | \n",
+ " 2 | \n",
+ " False | \n",
+ " 2 | \n",
+ " True | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 1 | \n",
+ " 6.0 | \n",
+ " 22.0 | \n",
+ " 2.0 | \n",
+ " 22.0 | \n",
+ " 1 | \n",
+ " 2021-05-26 17:35:38+00:00 | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 1311 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
151866 rows × 25 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id birthdate street_id is_partner gender is_email_true \\\n",
+ "0 12751 NaN 2 False 1 True \n",
+ "1 12825 NaN 2 False 2 True \n",
+ "2 11261 NaN 2 False 1 True \n",
+ "3 13071 NaN 2 False 2 True \n",
+ "4 653061 NaN 10 False 2 True \n",
+ "... ... ... ... ... ... ... \n",
+ "151861 295252 NaN 10 False 2 True \n",
+ "151862 295271 NaN 10 False 2 True \n",
+ "151863 295275 NaN 10 False 2 True \n",
+ "151864 295366 NaN 2 False 2 True \n",
+ "151865 295368 NaN 2 False 2 True \n",
+ "\n",
+ " opt_in structure_id profession language ... fidelity \\\n",
+ "0 True NaN NaN NaN ... 0 \n",
+ "1 True NaN NaN NaN ... 0 \n",
+ "2 True NaN NaN NaN ... 0 \n",
+ "3 True NaN NaN NaN ... 0 \n",
+ "4 False NaN NaN NaN ... 0 \n",
+ "... ... ... ... ... ... ... \n",
+ "151861 False NaN NaN NaN ... 0 \n",
+ "151862 False NaN NaN NaN ... 0 \n",
+ "151863 False NaN NaN NaN ... 0 \n",
+ "151864 False NaN NaN NaN ... 1 \n",
+ "151865 False NaN NaN NaN ... 1 \n",
+ "\n",
+ " average_purchase_delay average_price_basket average_ticket_basket \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "... ... ... ... \n",
+ "151861 NaN NaN NaN \n",
+ "151862 NaN NaN NaN \n",
+ "151863 NaN NaN NaN \n",
+ "151864 3.0 33.0 3.0 \n",
+ "151865 6.0 22.0 2.0 \n",
+ "\n",
+ " total_price purchase_count first_buying_date country age \\\n",
+ "0 NaN 0 NaT fr NaN \n",
+ "1 NaN 0 NaT fr NaN \n",
+ "2 NaN 0 NaT fr NaN \n",
+ "3 NaN 0 NaT fr NaN \n",
+ "4 NaN 0 NaT NaN NaN \n",
+ "... ... ... ... ... ... \n",
+ "151861 NaN 0 NaT NaN NaN \n",
+ "151862 NaN 0 NaT NaN NaN \n",
+ "151863 NaN 0 NaT NaN NaN \n",
+ "151864 33.0 1 2021-05-26 17:20:37+00:00 fr NaN \n",
+ "151865 22.0 1 2021-05-26 17:35:38+00:00 fr NaN \n",
+ "\n",
+ " tenant_id \n",
+ "0 1311 \n",
+ "1 1311 \n",
+ "2 1311 \n",
+ "3 1311 \n",
+ "4 1311 \n",
+ "... ... \n",
+ "151861 1311 \n",
+ "151862 1311 \n",
+ "151863 1311 \n",
+ "151864 1311 \n",
+ "151865 1311 \n",
+ "\n",
+ "[151866 rows x 25 columns]"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"df1_customerplus_clean"
]
},
{
"cell_type": "code",
- "execution_count": 120,
+ "execution_count": 43,
"id": "83230baa-9a8a-4614-b629-e99c2505c696",
"metadata": {},
"outputs": [
@@ -2388,7 +2803,7 @@
" profession | \n",
" language | \n",
" ... | \n",
- " nb_tickets | \n",
+ " nb_purchases | \n",
" total_amount | \n",
" nb_suppliers | \n",
" vente_internet_max | \n",
@@ -2414,13 +2829,13 @@
" NaN | \n",
" NaN | \n",
" ... | \n",
- " 384226.0 | \n",
+ " 194790.0 | \n",
" 2686540.5 | \n",
" 7.0 | \n",
" 1.0 | \n",
- " 2014-12-03 14:55:37+00:00 | \n",
- " 2023-11-04 15:12:16+00:00 | \n",
- " 3258 days 00:16:39 | \n",
+ " 3262.190868 | \n",
+ " 4.179306 | \n",
+ " 3258.011562 | \n",
" 51.0 | \n",
" offre muséale individuel | \n",
" 6.150659 | \n",
@@ -2438,13 +2853,13 @@
" NaN | \n",
" NaN | \n",
" ... | \n",
- " 217356.0 | \n",
+ " 111786.0 | \n",
" 1435871.5 | \n",
" 5.0 | \n",
" 1.0 | \n",
- " 2017-01-01 02:20:08+00:00 | \n",
- " 2019-12-31 02:20:06+00:00 | \n",
- " 1093 days 23:59:58 | \n",
+ " 2502.715509 | \n",
+ " 1408.715532 | \n",
+ " 1093.999977 | \n",
" 5.0 | \n",
" formule adhésion | \n",
" 6.439463 | \n",
@@ -2462,13 +2877,13 @@
" NaN | \n",
" NaN | \n",
" ... | \n",
- " 453242.0 | \n",
+ " 228945.0 | \n",
" 3248965.5 | \n",
" 6.0 | \n",
" 1.0 | \n",
- " 2013-09-23 14:45:01+00:00 | \n",
- " 2023-11-03 14:11:01+00:00 | \n",
- " 3692 days 23:26:00 | \n",
+ " 3698.198229 | \n",
+ " 5.221840 | \n",
+ " 3692.976389 | \n",
" 2988.0 | \n",
" spectacle vivant | \n",
" 7.762474 | \n",
@@ -2486,13 +2901,13 @@
" NaN | \n",
" NaN | \n",
" ... | \n",
- " 201750.0 | \n",
+ " 107110.0 | \n",
" 1459190.0 | \n",
" 6.0 | \n",
" 1.0 | \n",
- " 2013-06-10 10:37:58+00:00 | \n",
- " 2023-11-08 15:59:45+00:00 | \n",
- " 3803 days 05:21:47 | \n",
+ " 3803.369792 | \n",
+ " 0.146331 | \n",
+ " 3803.223461 | \n",
" 9.0 | \n",
" offre muséale groupe | \n",
" 4.452618 | \n",
@@ -2514,16 +2929,16 @@
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
- " 2019-03-09 13:14:21+00:00 | \n",
- " 2019-11-13 11:29:55+00:00 | \n",
- " 248 days 22:15:34 | \n",
+ " 1705.261192 | \n",
+ " 1456.333715 | \n",
+ " 248.927477 | \n",
" 0.0 | \n",
" formule adhésion | \n",
" 6.439463 | \n",
" \n",
" \n",
"\n",
- "5 rows × 36 columns
\n",
+ "5 rows × 37 columns
\n",
""
],
"text/plain": [
@@ -2534,45 +2949,38 @@
"59899 1 NaN 2 False 2 True \n",
"134695 2 NaN 2 False 1 True \n",
"\n",
- " opt_in structure_id profession language ... nb_tickets \\\n",
- "59897 False NaN NaN NaN ... 384226.0 \n",
- "59900 False NaN NaN NaN ... 217356.0 \n",
- "59898 False NaN NaN NaN ... 453242.0 \n",
- "59899 False NaN NaN NaN ... 201750.0 \n",
- "134695 True NaN NaN NaN ... 164.0 \n",
+ " opt_in structure_id profession language ... nb_purchases \\\n",
+ "59897 False NaN NaN NaN ... 194790.0 \n",
+ "59900 False NaN NaN NaN ... 111786.0 \n",
+ "59898 False NaN NaN NaN ... 228945.0 \n",
+ "59899 False NaN NaN NaN ... 107110.0 \n",
+ "134695 True NaN NaN NaN ... 164.0 \n",
"\n",
- " total_amount nb_suppliers vente_internet_max \\\n",
- "59897 2686540.5 7.0 1.0 \n",
- "59900 1435871.5 5.0 1.0 \n",
- "59898 3248965.5 6.0 1.0 \n",
- "59899 1459190.0 6.0 1.0 \n",
- "134695 0.0 1.0 0.0 \n",
+ " total_amount nb_suppliers vente_internet_max purchase_date_min \\\n",
+ "59897 2686540.5 7.0 1.0 3262.190868 \n",
+ "59900 1435871.5 5.0 1.0 2502.715509 \n",
+ "59898 3248965.5 6.0 1.0 3698.198229 \n",
+ "59899 1459190.0 6.0 1.0 3803.369792 \n",
+ "134695 0.0 1.0 0.0 1705.261192 \n",
"\n",
- " purchase_date_min purchase_date_max \\\n",
- "59897 2014-12-03 14:55:37+00:00 2023-11-04 15:12:16+00:00 \n",
- "59900 2017-01-01 02:20:08+00:00 2019-12-31 02:20:06+00:00 \n",
- "59898 2013-09-23 14:45:01+00:00 2023-11-03 14:11:01+00:00 \n",
- "59899 2013-06-10 10:37:58+00:00 2023-11-08 15:59:45+00:00 \n",
- "134695 2019-03-09 13:14:21+00:00 2019-11-13 11:29:55+00:00 \n",
+ " purchase_date_max time_between_purchase nb_tickets_internet \\\n",
+ "59897 4.179306 3258.011562 51.0 \n",
+ "59900 1408.715532 1093.999977 5.0 \n",
+ "59898 5.221840 3692.976389 2988.0 \n",
+ "59899 0.146331 3803.223461 9.0 \n",
+ "134695 1456.333715 248.927477 0.0 \n",
"\n",
- " time_between_purchase nb_tickets_internet name_event_types \\\n",
- "59897 3258 days 00:16:39 51.0 offre muséale individuel \n",
- "59900 1093 days 23:59:58 5.0 formule adhésion \n",
- "59898 3692 days 23:26:00 2988.0 spectacle vivant \n",
- "59899 3803 days 05:21:47 9.0 offre muséale groupe \n",
- "134695 248 days 22:15:34 0.0 formule adhésion \n",
+ " name_event_types avg_amount \n",
+ "59897 offre muséale individuel 6.150659 \n",
+ "59900 formule adhésion 6.439463 \n",
+ "59898 spectacle vivant 7.762474 \n",
+ "59899 offre muséale groupe 4.452618 \n",
+ "134695 formule adhésion 6.439463 \n",
"\n",
- " avg_amount \n",
- "59897 6.150659 \n",
- "59900 6.439463 \n",
- "59898 7.762474 \n",
- "59899 4.452618 \n",
- "134695 6.439463 \n",
- "\n",
- "[5 rows x 36 columns]"
+ "[5 rows x 37 columns]"
]
},
- "execution_count": 120,
+ "execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
@@ -2586,7 +2994,7 @@
},
{
"cell_type": "code",
- "execution_count": 123,
+ "execution_count": 44,
"id": "433921de-03ad-4024-9462-ecd267db1756",
"metadata": {},
"outputs": [
@@ -2649,9 +3057,9 @@
" NaN | \n",
" ... | \n",
" 1.0 | \n",
- " 2014-12-03 14:55:37+00:00 | \n",
- " 2023-11-04 15:12:16+00:00 | \n",
- " 3258 days 00:16:39 | \n",
+ " 3262.190868 | \n",
+ " 4.179306 | \n",
+ " 3258.011562 | \n",
" 51.0 | \n",
" offre muséale individuel | \n",
" 6.150659 | \n",
@@ -2673,9 +3081,9 @@
" NaN | \n",
" ... | \n",
" 1.0 | \n",
- " 2017-01-01 02:20:08+00:00 | \n",
- " 2019-12-31 02:20:06+00:00 | \n",
- " 1093 days 23:59:58 | \n",
+ " 2502.715509 | \n",
+ " 1408.715532 | \n",
+ " 1093.999977 | \n",
" 5.0 | \n",
" formule adhésion | \n",
" 6.439463 | \n",
@@ -2697,9 +3105,9 @@
" NaN | \n",
" ... | \n",
" 1.0 | \n",
- " 2013-09-23 14:45:01+00:00 | \n",
- " 2023-11-03 14:11:01+00:00 | \n",
- " 3692 days 23:26:00 | \n",
+ " 3698.198229 | \n",
+ " 5.221840 | \n",
+ " 3692.976389 | \n",
" 2988.0 | \n",
" spectacle vivant | \n",
" 7.762474 | \n",
@@ -2721,9 +3129,9 @@
" NaN | \n",
" ... | \n",
" 1.0 | \n",
- " 2013-06-10 10:37:58+00:00 | \n",
- " 2023-11-08 15:59:45+00:00 | \n",
- " 3803 days 05:21:47 | \n",
+ " 3803.369792 | \n",
+ " 0.146331 | \n",
+ " 3803.223461 | \n",
" 9.0 | \n",
" offre muséale groupe | \n",
" 4.452618 | \n",
@@ -2745,9 +3153,9 @@
" NaN | \n",
" ... | \n",
" 0.0 | \n",
- " 2019-03-09 13:14:21+00:00 | \n",
- " 2019-11-13 11:29:55+00:00 | \n",
- " 248 days 22:15:34 | \n",
+ " 1705.261192 | \n",
+ " 1456.333715 | \n",
+ " 248.927477 | \n",
" 0.0 | \n",
" formule adhésion | \n",
" 6.439463 | \n",
@@ -2757,7 +3165,7 @@
" \n",
" \n",
"\n",
- "5 rows × 39 columns
\n",
+ "5 rows × 40 columns
\n",
""
],
"text/plain": [
@@ -2775,12 +3183,12 @@
"3 False NaN NaN NaN ... 1.0 \n",
"4 True NaN NaN NaN ... 0.0 \n",
"\n",
- " purchase_date_min purchase_date_max time_between_purchase \\\n",
- "0 2014-12-03 14:55:37+00:00 2023-11-04 15:12:16+00:00 3258 days 00:16:39 \n",
- "1 2017-01-01 02:20:08+00:00 2019-12-31 02:20:06+00:00 1093 days 23:59:58 \n",
- "2 2013-09-23 14:45:01+00:00 2023-11-03 14:11:01+00:00 3692 days 23:26:00 \n",
- "3 2013-06-10 10:37:58+00:00 2023-11-08 15:59:45+00:00 3803 days 05:21:47 \n",
- "4 2019-03-09 13:14:21+00:00 2019-11-13 11:29:55+00:00 248 days 22:15:34 \n",
+ " purchase_date_min purchase_date_max time_between_purchase \\\n",
+ "0 3262.190868 4.179306 3258.011562 \n",
+ "1 2502.715509 1408.715532 1093.999977 \n",
+ "2 3698.198229 5.221840 3692.976389 \n",
+ "3 3803.369792 0.146331 3803.223461 \n",
+ "4 1705.261192 1456.333715 248.927477 \n",
"\n",
" nb_tickets_internet name_event_types avg_amount nb_campaigns \\\n",
"0 51.0 offre muséale individuel 6.150659 NaN \n",
@@ -2796,10 +3204,10 @@
"3 NaN NaT \n",
"4 0.0 NaT \n",
"\n",
- "[5 rows x 39 columns]"
+ "[5 rows x 40 columns]"
]
},
- "execution_count": 123,
+ "execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
@@ -2813,7 +3221,7 @@
},
{
"cell_type": "code",
- "execution_count": 124,
+ "execution_count": 45,
"id": "25e54131-6835-4e94-86d3-1a78520ed7bc",
"metadata": {},
"outputs": [],
@@ -2839,9 +3247,65 @@
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 46,
"id": "8710611c-7eb8-45ca-bdcc-009f4081f9e2",
"metadata": {},
+ "outputs": [],
+ "source": [
+ "# Fusion avec KPI campaigns liés au customer\n",
+ "#df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')\n",
+ "#df1_customer.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a89fad43-ee68-4081-9384-3e9f08ec6a59",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df1_customer_product = pd.merge(df1_customer, nb_tickets, on = 'customer_id', how = 'left')\n",
+ "print(\"shape : \", df1_customer_product.shape)\n",
+ "df1_customer_product.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a19fec00-4ece-400c-937c-ce5cd8daccfd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df1_customer_product.to_csv(\"customer_product.csv\", index = False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7c3211a5-a851-43bc-a1f0-b39d51857fb7",
+ "metadata": {},
+ "source": [
+ "# Fusion des bases locales"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "id": "46de1912-4a66-46e5-8b9e-7768b2d2723b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Fusion avec KPI liés au customer\n",
+ "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')\n",
+ "\n",
+ "# Fill NaN values\n",
+ "df1_customer[['nb_campaigns', 'nb_campaigns_opened']] = df1_customer[['nb_campaigns', 'nb_campaigns_opened']].fillna(0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "id": "d53825e4-6453-45bc-94f2-7b2504ec4afb",
+ "metadata": {},
"outputs": [
{
"data": {
@@ -2908,8 +3372,8 @@
" fr | \n",
" NaN | \n",
" 1311 | \n",
- " NaN | \n",
- " NaN | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
" NaT | \n",
" \n",
" \n",
@@ -2932,8 +3396,8 @@
" fr | \n",
" NaN | \n",
" 1311 | \n",
- " NaN | \n",
- " NaN | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
" NaT | \n",
"
\n",
" \n",
@@ -2956,8 +3420,8 @@
" fr | \n",
" NaN | \n",
" 1311 | \n",
- " NaN | \n",
- " NaN | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
" NaT | \n",
"
\n",
" \n",
@@ -2980,8 +3444,8 @@
" fr | \n",
" NaN | \n",
" 1311 | \n",
- " NaN | \n",
- " NaN | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
" NaT | \n",
"
\n",
" \n",
@@ -3036,300 +3500,86 @@
"4 NaN 0 NaT NaN NaN 1311 \n",
"\n",
" nb_campaigns nb_campaigns_opened time_to_open \n",
- "0 NaN NaN NaT \n",
- "1 NaN NaN NaT \n",
- "2 NaN NaN NaT \n",
- "3 NaN NaN NaT \n",
+ "0 0.0 0.0 NaT \n",
+ "1 0.0 0.0 NaT \n",
+ "2 0.0 0.0 NaT \n",
+ "3 0.0 0.0 NaT \n",
"4 80.0 2.0 0 days 19:53:02.500000 \n",
"\n",
"[5 rows x 28 columns]"
]
},
- "execution_count": 36,
+ "execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "# Fusion avec KPI campaigns liés au customer\n",
- "#df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')\n",
- "#df1_customer.head()"
+ "df1_customer.head()"
]
},
{
"cell_type": "code",
- "execution_count": 37,
- "id": "a89fad43-ee68-4081-9384-3e9f08ec6a59",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "shape : (156289, 31)\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " customer_id | \n",
- " birthdate | \n",
- " street_id | \n",
- " is_partner | \n",
- " gender | \n",
- " is_email_true | \n",
- " opt_in | \n",
- " structure_id | \n",
- " profession | \n",
- " language | \n",
- " ... | \n",
- " first_buying_date | \n",
- " country | \n",
- " age | \n",
- " tenant_id | \n",
- " nb_campaigns | \n",
- " nb_campaigns_opened | \n",
- " time_to_open | \n",
- " event_type_id | \n",
- " nb_tickets | \n",
- " avg_amount | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 12751 | \n",
- " NaN | \n",
- " 2 | \n",
- " False | \n",
- " 1 | \n",
- " True | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " ... | \n",
- " NaT | \n",
- " fr | \n",
- " NaN | \n",
- " 1311 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaT | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 12825 | \n",
- " NaN | \n",
- " 2 | \n",
- " False | \n",
- " 2 | \n",
- " True | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " ... | \n",
- " NaT | \n",
- " fr | \n",
- " NaN | \n",
- " 1311 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaT | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 11261 | \n",
- " NaN | \n",
- " 2 | \n",
- " False | \n",
- " 1 | \n",
- " True | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " ... | \n",
- " NaT | \n",
- " fr | \n",
- " NaN | \n",
- " 1311 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaT | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 13071 | \n",
- " NaN | \n",
- " 2 | \n",
- " False | \n",
- " 2 | \n",
- " True | \n",
- " True | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " ... | \n",
- " NaT | \n",
- " fr | \n",
- " NaN | \n",
- " 1311 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaT | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 653061 | \n",
- " NaN | \n",
- " 10 | \n",
- " False | \n",
- " 2 | \n",
- " True | \n",
- " False | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " ... | \n",
- " NaT | \n",
- " NaN | \n",
- " NaN | \n",
- " 1311 | \n",
- " 80.0 | \n",
- " 2.0 | \n",
- " 0 days 19:53:02.500000 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- "
\n",
- "
5 rows × 31 columns
\n",
- "
"
- ],
- "text/plain": [
- " customer_id birthdate street_id is_partner gender is_email_true \\\n",
- "0 12751 NaN 2 False 1 True \n",
- "1 12825 NaN 2 False 2 True \n",
- "2 11261 NaN 2 False 1 True \n",
- "3 13071 NaN 2 False 2 True \n",
- "4 653061 NaN 10 False 2 True \n",
- "\n",
- " opt_in structure_id profession language ... first_buying_date country \\\n",
- "0 True NaN NaN NaN ... NaT fr \n",
- "1 True NaN NaN NaN ... NaT fr \n",
- "2 True NaN NaN NaN ... NaT fr \n",
- "3 True NaN NaN NaN ... NaT fr \n",
- "4 False NaN NaN NaN ... NaT NaN \n",
- "\n",
- " age tenant_id nb_campaigns nb_campaigns_opened time_to_open \\\n",
- "0 NaN 1311 NaN NaN NaT \n",
- "1 NaN 1311 NaN NaN NaT \n",
- "2 NaN 1311 NaN NaN NaT \n",
- "3 NaN 1311 NaN NaN NaT \n",
- "4 NaN 1311 80.0 2.0 0 days 19:53:02.500000 \n",
- "\n",
- " event_type_id nb_tickets avg_amount \n",
- "0 NaN NaN NaN \n",
- "1 NaN NaN NaN \n",
- "2 NaN NaN NaN \n",
- "3 NaN NaN NaN \n",
- "4 NaN NaN NaN \n",
- "\n",
- "[5 rows x 31 columns]"
- ]
- },
- "execution_count": 37,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df1_customer_product = pd.merge(df1_customer, nb_tickets, on = 'customer_id', how = 'left')\n",
- "print(\"shape : \", df1_customer_product.shape)\n",
- "df1_customer_product.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 38,
- "id": "a19fec00-4ece-400c-937c-ce5cd8daccfd",
- "metadata": {},
- "outputs": [],
- "source": [
- "df1_customer_product.to_csv(\"customer_product.csv\", index = False)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "7c3211a5-a851-43bc-a1f0-b39d51857fb7",
- "metadata": {},
- "source": [
- "# Fusion des bases locales"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 39,
- "id": "46de1912-4a66-46e5-8b9e-7768b2d2723b",
- "metadata": {},
- "outputs": [],
- "source": [
- "# Fusion avec KPI liés au customer\n",
- "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 40,
+ "execution_count": 67,
"id": "1e42a790-b215-4107-a969-85005da06ebd",
"metadata": {},
"outputs": [],
"source": [
"# Fusion avec KPI liés au comportement d'achat\n",
- "df1_customer_product = pd.merge(df1_tickets_kpi, df1_customer, on = 'customer_id', how = 'outer')"
+ "df1_customer_product = pd.merge(df1_tickets_kpi, df1_customer, on = 'customer_id', how = 'outer')\n",
+ "\n",
+ "# Fill NaN values\n",
+ "df1_customer_product[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet']] = df1_customer_product[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet']].fillna(0)"
]
},
{
"cell_type": "code",
- "execution_count": 41,
+ "execution_count": 66,
"id": "d950f24d-a5d1-4f1e-aeaa-ca826470365f",
"metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['customer_id', 'event_type_id', 'nb_tickets', 'nb_purchases',\n",
+ " 'total_amount', 'nb_suppliers', 'vente_internet_max',\n",
+ " 'purchase_date_min', 'purchase_date_max', 'time_between_purchase',\n",
+ " 'nb_tickets_internet', 'name_event_types', 'avg_amount', 'birthdate',\n",
+ " 'street_id', 'is_partner', 'gender', 'is_email_true', 'opt_in',\n",
+ " 'structure_id', 'profession', 'language', 'mcp_contact_id',\n",
+ " 'last_buying_date', 'max_price', 'ticket_sum', 'average_price',\n",
+ " 'fidelity', 'average_purchase_delay', 'average_price_basket',\n",
+ " 'average_ticket_basket', 'total_price', 'purchase_count',\n",
+ " 'first_buying_date', 'country', 'age', 'tenant_id', 'nb_campaigns',\n",
+ " 'nb_campaigns_opened', 'time_to_open'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 66,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_customer_product"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "id": "ebf6d843-dcc0-4e83-b063-94806c0bac17",
+ "metadata": {},
"outputs": [],
"source": [
- "# df1_customer_product"
+ "## Exportation\n",
+ "\n",
+ "# Exportation vers 'projet-bdc2324-team1'\n",
+ "BUCKET_OUT = \"projet-bdc2324-team1\"\n",
+ "FILE_KEY_OUT_S3 = \"1_Output/Company 1 - Segmentation base.csv\"\n",
+ "FILE_PATH_OUT_S3 = BUCKET_OUT + \"/\" + FILE_KEY_OUT_S3\n",
+ "\n",
+ "with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
+ " df1_customer_product.to_csv(file_out, index = False)"
]
}
],