diff --git a/0_Cleaning_and_merge.ipynb b/0_Cleaning_and_merge.ipynb index b61b004..8d925b0 100644 --- a/0_Cleaning_and_merge.ipynb +++ b/0_Cleaning_and_merge.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 1, "id": "15103481-8d74-404c-aa09-7601fe7730da", "metadata": {}, "outputs": [], @@ -33,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 2, "id": "5d83bb1a-d341-446e-91f6-1c428607f6d4", "metadata": {}, "outputs": [], @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 3, "id": "a9b84234-d5df-4c43-a9cd-80cfe2f1e34d", "metadata": {}, "outputs": [], @@ -72,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 4, "id": "699664b9-eee4-4f8d-a207-e524526560c5", "metadata": {}, "outputs": [], @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 5, "id": "dd6a3518-b752-4a1e-b77b-9e03e853c3ed", "metadata": {}, "outputs": [], @@ -114,7 +114,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 6, "id": "d237be96-8c86-4a91-b7a1-487e87a16c3d", "metadata": {}, "outputs": [], @@ -155,7 +155,7 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 7, "id": "7e7b90ce-da54-4f00-bc34-64c543b0858f", "metadata": {}, "outputs": [], @@ -177,7 +177,7 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 8, "id": "03329e32-00a5-42c8-9470-75f7b6216ccd", "metadata": {}, "outputs": [], @@ -195,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 9, "id": "b95464b1-26bc-4aac-84b4-45da83b92251", "metadata": {}, "outputs": [], @@ -232,14 +232,14 @@ " \n", " # Fusion avec achats\n", " ticket_information = pd.merge(ticket_information, purchases, left_on = 'purchase_id', right_on = 'id', how = 'inner')\n", - " ticket_information.drop(['purchase_id', 'id'], axis = 1, inplace=True)\n", + " ticket_information.drop(['id'], axis = 1, inplace=True)\n", "\n", " return ticket_information" ] }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 10, "id": "3e1d2ba7-ff4f-48eb-93a8-2bb648c70396", "metadata": {}, "outputs": [], @@ -249,7 +249,7 @@ }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 11, "id": "4b18edfc-6450-4c6a-9e7b-ee5a5808c8c9", "metadata": {}, "outputs": [ @@ -275,6 +275,7 @@ " \n", " \n", " ticket_id\n", + " purchase_id\n", " product_id\n", " is_from_subscription\n", " supplier_name\n", @@ -288,6 +289,7 @@ " \n", " 0\n", " 13070859\n", + " 5107462\n", " 225251\n", " False\n", " vente en ligne\n", @@ -299,6 +301,7 @@ " \n", " 1\n", " 13070860\n", + " 5107462\n", " 224914\n", " False\n", " vente en ligne\n", @@ -310,6 +313,7 @@ " \n", " 2\n", " 13070861\n", + " 5107462\n", " 224914\n", " False\n", " vente en ligne\n", @@ -321,6 +325,7 @@ " \n", " 3\n", " 13070862\n", + " 5107462\n", " 224914\n", " False\n", " vente en ligne\n", @@ -332,6 +337,7 @@ " \n", " 4\n", " 13070863\n", + " 5107462\n", " 224914\n", " False\n", " vente en ligne\n", @@ -345,12 +351,12 @@ "" ], "text/plain": [ - " ticket_id product_id is_from_subscription supplier_name \\\n", - "0 13070859 225251 False vente en ligne \n", - "1 13070860 224914 False vente en ligne \n", - "2 13070861 224914 False vente en ligne \n", - "3 13070862 224914 False vente en ligne \n", - "4 13070863 224914 False vente en ligne \n", + " ticket_id purchase_id product_id is_from_subscription supplier_name \\\n", + "0 13070859 5107462 225251 False vente en ligne \n", + "1 13070860 5107462 224914 False vente en ligne \n", + "2 13070861 5107462 224914 False vente en ligne \n", + "3 13070862 5107462 224914 False vente en ligne \n", + "4 13070863 5107462 224914 False vente en ligne \n", "\n", " type_of_ticket_name children purchase_date customer_id \n", "0 Atelier pricing_formula 2018-12-28 14:47:50+00:00 48187 \n", @@ -360,7 +366,7 @@ "4 Atelier pricing_formula 2018-12-28 14:47:50+00:00 48187 " ] }, - "execution_count": 90, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -379,7 +385,7 @@ }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 12, "id": "baed146a-9d3a-4397-a812-3d50c9a2f038", "metadata": {}, "outputs": [], @@ -408,7 +414,7 @@ }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 13, "id": "5fbfd88b-b94c-489c-9201-670e96e453e7", "metadata": {}, "outputs": [], @@ -426,7 +432,7 @@ }, { "cell_type": "code", - "execution_count": 93, + "execution_count": 14, "id": "d883cc7b-ac43-4485-b86f-eaf595fbad85", "metadata": {}, "outputs": [], @@ -451,7 +457,7 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 15, "id": "c8552dd6-52c5-4431-b43d-3cd6c578fd9f", "metadata": {}, "outputs": [], @@ -461,7 +467,7 @@ }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 16, "id": "c24457e7-3cad-451a-a65b-7373b656bd6e", "metadata": { "scrolled": true @@ -581,7 +587,7 @@ "4 404 2021-03-27 23:00:00+00:00 " ] }, - "execution_count": 95, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -608,7 +614,7 @@ }, { "cell_type": "code", - "execution_count": 96, + "execution_count": 17, "id": "30488a40-1b38-4b9a-9d3b-26a0597c5e6d", "metadata": {}, "outputs": [], @@ -619,7 +625,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 18, "id": "607eb4b4-eed9-4b50-b823-f75c116dd37c", "metadata": {}, "outputs": [], @@ -690,7 +696,7 @@ }, { "cell_type": "code", - "execution_count": 98, + "execution_count": 19, "id": "350b09b9-451f-4d47-81fe-f34b892db027", "metadata": {}, "outputs": [], @@ -778,7 +784,7 @@ }, { "cell_type": "code", - "execution_count": 99, + "execution_count": 20, "id": "0fccc8ef-e575-4857-a401-94a7274394df", "metadata": {}, "outputs": [ @@ -931,7 +937,7 @@ "4 indiv entrées tp " ] }, - "execution_count": 99, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -943,7 +949,7 @@ }, { "cell_type": "code", - "execution_count": 100, + "execution_count": 21, "id": "779d8aaf-6668-4f66-8852-847304407ea3", "metadata": {}, "outputs": [ @@ -1113,7 +1119,7 @@ "4 spectacle vivant mucem " ] }, - "execution_count": 100, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -1125,7 +1131,7 @@ }, { "cell_type": "code", - "execution_count": 101, + "execution_count": 22, "id": "7714fa32-303b-4ea7-b174-3fd0fcab5af0", "metadata": {}, "outputs": [ @@ -1224,7 +1230,7 @@ "4 37 383 269 1" ] }, - "execution_count": 101, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -1244,7 +1250,7 @@ }, { "cell_type": "code", - "execution_count": 102, + "execution_count": 23, "id": "15a62ed6-35e4-4abc-aeef-a7daeec0a4ba", "metadata": {}, "outputs": [], @@ -1272,7 +1278,7 @@ }, { "cell_type": "code", - "execution_count": 103, + "execution_count": 24, "id": "89dc9685-1de9-4ce3-a6c0-8d7f1931a951", "metadata": {}, "outputs": [ @@ -1511,7 +1517,7 @@ "[5 rows x 21 columns]" ] }, - "execution_count": 103, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -1523,7 +1529,7 @@ }, { "cell_type": "code", - "execution_count": 104, + "execution_count": 26, "id": "98f78cd5-b694-4cc6-b033-20170aa13e8d", "metadata": {}, "outputs": [], @@ -1532,7 +1538,7 @@ "df1_products_purchased = pd.merge(df1_ticket_information, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')\n", "\n", "# Selection des variables d'intérêts\n", - "df1_products_purchased_reduced = df1_products_purchased[['ticket_id', 'customer_id', 'event_type_id', 'supplier_name', 'purchase_date', 'type_of_ticket_name', 'amount', 'children', 'is_full_price', 'name_event_types', 'name_facilities', 'name_categories', 'name_events', 'name_seasons']]" + "df1_products_purchased_reduced = df1_products_purchased[['ticket_id', 'customer_id', 'purchase_id' ,'event_type_id', 'supplier_name', 'purchase_date', 'type_of_ticket_name', 'amount', 'children', 'is_full_price', 'name_event_types', 'name_facilities', 'name_categories', 'name_events', 'name_seasons']]" ] }, { @@ -1553,7 +1559,7 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 27, "id": "e2c88552-b863-47a2-be23-8d2898fb28bc", "metadata": {}, "outputs": [], @@ -1577,7 +1583,7 @@ " campaigns_reduced = pd.merge(campaigns_reduced, time_to_open, on = 'customer_id', how = 'left')\n", "\n", " # Remplir les NaN : nb_campaigns_opened\n", - " campaigns_reduced['nb_campaigns_opened'].fillna(0, inplace=True)\n", + " campaigns_reduced['nb_campaigns_opened'].fillna(0)\n", "\n", " # Remplir les NaT : time_to_open (??)\n", "\n", @@ -1587,7 +1593,7 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 28, "id": "24537647-bc29-4777-9848-ac4120a4aa60", "metadata": {}, "outputs": [], @@ -1597,7 +1603,7 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 29, "id": "6be2a9a6-056b-4e19-8c26-a18ba3df36b3", "metadata": {}, "outputs": [ @@ -1677,7 +1683,7 @@ "4 6 20 0.0 NaT" ] }, - "execution_count": 107, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -1696,7 +1702,7 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 30, "id": "b913a69e-3146-4919-b5f6-a6108532bffa", "metadata": {}, "outputs": [ @@ -1707,7 +1713,7 @@ " 'offre muséale groupe'], dtype=object)" ] }, - "execution_count": 108, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -1718,7 +1724,7 @@ }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 31, "id": "2bda0b97-b28b-4070-a57d-aeab0e2f7dfe", "metadata": {}, "outputs": [], @@ -1729,7 +1735,7 @@ }, { "cell_type": "code", - "execution_count": 110, + "execution_count": 34, "id": "043303fe-e90f-4689-a2a9-5d690555a045", "metadata": {}, "outputs": [], @@ -1746,9 +1752,10 @@ " prop_vente_internet = tickets_information_copy[tickets_information_copy['vente_internet'] == 1].groupby(['customer_id', 'event_type_id'])['ticket_id'].count().reset_index()\n", " prop_vente_internet.rename(columns = {'ticket_id' : 'nb_tickets_internet'}, inplace = True)\n", " \n", - " tickets_kpi = (tickets_information_copy[['event_type_id', 'customer_id', 'ticket_id','supplier_name', 'purchase_date', 'amount', 'vente_internet']]\n", + " tickets_kpi = (tickets_information_copy[['event_type_id', 'customer_id', 'purchase_id' ,'ticket_id','supplier_name', 'purchase_date', 'amount', 'vente_internet']]\n", " .groupby(['customer_id', 'event_type_id']) \n", " .agg({'ticket_id': 'count', \n", + " 'purchase_id' : 'nunique',\n", " 'amount' : 'sum',\n", " 'supplier_name': 'nunique',\n", " 'vente_internet' : 'max',\n", @@ -1759,23 +1766,33 @@ " tickets_kpi.columns = tickets_kpi.columns.map('_'.join)\n", " \n", " tickets_kpi.rename(columns = {'ticket_id_count' : 'nb_tickets', \n", + " 'purchase_id_nunique' : 'nb_purchases',\n", " 'amount_sum' : 'total_amount',\n", " 'supplier_name_nunique' : 'nb_suppliers', \n", " 'customer_id_' : 'customer_id',\n", " 'event_type_id_' : 'event_type_id'}, inplace = True)\n", " \n", " tickets_kpi['time_between_purchase'] = tickets_kpi['purchase_date_max'] - tickets_kpi['purchase_date_min']\n", + " tickets_kpi['time_between_purchase'] = tickets_kpi['time_between_purchase'] / np.timedelta64(1, 'D') # En nombre de jours\n", "\n", + " # Convertir date et en chiffre\n", + " max_date = tickets_kpi['purchase_date_max'].max()\n", + " tickets_kpi['purchase_date_max'] = (max_date - tickets_kpi['purchase_date_max']) / np.timedelta64(1, 'D')\n", + " tickets_kpi['purchase_date_min'] = (max_date - tickets_kpi['purchase_date_min']) / np.timedelta64(1, 'D')\n", + "\n", + " \n", " tickets_kpi = tickets_kpi.merge(prop_vente_internet, on = ['customer_id', 'event_type_id'], how = 'left')\n", " tickets_kpi['nb_tickets_internet'] = tickets_kpi['nb_tickets_internet'].fillna(0)\n", - " \n", + "\n", + " \n", + " \n", " return tickets_kpi\n", " " ] }, { "cell_type": "code", - "execution_count": 111, + "execution_count": 35, "id": "5882234a-1ed5-4269-87a6-0d75613476e3", "metadata": {}, "outputs": [], @@ -1783,34 +1800,10 @@ "df1_tickets_kpi = tickets_kpi_function(tickets_information = df1_products_purchased_reduced)" ] }, - { - "cell_type": "markdown", - "id": "597b241e-a83d-4b7c-8ad7-eec50295dff2", - "metadata": {}, - "source": [ - "#### Exportation" - ] - }, { "cell_type": "code", - "execution_count": 112, - "id": "a4a2311d-8a72-4030-afd5-218004d5d2a5", - "metadata": {}, - "outputs": [], - "source": [ - "# Exportation vers 'projet-bdc2324-team1'\n", - "BUCKET_OUT = \"projet-bdc2324-team1\"\n", - "FILE_KEY_OUT_S3 = \"0_Temp/Company 1 - Purchasing behaviour.csv\"\n", - "FILE_PATH_OUT_S3 = BUCKET_OUT + \"/\" + FILE_KEY_OUT_S3\n", - "\n", - "with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n", - " df1_tickets_kpi.to_csv(file_out, index = False)" - ] - }, - { - "cell_type": "code", - "execution_count": 113, - "id": "a7a452a6-cd5e-4c8b-b250-8a7d26e48fad", + "execution_count": 36, + "id": "5f2046cf-ffde-4521-91e7-b727b8bc17f5", "metadata": {}, "outputs": [ { @@ -1837,6 +1830,7 @@ " customer_id\n", " event_type_id\n", " nb_tickets\n", + " nb_purchases\n", " total_amount\n", " nb_suppliers\n", " vente_internet_max\n", @@ -1848,104 +1842,125 @@ " \n", " \n", " \n", - " 1\n", - " 1\n", - " 4\n", - " 453242\n", - " 3248965.5\n", - " 6\n", - " 1\n", - " 2013-09-23 14:45:01+00:00\n", - " 2023-11-03 14:11:01+00:00\n", - " 3692 days 23:26:00\n", - " 2988.0\n", - " \n", - " \n", " 0\n", " 1\n", " 2\n", " 384226\n", + " 194790\n", " 2686540.5\n", " 7\n", " 1\n", - " 2014-12-03 14:55:37+00:00\n", - " 2023-11-04 15:12:16+00:00\n", - " 3258 days 00:16:39\n", + " 3262.190868\n", + " 4.179306\n", + " 3258.011562\n", " 51.0\n", " \n", " \n", - " 3\n", + " 1\n", " 1\n", + " 4\n", + " 453242\n", + " 228945\n", + " 3248965.5\n", " 6\n", - " 217356\n", - " 1435871.5\n", - " 5\n", " 1\n", - " 2017-01-01 02:20:08+00:00\n", - " 2019-12-31 02:20:06+00:00\n", - " 1093 days 23:59:58\n", - " 5.0\n", + " 3698.198229\n", + " 5.221840\n", + " 3692.976389\n", + " 2988.0\n", " \n", " \n", " 2\n", " 1\n", " 5\n", " 201750\n", + " 107110\n", " 1459190.0\n", " 6\n", " 1\n", - " 2013-06-10 10:37:58+00:00\n", - " 2023-11-08 15:59:45+00:00\n", - " 3803 days 05:21:47\n", + " 3803.369792\n", + " 0.146331\n", + " 3803.223461\n", " 9.0\n", " \n", " \n", - " 5032\n", - " 6733\n", - " 6\n", - " 14208\n", - " 0.0\n", - " 3\n", + " 3\n", " 1\n", - " 2017-01-11 15:00:54+00:00\n", - " 2019-11-27 09:47:06+00:00\n", - " 1049 days 18:46:12\n", - " 13497.0\n", + " 6\n", + " 217356\n", + " 111786\n", + " 1435871.5\n", + " 5\n", + " 1\n", + " 2502.715509\n", + " 1408.715532\n", + " 1093.999977\n", + " 5.0\n", + " \n", + " \n", + " 4\n", + " 2\n", + " 2\n", + " 143\n", + " 143\n", + " 0.0\n", + " 1\n", + " 0\n", + " 2041.274549\n", + " 1340.308160\n", + " 700.966389\n", + " 0.0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " customer_id event_type_id nb_tickets total_amount nb_suppliers \\\n", - "1 1 4 453242 3248965.5 6 \n", - "0 1 2 384226 2686540.5 7 \n", - "3 1 6 217356 1435871.5 5 \n", - "2 1 5 201750 1459190.0 6 \n", - "5032 6733 6 14208 0.0 3 \n", + " customer_id event_type_id nb_tickets nb_purchases total_amount \\\n", + "0 1 2 384226 194790 2686540.5 \n", + "1 1 4 453242 228945 3248965.5 \n", + "2 1 5 201750 107110 1459190.0 \n", + "3 1 6 217356 111786 1435871.5 \n", + "4 2 2 143 143 0.0 \n", "\n", - " vente_internet_max purchase_date_min purchase_date_max \\\n", - "1 1 2013-09-23 14:45:01+00:00 2023-11-03 14:11:01+00:00 \n", - "0 1 2014-12-03 14:55:37+00:00 2023-11-04 15:12:16+00:00 \n", - "3 1 2017-01-01 02:20:08+00:00 2019-12-31 02:20:06+00:00 \n", - "2 1 2013-06-10 10:37:58+00:00 2023-11-08 15:59:45+00:00 \n", - "5032 1 2017-01-11 15:00:54+00:00 2019-11-27 09:47:06+00:00 \n", + " nb_suppliers vente_internet_max purchase_date_min purchase_date_max \\\n", + "0 7 1 3262.190868 4.179306 \n", + "1 6 1 3698.198229 5.221840 \n", + "2 6 1 3803.369792 0.146331 \n", + "3 5 1 2502.715509 1408.715532 \n", + "4 1 0 2041.274549 1340.308160 \n", "\n", - " time_between_purchase nb_tickets_internet \n", - "1 3692 days 23:26:00 2988.0 \n", - "0 3258 days 00:16:39 51.0 \n", - "3 1093 days 23:59:58 5.0 \n", - "2 3803 days 05:21:47 9.0 \n", - "5032 1049 days 18:46:12 13497.0 " + " time_between_purchase nb_tickets_internet \n", + "0 3258.011562 51.0 \n", + "1 3692.976389 2988.0 \n", + "2 3803.223461 9.0 \n", + "3 1093.999977 5.0 \n", + "4 700.966389 0.0 " ] }, - "execution_count": 113, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df1_tickets_kpi.sort_values(by='nb_tickets', ascending=False).head(5)" + "df1_tickets_kpi.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "a4a2311d-8a72-4030-afd5-218004d5d2a5", + "metadata": {}, + "outputs": [], + "source": [ + "# Exportation vers 'projet-bdc2324-team1'\n", + "BUCKET_OUT = \"projet-bdc2324-team1\"\n", + "FILE_KEY_OUT_S3 = \"0_Temp/Company 1 - Purchasing behaviour.csv\"\n", + "FILE_PATH_OUT_S3 = BUCKET_OUT + \"/\" + FILE_KEY_OUT_S3\n", + "\n", + "with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n", + " df1_tickets_kpi.to_csv(file_out, index = False)" ] }, { @@ -1958,7 +1973,7 @@ }, { "cell_type": "code", - "execution_count": 114, + "execution_count": 39, "id": "273857e0-7112-4294-8ba6-3c39c5cbc13a", "metadata": {}, "outputs": [ @@ -1986,6 +2001,7 @@ " customer_id\n", " event_type_id\n", " nb_tickets\n", + " nb_purchases\n", " total_amount\n", " nb_suppliers\n", " vente_internet_max\n", @@ -2001,12 +2017,13 @@ " 1\n", " 2\n", " 384226\n", + " 194790\n", " 2686540.5\n", " 7\n", " 1\n", - " 2014-12-03 14:55:37+00:00\n", - " 2023-11-04 15:12:16+00:00\n", - " 3258 days 00:16:39\n", + " 3262.190868\n", + " 4.179306\n", + " 3258.011562\n", " 51.0\n", " \n", " \n", @@ -2014,12 +2031,13 @@ " 1\n", " 4\n", " 453242\n", + " 228945\n", " 3248965.5\n", " 6\n", " 1\n", - " 2013-09-23 14:45:01+00:00\n", - " 2023-11-03 14:11:01+00:00\n", - " 3692 days 23:26:00\n", + " 3698.198229\n", + " 5.221840\n", + " 3692.976389\n", " 2988.0\n", " \n", " \n", @@ -2027,12 +2045,13 @@ " 1\n", " 5\n", " 201750\n", + " 107110\n", " 1459190.0\n", " 6\n", " 1\n", - " 2013-06-10 10:37:58+00:00\n", - " 2023-11-08 15:59:45+00:00\n", - " 3803 days 05:21:47\n", + " 3803.369792\n", + " 0.146331\n", + " 3803.223461\n", " 9.0\n", " \n", " \n", @@ -2040,12 +2059,13 @@ " 1\n", " 6\n", " 217356\n", + " 111786\n", " 1435871.5\n", " 5\n", " 1\n", - " 2017-01-01 02:20:08+00:00\n", - " 2019-12-31 02:20:06+00:00\n", - " 1093 days 23:59:58\n", + " 2502.715509\n", + " 1408.715532\n", + " 1093.999977\n", " 5.0\n", " \n", " \n", @@ -2053,12 +2073,13 @@ " 2\n", " 2\n", " 143\n", + " 143\n", " 0.0\n", " 1\n", " 0\n", - " 2018-04-07 12:55:07+00:00\n", - " 2020-03-08 12:06:43+00:00\n", - " 700 days 23:11:36\n", + " 2041.274549\n", + " 1340.308160\n", + " 700.966389\n", " 0.0\n", " \n", " \n", @@ -2066,29 +2087,29 @@ "" ], "text/plain": [ - " customer_id event_type_id nb_tickets total_amount nb_suppliers \\\n", - "0 1 2 384226 2686540.5 7 \n", - "1 1 4 453242 3248965.5 6 \n", - "2 1 5 201750 1459190.0 6 \n", - "3 1 6 217356 1435871.5 5 \n", - "4 2 2 143 0.0 1 \n", + " customer_id event_type_id nb_tickets nb_purchases total_amount \\\n", + "0 1 2 384226 194790 2686540.5 \n", + "1 1 4 453242 228945 3248965.5 \n", + "2 1 5 201750 107110 1459190.0 \n", + "3 1 6 217356 111786 1435871.5 \n", + "4 2 2 143 143 0.0 \n", "\n", - " vente_internet_max purchase_date_min purchase_date_max \\\n", - "0 1 2014-12-03 14:55:37+00:00 2023-11-04 15:12:16+00:00 \n", - "1 1 2013-09-23 14:45:01+00:00 2023-11-03 14:11:01+00:00 \n", - "2 1 2013-06-10 10:37:58+00:00 2023-11-08 15:59:45+00:00 \n", - "3 1 2017-01-01 02:20:08+00:00 2019-12-31 02:20:06+00:00 \n", - "4 0 2018-04-07 12:55:07+00:00 2020-03-08 12:06:43+00:00 \n", + " nb_suppliers vente_internet_max purchase_date_min purchase_date_max \\\n", + "0 7 1 3262.190868 4.179306 \n", + "1 6 1 3698.198229 5.221840 \n", + "2 6 1 3803.369792 0.146331 \n", + "3 5 1 2502.715509 1408.715532 \n", + "4 1 0 2041.274549 1340.308160 \n", "\n", - " time_between_purchase nb_tickets_internet \n", - "0 3258 days 00:16:39 51.0 \n", - "1 3692 days 23:26:00 2988.0 \n", - "2 3803 days 05:21:47 9.0 \n", - "3 1093 days 23:59:58 5.0 \n", - "4 700 days 23:11:36 0.0 " + " time_between_purchase nb_tickets_internet \n", + "0 3258.011562 51.0 \n", + "1 3692.976389 2988.0 \n", + "2 3803.223461 9.0 \n", + "3 1093.999977 5.0 \n", + "4 700.966389 0.0 " ] }, - "execution_count": 114, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } @@ -2099,7 +2120,7 @@ }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 40, "id": "449731f3-340f-4648-8210-4622c7dbc174", "metadata": {}, "outputs": [ @@ -2166,7 +2187,7 @@ "3 6 formule adhésion 6.439463" ] }, - "execution_count": 115, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -2181,7 +2202,7 @@ }, { "cell_type": "code", - "execution_count": 116, + "execution_count": 41, "id": "b54bd9e8-3cad-453b-8e58-bf6d047912eb", "metadata": {}, "outputs": [ @@ -2209,6 +2230,7 @@ " customer_id\n", " event_type_id\n", " nb_tickets\n", + " nb_purchases\n", " total_amount\n", " nb_suppliers\n", " vente_internet_max\n", @@ -2226,12 +2248,13 @@ " 1\n", " 2\n", " 384226\n", + " 194790\n", " 2686540.5\n", " 7\n", " 1\n", - " 2014-12-03 14:55:37+00:00\n", - " 2023-11-04 15:12:16+00:00\n", - " 3258 days 00:16:39\n", + " 3262.190868\n", + " 4.179306\n", + " 3258.011562\n", " 51.0\n", " offre muséale individuel\n", " 6.150659\n", @@ -2241,12 +2264,13 @@ " 1\n", " 4\n", " 453242\n", + " 228945\n", " 3248965.5\n", " 6\n", " 1\n", - " 2013-09-23 14:45:01+00:00\n", - " 2023-11-03 14:11:01+00:00\n", - " 3692 days 23:26:00\n", + " 3698.198229\n", + " 5.221840\n", + " 3692.976389\n", " 2988.0\n", " spectacle vivant\n", " 7.762474\n", @@ -2256,12 +2280,13 @@ " 1\n", " 5\n", " 201750\n", + " 107110\n", " 1459190.0\n", " 6\n", " 1\n", - " 2013-06-10 10:37:58+00:00\n", - " 2023-11-08 15:59:45+00:00\n", - " 3803 days 05:21:47\n", + " 3803.369792\n", + " 0.146331\n", + " 3803.223461\n", " 9.0\n", " offre muséale groupe\n", " 4.452618\n", @@ -2271,12 +2296,13 @@ " 1\n", " 6\n", " 217356\n", + " 111786\n", " 1435871.5\n", " 5\n", " 1\n", - " 2017-01-01 02:20:08+00:00\n", - " 2019-12-31 02:20:06+00:00\n", - " 1093 days 23:59:58\n", + " 2502.715509\n", + " 1408.715532\n", + " 1093.999977\n", " 5.0\n", " formule adhésion\n", " 6.439463\n", @@ -2286,12 +2312,13 @@ " 2\n", " 2\n", " 143\n", + " 143\n", " 0.0\n", " 1\n", " 0\n", - " 2018-04-07 12:55:07+00:00\n", - " 2020-03-08 12:06:43+00:00\n", - " 700 days 23:11:36\n", + " 2041.274549\n", + " 1340.308160\n", + " 700.966389\n", " 0.0\n", " offre muséale individuel\n", " 6.150659\n", @@ -2301,26 +2328,26 @@ "" ], "text/plain": [ - " customer_id event_type_id nb_tickets total_amount nb_suppliers \\\n", - "0 1 2 384226 2686540.5 7 \n", - "1 1 4 453242 3248965.5 6 \n", - "2 1 5 201750 1459190.0 6 \n", - "3 1 6 217356 1435871.5 5 \n", - "4 2 2 143 0.0 1 \n", + " customer_id event_type_id nb_tickets nb_purchases total_amount \\\n", + "0 1 2 384226 194790 2686540.5 \n", + "1 1 4 453242 228945 3248965.5 \n", + "2 1 5 201750 107110 1459190.0 \n", + "3 1 6 217356 111786 1435871.5 \n", + "4 2 2 143 143 0.0 \n", "\n", - " vente_internet_max purchase_date_min purchase_date_max \\\n", - "0 1 2014-12-03 14:55:37+00:00 2023-11-04 15:12:16+00:00 \n", - "1 1 2013-09-23 14:45:01+00:00 2023-11-03 14:11:01+00:00 \n", - "2 1 2013-06-10 10:37:58+00:00 2023-11-08 15:59:45+00:00 \n", - "3 1 2017-01-01 02:20:08+00:00 2019-12-31 02:20:06+00:00 \n", - "4 0 2018-04-07 12:55:07+00:00 2020-03-08 12:06:43+00:00 \n", + " nb_suppliers vente_internet_max purchase_date_min purchase_date_max \\\n", + "0 7 1 3262.190868 4.179306 \n", + "1 6 1 3698.198229 5.221840 \n", + "2 6 1 3803.369792 0.146331 \n", + "3 5 1 2502.715509 1408.715532 \n", + "4 1 0 2041.274549 1340.308160 \n", "\n", - " time_between_purchase nb_tickets_internet name_event_types \\\n", - "0 3258 days 00:16:39 51.0 offre muséale individuel \n", - "1 3692 days 23:26:00 2988.0 spectacle vivant \n", - "2 3803 days 05:21:47 9.0 offre muséale groupe \n", - "3 1093 days 23:59:58 5.0 formule adhésion \n", - "4 700 days 23:11:36 0.0 offre muséale individuel \n", + " time_between_purchase nb_tickets_internet name_event_types \\\n", + "0 3258.011562 51.0 offre muséale individuel \n", + "1 3692.976389 2988.0 spectacle vivant \n", + "2 3803.223461 9.0 offre muséale groupe \n", + "3 1093.999977 5.0 formule adhésion \n", + "4 700.966389 0.0 offre muséale individuel \n", "\n", " avg_amount \n", "0 6.150659 \n", @@ -2330,7 +2357,7 @@ "4 6.150659 " ] }, - "execution_count": 116, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } @@ -2342,17 +2369,405 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "id": "2d6afe74-2517-478b-a99c-da9c7bd2edd4", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idbirthdatestreet_idis_partnergenderis_email_trueopt_instructure_idprofessionlanguage...fidelityaverage_purchase_delayaverage_price_basketaverage_ticket_baskettotal_pricepurchase_countfirst_buying_datecountryagetenant_id
012751NaN2False1TrueTrueNaNNaNNaN...0NaNNaNNaNNaN0NaTfrNaN1311
112825NaN2False2TrueTrueNaNNaNNaN...0NaNNaNNaNNaN0NaTfrNaN1311
211261NaN2False1TrueTrueNaNNaNNaN...0NaNNaNNaNNaN0NaTfrNaN1311
313071NaN2False2TrueTrueNaNNaNNaN...0NaNNaNNaNNaN0NaTfrNaN1311
4653061NaN10False2TrueFalseNaNNaNNaN...0NaNNaNNaNNaN0NaTNaNNaN1311
..................................................................
151861295252NaN10False2TrueFalseNaNNaNNaN...0NaNNaNNaNNaN0NaTNaNNaN1311
151862295271NaN10False2TrueFalseNaNNaNNaN...0NaNNaNNaNNaN0NaTNaNNaN1311
151863295275NaN10False2TrueFalseNaNNaNNaN...0NaNNaNNaNNaN0NaTNaNNaN1311
151864295366NaN2False2TrueFalseNaNNaNNaN...13.033.03.033.012021-05-26 17:20:37+00:00frNaN1311
151865295368NaN2False2TrueFalseNaNNaNNaN...16.022.02.022.012021-05-26 17:35:38+00:00frNaN1311
\n", + "

151866 rows × 25 columns

\n", + "
" + ], + "text/plain": [ + " customer_id birthdate street_id is_partner gender is_email_true \\\n", + "0 12751 NaN 2 False 1 True \n", + "1 12825 NaN 2 False 2 True \n", + "2 11261 NaN 2 False 1 True \n", + "3 13071 NaN 2 False 2 True \n", + "4 653061 NaN 10 False 2 True \n", + "... ... ... ... ... ... ... \n", + "151861 295252 NaN 10 False 2 True \n", + "151862 295271 NaN 10 False 2 True \n", + "151863 295275 NaN 10 False 2 True \n", + "151864 295366 NaN 2 False 2 True \n", + "151865 295368 NaN 2 False 2 True \n", + "\n", + " opt_in structure_id profession language ... fidelity \\\n", + "0 True NaN NaN NaN ... 0 \n", + "1 True NaN NaN NaN ... 0 \n", + "2 True NaN NaN NaN ... 0 \n", + "3 True NaN NaN NaN ... 0 \n", + "4 False NaN NaN NaN ... 0 \n", + "... ... ... ... ... ... ... \n", + "151861 False NaN NaN NaN ... 0 \n", + "151862 False NaN NaN NaN ... 0 \n", + "151863 False NaN NaN NaN ... 0 \n", + "151864 False NaN NaN NaN ... 1 \n", + "151865 False NaN NaN NaN ... 1 \n", + "\n", + " average_purchase_delay average_price_basket average_ticket_basket \\\n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "... ... ... ... \n", + "151861 NaN NaN NaN \n", + "151862 NaN NaN NaN \n", + "151863 NaN NaN NaN \n", + "151864 3.0 33.0 3.0 \n", + "151865 6.0 22.0 2.0 \n", + "\n", + " total_price purchase_count first_buying_date country age \\\n", + "0 NaN 0 NaT fr NaN \n", + "1 NaN 0 NaT fr NaN \n", + "2 NaN 0 NaT fr NaN \n", + "3 NaN 0 NaT fr NaN \n", + "4 NaN 0 NaT NaN NaN \n", + "... ... ... ... ... ... \n", + "151861 NaN 0 NaT NaN NaN \n", + "151862 NaN 0 NaT NaN NaN \n", + "151863 NaN 0 NaT NaN NaN \n", + "151864 33.0 1 2021-05-26 17:20:37+00:00 fr NaN \n", + "151865 22.0 1 2021-05-26 17:35:38+00:00 fr NaN \n", + "\n", + " tenant_id \n", + "0 1311 \n", + "1 1311 \n", + "2 1311 \n", + "3 1311 \n", + "4 1311 \n", + "... ... \n", + "151861 1311 \n", + "151862 1311 \n", + "151863 1311 \n", + "151864 1311 \n", + "151865 1311 \n", + "\n", + "[151866 rows x 25 columns]" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df1_customerplus_clean" ] }, { "cell_type": "code", - "execution_count": 120, + "execution_count": 43, "id": "83230baa-9a8a-4614-b629-e99c2505c696", "metadata": {}, "outputs": [ @@ -2388,7 +2803,7 @@ " profession\n", " language\n", " ...\n", - " nb_tickets\n", + " nb_purchases\n", " total_amount\n", " nb_suppliers\n", " vente_internet_max\n", @@ -2414,13 +2829,13 @@ " NaN\n", " NaN\n", " ...\n", - " 384226.0\n", + " 194790.0\n", " 2686540.5\n", " 7.0\n", " 1.0\n", - " 2014-12-03 14:55:37+00:00\n", - " 2023-11-04 15:12:16+00:00\n", - " 3258 days 00:16:39\n", + " 3262.190868\n", + " 4.179306\n", + " 3258.011562\n", " 51.0\n", " offre muséale individuel\n", " 6.150659\n", @@ -2438,13 +2853,13 @@ " NaN\n", " NaN\n", " ...\n", - " 217356.0\n", + " 111786.0\n", " 1435871.5\n", " 5.0\n", " 1.0\n", - " 2017-01-01 02:20:08+00:00\n", - " 2019-12-31 02:20:06+00:00\n", - " 1093 days 23:59:58\n", + " 2502.715509\n", + " 1408.715532\n", + " 1093.999977\n", " 5.0\n", " formule adhésion\n", " 6.439463\n", @@ -2462,13 +2877,13 @@ " NaN\n", " NaN\n", " ...\n", - " 453242.0\n", + " 228945.0\n", " 3248965.5\n", " 6.0\n", " 1.0\n", - " 2013-09-23 14:45:01+00:00\n", - " 2023-11-03 14:11:01+00:00\n", - " 3692 days 23:26:00\n", + " 3698.198229\n", + " 5.221840\n", + " 3692.976389\n", " 2988.0\n", " spectacle vivant\n", " 7.762474\n", @@ -2486,13 +2901,13 @@ " NaN\n", " NaN\n", " ...\n", - " 201750.0\n", + " 107110.0\n", " 1459190.0\n", " 6.0\n", " 1.0\n", - " 2013-06-10 10:37:58+00:00\n", - " 2023-11-08 15:59:45+00:00\n", - " 3803 days 05:21:47\n", + " 3803.369792\n", + " 0.146331\n", + " 3803.223461\n", " 9.0\n", " offre muséale groupe\n", " 4.452618\n", @@ -2514,16 +2929,16 @@ " 0.0\n", " 1.0\n", " 0.0\n", - " 2019-03-09 13:14:21+00:00\n", - " 2019-11-13 11:29:55+00:00\n", - " 248 days 22:15:34\n", + " 1705.261192\n", + " 1456.333715\n", + " 248.927477\n", " 0.0\n", " formule adhésion\n", " 6.439463\n", " \n", " \n", "\n", - "

5 rows × 36 columns

\n", + "

5 rows × 37 columns

\n", "" ], "text/plain": [ @@ -2534,45 +2949,38 @@ "59899 1 NaN 2 False 2 True \n", "134695 2 NaN 2 False 1 True \n", "\n", - " opt_in structure_id profession language ... nb_tickets \\\n", - "59897 False NaN NaN NaN ... 384226.0 \n", - "59900 False NaN NaN NaN ... 217356.0 \n", - "59898 False NaN NaN NaN ... 453242.0 \n", - "59899 False NaN NaN NaN ... 201750.0 \n", - "134695 True NaN NaN NaN ... 164.0 \n", + " opt_in structure_id profession language ... nb_purchases \\\n", + "59897 False NaN NaN NaN ... 194790.0 \n", + "59900 False NaN NaN NaN ... 111786.0 \n", + "59898 False NaN NaN NaN ... 228945.0 \n", + "59899 False NaN NaN NaN ... 107110.0 \n", + "134695 True NaN NaN NaN ... 164.0 \n", "\n", - " total_amount nb_suppliers vente_internet_max \\\n", - "59897 2686540.5 7.0 1.0 \n", - "59900 1435871.5 5.0 1.0 \n", - "59898 3248965.5 6.0 1.0 \n", - "59899 1459190.0 6.0 1.0 \n", - "134695 0.0 1.0 0.0 \n", + " total_amount nb_suppliers vente_internet_max purchase_date_min \\\n", + "59897 2686540.5 7.0 1.0 3262.190868 \n", + "59900 1435871.5 5.0 1.0 2502.715509 \n", + "59898 3248965.5 6.0 1.0 3698.198229 \n", + "59899 1459190.0 6.0 1.0 3803.369792 \n", + "134695 0.0 1.0 0.0 1705.261192 \n", "\n", - " purchase_date_min purchase_date_max \\\n", - "59897 2014-12-03 14:55:37+00:00 2023-11-04 15:12:16+00:00 \n", - "59900 2017-01-01 02:20:08+00:00 2019-12-31 02:20:06+00:00 \n", - "59898 2013-09-23 14:45:01+00:00 2023-11-03 14:11:01+00:00 \n", - "59899 2013-06-10 10:37:58+00:00 2023-11-08 15:59:45+00:00 \n", - "134695 2019-03-09 13:14:21+00:00 2019-11-13 11:29:55+00:00 \n", + " purchase_date_max time_between_purchase nb_tickets_internet \\\n", + "59897 4.179306 3258.011562 51.0 \n", + "59900 1408.715532 1093.999977 5.0 \n", + "59898 5.221840 3692.976389 2988.0 \n", + "59899 0.146331 3803.223461 9.0 \n", + "134695 1456.333715 248.927477 0.0 \n", "\n", - " time_between_purchase nb_tickets_internet name_event_types \\\n", - "59897 3258 days 00:16:39 51.0 offre muséale individuel \n", - "59900 1093 days 23:59:58 5.0 formule adhésion \n", - "59898 3692 days 23:26:00 2988.0 spectacle vivant \n", - "59899 3803 days 05:21:47 9.0 offre muséale groupe \n", - "134695 248 days 22:15:34 0.0 formule adhésion \n", + " name_event_types avg_amount \n", + "59897 offre muséale individuel 6.150659 \n", + "59900 formule adhésion 6.439463 \n", + "59898 spectacle vivant 7.762474 \n", + "59899 offre muséale groupe 4.452618 \n", + "134695 formule adhésion 6.439463 \n", "\n", - " avg_amount \n", - "59897 6.150659 \n", - "59900 6.439463 \n", - "59898 7.762474 \n", - "59899 4.452618 \n", - "134695 6.439463 \n", - "\n", - "[5 rows x 36 columns]" + "[5 rows x 37 columns]" ] }, - "execution_count": 120, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -2586,7 +2994,7 @@ }, { "cell_type": "code", - "execution_count": 123, + "execution_count": 44, "id": "433921de-03ad-4024-9462-ecd267db1756", "metadata": {}, "outputs": [ @@ -2649,9 +3057,9 @@ " NaN\n", " ...\n", " 1.0\n", - " 2014-12-03 14:55:37+00:00\n", - " 2023-11-04 15:12:16+00:00\n", - " 3258 days 00:16:39\n", + " 3262.190868\n", + " 4.179306\n", + " 3258.011562\n", " 51.0\n", " offre muséale individuel\n", " 6.150659\n", @@ -2673,9 +3081,9 @@ " NaN\n", " ...\n", " 1.0\n", - " 2017-01-01 02:20:08+00:00\n", - " 2019-12-31 02:20:06+00:00\n", - " 1093 days 23:59:58\n", + " 2502.715509\n", + " 1408.715532\n", + " 1093.999977\n", " 5.0\n", " formule adhésion\n", " 6.439463\n", @@ -2697,9 +3105,9 @@ " NaN\n", " ...\n", " 1.0\n", - " 2013-09-23 14:45:01+00:00\n", - " 2023-11-03 14:11:01+00:00\n", - " 3692 days 23:26:00\n", + " 3698.198229\n", + " 5.221840\n", + " 3692.976389\n", " 2988.0\n", " spectacle vivant\n", " 7.762474\n", @@ -2721,9 +3129,9 @@ " NaN\n", " ...\n", " 1.0\n", - " 2013-06-10 10:37:58+00:00\n", - " 2023-11-08 15:59:45+00:00\n", - " 3803 days 05:21:47\n", + " 3803.369792\n", + " 0.146331\n", + " 3803.223461\n", " 9.0\n", " offre muséale groupe\n", " 4.452618\n", @@ -2745,9 +3153,9 @@ " NaN\n", " ...\n", " 0.0\n", - " 2019-03-09 13:14:21+00:00\n", - " 2019-11-13 11:29:55+00:00\n", - " 248 days 22:15:34\n", + " 1705.261192\n", + " 1456.333715\n", + " 248.927477\n", " 0.0\n", " formule adhésion\n", " 6.439463\n", @@ -2757,7 +3165,7 @@ " \n", " \n", "\n", - "

5 rows × 39 columns

\n", + "

5 rows × 40 columns

\n", "" ], "text/plain": [ @@ -2775,12 +3183,12 @@ "3 False NaN NaN NaN ... 1.0 \n", "4 True NaN NaN NaN ... 0.0 \n", "\n", - " purchase_date_min purchase_date_max time_between_purchase \\\n", - "0 2014-12-03 14:55:37+00:00 2023-11-04 15:12:16+00:00 3258 days 00:16:39 \n", - "1 2017-01-01 02:20:08+00:00 2019-12-31 02:20:06+00:00 1093 days 23:59:58 \n", - "2 2013-09-23 14:45:01+00:00 2023-11-03 14:11:01+00:00 3692 days 23:26:00 \n", - "3 2013-06-10 10:37:58+00:00 2023-11-08 15:59:45+00:00 3803 days 05:21:47 \n", - "4 2019-03-09 13:14:21+00:00 2019-11-13 11:29:55+00:00 248 days 22:15:34 \n", + " purchase_date_min purchase_date_max time_between_purchase \\\n", + "0 3262.190868 4.179306 3258.011562 \n", + "1 2502.715509 1408.715532 1093.999977 \n", + "2 3698.198229 5.221840 3692.976389 \n", + "3 3803.369792 0.146331 3803.223461 \n", + "4 1705.261192 1456.333715 248.927477 \n", "\n", " nb_tickets_internet name_event_types avg_amount nb_campaigns \\\n", "0 51.0 offre muséale individuel 6.150659 NaN \n", @@ -2796,10 +3204,10 @@ "3 NaN NaT \n", "4 0.0 NaT \n", "\n", - "[5 rows x 39 columns]" + "[5 rows x 40 columns]" ] }, - "execution_count": 123, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -2813,7 +3221,7 @@ }, { "cell_type": "code", - "execution_count": 124, + "execution_count": 45, "id": "25e54131-6835-4e94-86d3-1a78520ed7bc", "metadata": {}, "outputs": [], @@ -2839,9 +3247,65 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 46, "id": "8710611c-7eb8-45ca-bdcc-009f4081f9e2", "metadata": {}, + "outputs": [], + "source": [ + "# Fusion avec KPI campaigns liés au customer\n", + "#df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')\n", + "#df1_customer.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a89fad43-ee68-4081-9384-3e9f08ec6a59", + "metadata": {}, + "outputs": [], + "source": [ + "df1_customer_product = pd.merge(df1_customer, nb_tickets, on = 'customer_id', how = 'left')\n", + "print(\"shape : \", df1_customer_product.shape)\n", + "df1_customer_product.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a19fec00-4ece-400c-937c-ce5cd8daccfd", + "metadata": {}, + "outputs": [], + "source": [ + "df1_customer_product.to_csv(\"customer_product.csv\", index = False)" + ] + }, + { + "cell_type": "markdown", + "id": "7c3211a5-a851-43bc-a1f0-b39d51857fb7", + "metadata": {}, + "source": [ + "# Fusion des bases locales" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "46de1912-4a66-46e5-8b9e-7768b2d2723b", + "metadata": {}, + "outputs": [], + "source": [ + "# Fusion avec KPI liés au customer\n", + "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')\n", + "\n", + "# Fill NaN values\n", + "df1_customer[['nb_campaigns', 'nb_campaigns_opened']] = df1_customer[['nb_campaigns', 'nb_campaigns_opened']].fillna(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "d53825e4-6453-45bc-94f2-7b2504ec4afb", + "metadata": {}, "outputs": [ { "data": { @@ -2908,8 +3372,8 @@ " fr\n", " NaN\n", " 1311\n", - " NaN\n", - " NaN\n", + " 0.0\n", + " 0.0\n", " NaT\n", " \n", " \n", @@ -2932,8 +3396,8 @@ " fr\n", " NaN\n", " 1311\n", - " NaN\n", - " NaN\n", + " 0.0\n", + " 0.0\n", " NaT\n", " \n", " \n", @@ -2956,8 +3420,8 @@ " fr\n", " NaN\n", " 1311\n", - " NaN\n", - " NaN\n", + " 0.0\n", + " 0.0\n", " NaT\n", " \n", " \n", @@ -2980,8 +3444,8 @@ " fr\n", " NaN\n", " 1311\n", - " NaN\n", - " NaN\n", + " 0.0\n", + " 0.0\n", " NaT\n", " \n", " \n", @@ -3036,300 +3500,86 @@ "4 NaN 0 NaT NaN NaN 1311 \n", "\n", " nb_campaigns nb_campaigns_opened time_to_open \n", - "0 NaN NaN NaT \n", - "1 NaN NaN NaT \n", - "2 NaN NaN NaT \n", - "3 NaN NaN NaT \n", + "0 0.0 0.0 NaT \n", + "1 0.0 0.0 NaT \n", + "2 0.0 0.0 NaT \n", + "3 0.0 0.0 NaT \n", "4 80.0 2.0 0 days 19:53:02.500000 \n", "\n", "[5 rows x 28 columns]" ] }, - "execution_count": 36, + "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Fusion avec KPI campaigns liés au customer\n", - "#df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')\n", - "#df1_customer.head()" + "df1_customer.head()" ] }, { "cell_type": "code", - "execution_count": 37, - "id": "a89fad43-ee68-4081-9384-3e9f08ec6a59", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "shape : (156289, 31)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
customer_idbirthdatestreet_idis_partnergenderis_email_trueopt_instructure_idprofessionlanguage...first_buying_datecountryagetenant_idnb_campaignsnb_campaigns_openedtime_to_openevent_type_idnb_ticketsavg_amount
012751NaN2False1TrueTrueNaNNaNNaN...NaTfrNaN1311NaNNaNNaTNaNNaNNaN
112825NaN2False2TrueTrueNaNNaNNaN...NaTfrNaN1311NaNNaNNaTNaNNaNNaN
211261NaN2False1TrueTrueNaNNaNNaN...NaTfrNaN1311NaNNaNNaTNaNNaNNaN
313071NaN2False2TrueTrueNaNNaNNaN...NaTfrNaN1311NaNNaNNaTNaNNaNNaN
4653061NaN10False2TrueFalseNaNNaNNaN...NaTNaNNaN131180.02.00 days 19:53:02.500000NaNNaNNaN
\n", - "

5 rows × 31 columns

\n", - "
" - ], - "text/plain": [ - " customer_id birthdate street_id is_partner gender is_email_true \\\n", - "0 12751 NaN 2 False 1 True \n", - "1 12825 NaN 2 False 2 True \n", - "2 11261 NaN 2 False 1 True \n", - "3 13071 NaN 2 False 2 True \n", - "4 653061 NaN 10 False 2 True \n", - "\n", - " opt_in structure_id profession language ... first_buying_date country \\\n", - "0 True NaN NaN NaN ... NaT fr \n", - "1 True NaN NaN NaN ... NaT fr \n", - "2 True NaN NaN NaN ... NaT fr \n", - "3 True NaN NaN NaN ... NaT fr \n", - "4 False NaN NaN NaN ... NaT NaN \n", - "\n", - " age tenant_id nb_campaigns nb_campaigns_opened time_to_open \\\n", - "0 NaN 1311 NaN NaN NaT \n", - "1 NaN 1311 NaN NaN NaT \n", - "2 NaN 1311 NaN NaN NaT \n", - "3 NaN 1311 NaN NaN NaT \n", - "4 NaN 1311 80.0 2.0 0 days 19:53:02.500000 \n", - "\n", - " event_type_id nb_tickets avg_amount \n", - "0 NaN NaN NaN \n", - "1 NaN NaN NaN \n", - "2 NaN NaN NaN \n", - "3 NaN NaN NaN \n", - "4 NaN NaN NaN \n", - "\n", - "[5 rows x 31 columns]" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_customer_product = pd.merge(df1_customer, nb_tickets, on = 'customer_id', how = 'left')\n", - "print(\"shape : \", df1_customer_product.shape)\n", - "df1_customer_product.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "a19fec00-4ece-400c-937c-ce5cd8daccfd", - "metadata": {}, - "outputs": [], - "source": [ - "df1_customer_product.to_csv(\"customer_product.csv\", index = False)" - ] - }, - { - "cell_type": "markdown", - "id": "7c3211a5-a851-43bc-a1f0-b39d51857fb7", - "metadata": {}, - "source": [ - "# Fusion des bases locales" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "46de1912-4a66-46e5-8b9e-7768b2d2723b", - "metadata": {}, - "outputs": [], - "source": [ - "# Fusion avec KPI liés au customer\n", - "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')" - ] - }, - { - "cell_type": "code", - "execution_count": 40, + "execution_count": 67, "id": "1e42a790-b215-4107-a969-85005da06ebd", "metadata": {}, "outputs": [], "source": [ "# Fusion avec KPI liés au comportement d'achat\n", - "df1_customer_product = pd.merge(df1_tickets_kpi, df1_customer, on = 'customer_id', how = 'outer')" + "df1_customer_product = pd.merge(df1_tickets_kpi, df1_customer, on = 'customer_id', how = 'outer')\n", + "\n", + "# Fill NaN values\n", + "df1_customer_product[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet']] = df1_customer_product[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet']].fillna(0)" ] }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 66, "id": "d950f24d-a5d1-4f1e-aeaa-ca826470365f", "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['customer_id', 'event_type_id', 'nb_tickets', 'nb_purchases',\n", + " 'total_amount', 'nb_suppliers', 'vente_internet_max',\n", + " 'purchase_date_min', 'purchase_date_max', 'time_between_purchase',\n", + " 'nb_tickets_internet', 'name_event_types', 'avg_amount', 'birthdate',\n", + " 'street_id', 'is_partner', 'gender', 'is_email_true', 'opt_in',\n", + " 'structure_id', 'profession', 'language', 'mcp_contact_id',\n", + " 'last_buying_date', 'max_price', 'ticket_sum', 'average_price',\n", + " 'fidelity', 'average_purchase_delay', 'average_price_basket',\n", + " 'average_ticket_basket', 'total_price', 'purchase_count',\n", + " 'first_buying_date', 'country', 'age', 'tenant_id', 'nb_campaigns',\n", + " 'nb_campaigns_opened', 'time_to_open'],\n", + " dtype='object')" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1_customer_product" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "ebf6d843-dcc0-4e83-b063-94806c0bac17", + "metadata": {}, "outputs": [], "source": [ - "# df1_customer_product" + "## Exportation\n", + "\n", + "# Exportation vers 'projet-bdc2324-team1'\n", + "BUCKET_OUT = \"projet-bdc2324-team1\"\n", + "FILE_KEY_OUT_S3 = \"1_Output/Company 1 - Segmentation base.csv\"\n", + "FILE_PATH_OUT_S3 = BUCKET_OUT + \"/\" + FILE_KEY_OUT_S3\n", + "\n", + "with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n", + " df1_customer_product.to_csv(file_out, index = False)" ] } ],