diff --git a/0_Cleaning_and_merge.ipynb b/0_Cleaning_and_merge.ipynb index 5077370..6c2d968 100644 --- a/0_Cleaning_and_merge.ipynb +++ b/0_Cleaning_and_merge.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 208, "id": "15103481-8d74-404c-aa09-7601fe7730da", "metadata": {}, "outputs": [], @@ -33,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 209, "id": "5d83bb1a-d341-446e-91f6-1c428607f6d4", "metadata": {}, "outputs": [], @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 210, "id": "a9b84234-d5df-4c43-a9cd-80cfe2f1e34d", "metadata": {}, "outputs": [], @@ -72,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 211, "id": "699664b9-eee4-4f8d-a207-e524526560c5", "metadata": {}, "outputs": [], @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 212, "id": "dd6a3518-b752-4a1e-b77b-9e03e853c3ed", "metadata": {}, "outputs": [], @@ -114,7 +114,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 213, "id": "d237be96-8c86-4a91-b7a1-487e87a16c3d", "metadata": {}, "outputs": [], @@ -155,7 +155,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 214, "id": "7e7b90ce-da54-4f00-bc34-64c543b0858f", "metadata": {}, "outputs": [], @@ -177,7 +177,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 215, "id": "03329e32-00a5-42c8-9470-75f7b6216ccd", "metadata": {}, "outputs": [], @@ -195,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 216, "id": "b95464b1-26bc-4aac-84b4-45da83b92251", "metadata": {}, "outputs": [], @@ -239,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 217, "id": "3e1d2ba7-ff4f-48eb-93a8-2bb648c70396", "metadata": {}, "outputs": [], @@ -249,7 +249,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 218, "id": "4b18edfc-6450-4c6a-9e7b-ee5a5808c8c9", "metadata": {}, "outputs": [ @@ -366,7 +366,7 @@ "4 Atelier pricing_formula 2018-12-28 14:47:50+00:00 48187 " ] }, - "execution_count": 11, + "execution_count": 218, "metadata": {}, "output_type": "execute_result" } @@ -385,7 +385,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 219, "id": "baed146a-9d3a-4397-a812-3d50c9a2f038", "metadata": {}, "outputs": [], @@ -414,7 +414,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 220, "id": "5fbfd88b-b94c-489c-9201-670e96e453e7", "metadata": {}, "outputs": [], @@ -432,7 +432,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 221, "id": "d883cc7b-ac43-4485-b86f-eaf595fbad85", "metadata": {}, "outputs": [], @@ -457,7 +457,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 222, "id": "c8552dd6-52c5-4431-b43d-3cd6c578fd9f", "metadata": {}, "outputs": [], @@ -467,7 +467,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 223, "id": "c24457e7-3cad-451a-a65b-7373b656bd6e", "metadata": { "scrolled": true @@ -587,7 +587,7 @@ "4 404 2021-03-27 23:00:00+00:00 " ] }, - "execution_count": 16, + "execution_count": 223, "metadata": {}, "output_type": "execute_result" } @@ -614,7 +614,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 224, "id": "30488a40-1b38-4b9a-9d3b-26a0597c5e6d", "metadata": {}, "outputs": [], @@ -625,7 +625,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 225, "id": "607eb4b4-eed9-4b50-b823-f75c116dd37c", "metadata": {}, "outputs": [], @@ -696,7 +696,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 226, "id": "350b09b9-451f-4d47-81fe-f34b892db027", "metadata": {}, "outputs": [], @@ -784,7 +784,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 227, "id": "0fccc8ef-e575-4857-a401-94a7274394df", "metadata": {}, "outputs": [ @@ -937,7 +937,7 @@ "4 indiv entrées tp " ] }, - "execution_count": 20, + "execution_count": 227, "metadata": {}, "output_type": "execute_result" } @@ -949,7 +949,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 228, "id": "779d8aaf-6668-4f66-8852-847304407ea3", "metadata": {}, "outputs": [ @@ -1119,7 +1119,7 @@ "4 spectacle vivant mucem " ] }, - "execution_count": 21, + "execution_count": 228, "metadata": {}, "output_type": "execute_result" } @@ -1131,7 +1131,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 229, "id": "7714fa32-303b-4ea7-b174-3fd0fcab5af0", "metadata": {}, "outputs": [ @@ -1230,7 +1230,7 @@ "4 37 383 269 1" ] }, - "execution_count": 22, + "execution_count": 229, "metadata": {}, "output_type": "execute_result" } @@ -1250,7 +1250,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 230, "id": "15a62ed6-35e4-4abc-aeef-a7daeec0a4ba", "metadata": {}, "outputs": [], @@ -1278,7 +1278,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 231, "id": "89dc9685-1de9-4ce3-a6c0-8d7f1931a951", "metadata": {}, "outputs": [ @@ -1517,7 +1517,7 @@ "[5 rows x 21 columns]" ] }, - "execution_count": 24, + "execution_count": 231, "metadata": {}, "output_type": "execute_result" } @@ -1529,7 +1529,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 232, "id": "98f78cd5-b694-4cc6-b033-20170aa13e8d", "metadata": {}, "outputs": [], @@ -1538,7 +1538,7 @@ "df1_products_purchased = pd.merge(df1_ticket_information, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')\n", "\n", "# Selection des variables d'intérêts\n", - "df1_products_purchased_reduced = df1_products_purchased[['ticket_id', 'customer_id', 'purchase_id' ,'event_type_id', 'supplier_name', 'purchase_date', 'type_of_ticket_name', 'amount', 'children', 'is_full_price', 'name_event_types', 'name_facilities', 'name_categories', 'name_events', 'name_seasons']]" + "df1_products_purchased_reduced = df1_products_purchased[['ticket_id', 'customer_id', 'purchase_id' ,'event_type_id', 'category_id', 'supplier_name', 'purchase_date', 'type_of_ticket_name', 'amount', 'children', 'is_full_price', 'name_event_types', 'name_facilities', 'name_categories', 'name_events', 'name_seasons']]" ] }, { @@ -1559,7 +1559,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 233, "id": "e2c88552-b863-47a2-be23-8d2898fb28bc", "metadata": {}, "outputs": [], @@ -1593,7 +1593,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 234, "id": "24537647-bc29-4777-9848-ac4120a4aa60", "metadata": {}, "outputs": [], @@ -1603,7 +1603,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 235, "id": "6be2a9a6-056b-4e19-8c26-a18ba3df36b3", "metadata": {}, "outputs": [ @@ -1639,7 +1639,7 @@ "
\n", + " | ticket_id | \n", + "customer_id | \n", + "purchase_id | \n", + "event_type_id | \n", + "category_id | \n", + "supplier_name | \n", + "purchase_date | \n", + "type_of_ticket_name | \n", + "amount | \n", + "children | \n", + "is_full_price | \n", + "name_event_types | \n", + "name_facilities | \n", + "name_categories | \n", + "name_events | \n", + "name_seasons | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "13070859 | \n", + "48187 | \n", + "5107462 | \n", + "4 | \n", + "13 | \n", + "vente en ligne | \n", + "2018-12-28 14:47:50+00:00 | \n", + "Atelier | \n", + "8.0 | \n", + "pricing_formula | \n", + "False | \n", + "spectacle vivant | \n", + "mucem | \n", + "indiv prog enfant | \n", + "l'école des magiciens | \n", + "2018 | \n", + "
1 | \n", + "13070860 | \n", + "48187 | \n", + "5107462 | \n", + "4 | \n", + "13 | \n", + "vente en ligne | \n", + "2018-12-28 14:47:50+00:00 | \n", + "Atelier | \n", + "4.0 | \n", + "pricing_formula | \n", + "False | \n", + "spectacle vivant | \n", + "mucem | \n", + "indiv prog enfant | \n", + "l'école des magiciens | \n", + "2018 | \n", + "
2 | \n", + "13070861 | \n", + "48187 | \n", + "5107462 | \n", + "4 | \n", + "13 | \n", + "vente en ligne | \n", + "2018-12-28 14:47:50+00:00 | \n", + "Atelier | \n", + "4.0 | \n", + "pricing_formula | \n", + "False | \n", + "spectacle vivant | \n", + "mucem | \n", + "indiv prog enfant | \n", + "l'école des magiciens | \n", + "2018 | \n", + "
3 | \n", + "13070862 | \n", + "48187 | \n", + "5107462 | \n", + "4 | \n", + "13 | \n", + "vente en ligne | \n", + "2018-12-28 14:47:50+00:00 | \n", + "Atelier | \n", + "4.0 | \n", + "pricing_formula | \n", + "False | \n", + "spectacle vivant | \n", + "mucem | \n", + "indiv prog enfant | \n", + "l'école des magiciens | \n", + "2018 | \n", + "
4 | \n", + "13070863 | \n", + "48187 | \n", + "5107462 | \n", + "4 | \n", + "13 | \n", + "vente en ligne | \n", + "2018-12-28 14:47:50+00:00 | \n", + "Atelier | \n", + "4.0 | \n", + "pricing_formula | \n", + "False | \n", + "spectacle vivant | \n", + "mucem | \n", + "indiv prog enfant | \n", + "l'école des magiciens | \n", + "2018 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
1826667 | \n", + "20662815 | \n", + "1256135 | \n", + "8007697 | \n", + "5 | \n", + "1 | \n", + "vente en ligne | \n", + "2023-11-08 17:23:54+00:00 | \n", + "Atelier | \n", + "11.0 | \n", + "pricing_formula | \n", + "False | \n", + "offre muséale groupe | \n", + "mucem | \n", + "indiv entrées tp | \n", + "NaN | \n", + "2023 | \n", + "
1826668 | \n", + "20662816 | \n", + "1256136 | \n", + "8007698 | \n", + "5 | \n", + "1 | \n", + "vente en ligne | \n", + "2023-11-08 18:32:18+00:00 | \n", + "Atelier | \n", + "11.0 | \n", + "pricing_formula | \n", + "False | \n", + "offre muséale groupe | \n", + "mucem | \n", + "indiv entrées tp | \n", + "NaN | \n", + "2023 | \n", + "
1826669 | \n", + "20662817 | \n", + "1256136 | \n", + "8007698 | \n", + "5 | \n", + "1 | \n", + "vente en ligne | \n", + "2023-11-08 18:32:18+00:00 | \n", + "Atelier | \n", + "11.0 | \n", + "pricing_formula | \n", + "False | \n", + "offre muséale groupe | \n", + "mucem | \n", + "indiv entrées tp | \n", + "NaN | \n", + "2023 | \n", + "
1826670 | \n", + "20662818 | \n", + "1256137 | \n", + "8007699 | \n", + "5 | \n", + "1 | \n", + "vente en ligne | \n", + "2023-11-08 19:30:28+00:00 | \n", + "Atelier | \n", + "11.0 | \n", + "pricing_formula | \n", + "False | \n", + "offre muséale groupe | \n", + "mucem | \n", + "indiv entrées tp | \n", + "NaN | \n", + "2023 | \n", + "
1826671 | \n", + "20662819 | \n", + "1256137 | \n", + "8007699 | \n", + "5 | \n", + "1 | \n", + "vente en ligne | \n", + "2023-11-08 19:30:28+00:00 | \n", + "Atelier | \n", + "11.0 | \n", + "pricing_formula | \n", + "False | \n", + "offre muséale groupe | \n", + "mucem | \n", + "indiv entrées tp | \n", + "NaN | \n", + "2023 | \n", + "
1826672 rows × 16 columns
\n", + "\n", + " | customer_id | \n", + "event_type_id | \n", + "nb_categories | \n", + "
---|---|---|---|
0 | \n", + "1 | \n", + "2 | \n", + "14 | \n", + "
1 | \n", + "1 | \n", + "4 | \n", + "9 | \n", + "
2 | \n", + "1 | \n", + "5 | \n", + "5 | \n", + "
3 | \n", + "1 | \n", + "6 | \n", + "9 | \n", + "
4 | \n", + "2 | \n", + "2 | \n", + "1 | \n", + "
5 rows × 37 columns
\n", + "5 rows × 38 columns
\n", "" ], "text/plain": [ - " customer_id birthdate street_id is_partner gender is_email_true \\\n", - "59897 1 NaN 2 False 2 True \n", - "59900 1 NaN 2 False 2 True \n", - "59898 1 NaN 2 False 2 True \n", - "59899 1 NaN 2 False 2 True \n", - "134695 2 NaN 2 False 1 True \n", + " customer_id birthdate street_id is_partner gender is_email_true \\\n", + "0 1 NaN 2 False 2 True \n", + "1 1 NaN 2 False 2 True \n", + "2 1 NaN 2 False 2 True \n", + "3 1 NaN 2 False 2 True \n", + "4 2 NaN 2 False 1 True \n", "\n", - " opt_in structure_id profession language ... nb_purchases \\\n", - "59897 False NaN NaN NaN ... 194790.0 \n", - "59900 False NaN NaN NaN ... 111786.0 \n", - "59898 False NaN NaN NaN ... 228945.0 \n", - "59899 False NaN NaN NaN ... 107110.0 \n", - "134695 True NaN NaN NaN ... 164.0 \n", + " opt_in structure_id profession language ... total_amount nb_suppliers \\\n", + "0 False NaN NaN NaN ... 2686540.5 7.0 \n", + "1 False NaN NaN NaN ... 1435871.5 5.0 \n", + "2 False NaN NaN NaN ... 3248965.5 6.0 \n", + "3 False NaN NaN NaN ... 1459190.0 6.0 \n", + "4 True NaN NaN NaN ... 0.0 1.0 \n", "\n", - " total_amount nb_suppliers vente_internet_max purchase_date_min \\\n", - "59897 2686540.5 7.0 1.0 3262.190868 \n", - "59900 1435871.5 5.0 1.0 2502.715509 \n", - "59898 3248965.5 6.0 1.0 3698.198229 \n", - "59899 1459190.0 6.0 1.0 3803.369792 \n", - "134695 0.0 1.0 0.0 1705.261192 \n", + " vente_internet_max purchase_date_min purchase_date_max \\\n", + "0 1.0 3262.190868 4.179306 \n", + "1 1.0 2502.715509 1408.715532 \n", + "2 1.0 3698.198229 5.221840 \n", + "3 1.0 3803.369792 0.146331 \n", + "4 0.0 1705.261192 1456.333715 \n", "\n", - " purchase_date_max time_between_purchase nb_tickets_internet \\\n", - "59897 4.179306 3258.011562 51.0 \n", - "59900 1408.715532 1093.999977 5.0 \n", - "59898 5.221840 3692.976389 2988.0 \n", - "59899 0.146331 3803.223461 9.0 \n", - "134695 1456.333715 248.927477 0.0 \n", + " time_between_purchase nb_tickets_internet name_event_types \\\n", + "0 3258.011562 51.0 offre muséale individuel \n", + "1 1093.999977 5.0 formule adhésion \n", + "2 3692.976389 2988.0 spectacle vivant \n", + "3 3803.223461 9.0 offre muséale groupe \n", + "4 248.927477 0.0 formule adhésion \n", "\n", - " name_event_types avg_amount \n", - "59897 offre muséale individuel 6.150659 \n", - "59900 formule adhésion 6.439463 \n", - "59898 spectacle vivant 7.762474 \n", - "59899 offre muséale groupe 4.452618 \n", - "134695 formule adhésion 6.439463 \n", + " avg_amount nb_categories \n", + "0 6.150659 14.0 \n", + "1 6.439463 9.0 \n", + "2 7.762474 9.0 \n", + "3 4.452618 5.0 \n", + "4 6.439463 1.0 \n", "\n", - "[5 rows x 37 columns]" + "[5 rows x 38 columns]" ] }, - "execution_count": 43, + "execution_count": 244, "metadata": {}, "output_type": "execute_result" } @@ -2168,12 +2656,14 @@ "## Add customer information\n", "df1_customer = (df1_customerplus_clean.merge(df1_tickets_kpi, how = \"left\", on='customer_id')\n", " .sort_values(by='customer_id', ascending=True))\n", + "\n", + "df1_customer = df1_customer.merge(nb_cat, how='left', on=['customer_id', 'event_type_id'])\n", "df1_customer.head()" ] }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 245, "id": "433921de-03ad-4024-9462-ecd267db1756", "metadata": {}, "outputs": [ @@ -2209,13 +2699,13 @@ "5 rows × 40 columns
\n", + "5 rows × 41 columns
\n", "" ], "text/plain": [ @@ -2355,38 +2845,38 @@ "3 1 NaN 2 False 2 True \n", "4 2 NaN 2 False 1 True \n", "\n", - " opt_in structure_id profession language ... vente_internet_max \\\n", - "0 False NaN NaN NaN ... 1.0 \n", - "1 False NaN NaN NaN ... 1.0 \n", - "2 False NaN NaN NaN ... 1.0 \n", - "3 False NaN NaN NaN ... 1.0 \n", - "4 True NaN NaN NaN ... 0.0 \n", + " opt_in structure_id profession language ... purchase_date_min \\\n", + "0 False NaN NaN NaN ... 3262.190868 \n", + "1 False NaN NaN NaN ... 2502.715509 \n", + "2 False NaN NaN NaN ... 3698.198229 \n", + "3 False NaN NaN NaN ... 3803.369792 \n", + "4 True NaN NaN NaN ... 1705.261192 \n", "\n", - " purchase_date_min purchase_date_max time_between_purchase \\\n", - "0 3262.190868 4.179306 3258.011562 \n", - "1 2502.715509 1408.715532 1093.999977 \n", - "2 3698.198229 5.221840 3692.976389 \n", - "3 3803.369792 0.146331 3803.223461 \n", - "4 1705.261192 1456.333715 248.927477 \n", + " purchase_date_max time_between_purchase nb_tickets_internet \\\n", + "0 4.179306 3258.011562 51.0 \n", + "1 1408.715532 1093.999977 5.0 \n", + "2 5.221840 3692.976389 2988.0 \n", + "3 0.146331 3803.223461 9.0 \n", + "4 1456.333715 248.927477 0.0 \n", "\n", - " nb_tickets_internet name_event_types avg_amount nb_campaigns \\\n", - "0 51.0 offre muséale individuel 6.150659 NaN \n", - "1 5.0 formule adhésion 6.439463 NaN \n", - "2 2988.0 spectacle vivant 7.762474 NaN \n", - "3 9.0 offre muséale groupe 4.452618 NaN \n", - "4 0.0 formule adhésion 6.439463 4.0 \n", + " name_event_types avg_amount nb_categories nb_campaigns \\\n", + "0 offre muséale individuel 6.150659 14.0 NaN \n", + "1 formule adhésion 6.439463 9.0 NaN \n", + "2 spectacle vivant 7.762474 9.0 NaN \n", + "3 offre muséale groupe 4.452618 5.0 NaN \n", + "4 formule adhésion 6.439463 1.0 4.0 \n", "\n", " nb_campaigns_opened time_to_open \n", "0 NaN NaT \n", "1 NaN NaT \n", "2 NaN NaT \n", "3 NaN NaT \n", - "4 0.0 NaT \n", + "4 NaN NaT \n", "\n", - "[5 rows x 40 columns]" + "[5 rows x 41 columns]" ] }, - "execution_count": 44, + "execution_count": 245, "metadata": {}, "output_type": "execute_result" } @@ -2400,7 +2890,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 246, "id": "25e54131-6835-4e94-86d3-1a78520ed7bc", "metadata": {}, "outputs": [], @@ -2778,7 +3268,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.11.6" } }, "nbformat": 4,