diff --git a/0_Cleaning_and_merge.ipynb b/0_Cleaning_and_merge.ipynb index 5077370..6c2d968 100644 --- a/0_Cleaning_and_merge.ipynb +++ b/0_Cleaning_and_merge.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 208, "id": "15103481-8d74-404c-aa09-7601fe7730da", "metadata": {}, "outputs": [], @@ -33,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 209, "id": "5d83bb1a-d341-446e-91f6-1c428607f6d4", "metadata": {}, "outputs": [], @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 210, "id": "a9b84234-d5df-4c43-a9cd-80cfe2f1e34d", "metadata": {}, "outputs": [], @@ -72,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 211, "id": "699664b9-eee4-4f8d-a207-e524526560c5", "metadata": {}, "outputs": [], @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 212, "id": "dd6a3518-b752-4a1e-b77b-9e03e853c3ed", "metadata": {}, "outputs": [], @@ -114,7 +114,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 213, "id": "d237be96-8c86-4a91-b7a1-487e87a16c3d", "metadata": {}, "outputs": [], @@ -155,7 +155,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 214, "id": "7e7b90ce-da54-4f00-bc34-64c543b0858f", "metadata": {}, "outputs": [], @@ -177,7 +177,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 215, "id": "03329e32-00a5-42c8-9470-75f7b6216ccd", "metadata": {}, "outputs": [], @@ -195,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 216, "id": "b95464b1-26bc-4aac-84b4-45da83b92251", "metadata": {}, "outputs": [], @@ -239,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 217, "id": "3e1d2ba7-ff4f-48eb-93a8-2bb648c70396", "metadata": {}, "outputs": [], @@ -249,7 +249,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 218, "id": "4b18edfc-6450-4c6a-9e7b-ee5a5808c8c9", "metadata": {}, "outputs": [ @@ -366,7 +366,7 @@ "4 Atelier pricing_formula 2018-12-28 14:47:50+00:00 48187 " ] }, - "execution_count": 11, + "execution_count": 218, "metadata": {}, "output_type": "execute_result" } @@ -385,7 +385,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 219, "id": "baed146a-9d3a-4397-a812-3d50c9a2f038", "metadata": {}, "outputs": [], @@ -414,7 +414,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 220, "id": "5fbfd88b-b94c-489c-9201-670e96e453e7", "metadata": {}, "outputs": [], @@ -432,7 +432,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 221, "id": "d883cc7b-ac43-4485-b86f-eaf595fbad85", "metadata": {}, "outputs": [], @@ -457,7 +457,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 222, "id": "c8552dd6-52c5-4431-b43d-3cd6c578fd9f", "metadata": {}, "outputs": [], @@ -467,7 +467,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 223, "id": "c24457e7-3cad-451a-a65b-7373b656bd6e", "metadata": { "scrolled": true @@ -587,7 +587,7 @@ "4 404 2021-03-27 23:00:00+00:00 " ] }, - "execution_count": 16, + "execution_count": 223, "metadata": {}, "output_type": "execute_result" } @@ -614,7 +614,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 224, "id": "30488a40-1b38-4b9a-9d3b-26a0597c5e6d", "metadata": {}, "outputs": [], @@ -625,7 +625,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 225, "id": "607eb4b4-eed9-4b50-b823-f75c116dd37c", "metadata": {}, "outputs": [], @@ -696,7 +696,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 226, "id": "350b09b9-451f-4d47-81fe-f34b892db027", "metadata": {}, "outputs": [], @@ -784,7 +784,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 227, "id": "0fccc8ef-e575-4857-a401-94a7274394df", "metadata": {}, "outputs": [ @@ -937,7 +937,7 @@ "4 indiv entrées tp " ] }, - "execution_count": 20, + "execution_count": 227, "metadata": {}, "output_type": "execute_result" } @@ -949,7 +949,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 228, "id": "779d8aaf-6668-4f66-8852-847304407ea3", "metadata": {}, "outputs": [ @@ -1119,7 +1119,7 @@ "4 spectacle vivant mucem " ] }, - "execution_count": 21, + "execution_count": 228, "metadata": {}, "output_type": "execute_result" } @@ -1131,7 +1131,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 229, "id": "7714fa32-303b-4ea7-b174-3fd0fcab5af0", "metadata": {}, "outputs": [ @@ -1230,7 +1230,7 @@ "4 37 383 269 1" ] }, - "execution_count": 22, + "execution_count": 229, "metadata": {}, "output_type": "execute_result" } @@ -1250,7 +1250,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 230, "id": "15a62ed6-35e4-4abc-aeef-a7daeec0a4ba", "metadata": {}, "outputs": [], @@ -1278,7 +1278,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 231, "id": "89dc9685-1de9-4ce3-a6c0-8d7f1931a951", "metadata": {}, "outputs": [ @@ -1517,7 +1517,7 @@ "[5 rows x 21 columns]" ] }, - "execution_count": 24, + "execution_count": 231, "metadata": {}, "output_type": "execute_result" } @@ -1529,7 +1529,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 232, "id": "98f78cd5-b694-4cc6-b033-20170aa13e8d", "metadata": {}, "outputs": [], @@ -1538,7 +1538,7 @@ "df1_products_purchased = pd.merge(df1_ticket_information, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')\n", "\n", "# Selection des variables d'intérêts\n", - "df1_products_purchased_reduced = df1_products_purchased[['ticket_id', 'customer_id', 'purchase_id' ,'event_type_id', 'supplier_name', 'purchase_date', 'type_of_ticket_name', 'amount', 'children', 'is_full_price', 'name_event_types', 'name_facilities', 'name_categories', 'name_events', 'name_seasons']]" + "df1_products_purchased_reduced = df1_products_purchased[['ticket_id', 'customer_id', 'purchase_id' ,'event_type_id', 'category_id', 'supplier_name', 'purchase_date', 'type_of_ticket_name', 'amount', 'children', 'is_full_price', 'name_event_types', 'name_facilities', 'name_categories', 'name_events', 'name_seasons']]" ] }, { @@ -1559,7 +1559,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 233, "id": "e2c88552-b863-47a2-be23-8d2898fb28bc", "metadata": {}, "outputs": [], @@ -1593,7 +1593,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 234, "id": "24537647-bc29-4777-9848-ac4120a4aa60", "metadata": {}, "outputs": [], @@ -1603,7 +1603,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 235, "id": "6be2a9a6-056b-4e19-8c26-a18ba3df36b3", "metadata": {}, "outputs": [ @@ -1639,7 +1639,7 @@ " 0\n", " 2\n", " 4\n", - " 0.0\n", + " NaN\n", " NaT\n", " \n", " \n", @@ -1660,14 +1660,14 @@ " 3\n", " 5\n", " 4\n", - " 0.0\n", + " NaN\n", " NaT\n", " \n", " \n", " 4\n", " 6\n", " 20\n", - " 0.0\n", + " NaN\n", " NaT\n", " \n", " \n", @@ -1676,14 +1676,14 @@ ], "text/plain": [ " customer_id nb_campaigns nb_campaigns_opened time_to_open\n", - "0 2 4 0.0 NaT\n", + "0 2 4 NaN NaT\n", "1 3 222 124.0 1 days 00:28:30.169354838\n", "2 4 7 7.0 1 days 04:31:01.428571428\n", - "3 5 4 0.0 NaT\n", - "4 6 20 0.0 NaT" + "3 5 4 NaN NaT\n", + "4 6 20 NaN NaT" ] }, - "execution_count": 29, + "execution_count": 235, "metadata": {}, "output_type": "execute_result" } @@ -1702,7 +1702,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 236, "id": "043303fe-e90f-4689-a2a9-5d690555a045", "metadata": {}, "outputs": [], @@ -1765,7 +1765,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 237, "id": "5882234a-1ed5-4269-87a6-0d75613476e3", "metadata": {}, "outputs": [], @@ -1775,7 +1775,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 238, "id": "5f2046cf-ffde-4521-91e7-b727b8bc17f5", "metadata": {}, "outputs": [ @@ -1811,6 +1811,8 @@ " purchase_date_max\n", " time_between_purchase\n", " nb_tickets_internet\n", + " name_event_types\n", + " avg_amount\n", " \n", " \n", " \n", @@ -1827,6 +1829,8 @@ " 4.179306\n", " 3258.011562\n", " 51.0\n", + " offre muséale individuel\n", + " 6.150659\n", " \n", " \n", " 1\n", @@ -1841,6 +1845,8 @@ " 5.221840\n", " 3692.976389\n", " 2988.0\n", + " spectacle vivant\n", + " 7.762474\n", " \n", " \n", " 2\n", @@ -1855,6 +1861,8 @@ " 0.146331\n", " 3803.223461\n", " 9.0\n", + " offre muséale groupe\n", + " 4.452618\n", " \n", " \n", " 3\n", @@ -1869,6 +1877,8 @@ " 1408.715532\n", " 1093.999977\n", " 5.0\n", + " formule adhésion\n", + " 6.439463\n", " \n", " \n", " 4\n", @@ -1883,6 +1893,8 @@ " 1340.308160\n", " 700.966389\n", " 0.0\n", + " offre muséale individuel\n", + " 6.150659\n", " \n", " \n", "\n", @@ -1903,15 +1915,22 @@ "3 5 1 2502.715509 1408.715532 \n", "4 1 0 2041.274549 1340.308160 \n", "\n", - " time_between_purchase nb_tickets_internet \n", - "0 3258.011562 51.0 \n", - "1 3692.976389 2988.0 \n", - "2 3803.223461 9.0 \n", - "3 1093.999977 5.0 \n", - "4 700.966389 0.0 " + " time_between_purchase nb_tickets_internet name_event_types \\\n", + "0 3258.011562 51.0 offre muséale individuel \n", + "1 3692.976389 2988.0 spectacle vivant \n", + "2 3803.223461 9.0 offre muséale groupe \n", + "3 1093.999977 5.0 formule adhésion \n", + "4 700.966389 0.0 offre muséale individuel \n", + "\n", + " avg_amount \n", + "0 6.150659 \n", + "1 7.762474 \n", + "2 4.452618 \n", + "3 6.439463 \n", + "4 6.150659 " ] }, - "execution_count": 36, + "execution_count": 238, "metadata": {}, "output_type": "execute_result" } @@ -1922,7 +1941,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 239, "id": "a4a2311d-8a72-4030-afd5-218004d5d2a5", "metadata": {}, "outputs": [], @@ -1946,7 +1965,476 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 240, + "id": "484becad-0390-48a8-923b-b03a4facc7ae", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ticket_idcustomer_idpurchase_idevent_type_idcategory_idsupplier_namepurchase_datetype_of_ticket_nameamountchildrenis_full_pricename_event_typesname_facilitiesname_categoriesname_eventsname_seasons
013070859481875107462413vente en ligne2018-12-28 14:47:50+00:00Atelier8.0pricing_formulaFalsespectacle vivantmucemindiv prog enfantl'école des magiciens2018
113070860481875107462413vente en ligne2018-12-28 14:47:50+00:00Atelier4.0pricing_formulaFalsespectacle vivantmucemindiv prog enfantl'école des magiciens2018
213070861481875107462413vente en ligne2018-12-28 14:47:50+00:00Atelier4.0pricing_formulaFalsespectacle vivantmucemindiv prog enfantl'école des magiciens2018
313070862481875107462413vente en ligne2018-12-28 14:47:50+00:00Atelier4.0pricing_formulaFalsespectacle vivantmucemindiv prog enfantl'école des magiciens2018
413070863481875107462413vente en ligne2018-12-28 14:47:50+00:00Atelier4.0pricing_formulaFalsespectacle vivantmucemindiv prog enfantl'école des magiciens2018
...................................................
1826667206628151256135800769751vente en ligne2023-11-08 17:23:54+00:00Atelier11.0pricing_formulaFalseoffre muséale groupemucemindiv entrées tpNaN2023
1826668206628161256136800769851vente en ligne2023-11-08 18:32:18+00:00Atelier11.0pricing_formulaFalseoffre muséale groupemucemindiv entrées tpNaN2023
1826669206628171256136800769851vente en ligne2023-11-08 18:32:18+00:00Atelier11.0pricing_formulaFalseoffre muséale groupemucemindiv entrées tpNaN2023
1826670206628181256137800769951vente en ligne2023-11-08 19:30:28+00:00Atelier11.0pricing_formulaFalseoffre muséale groupemucemindiv entrées tpNaN2023
1826671206628191256137800769951vente en ligne2023-11-08 19:30:28+00:00Atelier11.0pricing_formulaFalseoffre muséale groupemucemindiv entrées tpNaN2023
\n", + "

1826672 rows × 16 columns

\n", + "
" + ], + "text/plain": [ + " ticket_id customer_id purchase_id event_type_id category_id \\\n", + "0 13070859 48187 5107462 4 13 \n", + "1 13070860 48187 5107462 4 13 \n", + "2 13070861 48187 5107462 4 13 \n", + "3 13070862 48187 5107462 4 13 \n", + "4 13070863 48187 5107462 4 13 \n", + "... ... ... ... ... ... \n", + "1826667 20662815 1256135 8007697 5 1 \n", + "1826668 20662816 1256136 8007698 5 1 \n", + "1826669 20662817 1256136 8007698 5 1 \n", + "1826670 20662818 1256137 8007699 5 1 \n", + "1826671 20662819 1256137 8007699 5 1 \n", + "\n", + " supplier_name purchase_date type_of_ticket_name amount \\\n", + "0 vente en ligne 2018-12-28 14:47:50+00:00 Atelier 8.0 \n", + "1 vente en ligne 2018-12-28 14:47:50+00:00 Atelier 4.0 \n", + "2 vente en ligne 2018-12-28 14:47:50+00:00 Atelier 4.0 \n", + "3 vente en ligne 2018-12-28 14:47:50+00:00 Atelier 4.0 \n", + "4 vente en ligne 2018-12-28 14:47:50+00:00 Atelier 4.0 \n", + "... ... ... ... ... \n", + "1826667 vente en ligne 2023-11-08 17:23:54+00:00 Atelier 11.0 \n", + "1826668 vente en ligne 2023-11-08 18:32:18+00:00 Atelier 11.0 \n", + "1826669 vente en ligne 2023-11-08 18:32:18+00:00 Atelier 11.0 \n", + "1826670 vente en ligne 2023-11-08 19:30:28+00:00 Atelier 11.0 \n", + "1826671 vente en ligne 2023-11-08 19:30:28+00:00 Atelier 11.0 \n", + "\n", + " children is_full_price name_event_types name_facilities \\\n", + "0 pricing_formula False spectacle vivant mucem \n", + "1 pricing_formula False spectacle vivant mucem \n", + "2 pricing_formula False spectacle vivant mucem \n", + "3 pricing_formula False spectacle vivant mucem \n", + "4 pricing_formula False spectacle vivant mucem \n", + "... ... ... ... ... \n", + "1826667 pricing_formula False offre muséale groupe mucem \n", + "1826668 pricing_formula False offre muséale groupe mucem \n", + "1826669 pricing_formula False offre muséale groupe mucem \n", + "1826670 pricing_formula False offre muséale groupe mucem \n", + "1826671 pricing_formula False offre muséale groupe mucem \n", + "\n", + " name_categories name_events name_seasons \n", + "0 indiv prog enfant l'école des magiciens 2018 \n", + "1 indiv prog enfant l'école des magiciens 2018 \n", + "2 indiv prog enfant l'école des magiciens 2018 \n", + "3 indiv prog enfant l'école des magiciens 2018 \n", + "4 indiv prog enfant l'école des magiciens 2018 \n", + "... ... ... ... \n", + "1826667 indiv entrées tp NaN 2023 \n", + "1826668 indiv entrées tp NaN 2023 \n", + "1826669 indiv entrées tp NaN 2023 \n", + "1826670 indiv entrées tp NaN 2023 \n", + "1826671 indiv entrées tp NaN 2023 \n", + "\n", + "[1826672 rows x 16 columns]" + ] + }, + "execution_count": 240, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Look at categories\n", + "df1_products_purchased_reduced\n" + ] + }, + { + "cell_type": "code", + "execution_count": 241, + "id": "abb171a5-145f-40d0-91af-c51db4593d04", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 13, 16, 10, 1, 3, 2, 4, 6982, 5, 28, 8,\n", + " 41, 15, 39, 17, 9, 26, 2252, 6, 7, 1108, 2395,\n", + " 30, 2450, 14, 11])" + ] + }, + "execution_count": 241, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1_products_purchased_reduced['category_id'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 242, + "id": "750c5753-ea45-4deb-a934-994568185013", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['indiv prog enfant', 'indiv activité enfant', 'indiv entrées fa',\n", + " 'indiv entrées tp', 'indiv prog tp', 'indiv entrées gr',\n", + " 'indiv prog tr', nan, 'indiv entrées tr', 'indiv activité tp',\n", + " 'indiv prog gr', 'indiv activité tr', 'groupe autonome entrées tp',\n", + " 'indiv activité gr', 'groupe forfait scolaire',\n", + " 'groupe forfait entrées gr', 'en nb entrées tp',\n", + " 'groupe autonome gr', 'groupe forfait entrées tr',\n", + " 'groupe autonome entrées gr', 'groupe forfait adulte',\n", + " 'groupe autonome adulte', 'en nb entrées gr',\n", + " 'groupe forfait etudiant', 'en nb entrées tr',\n", + " 'groupe autonome entrées tr'], dtype=object)" + ] + }, + "execution_count": 242, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1_products_purchased_reduced['name_categories'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 243, + "id": "7a2c3124-26d0-49e1-9bb4-d7ff58053454", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idevent_type_idnb_categories
01214
1149
2155
3169
4221
\n", + "
" + ], + "text/plain": [ + " customer_id event_type_id nb_categories\n", + "0 1 2 14\n", + "1 1 4 9\n", + "2 1 5 5\n", + "3 1 6 9\n", + "4 2 2 1" + ] + }, + "execution_count": 243, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nb_cat = (df1_products_purchased_reduced.groupby(by=['customer_id', 'event_type_id'])\n", + " .agg({'category_id' : 'nunique'}).reset_index()\n", + " .rename(columns = {'category_id' : 'nb_categories'}))\n", + "\n", + "nb_cat.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 244, "id": "83230baa-9a8a-4614-b629-e99c2505c696", "metadata": {}, "outputs": [ @@ -1982,7 +2470,6 @@ " profession\n", " language\n", " ...\n", - " nb_purchases\n", " total_amount\n", " nb_suppliers\n", " vente_internet_max\n", @@ -1992,11 +2479,12 @@ " nb_tickets_internet\n", " name_event_types\n", " avg_amount\n", + " nb_categories\n", " \n", " \n", " \n", " \n", - " 59897\n", + " 0\n", " 1\n", " NaN\n", " 2\n", @@ -2008,7 +2496,6 @@ " NaN\n", " NaN\n", " ...\n", - " 194790.0\n", " 2686540.5\n", " 7.0\n", " 1.0\n", @@ -2018,9 +2505,10 @@ " 51.0\n", " offre muséale individuel\n", " 6.150659\n", + " 14.0\n", " \n", " \n", - " 59900\n", + " 1\n", " 1\n", " NaN\n", " 2\n", @@ -2032,7 +2520,6 @@ " NaN\n", " NaN\n", " ...\n", - " 111786.0\n", " 1435871.5\n", " 5.0\n", " 1.0\n", @@ -2042,9 +2529,10 @@ " 5.0\n", " formule adhésion\n", " 6.439463\n", + " 9.0\n", " \n", " \n", - " 59898\n", + " 2\n", " 1\n", " NaN\n", " 2\n", @@ -2056,7 +2544,6 @@ " NaN\n", " NaN\n", " ...\n", - " 228945.0\n", " 3248965.5\n", " 6.0\n", " 1.0\n", @@ -2066,9 +2553,10 @@ " 2988.0\n", " spectacle vivant\n", " 7.762474\n", + " 9.0\n", " \n", " \n", - " 59899\n", + " 3\n", " 1\n", " NaN\n", " 2\n", @@ -2080,7 +2568,6 @@ " NaN\n", " NaN\n", " ...\n", - " 107110.0\n", " 1459190.0\n", " 6.0\n", " 1.0\n", @@ -2090,9 +2577,10 @@ " 9.0\n", " offre muséale groupe\n", " 4.452618\n", + " 5.0\n", " \n", " \n", - " 134695\n", + " 4\n", " 2\n", " NaN\n", " 2\n", @@ -2104,7 +2592,6 @@ " NaN\n", " NaN\n", " ...\n", - " 164.0\n", " 0.0\n", " 1.0\n", " 0.0\n", @@ -2114,52 +2601,53 @@ " 0.0\n", " formule adhésion\n", " 6.439463\n", + " 1.0\n", " \n", " \n", "\n", - "

5 rows × 37 columns

\n", + "

5 rows × 38 columns

\n", "" ], "text/plain": [ - " customer_id birthdate street_id is_partner gender is_email_true \\\n", - "59897 1 NaN 2 False 2 True \n", - "59900 1 NaN 2 False 2 True \n", - "59898 1 NaN 2 False 2 True \n", - "59899 1 NaN 2 False 2 True \n", - "134695 2 NaN 2 False 1 True \n", + " customer_id birthdate street_id is_partner gender is_email_true \\\n", + "0 1 NaN 2 False 2 True \n", + "1 1 NaN 2 False 2 True \n", + "2 1 NaN 2 False 2 True \n", + "3 1 NaN 2 False 2 True \n", + "4 2 NaN 2 False 1 True \n", "\n", - " opt_in structure_id profession language ... nb_purchases \\\n", - "59897 False NaN NaN NaN ... 194790.0 \n", - "59900 False NaN NaN NaN ... 111786.0 \n", - "59898 False NaN NaN NaN ... 228945.0 \n", - "59899 False NaN NaN NaN ... 107110.0 \n", - "134695 True NaN NaN NaN ... 164.0 \n", + " opt_in structure_id profession language ... total_amount nb_suppliers \\\n", + "0 False NaN NaN NaN ... 2686540.5 7.0 \n", + "1 False NaN NaN NaN ... 1435871.5 5.0 \n", + "2 False NaN NaN NaN ... 3248965.5 6.0 \n", + "3 False NaN NaN NaN ... 1459190.0 6.0 \n", + "4 True NaN NaN NaN ... 0.0 1.0 \n", "\n", - " total_amount nb_suppliers vente_internet_max purchase_date_min \\\n", - "59897 2686540.5 7.0 1.0 3262.190868 \n", - "59900 1435871.5 5.0 1.0 2502.715509 \n", - "59898 3248965.5 6.0 1.0 3698.198229 \n", - "59899 1459190.0 6.0 1.0 3803.369792 \n", - "134695 0.0 1.0 0.0 1705.261192 \n", + " vente_internet_max purchase_date_min purchase_date_max \\\n", + "0 1.0 3262.190868 4.179306 \n", + "1 1.0 2502.715509 1408.715532 \n", + "2 1.0 3698.198229 5.221840 \n", + "3 1.0 3803.369792 0.146331 \n", + "4 0.0 1705.261192 1456.333715 \n", "\n", - " purchase_date_max time_between_purchase nb_tickets_internet \\\n", - "59897 4.179306 3258.011562 51.0 \n", - "59900 1408.715532 1093.999977 5.0 \n", - "59898 5.221840 3692.976389 2988.0 \n", - "59899 0.146331 3803.223461 9.0 \n", - "134695 1456.333715 248.927477 0.0 \n", + " time_between_purchase nb_tickets_internet name_event_types \\\n", + "0 3258.011562 51.0 offre muséale individuel \n", + "1 1093.999977 5.0 formule adhésion \n", + "2 3692.976389 2988.0 spectacle vivant \n", + "3 3803.223461 9.0 offre muséale groupe \n", + "4 248.927477 0.0 formule adhésion \n", "\n", - " name_event_types avg_amount \n", - "59897 offre muséale individuel 6.150659 \n", - "59900 formule adhésion 6.439463 \n", - "59898 spectacle vivant 7.762474 \n", - "59899 offre muséale groupe 4.452618 \n", - "134695 formule adhésion 6.439463 \n", + " avg_amount nb_categories \n", + "0 6.150659 14.0 \n", + "1 6.439463 9.0 \n", + "2 7.762474 9.0 \n", + "3 4.452618 5.0 \n", + "4 6.439463 1.0 \n", "\n", - "[5 rows x 37 columns]" + "[5 rows x 38 columns]" ] }, - "execution_count": 43, + "execution_count": 244, "metadata": {}, "output_type": "execute_result" } @@ -2168,12 +2656,14 @@ "## Add customer information\n", "df1_customer = (df1_customerplus_clean.merge(df1_tickets_kpi, how = \"left\", on='customer_id')\n", " .sort_values(by='customer_id', ascending=True))\n", + "\n", + "df1_customer = df1_customer.merge(nb_cat, how='left', on=['customer_id', 'event_type_id'])\n", "df1_customer.head()" ] }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 245, "id": "433921de-03ad-4024-9462-ecd267db1756", "metadata": {}, "outputs": [ @@ -2209,13 +2699,13 @@ " profession\n", " language\n", " ...\n", - " vente_internet_max\n", " purchase_date_min\n", " purchase_date_max\n", " time_between_purchase\n", " nb_tickets_internet\n", " name_event_types\n", " avg_amount\n", + " nb_categories\n", " nb_campaigns\n", " nb_campaigns_opened\n", " time_to_open\n", @@ -2235,13 +2725,13 @@ " NaN\n", " NaN\n", " ...\n", - " 1.0\n", " 3262.190868\n", " 4.179306\n", " 3258.011562\n", " 51.0\n", " offre muséale individuel\n", " 6.150659\n", + " 14.0\n", " NaN\n", " NaN\n", " NaT\n", @@ -2259,13 +2749,13 @@ " NaN\n", " NaN\n", " ...\n", - " 1.0\n", " 2502.715509\n", " 1408.715532\n", " 1093.999977\n", " 5.0\n", " formule adhésion\n", " 6.439463\n", + " 9.0\n", " NaN\n", " NaN\n", " NaT\n", @@ -2283,13 +2773,13 @@ " NaN\n", " NaN\n", " ...\n", - " 1.0\n", " 3698.198229\n", " 5.221840\n", " 3692.976389\n", " 2988.0\n", " spectacle vivant\n", " 7.762474\n", + " 9.0\n", " NaN\n", " NaN\n", " NaT\n", @@ -2307,13 +2797,13 @@ " NaN\n", " NaN\n", " ...\n", - " 1.0\n", " 3803.369792\n", " 0.146331\n", " 3803.223461\n", " 9.0\n", " offre muséale groupe\n", " 4.452618\n", + " 5.0\n", " NaN\n", " NaN\n", " NaT\n", @@ -2331,20 +2821,20 @@ " NaN\n", " NaN\n", " ...\n", - " 0.0\n", " 1705.261192\n", " 1456.333715\n", " 248.927477\n", " 0.0\n", " formule adhésion\n", " 6.439463\n", + " 1.0\n", " 4.0\n", - " 0.0\n", + " NaN\n", " NaT\n", " \n", " \n", "\n", - "

5 rows × 40 columns

\n", + "

5 rows × 41 columns

\n", "" ], "text/plain": [ @@ -2355,38 +2845,38 @@ "3 1 NaN 2 False 2 True \n", "4 2 NaN 2 False 1 True \n", "\n", - " opt_in structure_id profession language ... vente_internet_max \\\n", - "0 False NaN NaN NaN ... 1.0 \n", - "1 False NaN NaN NaN ... 1.0 \n", - "2 False NaN NaN NaN ... 1.0 \n", - "3 False NaN NaN NaN ... 1.0 \n", - "4 True NaN NaN NaN ... 0.0 \n", + " opt_in structure_id profession language ... purchase_date_min \\\n", + "0 False NaN NaN NaN ... 3262.190868 \n", + "1 False NaN NaN NaN ... 2502.715509 \n", + "2 False NaN NaN NaN ... 3698.198229 \n", + "3 False NaN NaN NaN ... 3803.369792 \n", + "4 True NaN NaN NaN ... 1705.261192 \n", "\n", - " purchase_date_min purchase_date_max time_between_purchase \\\n", - "0 3262.190868 4.179306 3258.011562 \n", - "1 2502.715509 1408.715532 1093.999977 \n", - "2 3698.198229 5.221840 3692.976389 \n", - "3 3803.369792 0.146331 3803.223461 \n", - "4 1705.261192 1456.333715 248.927477 \n", + " purchase_date_max time_between_purchase nb_tickets_internet \\\n", + "0 4.179306 3258.011562 51.0 \n", + "1 1408.715532 1093.999977 5.0 \n", + "2 5.221840 3692.976389 2988.0 \n", + "3 0.146331 3803.223461 9.0 \n", + "4 1456.333715 248.927477 0.0 \n", "\n", - " nb_tickets_internet name_event_types avg_amount nb_campaigns \\\n", - "0 51.0 offre muséale individuel 6.150659 NaN \n", - "1 5.0 formule adhésion 6.439463 NaN \n", - "2 2988.0 spectacle vivant 7.762474 NaN \n", - "3 9.0 offre muséale groupe 4.452618 NaN \n", - "4 0.0 formule adhésion 6.439463 4.0 \n", + " name_event_types avg_amount nb_categories nb_campaigns \\\n", + "0 offre muséale individuel 6.150659 14.0 NaN \n", + "1 formule adhésion 6.439463 9.0 NaN \n", + "2 spectacle vivant 7.762474 9.0 NaN \n", + "3 offre muséale groupe 4.452618 5.0 NaN \n", + "4 formule adhésion 6.439463 1.0 4.0 \n", "\n", " nb_campaigns_opened time_to_open \n", "0 NaN NaT \n", "1 NaN NaT \n", "2 NaN NaT \n", "3 NaN NaT \n", - "4 0.0 NaT \n", + "4 NaN NaT \n", "\n", - "[5 rows x 40 columns]" + "[5 rows x 41 columns]" ] }, - "execution_count": 44, + "execution_count": 245, "metadata": {}, "output_type": "execute_result" } @@ -2400,7 +2890,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 246, "id": "25e54131-6835-4e94-86d3-1a78520ed7bc", "metadata": {}, "outputs": [], @@ -2778,7 +3268,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.11.6" } }, "nbformat": 4,