diff --git a/0_Cleaning_and_merge.ipynb b/0_Cleaning_and_merge.ipynb index b2c2018..99d5ea7 100644 --- a/0_Cleaning_and_merge.ipynb +++ b/0_Cleaning_and_merge.ipynb @@ -79,7 +79,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_3658/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_15815/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.read_csv(file_in)\n" ] } @@ -242,17 +242,17 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_3658/1591303091.py:5: SettingWithCopyWarning: \n", + "/tmp/ipykernel_15815/1591303091.py:5: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " tickets.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n", - "/tmp/ipykernel_3658/1591303091.py:9: SettingWithCopyWarning: \n", + "/tmp/ipykernel_15815/1591303091.py:9: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n", - "/tmp/ipykernel_3658/1591303091.py:13: SettingWithCopyWarning: \n", + "/tmp/ipykernel_15815/1591303091.py:13: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -386,169 +386,6 @@ "df1_ticket_information.head()" ] }, - { - "cell_type": "markdown", - "id": "37499eae-1a7f-4dce-83b0-ff942ccf7a9d", - "metadata": {}, - "source": [ - "### KPI tickets" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "043303fe-e90f-4689-a2a9-5d690555a045", - "metadata": {}, - "outputs": [], - "source": [ - "def tickets_kpi_function(tickets_information = None):\n", - " tickets_information_copy = tickets_information.copy()\n", - " tickets_information_copy['purchase_date_max'] = tickets_information_copy['purchase_date']\n", - " tickets_kpi = (tickets_information_copy[['product_id', 'customer_id', 'ticket_id','supplier_name', 'purchase_date', 'purchase_date_max']]\n", - " .groupby(['product_id', 'customer_id'])\n", - " .agg({'ticket_id': 'count', \n", - " 'supplier_name': 'nunique',\n", - " 'purchase_date_max' : 'max',\n", - " 'purchase_date' : 'min'})\n", - " .reset_index()\n", - " )\n", - " \n", - " tickets_kpi.rename(columns = {'ticket_id' : 'nb_tickets', \n", - " 'supplier_name' : 'nb_suppliers', \n", - " 'purchase_date' : 'purchase_date_min'}, inplace = True)\n", - " \n", - " tickets_kpi['time_between_purchase'] = tickets_kpi['purchase_date_max'] - tickets_kpi['purchase_date_min']\n", - " \n", - " return tickets_kpi\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "5882234a-1ed5-4269-87a6-0d75613476e3", - "metadata": {}, - "outputs": [], - "source": [ - "df1_tickets_kpi = tickets_kpi_function(tickets_information = df1_ticket_information)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "a7a452a6-cd5e-4c8b-b250-8a7d26e48fad", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
product_idcustomer_idnb_ticketsnb_supplierspurchase_date_maxpurchase_date_mintime_between_purchase
01073102805422019-06-05 14:37:13+00:002019-06-05 14:18:38+00:000 days 00:18:35
111008954355112017-02-17 13:32:51+00:002017-02-17 13:32:51+00:000 days 00:00:00
211008954356112017-03-02 14:36:16+00:002017-03-02 14:36:16+00:000 days 00:00:00
311008954357112017-03-06 15:16:41+00:002017-03-06 15:16:41+00:000 days 00:00:00
411008954358112017-03-13 16:07:27+00:002017-03-13 16:07:27+00:000 days 00:00:00
\n", - "
" - ], - "text/plain": [ - " product_id customer_id nb_tickets nb_suppliers \\\n", - "0 107310 2805 4 2 \n", - "1 110089 54355 1 1 \n", - "2 110089 54356 1 1 \n", - "3 110089 54357 1 1 \n", - "4 110089 54358 1 1 \n", - "\n", - " purchase_date_max purchase_date_min time_between_purchase \n", - "0 2019-06-05 14:37:13+00:00 2019-06-05 14:18:38+00:00 0 days 00:18:35 \n", - "1 2017-02-17 13:32:51+00:00 2017-02-17 13:32:51+00:00 0 days 00:00:00 \n", - "2 2017-03-02 14:36:16+00:00 2017-03-02 14:36:16+00:00 0 days 00:00:00 \n", - "3 2017-03-06 15:16:41+00:00 2017-03-06 15:16:41+00:00 0 days 00:00:00 \n", - "4 2017-03-13 16:07:27+00:00 2017-03-13 16:07:27+00:00 0 days 00:00:00 " - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_tickets_kpi.head()" - ] - }, { "cell_type": "markdown", "id": "096e47f4-1d65-4575-989d-83227eedad2b", @@ -559,7 +396,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 11, "id": "baed146a-9d3a-4397-a812-3d50c9a2f038", "metadata": {}, "outputs": [], @@ -588,7 +425,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 12, "id": "5fbfd88b-b94c-489c-9201-670e96e453e7", "metadata": {}, "outputs": [ @@ -596,7 +433,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_3658/3848597476.py:4: SettingWithCopyWarning: \n", + "/tmp/ipykernel_15815/3848597476.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -610,7 +447,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 13, "id": "b4f05142-2a22-42ef-a60d-f23cc4b5cb09", "metadata": {}, "outputs": [ @@ -677,7 +514,7 @@ "consentement optout b2c 34523" ] }, - "execution_count": 16, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -688,7 +525,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 14, "id": "4417ff51-f501-4ab9-a192-4ab75764a8ed", "metadata": { "scrolled": true @@ -757,7 +594,7 @@ "DDCP MD Procès du Siècle 1684" ] }, - "execution_count": 17, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -777,7 +614,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 15, "id": "d883cc7b-ac43-4485-b86f-eaf595fbad85", "metadata": {}, "outputs": [], @@ -802,7 +639,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 16, "id": "c8552dd6-52c5-4431-b43d-3cd6c578fd9f", "metadata": {}, "outputs": [ @@ -810,19 +647,19 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_3658/1967867975.py:15: SettingWithCopyWarning: \n", + "/tmp/ipykernel_15815/1967867975.py:15: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n", - "/tmp/ipykernel_3658/1967867975.py:15: SettingWithCopyWarning: \n", + "/tmp/ipykernel_15815/1967867975.py:15: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n", - "/tmp/ipykernel_3658/1967867975.py:15: SettingWithCopyWarning: \n", + "/tmp/ipykernel_15815/1967867975.py:15: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", @@ -837,7 +674,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 17, "id": "c24457e7-3cad-451a-a65b-7373b656bd6e", "metadata": { "scrolled": true @@ -957,7 +794,7 @@ "4 404 2021-03-27 23:00:00+00:00 " ] }, - "execution_count": 20, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -968,7 +805,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 18, "id": "e2c88552-b863-47a2-be23-8d2898fb28bc", "metadata": {}, "outputs": [], @@ -1002,7 +839,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 19, "id": "24537647-bc29-4777-9848-ac4120a4aa60", "metadata": {}, "outputs": [ @@ -1010,7 +847,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_3658/3700263836.py:11: SettingWithCopyWarning: \n", + "/tmp/ipykernel_15815/3700263836.py:11: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -1024,7 +861,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 20, "id": "6be2a9a6-056b-4e19-8c26-a18ba3df36b3", "metadata": {}, "outputs": [ @@ -1104,7 +941,7 @@ "4 6 20 0.0 NaT" ] }, - "execution_count": 23, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -1131,7 +968,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 21, "id": "30488a40-1b38-4b9a-9d3b-26a0597c5e6d", "metadata": {}, "outputs": [], @@ -1142,7 +979,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 22, "id": "607eb4b4-eed9-4b50-b823-f75c116dd37c", "metadata": {}, "outputs": [], @@ -1213,7 +1050,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 23, "id": "350b09b9-451f-4d47-81fe-f34b892db027", "metadata": {}, "outputs": [], @@ -1301,7 +1138,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 24, "id": "0fccc8ef-e575-4857-a401-94a7274394df", "metadata": {}, "outputs": [ @@ -1454,7 +1291,7 @@ "4 indiv entrées tp " ] }, - "execution_count": 27, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -1466,7 +1303,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 25, "id": "779d8aaf-6668-4f66-8852-847304407ea3", "metadata": {}, "outputs": [ @@ -1636,7 +1473,7 @@ "4 spectacle vivant mucem " ] }, - "execution_count": 28, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -1648,7 +1485,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 26, "id": "7714fa32-303b-4ea7-b174-3fd0fcab5af0", "metadata": {}, "outputs": [ @@ -1747,7 +1584,7 @@ "4 37 383 269 1" ] }, - "execution_count": 29, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -1767,7 +1604,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 27, "id": "15a62ed6-35e4-4abc-aeef-a7daeec0a4ba", "metadata": {}, "outputs": [], @@ -1789,13 +1626,13 @@ " products_global = order_columns_id(products_global)\n", "\n", " # remove useless columns \n", - " products_global = products_global.drop(columns = ['type_of_id', 'name_events', 'name_seasons', 'name_categories'])\n", + " products_global = products_global.drop(columns = ['type_of_id']) # 'name_events', 'name_seasons', 'name_categories'\n", " return products_global" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 28, "id": "89dc9685-1de9-4ce3-a6c0-8d7f1931a951", "metadata": {}, "outputs": [ @@ -1849,12 +1686,15 @@ " id_representation_cap\n", " season_id\n", " facility_id\n", - " event_type_id\n", + " ...\n", " event_type_key_id\n", " facility_key_id\n", " street_id\n", " amount\n", " is_full_price\n", + " name_categories\n", + " name_events\n", + " name_seasons\n", " name_event_types\n", " name_facilities\n", " \n", @@ -1872,12 +1712,15 @@ " 8789\n", " 4\n", " 1\n", - " 2\n", + " ...\n", " 5\n", " 1\n", " 1\n", " 9.0\n", " False\n", + " indiv activité tr\n", + " visite-jeu \"le classico des minots\" (1h30)\n", + " 2017\n", " offre muséale individuel\n", " mucem\n", " \n", @@ -1893,12 +1736,15 @@ " 390\n", " 2\n", " 1\n", - " 2\n", + " ...\n", " 2\n", " 1\n", " 1\n", " 9.5\n", " False\n", + " indiv entrées tp\n", + " billet mucem picasso\n", + " 2016\n", " offre muséale individuel\n", " mucem\n", " \n", @@ -1914,12 +1760,15 @@ " 395\n", " 2\n", " 1\n", - " 2\n", + " ...\n", " 2\n", " 1\n", " 1\n", " 11.5\n", " False\n", + " indiv entrées tp\n", + " billet mucem picasso\n", + " 2016\n", " offre muséale individuel\n", " mucem\n", " \n", @@ -1935,12 +1784,15 @@ " 120199\n", " 1754\n", " 1\n", - " 2\n", + " ...\n", " 4\n", " 1\n", " 1\n", " 8.0\n", " False\n", + " indiv entrées tr\n", + " NaN\n", + " NaN\n", " offre muséale individuel\n", " mucem\n", " \n", @@ -1956,17 +1808,21 @@ " 21\n", " 4\n", " 1\n", - " 3\n", + " ...\n", " 6\n", " 1\n", " 1\n", " 8.5\n", " False\n", + " indiv entrées tp\n", + " non défini\n", + " 2017\n", " non défini\n", " mucem\n", " \n", " \n", "\n", + "

5 rows × 21 columns

\n", "" ], "text/plain": [ @@ -1984,19 +1840,114 @@ "3 156773 1 12365 120199 \n", "4 1175 1 8 21 \n", "\n", - " season_id facility_id event_type_id event_type_key_id facility_key_id \\\n", - "0 4 1 2 5 1 \n", - "1 2 1 2 2 1 \n", - "2 2 1 2 2 1 \n", - "3 1754 1 2 4 1 \n", - "4 4 1 3 6 1 \n", + " season_id facility_id ... event_type_key_id facility_key_id street_id \\\n", + "0 4 1 ... 5 1 1 \n", + "1 2 1 ... 2 1 1 \n", + "2 2 1 ... 2 1 1 \n", + "3 1754 1 ... 4 1 1 \n", + "4 4 1 ... 6 1 1 \n", "\n", - " street_id amount is_full_price name_event_types name_facilities \n", - "0 1 9.0 False offre muséale individuel mucem \n", - "1 1 9.5 False offre muséale individuel mucem \n", - "2 1 11.5 False offre muséale individuel mucem \n", - "3 1 8.0 False offre muséale individuel mucem \n", - "4 1 8.5 False non défini mucem " + " amount is_full_price name_categories \\\n", + "0 9.0 False indiv activité tr \n", + "1 9.5 False indiv entrées tp \n", + "2 11.5 False indiv entrées tp \n", + "3 8.0 False indiv entrées tr \n", + "4 8.5 False indiv entrées tp \n", + "\n", + " name_events name_seasons \\\n", + "0 visite-jeu \"le classico des minots\" (1h30) 2017 \n", + "1 billet mucem picasso 2016 \n", + "2 billet mucem picasso 2016 \n", + "3 NaN NaN \n", + "4 non défini 2017 \n", + "\n", + " name_event_types name_facilities \n", + "0 offre muséale individuel mucem \n", + "1 offre muséale individuel mucem \n", + "2 offre muséale individuel mucem \n", + "3 offre muséale individuel mucem \n", + "4 non défini mucem \n", + "\n", + "[5 rows x 21 columns]" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "products_global = uniform_product_df()\n", + "products_global.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "98f78cd5-b694-4cc6-b033-20170aa13e8d", + "metadata": {}, + "outputs": [], + "source": [ + "# Fusion liée au product\n", + "df1_products_purchased = pd.merge(df1_ticket_information, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52db7bcb-3fb7-48e5-b612-4e22bdab4a94", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "d4dcfbe0-c6ce-497e-b75e-dc9e938801b2", + "metadata": {}, + "source": [ + "### KPI tickets" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "665a5925-9c0e-425a-8f11-c33a0a9ec444", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['ticket_id', 'product_id', 'is_from_subscription', 'supplier_name',\n", + " 'type_of_ticket_name', 'children', 'purchase_date', 'customer_id',\n", + " 'id_products', 'representation_id', 'pricing_formula_id', 'category_id',\n", + " 'products_group_id', 'product_pack_id', 'event_id',\n", + " 'id_representation_cap', 'season_id', 'facility_id', 'event_type_id',\n", + " 'event_type_key_id', 'facility_key_id', 'street_id', 'amount',\n", + " 'is_full_price', 'name_categories', 'name_events', 'name_seasons',\n", + " 'name_event_types', 'name_facilities'],\n", + " dtype='object')" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1_products_purchased.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "b913a69e-3146-4919-b5f6-a6108532bffa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['spectacle vivant', 'offre muséale individuel', 'formule adhésion',\n", + " 'offre muséale groupe'], dtype=object)" ] }, "execution_count": 31, @@ -2005,8 +1956,819 @@ } ], "source": [ - "products_global = uniform_product_df()\n", - "products_global.head()" + "df1_products_purchased['name_event_types'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "e01e8cf9-1187-4a4b-993d-b7b4321cd8f0", + "metadata": {}, + "outputs": [], + "source": [ + "df1_products_purchased_reduced = df1_products_purchased[['ticket_id', 'customer_id', 'event_type_id', 'supplier_name', 'purchase_date', 'type_of_ticket_name', 'amount', 'children', 'is_full_price', 'name_event_types', 'name_facilities', 'name_categories', 'name_events', 'name_seasons']]" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "3d8b0875-b409-44ce-b688-d9d6758782d3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ticket_idcustomer_idevent_type_idsupplier_namepurchase_datetype_of_ticket_nameamountchildrenis_full_pricename_event_typesname_facilitiesname_categoriesname_eventsname_seasons
013070859481874vente en ligne2018-12-28 14:47:50+00:00Atelier8.0pricing_formulaFalsespectacle vivantmucemindiv prog enfantl'école des magiciens2018
113070855481874vente en ligne2018-12-28 14:47:50+00:00Atelier8.0pricing_formulaFalsespectacle vivantmucemindiv prog enfantl'école des magiciens2018
213070856481874vente en ligne2018-12-28 14:47:50+00:00Atelier8.0pricing_formulaFalsespectacle vivantmucemindiv prog enfantl'école des magiciens2018
313070857481874vente en ligne2018-12-28 14:47:50+00:00Atelier8.0pricing_formulaFalsespectacle vivantmucemindiv prog enfantl'école des magiciens2018
413070858481874vente en ligne2018-12-28 14:47:50+00:00Atelier8.0pricing_formulaFalsespectacle vivantmucemindiv prog enfantl'école des magiciens2018
.............................................
182666718643494814vad2022-08-02 12:18:16+00:00Billet en nombre11.0pricing_formulaFalsespectacle vivantmucemen nb entrées trNaN2022
182666818643495814vad2022-08-02 12:18:16+00:00Billet en nombre11.0pricing_formulaFalsespectacle vivantmucemen nb entrées trNaN2022
182666918643496814vad2022-08-02 12:18:16+00:00Billet en nombre11.0pricing_formulaFalsespectacle vivantmucemen nb entrées trNaN2022
182667018643497814vad2022-08-02 12:18:16+00:00Billet en nombre11.0pricing_formulaFalsespectacle vivantmucemen nb entrées trNaN2022
182667119853111627634vad2022-11-04 14:25:42+00:00Billet en nombre0.0pricing_formulaFalsespectacle vivantmucemindiv entrées grNaN2022
\n", + "

1826672 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " ticket_id customer_id event_type_id supplier_name \\\n", + "0 13070859 48187 4 vente en ligne \n", + "1 13070855 48187 4 vente en ligne \n", + "2 13070856 48187 4 vente en ligne \n", + "3 13070857 48187 4 vente en ligne \n", + "4 13070858 48187 4 vente en ligne \n", + "... ... ... ... ... \n", + "1826667 18643494 81 4 vad \n", + "1826668 18643495 81 4 vad \n", + "1826669 18643496 81 4 vad \n", + "1826670 18643497 81 4 vad \n", + "1826671 19853111 62763 4 vad \n", + "\n", + " purchase_date type_of_ticket_name amount \\\n", + "0 2018-12-28 14:47:50+00:00 Atelier 8.0 \n", + "1 2018-12-28 14:47:50+00:00 Atelier 8.0 \n", + "2 2018-12-28 14:47:50+00:00 Atelier 8.0 \n", + "3 2018-12-28 14:47:50+00:00 Atelier 8.0 \n", + "4 2018-12-28 14:47:50+00:00 Atelier 8.0 \n", + "... ... ... ... \n", + "1826667 2022-08-02 12:18:16+00:00 Billet en nombre 11.0 \n", + "1826668 2022-08-02 12:18:16+00:00 Billet en nombre 11.0 \n", + "1826669 2022-08-02 12:18:16+00:00 Billet en nombre 11.0 \n", + "1826670 2022-08-02 12:18:16+00:00 Billet en nombre 11.0 \n", + "1826671 2022-11-04 14:25:42+00:00 Billet en nombre 0.0 \n", + "\n", + " children is_full_price name_event_types name_facilities \\\n", + "0 pricing_formula False spectacle vivant mucem \n", + "1 pricing_formula False spectacle vivant mucem \n", + "2 pricing_formula False spectacle vivant mucem \n", + "3 pricing_formula False spectacle vivant mucem \n", + "4 pricing_formula False spectacle vivant mucem \n", + "... ... ... ... ... \n", + "1826667 pricing_formula False spectacle vivant mucem \n", + "1826668 pricing_formula False spectacle vivant mucem \n", + "1826669 pricing_formula False spectacle vivant mucem \n", + "1826670 pricing_formula False spectacle vivant mucem \n", + "1826671 pricing_formula False spectacle vivant mucem \n", + "\n", + " name_categories name_events name_seasons \n", + "0 indiv prog enfant l'école des magiciens 2018 \n", + "1 indiv prog enfant l'école des magiciens 2018 \n", + "2 indiv prog enfant l'école des magiciens 2018 \n", + "3 indiv prog enfant l'école des magiciens 2018 \n", + "4 indiv prog enfant l'école des magiciens 2018 \n", + "... ... ... ... \n", + "1826667 en nb entrées tr NaN 2022 \n", + "1826668 en nb entrées tr NaN 2022 \n", + "1826669 en nb entrées tr NaN 2022 \n", + "1826670 en nb entrées tr NaN 2022 \n", + "1826671 indiv entrées gr NaN 2022 \n", + "\n", + "[1826672 rows x 14 columns]" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Importance des suppliers\n", + "df1_products_purchased_reduced" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "2bda0b97-b28b-4070-a57d-aeab0e2f7dfe", + "metadata": {}, + "outputs": [], + "source": [ + "# Nombre de client assistant à plus de 2 type d'événement\n", + "nb_event_types = df1_products_purchased_reduced[['customer_id', 'name_event_types']].groupby('customer_id').nunique()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "043303fe-e90f-4689-a2a9-5d690555a045", + "metadata": {}, + "outputs": [], + "source": [ + "def tickets_kpi_function(tickets_information = None):\n", + " tickets_information_copy = tickets_information.copy()\n", + " tickets_information_copy['purchase_date_max'] = tickets_information_copy['purchase_date']\n", + " tickets_kpi = (tickets_information_copy[['event_type_id', 'customer_id', 'ticket_id','supplier_name', 'purchase_date', 'purchase_date_max', 'amount']]\n", + " .groupby([ 'customer_id']) # 'event_type_id',\n", + " .agg({'ticket_id': 'count', \n", + " 'amount' : 'sum',\n", + " 'supplier_name': 'nunique',\n", + " 'purchase_date_max' : 'max',\n", + " 'purchase_date' : 'min'})\n", + " .reset_index()\n", + " )\n", + " \n", + " tickets_kpi.rename(columns = {'ticket_id' : 'nb_tickets', \n", + " 'amount' : 'total_amount',\n", + " 'supplier_name' : 'nb_suppliers', \n", + " 'purchase_date' : 'purchase_date_min'}, inplace = True)\n", + " \n", + " tickets_kpi['time_between_purchase'] = tickets_kpi['purchase_date_max'] - tickets_kpi['purchase_date_min']\n", + " \n", + " return tickets_kpi\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "5882234a-1ed5-4269-87a6-0d75613476e3", + "metadata": {}, + "outputs": [], + "source": [ + "df1_tickets_kpi = tickets_kpi_function(tickets_information = df1_products_purchased_reduced)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "a7a452a6-cd5e-4c8b-b250-8a7d26e48fad", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idnb_ticketstotal_amountnb_supplierspurchase_date_maxpurchase_date_mintime_between_purchase
0112565748830567.572023-11-08 15:59:45+00:002013-06-10 10:37:58+00:003803 days 05:21:47
36156733355271188.042023-11-03 09:42:40+00:002015-09-09 13:48:38+00:002976 days 19:54:02
39411626337642.062023-10-25 09:13:16+00:002014-01-23 16:56:57+00:003561 days 16:16:19
1112587138767.022023-11-04 13:46:59+00:002018-04-04 07:46:31+00:002040 days 06:00:28
3280963488585164350.012022-08-25 13:08:38+00:002020-08-18 08:32:57+00:00737 days 04:35:41
37086916548251489.522021-08-26 12:49:17+00:002018-03-26 11:13:43+00:001249 days 01:35:34
3261663194450713232.032022-09-07 12:55:33+00:002017-11-28 13:52:15+00:001743 days 23:03:18
7881356238746.012022-08-30 11:51:34+00:002017-01-05 13:04:58+00:002062 days 22:46:36
3529584002340319830.042023-11-06 15:59:22+00:002021-05-28 10:22:33+00:00892 days 05:36:49
33775618329431684.512022-02-24 07:47:20+00:002018-10-25 11:04:24+00:001217 days 20:42:56
300115925925914350.032023-06-12 14:05:19+00:002019-11-25 08:52:48+00:001295 days 05:12:31
349377487625712600.022023-10-02 08:13:05+00:002018-02-08 12:54:01+00:002061 days 19:19:04
270295257017678.562023-10-16 10:19:22+00:002014-01-24 15:16:17+00:003551 days 19:03:05
866122123209652.022022-09-19 12:55:15+00:002017-03-29 08:00:09+00:002000 days 04:55:06
1022142922493500.042023-11-06 08:30:37+00:002014-12-03 14:56:38+00:003259 days 17:33:59
39227249182713385.012021-10-26 12:28:40+00:002019-05-07 12:34:56+00:00902 days 23:53:44
544251070539180019800.012022-07-25 12:49:27+00:002022-05-02 16:09:03+00:0083 days 20:40:24
695201216801162312562.022023-09-29 16:34:38+00:002023-06-16 14:16:04+00:00105 days 02:18:34
300565933015510.012023-11-06 10:22:14+00:002018-02-02 08:53:51+00:002103 days 01:28:23
32435441154414133.022022-09-22 08:21:47+00:002017-12-14 12:50:23+00:001742 days 19:31:24
551951084435150016500.012022-09-27 14:32:13+00:002022-05-18 08:04:41+00:00132 days 06:27:32
289835781614850.022023-05-22 07:30:55+00:002019-01-21 14:19:18+00:001581 days 17:11:37
223129421307100.022023-06-29 09:33:58+00:002017-10-25 15:06:58+00:002072 days 18:27:00
232412660.022023-10-19 07:20:48+00:002015-09-30 16:07:52+00:002940 days 15:12:56
45139592121162.042023-10-17 09:39:40+00:002018-02-25 07:17:19+00:002060 days 02:22:21
2936505911866308.032023-05-22 13:41:22+00:002018-02-01 11:16:51+00:001936 days 02:24:31
114842510011230.012021-07-13 07:39:57+00:002015-12-21 15:38:05+00:002030 days 16:01:52
93413261098798.032023-02-01 08:39:45+00:002018-02-13 13:13:48+00:001813 days 19:25:57
301565949010880.012023-10-05 08:23:50+00:002019-12-06 12:59:20+00:001398 days 19:24:30
3647825126810860.022023-06-30 07:22:46+00:002018-02-02 09:06:22+00:001973 days 22:16:24
\n", + "
" + ], + "text/plain": [ + " customer_id nb_tickets total_amount nb_suppliers \\\n", + "0 1 1256574 8830567.5 7 \n", + "3615 6733 35527 1188.0 4 \n", + "39 41 16263 37642.0 6 \n", + "11 12 5871 38767.0 2 \n", + "32809 63488 5851 64350.0 1 \n", + "3708 6916 5482 51489.5 2 \n", + "32616 63194 4507 13232.0 3 \n", + "78 81 3562 38746.0 1 \n", + "35295 84002 3403 19830.0 4 \n", + "3377 5618 3294 31684.5 1 \n", + "30011 59259 2591 4350.0 3 \n", + "34937 74876 2571 2600.0 2 \n", + "270 295 2570 17678.5 6 \n", + "866 1221 2320 9652.0 2 \n", + "1022 1429 2249 3500.0 4 \n", + "3922 7249 1827 13385.0 1 \n", + "54425 1070539 1800 19800.0 1 \n", + "69520 1216801 1623 12562.0 2 \n", + "30056 59330 1551 0.0 1 \n", + "3243 5441 1544 14133.0 2 \n", + "55195 1084435 1500 16500.0 1 \n", + "28983 57816 1485 0.0 2 \n", + "2231 2942 1307 100.0 2 \n", + "23 24 1266 0.0 2 \n", + "4513 9592 1211 62.0 4 \n", + "2936 5059 1186 6308.0 3 \n", + "11484 25100 1123 0.0 1 \n", + "934 1326 1098 798.0 3 \n", + "30156 59490 1088 0.0 1 \n", + "36478 251268 1086 0.0 2 \n", + "\n", + " purchase_date_max purchase_date_min \\\n", + "0 2023-11-08 15:59:45+00:00 2013-06-10 10:37:58+00:00 \n", + "3615 2023-11-03 09:42:40+00:00 2015-09-09 13:48:38+00:00 \n", + "39 2023-10-25 09:13:16+00:00 2014-01-23 16:56:57+00:00 \n", + "11 2023-11-04 13:46:59+00:00 2018-04-04 07:46:31+00:00 \n", + "32809 2022-08-25 13:08:38+00:00 2020-08-18 08:32:57+00:00 \n", + "3708 2021-08-26 12:49:17+00:00 2018-03-26 11:13:43+00:00 \n", + "32616 2022-09-07 12:55:33+00:00 2017-11-28 13:52:15+00:00 \n", + "78 2022-08-30 11:51:34+00:00 2017-01-05 13:04:58+00:00 \n", + "35295 2023-11-06 15:59:22+00:00 2021-05-28 10:22:33+00:00 \n", + "3377 2022-02-24 07:47:20+00:00 2018-10-25 11:04:24+00:00 \n", + "30011 2023-06-12 14:05:19+00:00 2019-11-25 08:52:48+00:00 \n", + "34937 2023-10-02 08:13:05+00:00 2018-02-08 12:54:01+00:00 \n", + "270 2023-10-16 10:19:22+00:00 2014-01-24 15:16:17+00:00 \n", + "866 2022-09-19 12:55:15+00:00 2017-03-29 08:00:09+00:00 \n", + "1022 2023-11-06 08:30:37+00:00 2014-12-03 14:56:38+00:00 \n", + "3922 2021-10-26 12:28:40+00:00 2019-05-07 12:34:56+00:00 \n", + "54425 2022-07-25 12:49:27+00:00 2022-05-02 16:09:03+00:00 \n", + "69520 2023-09-29 16:34:38+00:00 2023-06-16 14:16:04+00:00 \n", + "30056 2023-11-06 10:22:14+00:00 2018-02-02 08:53:51+00:00 \n", + "3243 2022-09-22 08:21:47+00:00 2017-12-14 12:50:23+00:00 \n", + "55195 2022-09-27 14:32:13+00:00 2022-05-18 08:04:41+00:00 \n", + "28983 2023-05-22 07:30:55+00:00 2019-01-21 14:19:18+00:00 \n", + "2231 2023-06-29 09:33:58+00:00 2017-10-25 15:06:58+00:00 \n", + "23 2023-10-19 07:20:48+00:00 2015-09-30 16:07:52+00:00 \n", + "4513 2023-10-17 09:39:40+00:00 2018-02-25 07:17:19+00:00 \n", + "2936 2023-05-22 13:41:22+00:00 2018-02-01 11:16:51+00:00 \n", + "11484 2021-07-13 07:39:57+00:00 2015-12-21 15:38:05+00:00 \n", + "934 2023-02-01 08:39:45+00:00 2018-02-13 13:13:48+00:00 \n", + "30156 2023-10-05 08:23:50+00:00 2019-12-06 12:59:20+00:00 \n", + "36478 2023-06-30 07:22:46+00:00 2018-02-02 09:06:22+00:00 \n", + "\n", + " time_between_purchase \n", + "0 3803 days 05:21:47 \n", + "3615 2976 days 19:54:02 \n", + "39 3561 days 16:16:19 \n", + "11 2040 days 06:00:28 \n", + "32809 737 days 04:35:41 \n", + "3708 1249 days 01:35:34 \n", + "32616 1743 days 23:03:18 \n", + "78 2062 days 22:46:36 \n", + "35295 892 days 05:36:49 \n", + "3377 1217 days 20:42:56 \n", + "30011 1295 days 05:12:31 \n", + "34937 2061 days 19:19:04 \n", + "270 3551 days 19:03:05 \n", + "866 2000 days 04:55:06 \n", + "1022 3259 days 17:33:59 \n", + "3922 902 days 23:53:44 \n", + "54425 83 days 20:40:24 \n", + "69520 105 days 02:18:34 \n", + "30056 2103 days 01:28:23 \n", + "3243 1742 days 19:31:24 \n", + "55195 132 days 06:27:32 \n", + "28983 1581 days 17:11:37 \n", + "2231 2072 days 18:27:00 \n", + "23 2940 days 15:12:56 \n", + "4513 2060 days 02:22:21 \n", + "2936 1936 days 02:24:31 \n", + "11484 2030 days 16:01:52 \n", + "934 1813 days 19:25:57 \n", + "30156 1398 days 19:24:30 \n", + "36478 1973 days 22:16:24 " + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1_tickets_kpi.sort_values(by='nb_tickets', ascending=False).head(30)" ] }, { @@ -2019,28 +2781,377 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 39, "id": "46de1912-4a66-46e5-8b9e-7768b2d2723b", "metadata": {}, "outputs": [], "source": [ - "# Fusion liée au product\n", - "df1_products_purchased = pd.merge(df1_tickets_kpi, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')\n", - "\n", - "# Fusion liée au customer\n", - "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')\n", - "\n", - "# Fusion product et customer\n", - "df1_customer_product = pd.merge(df1_customer, df1_products_purchased, on = 'customer_id', how = 'left')" + "# Fusion avec KPI liés au customer\n", + "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, + "id": "9740d64a-e5eb-4967-a534-ca6177546465", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idbirthdatestreet_idis_partnergenderis_email_trueopt_instructure_idprofessionlanguage...average_ticket_baskettotal_pricepurchase_countfirst_buying_datecountryagetenant_idnb_campaignsnb_campaigns_openedtime_to_open
012751NaN2False1TrueTrueNaNNaNNaN...NaNNaN0NaTfrNaN1311NaNNaNNaT
112825NaN2False2TrueTrueNaNNaNNaN...NaNNaN0NaTfrNaN1311NaNNaNNaT
211261NaN2False1TrueTrueNaNNaNNaN...NaNNaN0NaTfrNaN1311NaNNaNNaT
313071NaN2False2TrueTrueNaNNaNNaN...NaNNaN0NaTfrNaN1311NaNNaNNaT
4653061NaN10False2TrueFalseNaNNaNNaN...NaNNaN0NaTNaNNaN131180.02.00 days 19:53:02.500000
\n", + "

5 rows × 28 columns

\n", + "
" + ], + "text/plain": [ + " customer_id birthdate street_id is_partner gender is_email_true \\\n", + "0 12751 NaN 2 False 1 True \n", + "1 12825 NaN 2 False 2 True \n", + "2 11261 NaN 2 False 1 True \n", + "3 13071 NaN 2 False 2 True \n", + "4 653061 NaN 10 False 2 True \n", + "\n", + " opt_in structure_id profession language ... average_ticket_basket \\\n", + "0 True NaN NaN NaN ... NaN \n", + "1 True NaN NaN NaN ... NaN \n", + "2 True NaN NaN NaN ... NaN \n", + "3 True NaN NaN NaN ... NaN \n", + "4 False NaN NaN NaN ... NaN \n", + "\n", + " total_price purchase_count first_buying_date country age tenant_id \\\n", + "0 NaN 0 NaT fr NaN 1311 \n", + "1 NaN 0 NaT fr NaN 1311 \n", + "2 NaN 0 NaT fr NaN 1311 \n", + "3 NaN 0 NaT fr NaN 1311 \n", + "4 NaN 0 NaT NaN NaN 1311 \n", + "\n", + " nb_campaigns nb_campaigns_opened time_to_open \n", + "0 NaN NaN NaT \n", + "1 NaN NaN NaT \n", + "2 NaN NaN NaT \n", + "3 NaN NaN NaT \n", + "4 80.0 2.0 0 days 19:53:02.500000 \n", + "\n", + "[5 rows x 28 columns]" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1_customer.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "b5c4418c-ad2e-4bb9-bd5c-3b769e9c87d4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idbirthdatestreet_idis_partnergenderis_email_trueopt_instructure_idprofessionlanguagemcp_contact_idlast_buying_datemax_priceticket_sumaverage_pricefidelityaverage_purchase_delayaverage_price_basketaverage_ticket_baskettotal_pricepurchase_countfirst_buying_datecountryagetenant_idnb_campaignsnb_campaigns_openedtime_to_open
582011NaN2False2TrueFalseNaNNaNNaNNaN2023-11-08 03:20:0745.012547757.030122330831-67.79096913.751531.9560878821221.56414722013-06-10 10:37:58+00:00frNaN1311NaNNaNNaT
\n", + "
" + ], + "text/plain": [ + " customer_id birthdate street_id is_partner gender is_email_true \\\n", + "58201 1 NaN 2 False 2 True \n", + "\n", + " opt_in structure_id profession language mcp_contact_id \\\n", + "58201 False NaN NaN NaN NaN \n", + "\n", + " last_buying_date max_price ticket_sum average_price fidelity \\\n", + "58201 2023-11-08 03:20:07 45.0 1254775 7.030122 330831 \n", + "\n", + " average_purchase_delay average_price_basket average_ticket_basket \\\n", + "58201 -67.790969 13.75153 1.956087 \n", + "\n", + " total_price purchase_count first_buying_date country age \\\n", + "58201 8821221.5 641472 2013-06-10 10:37:58+00:00 fr NaN \n", + "\n", + " tenant_id nb_campaigns nb_campaigns_opened time_to_open \n", + "58201 1311 NaN NaN NaT " + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.set_option('display.max_columns', None)\n", + "\n", + "\n", + "df1_customer[df1_customer['customer_id'] == 1]" + ] + }, + { + "cell_type": "code", + "execution_count": 41, "id": "1e42a790-b215-4107-a969-85005da06ebd", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# Fusion avec KPI liés au comportement d'achat\n", + "# df1_customer_product = pd.merge(df1_products_purchased_reduced, df1_products_purchased, on = 'customer_id', how = 'outer')" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "d950f24d-a5d1-4f1e-aeaa-ca826470365f", + "metadata": {}, + "outputs": [], + "source": [ + "# df1_customer_product" + ] } ], "metadata": { diff --git a/Exploration_billet_AJ.ipynb b/Exploration_billet_AJ.ipynb index 6af213e..344dd7b 100644 --- a/Exploration_billet_AJ.ipynb +++ b/Exploration_billet_AJ.ipynb @@ -143,7 +143,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 6, "id": "dd6a3518-b752-4a1e-b77b-9e03e853c3ed", "metadata": {}, "outputs": [ @@ -151,7 +151,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_683/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_15285/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.read_csv(file_in)\n" ] } @@ -2731,7 +2731,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 8, "id": "da5d4708-7147-4cc8-8686-52d4bcba5a7a", "metadata": {}, "outputs": [ @@ -2739,7 +2739,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_619/2625134041.py:3: SettingWithCopyWarning: \n", + "/tmp/ipykernel_15285/2625134041.py:3: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -2795,11 +2795,9 @@ }, { "cell_type": "code", - "execution_count": 57, - "id": "8072bbb7-1360-4882-bb2b-2f43b6beea0d", - "metadata": { - "scrolled": true - }, + "execution_count": 10, + "id": "c74746de-0bf4-4b83-9a75-f1d3183abf1c", + "metadata": {}, "outputs": [ { "data": { @@ -2831,226 +2829,42 @@ " \n", " \n", " \n", - " 8793\n", - " 4584599\n", - " 1\n", - " consentement optin jeune public\n", + " 0\n", + " 1184824\n", + " 645400\n", + " DDCP PROMO Réseau livres\n", " False\n", " manual_static_filter\n", " \n", " \n", - " 13249\n", - " 4567465\n", - " 1\n", - " DDCP rentrée culturelle 2023\n", + " 1\n", + " 210571\n", + " 2412\n", + " DDCP PROMO Réseau livres\n", " False\n", " manual_static_filter\n", " \n", " \n", - " 21424\n", - " 4544805\n", - " 1\n", - " spectateurs cine dimanche_cine concert_2122\n", + " 2\n", + " 210572\n", + " 4536\n", + " DDCP PROMO Réseau livres\n", " False\n", " manual_static_filter\n", " \n", " \n", - " 21665\n", - " 4544911\n", - " 1\n", - " DDCP Cine 2023\n", + " 3\n", + " 210573\n", + " 6736\n", + " DDCP PROMO Réseau livres\n", " False\n", " manual_static_filter\n", " \n", " \n", - " 22811\n", - " 4545766\n", - " 1\n", - " DDCP OLBJ! 2023\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 57305\n", - " 4457909\n", - " 1\n", - " ddcp_promo_visiteurs occasionnels_musee_8mois\n", - " False\n", - " manual_dynamic_filter\n", - " \n", - " \n", - " 58843\n", - " 3688872\n", - " 1\n", - " DDCP promo livemag\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 66813\n", - " 4313646\n", - " 1\n", - " DDCP spectateurs Classique mais pas que 2022\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 68367\n", - " 4547662\n", - " 1\n", - " ddcp_promo_musee_au moins 3 achats_dps8mois\n", - " False\n", - " manual_dynamic_filter\n", - " \n", - " \n", - " 77320\n", - " 4285520\n", - " 1\n", - " DDCP spectateurs Iminente\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 84350\n", - " 4037805\n", - " 1\n", - " DDCP spectateurs Marseille Jazz 18-19-21\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 85383\n", - " 4569504\n", - " 1\n", - " DDCP rendez-vous de septembre offre spéciale\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 92868\n", - " 4433064\n", - " 1\n", - " ddcp_promo_plein air_ateliers_jardins\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 99670\n", - " 3858684\n", - " 1\n", - " Acid Arab\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 105477\n", - " 4321810\n", - " 1\n", - " Arenametrix_bascule tel vers sib\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 169513\n", - " 3697992\n", - " 1\n", - " ddcp_achats billets nb dps 19052021\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 214421\n", - " 2925324\n", - " 1\n", - " consentement optout scolaires\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 234546\n", - " 4575957\n", - " 1\n", - " Portrait de Leila shahid\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 259808\n", - " 3722259\n", - " 1\n", - " consentement optin b2b\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 274380\n", - " 4510423\n", - " 1\n", - " DDCP_marseille_jazz_2023\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 307511\n", - " 5174466\n", - " 1\n", - " ddcp actoral 21-22\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 357509\n", - " 4442526\n", - " 1\n", - " ddcp musique barvalo\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 392920\n", - " 4390642\n", - " 1\n", - " ddcp_md_promo_spectateurs theatre contempo\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 449620\n", - " 4411897\n", - " 1\n", - " FORMATION _ acheteurs optin last year\n", - " False\n", - " manual_dynamic_filter\n", - " \n", - " \n", - " 503809\n", - " 4734591\n", - " 1\n", - " consentement optin mediation specialisee\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 651222\n", - " 3554426\n", - " 1\n", - " consentement optin b2c\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 654246\n", - " 5182212\n", - " 1\n", - " DDCP spectateurs Festival de Marseille 2023\n", - " False\n", - " manual_static_filter\n", - " \n", - " \n", - " 654395\n", - " 5182456\n", - " 1\n", - " rencontres_echelle_spectateurs_2021_2023\n", + " 4\n", + " 210574\n", + " 38210\n", + " DDCP PROMO Réseau livres\n", " False\n", " manual_static_filter\n", " \n", @@ -3059,80 +2873,241 @@ "" ], "text/plain": [ - " id customer_id target_name \\\n", - "8793 4584599 1 consentement optin jeune public \n", - "13249 4567465 1 DDCP rentrée culturelle 2023 \n", - "21424 4544805 1 spectateurs cine dimanche_cine concert_2122 \n", - "21665 4544911 1 DDCP Cine 2023 \n", - "22811 4545766 1 DDCP OLBJ! 2023 \n", - "57305 4457909 1 ddcp_promo_visiteurs occasionnels_musee_8mois \n", - "58843 3688872 1 DDCP promo livemag \n", - "66813 4313646 1 DDCP spectateurs Classique mais pas que 2022 \n", - "68367 4547662 1 ddcp_promo_musee_au moins 3 achats_dps8mois \n", - "77320 4285520 1 DDCP spectateurs Iminente \n", - "84350 4037805 1 DDCP spectateurs Marseille Jazz 18-19-21 \n", - "85383 4569504 1 DDCP rendez-vous de septembre offre spéciale \n", - "92868 4433064 1 ddcp_promo_plein air_ateliers_jardins \n", - "99670 3858684 1 Acid Arab \n", - "105477 4321810 1 Arenametrix_bascule tel vers sib \n", - "169513 3697992 1 ddcp_achats billets nb dps 19052021 \n", - "214421 2925324 1 consentement optout scolaires \n", - "234546 4575957 1 Portrait de Leila shahid \n", - "259808 3722259 1 consentement optin b2b \n", - "274380 4510423 1 DDCP_marseille_jazz_2023 \n", - "307511 5174466 1 ddcp actoral 21-22 \n", - "357509 4442526 1 ddcp musique barvalo \n", - "392920 4390642 1 ddcp_md_promo_spectateurs theatre contempo \n", - "449620 4411897 1 FORMATION _ acheteurs optin last year \n", - "503809 4734591 1 consentement optin mediation specialisee \n", - "651222 3554426 1 consentement optin b2c \n", - "654246 5182212 1 DDCP spectateurs Festival de Marseille 2023 \n", - "654395 5182456 1 rencontres_echelle_spectateurs_2021_2023 \n", + " id customer_id target_name target_type_is_import \\\n", + "0 1184824 645400 DDCP PROMO Réseau livres False \n", + "1 210571 2412 DDCP PROMO Réseau livres False \n", + "2 210572 4536 DDCP PROMO Réseau livres False \n", + "3 210573 6736 DDCP PROMO Réseau livres False \n", + "4 210574 38210 DDCP PROMO Réseau livres False \n", "\n", - " target_type_is_import target_type_name \n", - "8793 False manual_static_filter \n", - "13249 False manual_static_filter \n", - "21424 False manual_static_filter \n", - "21665 False manual_static_filter \n", - "22811 False manual_static_filter \n", - "57305 False manual_dynamic_filter \n", - "58843 False manual_static_filter \n", - "66813 False manual_static_filter \n", - "68367 False manual_dynamic_filter \n", - "77320 False manual_static_filter \n", - "84350 False manual_static_filter \n", - "85383 False manual_static_filter \n", - "92868 False manual_static_filter \n", - "99670 False manual_static_filter \n", - "105477 False manual_static_filter \n", - "169513 False manual_static_filter \n", - "214421 False manual_static_filter \n", - "234546 False manual_static_filter \n", - "259808 False manual_static_filter \n", - "274380 False manual_static_filter \n", - "307511 False manual_static_filter \n", - "357509 False manual_static_filter \n", - "392920 False manual_static_filter \n", - "449620 False manual_dynamic_filter \n", - "503809 False manual_static_filter \n", - "651222 False manual_static_filter \n", - "654246 False manual_static_filter \n", - "654395 False manual_static_filter " + " target_type_name \n", + "0 manual_static_filter \n", + "1 manual_static_filter \n", + "2 manual_static_filter \n", + "3 manual_static_filter \n", + "4 manual_static_filter " ] }, - "execution_count": 57, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df1_targets_full[df1_targets_full['customer_id'] == 1]" + "df1_targets_full.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "47c55fa0-b2f3-46f9-9abf-c4ab66bd9fcb", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package punkt to /home/onyxia/nltk_data...\n", + "[nltk_data] Package punkt is already up-to-date!\n", + "[nltk_data] Downloading package stopwords to /home/onyxia/nltk_data...\n", + "[nltk_data] Package stopwords is already up-to-date!\n", + "[nltk_data] Downloading package wordnet to /home/onyxia/nltk_data...\n", + "[nltk_data] Package wordnet is already up-to-date!\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Catégorisation des target_name\n", + "import pandas as pd\n", + "import nltk\n", + "from nltk.tokenize import word_tokenize\n", + "from nltk.corpus import stopwords\n", + "from nltk.stem import WordNetLemmatizer\n", + "from nltk.probability import FreqDist\n", + "\n", + "# Téléchargement des ressources nécessaires\n", + "nltk.download('punkt')\n", + "nltk.download('stopwords')\n", + "nltk.download('wordnet')\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "8af1aeb9-ebdd-4286-a14c-3b7d801ea172", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mots les plus fréquents:\n", + "consentement: 550777\n", + "optin: 463579\n", + "jeune: 155103\n", + "public: 155103\n", + "mediation: 150001\n" + ] + } + ], + "source": [ + "# Définition des fonctions de tokenisation, suppression des mots vides et lemmatisation\n", + "def preprocess_text(texte):\n", + " # Concaténation des éléments de la liste en une seule chaîne de caractères\n", + " texte_concat = ' '.join(texte)\n", + " \n", + " # Tokenisation des mots\n", + " tokens = word_tokenize(texte_concat.lower())\n", + " \n", + " # Suppression des mots vides (stopwords)\n", + " stop_words = set(stopwords.words('french'))\n", + " filtered_tokens = [word for word in tokens if word not in stop_words]\n", + " \n", + " # Lemmatisation des mots\n", + " lemmatizer = WordNetLemmatizer()\n", + " lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n", + " \n", + " return lemmatized_tokens\n", + "\n", + "\n", + "# Appliquer le prétraitement à la colonne de texte\n", + "df1_targets_full['target_name_tokened'] = df1_targets_full['target_name'].apply(preprocess_text)\n", + "\n", + "# Concaténer les listes de mots pour obtenir une liste de tous les mots dans le corpus\n", + "all_words = [word for tokens in df1_targets_full['target_name_tokened'] for word in tokens]\n", + "\n", + "# Calculer la fréquence des mots\n", + "freq_dist = FreqDist(all_words)\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "ceb069e5-76c9-46e4-9ea7-8c16eb4ed3cd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mots les plus fréquents:\n", + "consentement: 550777\n", + "optin: 463579\n", + "jeune: 155103\n", + "public: 155103\n", + "mediation: 150001\n", + "specialisee: 150001\n", + "b2c: 143432\n", + "optout: 97683\n", + "newsletter: 56022\n", + "(: 46084\n", + "): 46084\n", + "inscrits: 42296\n", + "nl: 42294\n", + "générale: 41037\n", + "generale: 40950\n" + ] + } + ], + "source": [ + "# Affichage des mots les plus fréquents\n", + "print(\"Mots les plus fréquents:\")\n", + "for mot, freq in freq_dist.most_common(15):\n", + " print(f\"{mot}: {freq}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "8bffef87-542e-4775-bc7c-2c0323fda581", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " texte \\\n", + "0 Le chat noir mange une souris. \n", + "1 Le chien blanc aboie. \n", + "\n", + " texte_preprocessed \n", + "0 [e, h, a, o, i, r, a, g, e, u, e, o, u, r, i, .] \n", + "1 [e, h, i, e, b, a, a, b, o, i, e, .] \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package punkt to /home/onyxia/nltk_data...\n", + "[nltk_data] Package punkt is already up-to-date!\n", + "[nltk_data] Downloading package stopwords to /home/onyxia/nltk_data...\n", + "[nltk_data] Package stopwords is already up-to-date!\n", + "[nltk_data] Downloading package wordnet to /home/onyxia/nltk_data...\n", + "[nltk_data] Package wordnet is already up-to-date!\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import nltk\n", + "from nltk.tokenize import word_tokenize\n", + "from nltk.corpus import stopwords\n", + "from nltk.stem import WordNetLemmatizer\n", + "\n", + "# Téléchargement des ressources nécessaires\n", + "nltk.download('punkt')\n", + "nltk.download('stopwords')\n", + "nltk.download('wordnet')\n", + "\n", + "# Création de la DataFrame d'exemple\n", + "data = {'texte': [\"Le chat noir mange une souris.\", \"Le chien blanc aboie.\"]}\n", + "df = pd.DataFrame(data)\n", + "\n", + "# Fonction pour prétraiter le texte\n", + "def preprocess_text(texte):\n", + " # Concaténation des éléments de la liste en une seule chaîne de caractères\n", + " texte_concat = ' '.join(texte)\n", + " \n", + " # Tokenisation des mots\n", + " tokens = word_tokenize(texte_concat.lower())\n", + " \n", + " # Suppression des mots vides (stopwords)\n", + " stop_words = set(stopwords.words('french'))\n", + " filtered_tokens = [word for word in tokens if word not in stop_words]\n", + " \n", + " # Lemmatisation des mots\n", + " lemmatizer = WordNetLemmatizer()\n", + " lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]\n", + " \n", + " return lemmatized_tokens\n", + "\n", + "# Appliquer la fonction de prétraitement à la colonne de texte\n", + "df['texte_preprocessed'] = df['texte'].apply(preprocess_text)\n", + "\n", + "# Afficher le résultat\n", + "print(df)\n" ] }, { "cell_type": "markdown", "id": "2f665824-a026-4acd-8358-b408a61854b4", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ "## Campaign area" ] @@ -3902,9 +3877,7 @@ { "cell_type": "markdown", "id": "96ea2523-38dc-47ef-a49e-2c2d9ad0b1c6", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, + "metadata": {}, "source": [ "## Exploration variables" ]