From c96be82ffa1f2d4978fd97413c5c8fda978e3422 Mon Sep 17 00:00:00 2001 From: ajoubrel-ensae Date: Mon, 5 Feb 2024 21:03:49 +0000 Subject: [PATCH] Ajout cleaning customerplus, kpi on tickets and mergers --- 0_Cleaning_and_merge.ipynb | 1419 ++++++++++++++++++++++++++++++++++-- 1 file changed, 1370 insertions(+), 49 deletions(-) diff --git a/0_Cleaning_and_merge.ipynb b/0_Cleaning_and_merge.ipynb index a3018ba..a8dfc0f 100644 --- a/0_Cleaning_and_merge.ipynb +++ b/0_Cleaning_and_merge.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 1, "id": "15103481-8d74-404c-aa09-7601fe7730da", "metadata": {}, "outputs": [], @@ -119,10 +119,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "dd6a3518-b752-4a1e-b77b-9e03e853c3ed", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_2240/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv(file_in)\n" + ] + } + ], "source": [ "# loop to create dataframes from liste\n", "files_path = liste_database\n", @@ -149,7 +158,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "d237be96-8c86-4a91-b7a1-487e87a16c3d", "metadata": {}, "outputs": [], @@ -190,11 +199,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "7e7b90ce-da54-4f00-bc34-64c543b0858f", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "def preprocessing_customerplus(customerplus = None):\n", + "\n", + " customerplus_copy = customerplus.copy()\n", + " \n", + " # Passage en format date\n", + " cleaning_date(customerplus_copy, 'first_buying_date')\n", + " cleaning_date(customerplus_copy, 'last_visiting_date')\n", + " \n", + " # Selection des variables\n", + " customerplus_copy.drop(['lastname', 'firstname', 'email', 'civility', 'note', 'created_at', 'updated_at', 'deleted_at', 'extra', 'reference', 'extra_field', 'identifier', 'need_reload', 'preferred_category', 'preferred_supplier', 'preferred_formula', 'zipcode', 'last_visiting_date'], axis = 1, inplace=True)\n", + " customerplus_copy.rename(columns = {'id' : 'customer_id'}, inplace = True)\n", + "\n", + " return customerplus_copy\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "03329e32-00a5-42c8-9470-75f7b6216ccd", + "metadata": {}, + "outputs": [], + "source": [ + "df1_customerplus_clean = preprocessing_customerplus(df1_customersplus)" + ] }, { "cell_type": "markdown", @@ -206,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "b95464b1-26bc-4aac-84b4-45da83b92251", "metadata": {}, "outputs": [], @@ -222,8 +255,8 @@ " suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n", "\n", " # Base des types de billets\n", - " # type_ofs = type_ofs[['id', 'name', 'children']]\n", - " # type_ofs.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)\n", + " type_ofs = type_ofs[['id', 'name', 'children']]\n", + " type_ofs.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)\n", "\n", " # Base des achats\n", " # Nettoyage de la date d'achat\n", @@ -236,9 +269,9 @@ " ticket_information = pd.merge(tickets, suppliers, left_on = 'supplier_id', right_on = 'id', how = 'inner')\n", " ticket_information.drop(['supplier_id', 'id'], axis = 1, inplace=True)\n", " \n", - " # # Fusion avec type de tickets\n", - " # ticket_information = pd.merge(ticket_information, type_ofs, left_on = 'type_of', right_on = 'id', how = 'inner')\n", - " # ticket_information.drop(['type_of', 'id'], axis = 1, inplace=True)\n", + " # Fusion avec type de tickets\n", + " ticket_information = pd.merge(ticket_information, type_ofs, left_on = 'type_of', right_on = 'id', how = 'inner')\n", + " ticket_information.drop(['type_of', 'id'], axis = 1, inplace=True)\n", " \n", " # Fusion avec achats\n", " ticket_information = pd.merge(ticket_information, purchases, left_on = 'purchase_id', right_on = 'id', how = 'inner')\n", @@ -249,22 +282,501 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "3e1d2ba7-ff4f-48eb-93a8-2bb648c70396", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_2240/1591303091.py:5: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " tickets.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n", + "/tmp/ipykernel_2240/1591303091.py:9: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n", + "/tmp/ipykernel_2240/1591303091.py:13: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " type_ofs.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)\n" + ] + } + ], "source": [ "df1_ticket_information = preprocessing_tickets_area(tickets = df1_tickets, purchases = df1_purchases, suppliers = df1_suppliers, type_ofs = df1_type_ofs)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "4b18edfc-6450-4c6a-9e7b-ee5a5808c8c9", "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ticket_idproduct_idis_from_subscriptionsupplier_nametype_of_ticket_namechildrenpurchase_datecustomer_id
013070859225251Falsevente en ligneAtelierpricing_formula2018-12-28 14:47:50+00:0048187
113070860224914Falsevente en ligneAtelierpricing_formula2018-12-28 14:47:50+00:0048187
213070861224914Falsevente en ligneAtelierpricing_formula2018-12-28 14:47:50+00:0048187
313070862224914Falsevente en ligneAtelierpricing_formula2018-12-28 14:47:50+00:0048187
413070863224914Falsevente en ligneAtelierpricing_formula2018-12-28 14:47:50+00:0048187
...........................
182666718643847350454FalsevadBillet en nombrepricing_formula2022-08-02 08:59:17+00:0041
182666819853111383564FalsevadBillet en nombrepricing_formula2022-11-04 14:25:42+00:0062763
182666919860514383751FalsevadBillet en nombrepricing_formula2022-11-18 10:47:26+00:001195566
182667019860515383751FalsevadBillet en nombrepricing_formula2022-11-18 10:47:26+00:001195566
182667119860516383751FalsevadBillet en nombrepricing_formula2022-11-18 10:47:26+00:001195566
\n", + "

1826672 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " ticket_id product_id is_from_subscription supplier_name \\\n", + "0 13070859 225251 False vente en ligne \n", + "1 13070860 224914 False vente en ligne \n", + "2 13070861 224914 False vente en ligne \n", + "3 13070862 224914 False vente en ligne \n", + "4 13070863 224914 False vente en ligne \n", + "... ... ... ... ... \n", + "1826667 18643847 350454 False vad \n", + "1826668 19853111 383564 False vad \n", + "1826669 19860514 383751 False vad \n", + "1826670 19860515 383751 False vad \n", + "1826671 19860516 383751 False vad \n", + "\n", + " type_of_ticket_name children purchase_date \\\n", + "0 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", + "1 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", + "2 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", + "3 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", + "4 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", + "... ... ... ... \n", + "1826667 Billet en nombre pricing_formula 2022-08-02 08:59:17+00:00 \n", + "1826668 Billet en nombre pricing_formula 2022-11-04 14:25:42+00:00 \n", + "1826669 Billet en nombre pricing_formula 2022-11-18 10:47:26+00:00 \n", + "1826670 Billet en nombre pricing_formula 2022-11-18 10:47:26+00:00 \n", + "1826671 Billet en nombre pricing_formula 2022-11-18 10:47:26+00:00 \n", + "\n", + " customer_id \n", + "0 48187 \n", + "1 48187 \n", + "2 48187 \n", + "3 48187 \n", + "4 48187 \n", + "... ... \n", + "1826667 41 \n", + "1826668 62763 \n", + "1826669 1195566 \n", + "1826670 1195566 \n", + "1826671 1195566 \n", + "\n", + "[1826672 rows x 8 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1_ticket_information" + ] + }, + { + "cell_type": "markdown", + "id": "37499eae-1a7f-4dce-83b0-ff942ccf7a9d", + "metadata": {}, + "source": [ + "### KPI tickets" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "043303fe-e90f-4689-a2a9-5d690555a045", + "metadata": {}, "outputs": [], "source": [ - "df1_ticket_information" + "def tickets_kpi_function(tickets_information = None):\n", + " tickets_information_copy = tickets_information.copy()\n", + " tickets_information_copy['purchase_date_max'] = tickets_information_copy['purchase_date']\n", + " tickets_kpi = (tickets_information_copy[['product_id', 'customer_id', 'ticket_id','supplier_name', 'purchase_date', 'purchase_date_max']]\n", + " .groupby(['product_id', 'customer_id'])\n", + " .agg({'ticket_id': 'count', \n", + " 'supplier_name': 'nunique',\n", + " 'purchase_date_max' : 'max',\n", + " 'purchase_date' : 'min'})\n", + " .reset_index()\n", + " )\n", + " \n", + " tickets_kpi.rename(columns = {'ticket_id' : 'nb_tickets', \n", + " 'supplier_name' : 'nb_suppliers', \n", + " 'purchase_date' : 'purchase_date_min'}, inplace = True)\n", + " \n", + " tickets_kpi['time_between_purchase'] = tickets_kpi['purchase_date_max'] - tickets_kpi['purchase_date_min']\n", + " \n", + " return tickets_kpi\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "5882234a-1ed5-4269-87a6-0d75613476e3", + "metadata": {}, + "outputs": [], + "source": [ + "df1_tickets_kpi = tickets_kpi_function(tickets_information = df1_ticket_information)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "a7a452a6-cd5e-4c8b-b250-8a7d26e48fad", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
product_idcustomer_idnb_ticketsnb_supplierspurchase_date_maxpurchase_date_mintime_between_purchase
01073102805422019-06-05 14:37:13+00:002019-06-05 14:18:38+00:000 days 00:18:35
111008954355112017-02-17 13:32:51+00:002017-02-17 13:32:51+00:000 days 00:00:00
211008954356112017-03-02 14:36:16+00:002017-03-02 14:36:16+00:000 days 00:00:00
311008954357112017-03-06 15:16:41+00:002017-03-06 15:16:41+00:000 days 00:00:00
411008954358112017-03-13 16:07:27+00:002017-03-13 16:07:27+00:000 days 00:00:00
........................
12836040602611122023-11-08 12:53:31+00:002023-11-08 09:30:28+00:000 days 03:23:03
12836140602713122023-11-08 15:59:11+00:002023-11-08 09:15:36+00:000 days 06:43:35
1283624060281212023-11-08 14:56:08+00:002023-11-08 11:18:37+00:000 days 03:37:31
1283634060291256130212023-11-08 10:35:43+00:002023-11-08 10:35:43+00:000 days 00:00:00
1283644060291256133312023-11-08 16:51:19+00:002023-11-08 16:51:19+00:000 days 00:00:00
\n", + "

128365 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " product_id customer_id nb_tickets nb_suppliers \\\n", + "0 107310 2805 4 2 \n", + "1 110089 54355 1 1 \n", + "2 110089 54356 1 1 \n", + "3 110089 54357 1 1 \n", + "4 110089 54358 1 1 \n", + "... ... ... ... ... \n", + "128360 406026 1 11 2 \n", + "128361 406027 1 31 2 \n", + "128362 406028 1 2 1 \n", + "128363 406029 1256130 2 1 \n", + "128364 406029 1256133 3 1 \n", + "\n", + " purchase_date_max purchase_date_min \\\n", + "0 2019-06-05 14:37:13+00:00 2019-06-05 14:18:38+00:00 \n", + "1 2017-02-17 13:32:51+00:00 2017-02-17 13:32:51+00:00 \n", + "2 2017-03-02 14:36:16+00:00 2017-03-02 14:36:16+00:00 \n", + "3 2017-03-06 15:16:41+00:00 2017-03-06 15:16:41+00:00 \n", + "4 2017-03-13 16:07:27+00:00 2017-03-13 16:07:27+00:00 \n", + "... ... ... \n", + "128360 2023-11-08 12:53:31+00:00 2023-11-08 09:30:28+00:00 \n", + "128361 2023-11-08 15:59:11+00:00 2023-11-08 09:15:36+00:00 \n", + "128362 2023-11-08 14:56:08+00:00 2023-11-08 11:18:37+00:00 \n", + "128363 2023-11-08 10:35:43+00:00 2023-11-08 10:35:43+00:00 \n", + "128364 2023-11-08 16:51:19+00:00 2023-11-08 16:51:19+00:00 \n", + "\n", + " time_between_purchase \n", + "0 0 days 00:18:35 \n", + "1 0 days 00:00:00 \n", + "2 0 days 00:00:00 \n", + "3 0 days 00:00:00 \n", + "4 0 days 00:00:00 \n", + "... ... \n", + "128360 0 days 03:23:03 \n", + "128361 0 days 06:43:35 \n", + "128362 0 days 03:37:31 \n", + "128363 0 days 00:00:00 \n", + "128364 0 days 00:00:00 \n", + "\n", + "[128365 rows x 7 columns]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1_tickets_kpi" ] }, { @@ -277,7 +789,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "baed146a-9d3a-4397-a812-3d50c9a2f038", "metadata": {}, "outputs": [], @@ -306,32 +818,413 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "5fbfd88b-b94c-489c-9201-670e96e453e7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_2240/3848597476.py:4: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " targets.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n" + ] + } + ], "source": [ "df1_target_information = preprocessing_target_area(targets = df1_targets, target_types = df1_target_types, customer_target_mappings = df1_customer_target_mappings)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "b4f05142-2a22-42ef-a60d-f23cc4b5cb09", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_id
target_name
consentement optin mediation specialisee150000
consentement optin jeune public149979
consentement optin b2c108909
Arenametrix_bascule tel vers sib35216
consentement optout b2c34523
......
Automation_parrainage_newsletter_handicap_visuel1
consentement optout mediation specialisee1
Inscrits NL LSF formulaire1
Market auto - contacts inactifs post-scénario1
Inactifs - fin du scénario1
\n", + "

283 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " customer_id\n", + "target_name \n", + "consentement optin mediation specialisee 150000\n", + "consentement optin jeune public 149979\n", + "consentement optin b2c 108909\n", + "Arenametrix_bascule tel vers sib 35216\n", + "consentement optout b2c 34523\n", + "... ...\n", + "Automation_parrainage_newsletter_handicap_visuel 1\n", + "consentement optout mediation specialisee 1\n", + "Inscrits NL LSF formulaire 1\n", + "Market auto - contacts inactifs post-scénario 1\n", + "Inactifs - fin du scénario 1\n", + "\n", + "[283 rows x 1 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df1_target_information[['target_name', 'customer_id']].groupby('target_name').count().sort_values(by='customer_id', ascending=False)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "4417ff51-f501-4ab9-a192-4ab75764a8ed", "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_id
target_name
Arenametrix_bascule tel vers sib35216
Autres_interet_exposition1021
COM Inscrits NL générale (historique)23005
Contacts_prenomsdoubles11643
DDCP MD Procès du Siècle1684
DDCP Newsletter centres de loisirs1032
DDCP Newsletter enseignants4510
DDCP Newsletter jeune public3862
DDCP Newsletter relais champ social2270
DDCP PROMO Participants ateliers (adultes et enfants)1954
DDCP billets famille3609
DDCP promo MD pass musées dps oct 20181785
DDCP promo Plan B 2019 (concerts)1948
DDCP promo spectateurs prog 21-22 (spectacles, ciné, ateliers)1293
DDCP rentrée culturelle 20231757
DDCP_marseille_jazz_20231043
DRE Festival Jean Rouch1502
DRE MucemLab2302
DRE chercheurs1557
DRE institutionnels2229
FORMATION _ acheteurs optin last year10485
Inscrits NL générale (export_291019 + operation_videomaton)14086
Inscrits NL générale site web3732
Inscrits NL jeune public site web1249
Votre première liste3715
consentement optin b2b12735
consentement optin b2c108909
consentement optin dre4527
consentement optin jeune public149979
consentement optin mediation specialisee150000
consentement optin newsletter generale22095
consentement optin scolaires4849
consentement optout b2b14219
consentement optout b2c34523
consentement optout dre14328
consentement optout newsletter generale18855
consentement optout scolaires15744
ddcp_md_scene_ouverte_au_talent1577
ddcp_promo_MD_billet_musée_oct_2019_agarder25482
ddcp_promo_md_musée_dps 0110196010
ddcp_promo_visiteurs occasionnels_musee_8mois6640
ddcp_visiteurs dps 01062212355
festival_jean_rouch1502
rappel po barvalo1248
structures_etiquette champ social1488
\n", + "
" + ], + "text/plain": [ + " customer_id\n", + "target_name \n", + "Arenametrix_bascule tel vers sib 35216\n", + "Autres_interet_exposition 1021\n", + "COM Inscrits NL générale (historique) 23005\n", + "Contacts_prenomsdoubles 11643\n", + "DDCP MD Procès du Siècle 1684\n", + "DDCP Newsletter centres de loisirs 1032\n", + "DDCP Newsletter enseignants 4510\n", + "DDCP Newsletter jeune public 3862\n", + "DDCP Newsletter relais champ social 2270\n", + "DDCP PROMO Participants ateliers (adultes et en... 1954\n", + "DDCP billets famille 3609\n", + "DDCP promo MD pass musées dps oct 2018 1785\n", + "DDCP promo Plan B 2019 (concerts) 1948\n", + "DDCP promo spectateurs prog 21-22 (spectacles, ... 1293\n", + "DDCP rentrée culturelle 2023 1757\n", + "DDCP_marseille_jazz_2023 1043\n", + "DRE Festival Jean Rouch 1502\n", + "DRE MucemLab 2302\n", + "DRE chercheurs 1557\n", + "DRE institutionnels 2229\n", + "FORMATION _ acheteurs optin last year 10485\n", + "Inscrits NL générale (export_291019 + operation... 14086\n", + "Inscrits NL générale site web 3732\n", + "Inscrits NL jeune public site web 1249\n", + "Votre première liste 3715\n", + "consentement optin b2b 12735\n", + "consentement optin b2c 108909\n", + "consentement optin dre 4527\n", + "consentement optin jeune public 149979\n", + "consentement optin mediation specialisee 150000\n", + "consentement optin newsletter generale 22095\n", + "consentement optin scolaires 4849\n", + "consentement optout b2b 14219\n", + "consentement optout b2c 34523\n", + "consentement optout dre 14328\n", + "consentement optout newsletter generale 18855\n", + "consentement optout scolaires 15744\n", + "ddcp_md_scene_ouverte_au_talent 1577\n", + "ddcp_promo_MD_billet_musée_oct_2019_agarder2 5482\n", + "ddcp_promo_md_musée_dps 011019 6010\n", + "ddcp_promo_visiteurs occasionnels_musee_8mois 6640\n", + "ddcp_visiteurs dps 010622 12355\n", + "festival_jean_rouch 1502\n", + "rappel po barvalo 1248\n", + "structures_etiquette champ social 1488" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df1_target_information_reduced = df1_target_information[['target_name', 'customer_id']].groupby('target_name').count()\n", "df1_target_information_reduced[df1_target_information_reduced['customer_id'] >= 1000]" @@ -347,7 +1240,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "d883cc7b-ac43-4485-b86f-eaf595fbad85", "metadata": {}, "outputs": [], @@ -372,32 +1265,278 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "c8552dd6-52c5-4431-b43d-3cd6c578fd9f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_2240/1967867975.py:15: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n", + "/tmp/ipykernel_2240/1967867975.py:15: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n", + "/tmp/ipykernel_2240/1967867975.py:15: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n" + ] + } + ], "source": [ "df1_campaigns_information = preprocessing_campaigns_area(campaign_stats = df1_campaign_stats, campaigns = df1_campaigns)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "c24457e7-3cad-451a-a65b-7373b656bd6e", - "metadata": {}, - "outputs": [], + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idcustomer_idopened_atsent_atdelivered_atcampaign_namecampaign_service_idcampaign_sent_at
019793112597NaT2021-03-28 16:01:09+00:002021-03-28 16:24:18+00:00Le Mucem chez vous, gardons le lien #224042021-03-27 23:00:00+00:00
114211113666NaT2021-03-28 16:01:09+00:002021-03-28 16:21:02+00:00Le Mucem chez vous, gardons le lien #224042021-03-27 23:00:00+00:00
213150280561NaT2021-03-28 16:00:59+00:002021-03-28 16:08:45+00:00Le Mucem chez vous, gardons le lien #224042021-03-27 23:00:00+00:00
370731010072021-03-28 18:11:06+00:002021-03-28 16:00:59+00:002021-03-28 16:09:47+00:00Le Mucem chez vous, gardons le lien #224042021-03-27 23:00:00+00:00
45175103972NaT2021-03-28 16:01:06+00:002021-03-28 16:05:03+00:00Le Mucem chez vous, gardons le lien #224042021-03-27 23:00:00+00:00
...........................
621480383029942661552023-10-23 09:43:25+00:002023-10-23 09:32:33+00:002023-10-23 09:32:34+00:00dre_nov_202313182023-10-23 09:31:17+00:00
62148048303307213552023-10-23 09:44:02+00:002023-10-23 09:32:49+00:002023-10-23 09:32:49+00:00dre_nov_202313182023-10-23 09:31:17+00:00
62148058304346218492023-10-23 09:45:52+00:002023-10-23 09:33:28+00:002023-10-23 09:33:29+00:00dre_nov_202313182023-10-23 09:31:17+00:00
621480683020376677892023-10-23 09:47:32+00:002023-10-23 09:31:53+00:002023-10-23 09:31:54+00:00dre_nov_202313182023-10-23 09:31:17+00:00
62148078304939294154NaT2023-10-23 09:33:54+00:002023-10-23 09:33:55+00:00dre_nov_202313182023-10-23 09:31:17+00:00
\n", + "

6214808 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " id customer_id opened_at \\\n", + "0 19793 112597 NaT \n", + "1 14211 113666 NaT \n", + "2 13150 280561 NaT \n", + "3 7073 101007 2021-03-28 18:11:06+00:00 \n", + "4 5175 103972 NaT \n", + "... ... ... ... \n", + "6214803 8302994 266155 2023-10-23 09:43:25+00:00 \n", + "6214804 8303307 21355 2023-10-23 09:44:02+00:00 \n", + "6214805 8304346 21849 2023-10-23 09:45:52+00:00 \n", + "6214806 8302037 667789 2023-10-23 09:47:32+00:00 \n", + "6214807 8304939 294154 NaT \n", + "\n", + " sent_at delivered_at \\\n", + "0 2021-03-28 16:01:09+00:00 2021-03-28 16:24:18+00:00 \n", + "1 2021-03-28 16:01:09+00:00 2021-03-28 16:21:02+00:00 \n", + "2 2021-03-28 16:00:59+00:00 2021-03-28 16:08:45+00:00 \n", + "3 2021-03-28 16:00:59+00:00 2021-03-28 16:09:47+00:00 \n", + "4 2021-03-28 16:01:06+00:00 2021-03-28 16:05:03+00:00 \n", + "... ... ... \n", + "6214803 2023-10-23 09:32:33+00:00 2023-10-23 09:32:34+00:00 \n", + "6214804 2023-10-23 09:32:49+00:00 2023-10-23 09:32:49+00:00 \n", + "6214805 2023-10-23 09:33:28+00:00 2023-10-23 09:33:29+00:00 \n", + "6214806 2023-10-23 09:31:53+00:00 2023-10-23 09:31:54+00:00 \n", + "6214807 2023-10-23 09:33:54+00:00 2023-10-23 09:33:55+00:00 \n", + "\n", + " campaign_name campaign_service_id \\\n", + "0 Le Mucem chez vous, gardons le lien #22 404 \n", + "1 Le Mucem chez vous, gardons le lien #22 404 \n", + "2 Le Mucem chez vous, gardons le lien #22 404 \n", + "3 Le Mucem chez vous, gardons le lien #22 404 \n", + "4 Le Mucem chez vous, gardons le lien #22 404 \n", + "... ... ... \n", + "6214803 dre_nov_2023 1318 \n", + "6214804 dre_nov_2023 1318 \n", + "6214805 dre_nov_2023 1318 \n", + "6214806 dre_nov_2023 1318 \n", + "6214807 dre_nov_2023 1318 \n", + "\n", + " campaign_sent_at \n", + "0 2021-03-27 23:00:00+00:00 \n", + "1 2021-03-27 23:00:00+00:00 \n", + "2 2021-03-27 23:00:00+00:00 \n", + "3 2021-03-27 23:00:00+00:00 \n", + "4 2021-03-27 23:00:00+00:00 \n", + "... ... \n", + "6214803 2023-10-23 09:31:17+00:00 \n", + "6214804 2023-10-23 09:31:17+00:00 \n", + "6214805 2023-10-23 09:31:17+00:00 \n", + "6214806 2023-10-23 09:31:17+00:00 \n", + "6214807 2023-10-23 09:31:17+00:00 \n", + "\n", + "[6214808 rows x 8 columns]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df1_campaigns_information" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "e2c88552-b863-47a2-be23-8d2898fb28bc", "metadata": {}, "outputs": [], "source": [ - "def campaigns_kpi(campaigns_information = None):\n", + "def campaigns_kpi_function(campaigns_information = None):\n", " # Nombre de campagnes de mails\n", " nb_campaigns = campaigns_information[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()\n", " nb_campaigns.rename(columns = {'campaign_name' : 'nb_campaigns'}, inplace = True)\n", @@ -426,20 +1565,177 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "24537647-bc29-4777-9848-ac4120a4aa60", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_2240/3700263836.py:11: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " opened_campaign.dropna(subset=['opened_at'], inplace=True)\n" + ] + } + ], "source": [ - "df1_campaigns_kpi = campaigns_kpi(campaigns_information = df1_campaigns_information) " + "df1_campaigns_kpi = campaigns_kpi_function(campaigns_information = df1_campaigns_information) " ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "6be2a9a6-056b-4e19-8c26-a18ba3df36b3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idnb_campaignsnb_campaigns_openedtime_to_open
0240.0NaT
13222124.01 days 00:28:30.169354838
2477.01 days 04:31:01.428571428
3540.0NaT
46200.0NaT
...............
130467125609711.00 days 02:11:15
130468125609810.0NaT
130469125609910.0NaT
130470125610010.0NaT
130471125610110.0NaT
\n", + "

130472 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " customer_id nb_campaigns nb_campaigns_opened \\\n", + "0 2 4 0.0 \n", + "1 3 222 124.0 \n", + "2 4 7 7.0 \n", + "3 5 4 0.0 \n", + "4 6 20 0.0 \n", + "... ... ... ... \n", + "130467 1256097 1 1.0 \n", + "130468 1256098 1 0.0 \n", + "130469 1256099 1 0.0 \n", + "130470 1256100 1 0.0 \n", + "130471 1256101 1 0.0 \n", + "\n", + " time_to_open \n", + "0 NaT \n", + "1 1 days 00:28:30.169354838 \n", + "2 1 days 04:31:01.428571428 \n", + "3 NaT \n", + "4 NaT \n", + "... ... \n", + "130467 0 days 02:11:15 \n", + "130468 NaT \n", + "130469 NaT \n", + "130470 NaT \n", + "130471 NaT \n", + "\n", + "[130472 rows x 4 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df1_campaigns_kpi" ] @@ -462,7 +1758,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 24, "id": "30488a40-1b38-4b9a-9d3b-26a0597c5e6d", "metadata": {}, "outputs": [], @@ -473,7 +1769,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 25, "id": "607eb4b4-eed9-4b50-b823-f75c116dd37c", "metadata": {}, "outputs": [], @@ -544,7 +1840,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 26, "id": "350b09b9-451f-4d47-81fe-f34b892db027", "metadata": {}, "outputs": [], @@ -632,7 +1928,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 27, "id": "0fccc8ef-e575-4857-a401-94a7274394df", "metadata": {}, "outputs": [ @@ -785,7 +2081,7 @@ "4 indiv entrées tp " ] }, - "execution_count": 32, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -797,7 +2093,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 28, "id": "779d8aaf-6668-4f66-8852-847304407ea3", "metadata": {}, "outputs": [ @@ -967,7 +2263,7 @@ "4 spectacle vivant mucem " ] }, - "execution_count": 33, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -979,7 +2275,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 29, "id": "7714fa32-303b-4ea7-b174-3fd0fcab5af0", "metadata": {}, "outputs": [ @@ -1078,7 +2374,7 @@ "4 37 383 269 1" ] }, - "execution_count": 34, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -1098,7 +2394,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 30, "id": "15a62ed6-35e4-4abc-aeef-a7daeec0a4ba", "metadata": {}, "outputs": [], @@ -1126,7 +2422,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 31, "id": "89dc9685-1de9-4ce3-a6c0-8d7f1931a951", "metadata": {}, "outputs": [ @@ -1330,7 +2626,7 @@ "4 1 8.5 False non défini mucem " ] }, - "execution_count": 36, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -1340,10 +2636,35 @@ "products_global.head()" ] }, + { + "cell_type": "markdown", + "id": "7c3211a5-a851-43bc-a1f0-b39d51857fb7", + "metadata": {}, + "source": [ + "# Fusion des bases locales" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "46de1912-4a66-46e5-8b9e-7768b2d2723b", + "metadata": {}, + "outputs": [], + "source": [ + "# Fusion liée au product\n", + "df1_product_purchased = pd.merge(df1_tickets_kpi, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')\n", + "\n", + "# Fusion liée au customer\n", + "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')\n", + "\n", + "# Fusion product et customer\n", + "df1_customer_product = pd.merge(df1_customer, df1_product_purchased, on = 'customer_id', how = 'left')" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "117d172a-2195-4060-9245-96c6f637ebbd", + "id": "1e42a790-b215-4107-a969-85005da06ebd", "metadata": {}, "outputs": [], "source": []