From b2a6ae0929c7471c80b74017feec6b8bffea14aa Mon Sep 17 00:00:00 2001 From: ajoubrel-ensae Date: Mon, 5 Feb 2024 21:10:07 +0000 Subject: [PATCH] Ajout .head() --- 0_Cleaning_and_merge.ipynb | 809 +++++-------------------------------- 1 file changed, 91 insertions(+), 718 deletions(-) diff --git a/0_Cleaning_and_merge.ipynb b/0_Cleaning_and_merge.ipynb index a8dfc0f..b2c2018 100644 --- a/0_Cleaning_and_merge.ipynb +++ b/0_Cleaning_and_merge.ipynb @@ -72,54 +72,6 @@ { "cell_type": "code", "execution_count": 4, - "id": "aaf64d60-bf92-470c-8210-d09abd6a653e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['bdc2324-data/1/1campaign_stats.csv',\n", - " 'bdc2324-data/1/1campaigns.csv',\n", - " 'bdc2324-data/1/1categories.csv',\n", - " 'bdc2324-data/1/1countries.csv',\n", - " 'bdc2324-data/1/1currencies.csv',\n", - " 'bdc2324-data/1/1customer_target_mappings.csv',\n", - " 'bdc2324-data/1/1customersplus.csv',\n", - " 'bdc2324-data/1/1event_types.csv',\n", - " 'bdc2324-data/1/1events.csv',\n", - " 'bdc2324-data/1/1facilities.csv',\n", - " 'bdc2324-data/1/1link_stats.csv',\n", - " 'bdc2324-data/1/1pricing_formulas.csv',\n", - " 'bdc2324-data/1/1product_packs.csv',\n", - " 'bdc2324-data/1/1products.csv',\n", - " 'bdc2324-data/1/1products_groups.csv',\n", - " 'bdc2324-data/1/1purchases.csv',\n", - " 'bdc2324-data/1/1representation_category_capacities.csv',\n", - " 'bdc2324-data/1/1representations.csv',\n", - " 'bdc2324-data/1/1seasons.csv',\n", - " 'bdc2324-data/1/1structure_tag_mappings.csv',\n", - " 'bdc2324-data/1/1suppliers.csv',\n", - " 'bdc2324-data/1/1tags.csv',\n", - " 'bdc2324-data/1/1target_types.csv',\n", - " 'bdc2324-data/1/1targets.csv',\n", - " 'bdc2324-data/1/1tickets.csv',\n", - " 'bdc2324-data/1/1type_of_categories.csv',\n", - " 'bdc2324-data/1/1type_of_pricing_formulas.csv',\n", - " 'bdc2324-data/1/1type_ofs.csv']" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "liste_database" - ] - }, - { - "cell_type": "code", - "execution_count": 5, "id": "dd6a3518-b752-4a1e-b77b-9e03e853c3ed", "metadata": {}, "outputs": [ @@ -127,7 +79,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_2240/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_3658/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.read_csv(file_in)\n" ] } @@ -158,7 +110,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "d237be96-8c86-4a91-b7a1-487e87a16c3d", "metadata": {}, "outputs": [], @@ -199,7 +151,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "7e7b90ce-da54-4f00-bc34-64c543b0858f", "metadata": {}, "outputs": [], @@ -221,7 +173,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "03329e32-00a5-42c8-9470-75f7b6216ccd", "metadata": {}, "outputs": [], @@ -239,7 +191,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "b95464b1-26bc-4aac-84b4-45da83b92251", "metadata": {}, "outputs": [], @@ -282,7 +234,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "3e1d2ba7-ff4f-48eb-93a8-2bb648c70396", "metadata": {}, "outputs": [ @@ -290,17 +242,17 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_2240/1591303091.py:5: SettingWithCopyWarning: \n", + "/tmp/ipykernel_3658/1591303091.py:5: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " tickets.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n", - "/tmp/ipykernel_2240/1591303091.py:9: SettingWithCopyWarning: \n", + "/tmp/ipykernel_3658/1591303091.py:9: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n", - "/tmp/ipykernel_2240/1591303091.py:13: SettingWithCopyWarning: \n", + "/tmp/ipykernel_3658/1591303091.py:13: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -314,7 +266,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "4b18edfc-6450-4c6a-9e7b-ee5a5808c8c9", "metadata": {}, "outputs": [ @@ -405,127 +357,33 @@ " 2018-12-28 14:47:50+00:00\n", " 48187\n", " \n", - " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " 1826667\n", - " 18643847\n", - " 350454\n", - " False\n", - " vad\n", - " Billet en nombre\n", - " pricing_formula\n", - " 2022-08-02 08:59:17+00:00\n", - " 41\n", - " \n", - " \n", - " 1826668\n", - " 19853111\n", - " 383564\n", - " False\n", - " vad\n", - " Billet en nombre\n", - " pricing_formula\n", - " 2022-11-04 14:25:42+00:00\n", - " 62763\n", - " \n", - " \n", - " 1826669\n", - " 19860514\n", - " 383751\n", - " False\n", - " vad\n", - " Billet en nombre\n", - " pricing_formula\n", - " 2022-11-18 10:47:26+00:00\n", - " 1195566\n", - " \n", - " \n", - " 1826670\n", - " 19860515\n", - " 383751\n", - " False\n", - " vad\n", - " Billet en nombre\n", - " pricing_formula\n", - " 2022-11-18 10:47:26+00:00\n", - " 1195566\n", - " \n", - " \n", - " 1826671\n", - " 19860516\n", - " 383751\n", - " False\n", - " vad\n", - " Billet en nombre\n", - " pricing_formula\n", - " 2022-11-18 10:47:26+00:00\n", - " 1195566\n", - " \n", " \n", "\n", - "

1826672 rows × 8 columns

\n", "" ], "text/plain": [ - " ticket_id product_id is_from_subscription supplier_name \\\n", - "0 13070859 225251 False vente en ligne \n", - "1 13070860 224914 False vente en ligne \n", - "2 13070861 224914 False vente en ligne \n", - "3 13070862 224914 False vente en ligne \n", - "4 13070863 224914 False vente en ligne \n", - "... ... ... ... ... \n", - "1826667 18643847 350454 False vad \n", - "1826668 19853111 383564 False vad \n", - "1826669 19860514 383751 False vad \n", - "1826670 19860515 383751 False vad \n", - "1826671 19860516 383751 False vad \n", + " ticket_id product_id is_from_subscription supplier_name \\\n", + "0 13070859 225251 False vente en ligne \n", + "1 13070860 224914 False vente en ligne \n", + "2 13070861 224914 False vente en ligne \n", + "3 13070862 224914 False vente en ligne \n", + "4 13070863 224914 False vente en ligne \n", "\n", - " type_of_ticket_name children purchase_date \\\n", - "0 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", - "1 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", - "2 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", - "3 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", - "4 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", - "... ... ... ... \n", - "1826667 Billet en nombre pricing_formula 2022-08-02 08:59:17+00:00 \n", - "1826668 Billet en nombre pricing_formula 2022-11-04 14:25:42+00:00 \n", - "1826669 Billet en nombre pricing_formula 2022-11-18 10:47:26+00:00 \n", - "1826670 Billet en nombre pricing_formula 2022-11-18 10:47:26+00:00 \n", - "1826671 Billet en nombre pricing_formula 2022-11-18 10:47:26+00:00 \n", - "\n", - " customer_id \n", - "0 48187 \n", - "1 48187 \n", - "2 48187 \n", - "3 48187 \n", - "4 48187 \n", - "... ... \n", - "1826667 41 \n", - "1826668 62763 \n", - "1826669 1195566 \n", - "1826670 1195566 \n", - "1826671 1195566 \n", - "\n", - "[1826672 rows x 8 columns]" + " type_of_ticket_name children purchase_date customer_id \n", + "0 Atelier pricing_formula 2018-12-28 14:47:50+00:00 48187 \n", + "1 Atelier pricing_formula 2018-12-28 14:47:50+00:00 48187 \n", + "2 Atelier pricing_formula 2018-12-28 14:47:50+00:00 48187 \n", + "3 Atelier pricing_formula 2018-12-28 14:47:50+00:00 48187 \n", + "4 Atelier pricing_formula 2018-12-28 14:47:50+00:00 48187 " ] }, - "execution_count": 11, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df1_ticket_information" + "df1_ticket_information.head()" ] }, { @@ -538,7 +396,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "id": "043303fe-e90f-4689-a2a9-5d690555a045", "metadata": {}, "outputs": [], @@ -567,7 +425,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "5882234a-1ed5-4269-87a6-0d75613476e3", "metadata": {}, "outputs": [], @@ -577,7 +435,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 13, "id": "a7a452a6-cd5e-4c8b-b250-8a7d26e48fad", "metadata": {}, "outputs": [ @@ -662,121 +520,33 @@ " 2017-03-13 16:07:27+00:00\n", " 0 days 00:00:00\n", " \n", - " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " 128360\n", - " 406026\n", - " 1\n", - " 11\n", - " 2\n", - " 2023-11-08 12:53:31+00:00\n", - " 2023-11-08 09:30:28+00:00\n", - " 0 days 03:23:03\n", - " \n", - " \n", - " 128361\n", - " 406027\n", - " 1\n", - " 31\n", - " 2\n", - " 2023-11-08 15:59:11+00:00\n", - " 2023-11-08 09:15:36+00:00\n", - " 0 days 06:43:35\n", - " \n", - " \n", - " 128362\n", - " 406028\n", - " 1\n", - " 2\n", - " 1\n", - " 2023-11-08 14:56:08+00:00\n", - " 2023-11-08 11:18:37+00:00\n", - " 0 days 03:37:31\n", - " \n", - " \n", - " 128363\n", - " 406029\n", - " 1256130\n", - " 2\n", - " 1\n", - " 2023-11-08 10:35:43+00:00\n", - " 2023-11-08 10:35:43+00:00\n", - " 0 days 00:00:00\n", - " \n", - " \n", - " 128364\n", - " 406029\n", - " 1256133\n", - " 3\n", - " 1\n", - " 2023-11-08 16:51:19+00:00\n", - " 2023-11-08 16:51:19+00:00\n", - " 0 days 00:00:00\n", - " \n", " \n", "\n", - "

128365 rows × 7 columns

\n", "" ], "text/plain": [ - " product_id customer_id nb_tickets nb_suppliers \\\n", - "0 107310 2805 4 2 \n", - "1 110089 54355 1 1 \n", - "2 110089 54356 1 1 \n", - "3 110089 54357 1 1 \n", - "4 110089 54358 1 1 \n", - "... ... ... ... ... \n", - "128360 406026 1 11 2 \n", - "128361 406027 1 31 2 \n", - "128362 406028 1 2 1 \n", - "128363 406029 1256130 2 1 \n", - "128364 406029 1256133 3 1 \n", + " product_id customer_id nb_tickets nb_suppliers \\\n", + "0 107310 2805 4 2 \n", + "1 110089 54355 1 1 \n", + "2 110089 54356 1 1 \n", + "3 110089 54357 1 1 \n", + "4 110089 54358 1 1 \n", "\n", - " purchase_date_max purchase_date_min \\\n", - "0 2019-06-05 14:37:13+00:00 2019-06-05 14:18:38+00:00 \n", - "1 2017-02-17 13:32:51+00:00 2017-02-17 13:32:51+00:00 \n", - "2 2017-03-02 14:36:16+00:00 2017-03-02 14:36:16+00:00 \n", - "3 2017-03-06 15:16:41+00:00 2017-03-06 15:16:41+00:00 \n", - "4 2017-03-13 16:07:27+00:00 2017-03-13 16:07:27+00:00 \n", - "... ... ... \n", - "128360 2023-11-08 12:53:31+00:00 2023-11-08 09:30:28+00:00 \n", - "128361 2023-11-08 15:59:11+00:00 2023-11-08 09:15:36+00:00 \n", - "128362 2023-11-08 14:56:08+00:00 2023-11-08 11:18:37+00:00 \n", - "128363 2023-11-08 10:35:43+00:00 2023-11-08 10:35:43+00:00 \n", - "128364 2023-11-08 16:51:19+00:00 2023-11-08 16:51:19+00:00 \n", - "\n", - " time_between_purchase \n", - "0 0 days 00:18:35 \n", - "1 0 days 00:00:00 \n", - "2 0 days 00:00:00 \n", - "3 0 days 00:00:00 \n", - "4 0 days 00:00:00 \n", - "... ... \n", - "128360 0 days 03:23:03 \n", - "128361 0 days 06:43:35 \n", - "128362 0 days 03:37:31 \n", - "128363 0 days 00:00:00 \n", - "128364 0 days 00:00:00 \n", - "\n", - "[128365 rows x 7 columns]" + " purchase_date_max purchase_date_min time_between_purchase \n", + "0 2019-06-05 14:37:13+00:00 2019-06-05 14:18:38+00:00 0 days 00:18:35 \n", + "1 2017-02-17 13:32:51+00:00 2017-02-17 13:32:51+00:00 0 days 00:00:00 \n", + "2 2017-03-02 14:36:16+00:00 2017-03-02 14:36:16+00:00 0 days 00:00:00 \n", + "3 2017-03-06 15:16:41+00:00 2017-03-06 15:16:41+00:00 0 days 00:00:00 \n", + "4 2017-03-13 16:07:27+00:00 2017-03-13 16:07:27+00:00 0 days 00:00:00 " ] }, - "execution_count": 32, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df1_tickets_kpi" + "df1_tickets_kpi.head()" ] }, { @@ -826,7 +596,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_2240/3848597476.py:4: SettingWithCopyWarning: \n", + "/tmp/ipykernel_3658/3848597476.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -893,51 +663,18 @@ " consentement optout b2c\n", " 34523\n", " \n", - " \n", - " ...\n", - " ...\n", - " \n", - " \n", - " Automation_parrainage_newsletter_handicap_visuel\n", - " 1\n", - " \n", - " \n", - " consentement optout mediation specialisee\n", - " 1\n", - " \n", - " \n", - " Inscrits NL LSF formulaire\n", - " 1\n", - " \n", - " \n", - " Market auto - contacts inactifs post-scénario\n", - " 1\n", - " \n", - " \n", - " Inactifs - fin du scénario\n", - " 1\n", - " \n", " \n", "\n", - "

283 rows × 1 columns

\n", "" ], "text/plain": [ - " customer_id\n", - "target_name \n", - "consentement optin mediation specialisee 150000\n", - "consentement optin jeune public 149979\n", - "consentement optin b2c 108909\n", - "Arenametrix_bascule tel vers sib 35216\n", - "consentement optout b2c 34523\n", - "... ...\n", - "Automation_parrainage_newsletter_handicap_visuel 1\n", - "consentement optout mediation specialisee 1\n", - "Inscrits NL LSF formulaire 1\n", - "Market auto - contacts inactifs post-scénario 1\n", - "Inactifs - fin du scénario 1\n", - "\n", - "[283 rows x 1 columns]" + " customer_id\n", + "target_name \n", + "consentement optin mediation specialisee 150000\n", + "consentement optin jeune public 149979\n", + "consentement optin b2c 108909\n", + "Arenametrix_bascule tel vers sib 35216\n", + "consentement optout b2c 34523" ] }, "execution_count": 16, @@ -946,7 +683,7 @@ } ], "source": [ - "df1_target_information[['target_name', 'customer_id']].groupby('target_name').count().sort_values(by='customer_id', ascending=False)" + "df1_target_information[['target_name', 'customer_id']].groupby('target_name').count().sort_values(by='customer_id', ascending=False).head()" ] }, { @@ -1006,218 +743,18 @@ " DDCP MD Procès du Siècle\n", " 1684\n", " \n", - " \n", - " DDCP Newsletter centres de loisirs\n", - " 1032\n", - " \n", - " \n", - " DDCP Newsletter enseignants\n", - " 4510\n", - " \n", - " \n", - " DDCP Newsletter jeune public\n", - " 3862\n", - " \n", - " \n", - " DDCP Newsletter relais champ social\n", - " 2270\n", - " \n", - " \n", - " DDCP PROMO Participants ateliers (adultes et enfants)\n", - " 1954\n", - " \n", - " \n", - " DDCP billets famille\n", - " 3609\n", - " \n", - " \n", - " DDCP promo MD pass musées dps oct 2018\n", - " 1785\n", - " \n", - " \n", - " DDCP promo Plan B 2019 (concerts)\n", - " 1948\n", - " \n", - " \n", - " DDCP promo spectateurs prog 21-22 (spectacles, ciné, ateliers)\n", - " 1293\n", - " \n", - " \n", - " DDCP rentrée culturelle 2023\n", - " 1757\n", - " \n", - " \n", - " DDCP_marseille_jazz_2023\n", - " 1043\n", - " \n", - " \n", - " DRE Festival Jean Rouch\n", - " 1502\n", - " \n", - " \n", - " DRE MucemLab\n", - " 2302\n", - " \n", - " \n", - " DRE chercheurs\n", - " 1557\n", - " \n", - " \n", - " DRE institutionnels\n", - " 2229\n", - " \n", - " \n", - " FORMATION _ acheteurs optin last year\n", - " 10485\n", - " \n", - " \n", - " Inscrits NL générale (export_291019 + operation_videomaton)\n", - " 14086\n", - " \n", - " \n", - " Inscrits NL générale site web\n", - " 3732\n", - " \n", - " \n", - " Inscrits NL jeune public site web\n", - " 1249\n", - " \n", - " \n", - " Votre première liste\n", - " 3715\n", - " \n", - " \n", - " consentement optin b2b\n", - " 12735\n", - " \n", - " \n", - " consentement optin b2c\n", - " 108909\n", - " \n", - " \n", - " consentement optin dre\n", - " 4527\n", - " \n", - " \n", - " consentement optin jeune public\n", - " 149979\n", - " \n", - " \n", - " consentement optin mediation specialisee\n", - " 150000\n", - " \n", - " \n", - " consentement optin newsletter generale\n", - " 22095\n", - " \n", - " \n", - " consentement optin scolaires\n", - " 4849\n", - " \n", - " \n", - " consentement optout b2b\n", - " 14219\n", - " \n", - " \n", - " consentement optout b2c\n", - " 34523\n", - " \n", - " \n", - " consentement optout dre\n", - " 14328\n", - " \n", - " \n", - " consentement optout newsletter generale\n", - " 18855\n", - " \n", - " \n", - " consentement optout scolaires\n", - " 15744\n", - " \n", - " \n", - " ddcp_md_scene_ouverte_au_talent\n", - " 1577\n", - " \n", - " \n", - " ddcp_promo_MD_billet_musée_oct_2019_agarder2\n", - " 5482\n", - " \n", - " \n", - " ddcp_promo_md_musée_dps 011019\n", - " 6010\n", - " \n", - " \n", - " ddcp_promo_visiteurs occasionnels_musee_8mois\n", - " 6640\n", - " \n", - " \n", - " ddcp_visiteurs dps 010622\n", - " 12355\n", - " \n", - " \n", - " festival_jean_rouch\n", - " 1502\n", - " \n", - " \n", - " rappel po barvalo\n", - " 1248\n", - " \n", - " \n", - " structures_etiquette champ social\n", - " 1488\n", - " \n", " \n", "\n", "" ], "text/plain": [ - " customer_id\n", - "target_name \n", - "Arenametrix_bascule tel vers sib 35216\n", - "Autres_interet_exposition 1021\n", - "COM Inscrits NL générale (historique) 23005\n", - "Contacts_prenomsdoubles 11643\n", - "DDCP MD Procès du Siècle 1684\n", - "DDCP Newsletter centres de loisirs 1032\n", - "DDCP Newsletter enseignants 4510\n", - "DDCP Newsletter jeune public 3862\n", - "DDCP Newsletter relais champ social 2270\n", - "DDCP PROMO Participants ateliers (adultes et en... 1954\n", - "DDCP billets famille 3609\n", - "DDCP promo MD pass musées dps oct 2018 1785\n", - "DDCP promo Plan B 2019 (concerts) 1948\n", - "DDCP promo spectateurs prog 21-22 (spectacles, ... 1293\n", - "DDCP rentrée culturelle 2023 1757\n", - "DDCP_marseille_jazz_2023 1043\n", - "DRE Festival Jean Rouch 1502\n", - "DRE MucemLab 2302\n", - "DRE chercheurs 1557\n", - "DRE institutionnels 2229\n", - "FORMATION _ acheteurs optin last year 10485\n", - "Inscrits NL générale (export_291019 + operation... 14086\n", - "Inscrits NL générale site web 3732\n", - "Inscrits NL jeune public site web 1249\n", - "Votre première liste 3715\n", - "consentement optin b2b 12735\n", - "consentement optin b2c 108909\n", - "consentement optin dre 4527\n", - "consentement optin jeune public 149979\n", - "consentement optin mediation specialisee 150000\n", - "consentement optin newsletter generale 22095\n", - "consentement optin scolaires 4849\n", - "consentement optout b2b 14219\n", - "consentement optout b2c 34523\n", - "consentement optout dre 14328\n", - "consentement optout newsletter generale 18855\n", - "consentement optout scolaires 15744\n", - "ddcp_md_scene_ouverte_au_talent 1577\n", - "ddcp_promo_MD_billet_musée_oct_2019_agarder2 5482\n", - "ddcp_promo_md_musée_dps 011019 6010\n", - "ddcp_promo_visiteurs occasionnels_musee_8mois 6640\n", - "ddcp_visiteurs dps 010622 12355\n", - "festival_jean_rouch 1502\n", - "rappel po barvalo 1248\n", - "structures_etiquette champ social 1488" + " customer_id\n", + "target_name \n", + "Arenametrix_bascule tel vers sib 35216\n", + "Autres_interet_exposition 1021\n", + "COM Inscrits NL générale (historique) 23005\n", + "Contacts_prenomsdoubles 11643\n", + "DDCP MD Procès du Siècle 1684" ] }, "execution_count": 17, @@ -1227,7 +764,7 @@ ], "source": [ "df1_target_information_reduced = df1_target_information[['target_name', 'customer_id']].groupby('target_name').count()\n", - "df1_target_information_reduced[df1_target_information_reduced['customer_id'] >= 1000]" + "df1_target_information_reduced[df1_target_information_reduced['customer_id'] >= 1000].head()" ] }, { @@ -1273,19 +810,19 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_2240/1967867975.py:15: SettingWithCopyWarning: \n", + "/tmp/ipykernel_3658/1967867975.py:15: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n", - "/tmp/ipykernel_2240/1967867975.py:15: SettingWithCopyWarning: \n", + "/tmp/ipykernel_3658/1967867975.py:15: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n", - "/tmp/ipykernel_2240/1967867975.py:15: SettingWithCopyWarning: \n", + "/tmp/ipykernel_3658/1967867975.py:15: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", @@ -1393,131 +930,31 @@ " 404\n", " 2021-03-27 23:00:00+00:00\n", " \n", - " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " 6214803\n", - " 8302994\n", - " 266155\n", - " 2023-10-23 09:43:25+00:00\n", - " 2023-10-23 09:32:33+00:00\n", - " 2023-10-23 09:32:34+00:00\n", - " dre_nov_2023\n", - " 1318\n", - " 2023-10-23 09:31:17+00:00\n", - " \n", - " \n", - " 6214804\n", - " 8303307\n", - " 21355\n", - " 2023-10-23 09:44:02+00:00\n", - " 2023-10-23 09:32:49+00:00\n", - " 2023-10-23 09:32:49+00:00\n", - " dre_nov_2023\n", - " 1318\n", - " 2023-10-23 09:31:17+00:00\n", - " \n", - " \n", - " 6214805\n", - " 8304346\n", - " 21849\n", - " 2023-10-23 09:45:52+00:00\n", - " 2023-10-23 09:33:28+00:00\n", - " 2023-10-23 09:33:29+00:00\n", - " dre_nov_2023\n", - " 1318\n", - " 2023-10-23 09:31:17+00:00\n", - " \n", - " \n", - " 6214806\n", - " 8302037\n", - " 667789\n", - " 2023-10-23 09:47:32+00:00\n", - " 2023-10-23 09:31:53+00:00\n", - " 2023-10-23 09:31:54+00:00\n", - " dre_nov_2023\n", - " 1318\n", - " 2023-10-23 09:31:17+00:00\n", - " \n", - " \n", - " 6214807\n", - " 8304939\n", - " 294154\n", - " NaT\n", - " 2023-10-23 09:33:54+00:00\n", - " 2023-10-23 09:33:55+00:00\n", - " dre_nov_2023\n", - " 1318\n", - " 2023-10-23 09:31:17+00:00\n", - " \n", " \n", "\n", - "

6214808 rows × 8 columns

\n", "" ], "text/plain": [ - " id customer_id opened_at \\\n", - "0 19793 112597 NaT \n", - "1 14211 113666 NaT \n", - "2 13150 280561 NaT \n", - "3 7073 101007 2021-03-28 18:11:06+00:00 \n", - "4 5175 103972 NaT \n", - "... ... ... ... \n", - "6214803 8302994 266155 2023-10-23 09:43:25+00:00 \n", - "6214804 8303307 21355 2023-10-23 09:44:02+00:00 \n", - "6214805 8304346 21849 2023-10-23 09:45:52+00:00 \n", - "6214806 8302037 667789 2023-10-23 09:47:32+00:00 \n", - "6214807 8304939 294154 NaT \n", + " id customer_id opened_at sent_at \\\n", + "0 19793 112597 NaT 2021-03-28 16:01:09+00:00 \n", + "1 14211 113666 NaT 2021-03-28 16:01:09+00:00 \n", + "2 13150 280561 NaT 2021-03-28 16:00:59+00:00 \n", + "3 7073 101007 2021-03-28 18:11:06+00:00 2021-03-28 16:00:59+00:00 \n", + "4 5175 103972 NaT 2021-03-28 16:01:06+00:00 \n", "\n", - " sent_at delivered_at \\\n", - "0 2021-03-28 16:01:09+00:00 2021-03-28 16:24:18+00:00 \n", - "1 2021-03-28 16:01:09+00:00 2021-03-28 16:21:02+00:00 \n", - "2 2021-03-28 16:00:59+00:00 2021-03-28 16:08:45+00:00 \n", - "3 2021-03-28 16:00:59+00:00 2021-03-28 16:09:47+00:00 \n", - "4 2021-03-28 16:01:06+00:00 2021-03-28 16:05:03+00:00 \n", - "... ... ... \n", - "6214803 2023-10-23 09:32:33+00:00 2023-10-23 09:32:34+00:00 \n", - "6214804 2023-10-23 09:32:49+00:00 2023-10-23 09:32:49+00:00 \n", - "6214805 2023-10-23 09:33:28+00:00 2023-10-23 09:33:29+00:00 \n", - "6214806 2023-10-23 09:31:53+00:00 2023-10-23 09:31:54+00:00 \n", - "6214807 2023-10-23 09:33:54+00:00 2023-10-23 09:33:55+00:00 \n", + " delivered_at campaign_name \\\n", + "0 2021-03-28 16:24:18+00:00 Le Mucem chez vous, gardons le lien #22 \n", + "1 2021-03-28 16:21:02+00:00 Le Mucem chez vous, gardons le lien #22 \n", + "2 2021-03-28 16:08:45+00:00 Le Mucem chez vous, gardons le lien #22 \n", + "3 2021-03-28 16:09:47+00:00 Le Mucem chez vous, gardons le lien #22 \n", + "4 2021-03-28 16:05:03+00:00 Le Mucem chez vous, gardons le lien #22 \n", "\n", - " campaign_name campaign_service_id \\\n", - "0 Le Mucem chez vous, gardons le lien #22 404 \n", - "1 Le Mucem chez vous, gardons le lien #22 404 \n", - "2 Le Mucem chez vous, gardons le lien #22 404 \n", - "3 Le Mucem chez vous, gardons le lien #22 404 \n", - "4 Le Mucem chez vous, gardons le lien #22 404 \n", - "... ... ... \n", - "6214803 dre_nov_2023 1318 \n", - "6214804 dre_nov_2023 1318 \n", - "6214805 dre_nov_2023 1318 \n", - "6214806 dre_nov_2023 1318 \n", - "6214807 dre_nov_2023 1318 \n", - "\n", - " campaign_sent_at \n", - "0 2021-03-27 23:00:00+00:00 \n", - "1 2021-03-27 23:00:00+00:00 \n", - "2 2021-03-27 23:00:00+00:00 \n", - "3 2021-03-27 23:00:00+00:00 \n", - "4 2021-03-27 23:00:00+00:00 \n", - "... ... \n", - "6214803 2023-10-23 09:31:17+00:00 \n", - "6214804 2023-10-23 09:31:17+00:00 \n", - "6214805 2023-10-23 09:31:17+00:00 \n", - "6214806 2023-10-23 09:31:17+00:00 \n", - "6214807 2023-10-23 09:31:17+00:00 \n", - "\n", - "[6214808 rows x 8 columns]" + " campaign_service_id campaign_sent_at \n", + "0 404 2021-03-27 23:00:00+00:00 \n", + "1 404 2021-03-27 23:00:00+00:00 \n", + "2 404 2021-03-27 23:00:00+00:00 \n", + "3 404 2021-03-27 23:00:00+00:00 \n", + "4 404 2021-03-27 23:00:00+00:00 " ] }, "execution_count": 20, @@ -1526,7 +963,7 @@ } ], "source": [ - "df1_campaigns_information" + "df1_campaigns_information.head()" ] }, { @@ -1573,7 +1010,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_2240/3700263836.py:11: SettingWithCopyWarning: \n", + "/tmp/ipykernel_3658/3700263836.py:11: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -1654,81 +1091,17 @@ " 0.0\n", " NaT\n", " \n", - " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " 130467\n", - " 1256097\n", - " 1\n", - " 1.0\n", - " 0 days 02:11:15\n", - " \n", - " \n", - " 130468\n", - " 1256098\n", - " 1\n", - " 0.0\n", - " NaT\n", - " \n", - " \n", - " 130469\n", - " 1256099\n", - " 1\n", - " 0.0\n", - " NaT\n", - " \n", - " \n", - " 130470\n", - " 1256100\n", - " 1\n", - " 0.0\n", - " NaT\n", - " \n", - " \n", - " 130471\n", - " 1256101\n", - " 1\n", - " 0.0\n", - " NaT\n", - " \n", " \n", "\n", - "

130472 rows × 4 columns

\n", "" ], "text/plain": [ - " customer_id nb_campaigns nb_campaigns_opened \\\n", - "0 2 4 0.0 \n", - "1 3 222 124.0 \n", - "2 4 7 7.0 \n", - "3 5 4 0.0 \n", - "4 6 20 0.0 \n", - "... ... ... ... \n", - "130467 1256097 1 1.0 \n", - "130468 1256098 1 0.0 \n", - "130469 1256099 1 0.0 \n", - "130470 1256100 1 0.0 \n", - "130471 1256101 1 0.0 \n", - "\n", - " time_to_open \n", - "0 NaT \n", - "1 1 days 00:28:30.169354838 \n", - "2 1 days 04:31:01.428571428 \n", - "3 NaT \n", - "4 NaT \n", - "... ... \n", - "130467 0 days 02:11:15 \n", - "130468 NaT \n", - "130469 NaT \n", - "130470 NaT \n", - "130471 NaT \n", - "\n", - "[130472 rows x 4 columns]" + " customer_id nb_campaigns nb_campaigns_opened time_to_open\n", + "0 2 4 0.0 NaT\n", + "1 3 222 124.0 1 days 00:28:30.169354838\n", + "2 4 7 7.0 1 days 04:31:01.428571428\n", + "3 5 4 0.0 NaT\n", + "4 6 20 0.0 NaT" ] }, "execution_count": 23, @@ -1737,7 +1110,7 @@ } ], "source": [ - "df1_campaigns_kpi" + "df1_campaigns_kpi.head()" ] }, { @@ -2646,19 +2019,19 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 32, "id": "46de1912-4a66-46e5-8b9e-7768b2d2723b", "metadata": {}, "outputs": [], "source": [ "# Fusion liée au product\n", - "df1_product_purchased = pd.merge(df1_tickets_kpi, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')\n", + "df1_products_purchased = pd.merge(df1_tickets_kpi, products_global, left_on = 'product_id', right_on = 'id_products', how = 'inner')\n", "\n", "# Fusion liée au customer\n", "df1_customer = pd.merge(df1_customerplus_clean, df1_campaigns_kpi, on = 'customer_id', how = 'left')\n", "\n", "# Fusion product et customer\n", - "df1_customer_product = pd.merge(df1_customer, df1_product_purchased, on = 'customer_id', how = 'left')" + "df1_customer_product = pd.merge(df1_customer, df1_products_purchased, on = 'customer_id', how = 'left')" ] }, {