diff --git a/Exploration_billet_AJ.ipynb b/Exploration_billet_AJ.ipynb index 14c1b9a..e1802cd 100644 --- a/Exploration_billet_AJ.ipynb +++ b/Exploration_billet_AJ.ipynb @@ -69,6 +69,54 @@ "liste_database = fs.ls(BUCKET)" ] }, + { + "cell_type": "code", + "execution_count": 39, + "id": "aaf64d60-bf92-470c-8210-d09abd6a653e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['bdc2324-data/1/1campaign_stats.csv',\n", + " 'bdc2324-data/1/1campaigns.csv',\n", + " 'bdc2324-data/1/1categories.csv',\n", + " 'bdc2324-data/1/1countries.csv',\n", + " 'bdc2324-data/1/1currencies.csv',\n", + " 'bdc2324-data/1/1customer_target_mappings.csv',\n", + " 'bdc2324-data/1/1customersplus.csv',\n", + " 'bdc2324-data/1/1event_types.csv',\n", + " 'bdc2324-data/1/1events.csv',\n", + " 'bdc2324-data/1/1facilities.csv',\n", + " 'bdc2324-data/1/1link_stats.csv',\n", + " 'bdc2324-data/1/1pricing_formulas.csv',\n", + " 'bdc2324-data/1/1product_packs.csv',\n", + " 'bdc2324-data/1/1products.csv',\n", + " 'bdc2324-data/1/1products_groups.csv',\n", + " 'bdc2324-data/1/1purchases.csv',\n", + " 'bdc2324-data/1/1representation_category_capacities.csv',\n", + " 'bdc2324-data/1/1representations.csv',\n", + " 'bdc2324-data/1/1seasons.csv',\n", + " 'bdc2324-data/1/1structure_tag_mappings.csv',\n", + " 'bdc2324-data/1/1suppliers.csv',\n", + " 'bdc2324-data/1/1tags.csv',\n", + " 'bdc2324-data/1/1target_types.csv',\n", + " 'bdc2324-data/1/1targets.csv',\n", + " 'bdc2324-data/1/1tickets.csv',\n", + " 'bdc2324-data/1/1type_of_categories.csv',\n", + " 'bdc2324-data/1/1type_of_pricing_formulas.csv',\n", + " 'bdc2324-data/1/1type_ofs.csv']" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "liste_database" + ] + }, { "cell_type": "code", "execution_count": 4, @@ -103,14 +151,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_864/4135596479.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_9792/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.read_csv(file_in)\n" ] } ], "source": [ - "# loop to create dataframes from file 2\n", - "files_path = liste_database_filtered\n", + "# loop to create dataframes from liste\n", + "files_path = liste_database\n", "\n", "client_number = files_path[0].split(\"/\")[1]\n", "df_prefix = \"df\" + str(client_number) + \"_\"\n", @@ -127,7 +175,9 @@ { "cell_type": "markdown", "id": "e908f516-2a74-45d6-8492-7dcdc3afbe1f", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ "## tickets.csv" ] @@ -474,7 +524,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_864/232847087.py:3: SettingWithCopyWarning: \n", + "/tmp/ipykernel_619/232847087.py:3: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -491,7 +541,9 @@ { "cell_type": "markdown", "id": "78453f3c-4f89-44ed-a6c6-2a7443b72b52", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ "## suppliers.csv" ] @@ -764,7 +816,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_864/302783287.py:3: SettingWithCopyWarning: \n", + "/tmp/ipykernel_619/302783287.py:3: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -884,7 +936,9 @@ { "cell_type": "markdown", "id": "0a6df975-c7fc-45bc-92af-a0bdab17d795", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ "## type_ofs.csv" ] @@ -1116,7 +1170,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_864/81842251.py:3: SettingWithCopyWarning: \n", + "/tmp/ipykernel_619/81842251.py:3: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -1133,7 +1187,9 @@ { "cell_type": "markdown", "id": "676a9869-9a8b-4cd2-8b1c-0644b5229c72", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ "## purchases.csv" ] @@ -1565,58 +1621,58 @@ " \n", " \n", " 1826667\n", - " 18643847\n", - " 350454\n", + " 20662815\n", + " 405689\n", " False\n", - " vad\n", - " Billet en nombre\n", + " vente en ligne\n", + " Atelier\n", " pricing_formula\n", - " 2022-08-02 08:59:17+00:00\n", - " 41\n", + " 2023-11-08 17:23:54+00:00\n", + " 1256135\n", " \n", " \n", " 1826668\n", - " 19853111\n", - " 383564\n", + " 20662816\n", + " 403658\n", " False\n", - " vad\n", - " Billet en nombre\n", + " vente en ligne\n", + " Atelier\n", " pricing_formula\n", - " 2022-11-04 14:25:42+00:00\n", - " 62763\n", + " 2023-11-08 18:32:18+00:00\n", + " 1256136\n", " \n", " \n", " 1826669\n", - " 19860514\n", - " 383751\n", + " 20662817\n", + " 403658\n", " False\n", - " vad\n", - " Billet en nombre\n", + " vente en ligne\n", + " Atelier\n", " pricing_formula\n", - " 2022-11-18 10:47:26+00:00\n", - " 1195566\n", + " 2023-11-08 18:32:18+00:00\n", + " 1256136\n", " \n", " \n", " 1826670\n", - " 19860515\n", - " 383751\n", + " 20662818\n", + " 403658\n", " False\n", - " vad\n", - " Billet en nombre\n", + " vente en ligne\n", + " Atelier\n", " pricing_formula\n", - " 2022-11-18 10:47:26+00:00\n", - " 1195566\n", + " 2023-11-08 19:30:28+00:00\n", + " 1256137\n", " \n", " \n", " 1826671\n", - " 19860516\n", - " 383751\n", + " 20662819\n", + " 403658\n", " False\n", - " vad\n", - " Billet en nombre\n", + " vente en ligne\n", + " Atelier\n", " pricing_formula\n", - " 2022-11-18 10:47:26+00:00\n", - " 1195566\n", + " 2023-11-08 19:30:28+00:00\n", + " 1256137\n", " \n", " \n", "\n", @@ -1631,11 +1687,11 @@ "3 13070862 224914 False vente en ligne \n", "4 13070863 224914 False vente en ligne \n", "... ... ... ... ... \n", - "1826667 18643847 350454 False vad \n", - "1826668 19853111 383564 False vad \n", - "1826669 19860514 383751 False vad \n", - "1826670 19860515 383751 False vad \n", - "1826671 19860516 383751 False vad \n", + "1826667 20662815 405689 False vente en ligne \n", + "1826668 20662816 403658 False vente en ligne \n", + "1826669 20662817 403658 False vente en ligne \n", + "1826670 20662818 403658 False vente en ligne \n", + "1826671 20662819 403658 False vente en ligne \n", "\n", " type_of_ticket_name children purchase_date \\\n", "0 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", @@ -1644,11 +1700,11 @@ "3 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", "4 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", "... ... ... ... \n", - "1826667 Billet en nombre pricing_formula 2022-08-02 08:59:17+00:00 \n", - "1826668 Billet en nombre pricing_formula 2022-11-04 14:25:42+00:00 \n", - "1826669 Billet en nombre pricing_formula 2022-11-18 10:47:26+00:00 \n", - "1826670 Billet en nombre pricing_formula 2022-11-18 10:47:26+00:00 \n", - "1826671 Billet en nombre pricing_formula 2022-11-18 10:47:26+00:00 \n", + "1826667 Atelier pricing_formula 2023-11-08 17:23:54+00:00 \n", + "1826668 Atelier pricing_formula 2023-11-08 18:32:18+00:00 \n", + "1826669 Atelier pricing_formula 2023-11-08 18:32:18+00:00 \n", + "1826670 Atelier pricing_formula 2023-11-08 19:30:28+00:00 \n", + "1826671 Atelier pricing_formula 2023-11-08 19:30:28+00:00 \n", "\n", " customer_id \n", "0 48187 \n", @@ -1657,11 +1713,11 @@ "3 48187 \n", "4 48187 \n", "... ... \n", - "1826667 41 \n", - "1826668 62763 \n", - "1826669 1195566 \n", - "1826670 1195566 \n", - "1826671 1195566 \n", + "1826667 1256135 \n", + "1826668 1256136 \n", + "1826669 1256136 \n", + "1826670 1256137 \n", + "1826671 1256137 \n", "\n", "[1826672 rows x 8 columns]" ] @@ -1683,6 +1739,66 @@ "# Utilisation de fonctions" ] }, + { + "cell_type": "code", + "execution_count": 50, + "id": "d237be96-8c86-4a91-b7a1-487e87a16c3d", + "metadata": {}, + "outputs": [], + "source": [ + "def cleaning_date(df, column_name):\n", + " \"\"\"\n", + " Nettoie la colonne spécifiée du DataFrame en convertissant les valeurs en datetime avec le format ISO8601.\n", + "\n", + " Parameters:\n", + " - df: DataFrame\n", + " Le DataFrame contenant la colonne à nettoyer.\n", + " - column_name: str\n", + " Le nom de la colonne à nettoyer.\n", + "\n", + " Returns:\n", + " - DataFrame\n", + " Le DataFrame modifié avec la colonne nettoyée.\n", + " \"\"\"\n", + " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "c1afe322-ff41-4760-819e-0195fed5b27d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 20 entries, 0 to 19\n", + "Data columns (total 2 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 opened_at 8 non-null object \n", + " 1 opened_at_clean 8 non-null datetime64[ns, UTC]\n", + "dtypes: datetime64[ns, UTC](1), object(1)\n", + "memory usage: 448.0+ bytes\n" + ] + } + ], + "source": [ + "# Créer un DataFrame exemple\n", + "df_not_clean = df1_campaign_stats[['opened_at']].head(20)\n", + "\n", + "# Appliquer la fonction pour nettoyer la colonne 'purchase_date' de manière vectorisée\n", + "df_clean = cleaning_date(df_not_clean, 'opened_at')\n", + "df_clean.rename(columns = {'opened_at' : 'opened_at_clean'}, inplace = True)\n", + "\n", + "test = pd.concat([df1_campaign_stats[['opened_at']].head(20), df_clean], axis=1)\n", + "\n", + "test.info()" + ] + }, { "cell_type": "markdown", "id": "27ecf058-23eb-4018-abbd-68c4ebe7c786", @@ -1693,87 +1809,9 @@ }, { "cell_type": "code", - "execution_count": 25, - "id": "b95464b1-26bc-4aac-84b4-45da83b92251", + "execution_count": 23, + "id": "d887898c-6a21-41ed-901d-4d6fdbca5372", "metadata": {}, - "outputs": [], - "source": [ - "# Fonction de nettoyage et selection\n", - "def preprocessing_tickets_area(tickets = None, purchases = None, suppliers = None, type_ofs = None):\n", - " # Base des tickets\n", - " tickets = tickets[['id', 'purchase_id', 'product_id', 'is_from_subscription', 'type_of', 'supplier_id']]\n", - " tickets.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n", - "\n", - " # Base des fournisseurs\n", - " suppliers = suppliers[['id', 'name']]\n", - " suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n", - "\n", - " # Base des types de billets\n", - " type_ofs = type_ofs[['id', 'name', 'children']]\n", - " type_ofs.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)\n", - "\n", - " # Base des achats\n", - " # Nettoyage de la date d'achat\n", - " purchases['purchase_date'] = pd.to_datetime(purchases['purchase_date'], utc = True)\n", - " purchases['purchase_date'] = pd.to_datetime(purchases['purchase_date'], format = 'ISO8601')\n", - " # Selection des variables\n", - " purchases = purchases[['id', 'purchase_date', 'customer_id']]\n", - "\n", - " # Fusions \n", - " # Fusion avec fournisseurs\n", - " ticket_information = pd.merge(tickets, suppliers, left_on = 'supplier_id', right_on = 'id', how = 'inner')\n", - " ticket_information.drop(['supplier_id', 'id'], axis = 1, inplace=True)\n", - " \n", - " # Fusion avec type de tickets\n", - " ticket_information = pd.merge(ticket_information, type_ofs, left_on = 'type_of', right_on = 'id', how = 'inner')\n", - " ticket_information.drop(['type_of', 'id'], axis = 1, inplace=True)\n", - " \n", - " # Fusion avec achats\n", - " ticket_information = pd.merge(ticket_information, purchases, left_on = 'purchase_id', right_on = 'id', how = 'inner')\n", - " ticket_information.drop(['purchase_id', 'id'], axis = 1, inplace=True)\n", - "\n", - " return ticket_information" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "3e1d2ba7-ff4f-48eb-93a8-2bb648c70396", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_864/2452826288.py:5: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " tickets_clean.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n", - "/tmp/ipykernel_864/2452826288.py:9: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " suppliers_clean.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n", - "/tmp/ipykernel_864/2452826288.py:13: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " type_ofs_clean.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)\n" - ] - } - ], - "source": [ - "df1_ticket_information_test = preprocessing_tickets_area(tickets = df1_tickets, purchases = df1_purchases, suppliers = df1_suppliers, type_ofs = df1_type_ofs)" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "2877f3de-55d6-42d6-ad94-352d3e107862", - "metadata": { - "scrolled": true - }, "outputs": [ { "data": { @@ -1799,9 +1837,8 @@ " ticket_id\n", " product_id\n", " is_from_subscription\n", + " type_of\n", " supplier_name\n", - " type_of_ticket_name\n", - " children\n", " purchase_date\n", " customer_id\n", " \n", @@ -1812,9 +1849,8 @@ " 13070859\n", " 225251\n", " False\n", + " 1\n", " vente en ligne\n", - " Atelier\n", - " pricing_formula\n", " 2018-12-28 14:47:50+00:00\n", " 48187\n", " \n", @@ -1823,9 +1859,8 @@ " 13070860\n", " 224914\n", " False\n", + " 1\n", " vente en ligne\n", - " Atelier\n", - " pricing_formula\n", " 2018-12-28 14:47:50+00:00\n", " 48187\n", " \n", @@ -1834,9 +1869,8 @@ " 13070861\n", " 224914\n", " False\n", + " 1\n", " vente en ligne\n", - " Atelier\n", - " pricing_formula\n", " 2018-12-28 14:47:50+00:00\n", " 48187\n", " \n", @@ -1845,9 +1879,8 @@ " 13070862\n", " 224914\n", " False\n", + " 1\n", " vente en ligne\n", - " Atelier\n", - " pricing_formula\n", " 2018-12-28 14:47:50+00:00\n", " 48187\n", " \n", @@ -1856,9 +1889,8 @@ " 13070863\n", " 224914\n", " False\n", + " 1\n", " vente en ligne\n", - " Atelier\n", - " pricing_formula\n", " 2018-12-28 14:47:50+00:00\n", " 48187\n", " \n", @@ -1871,131 +1903,1331 @@ " ...\n", " ...\n", " ...\n", + " \n", + " \n", + " 1826667\n", + " 20662815\n", + " 405689\n", + " False\n", + " 1\n", + " vente en ligne\n", + " 2023-11-08 17:23:54+00:00\n", + " 1256135\n", + " \n", + " \n", + " 1826668\n", + " 20662816\n", + " 403658\n", + " False\n", + " 1\n", + " vente en ligne\n", + " 2023-11-08 18:32:18+00:00\n", + " 1256136\n", + " \n", + " \n", + " 1826669\n", + " 20662817\n", + " 403658\n", + " False\n", + " 1\n", + " vente en ligne\n", + " 2023-11-08 18:32:18+00:00\n", + " 1256136\n", + " \n", + " \n", + " 1826670\n", + " 20662818\n", + " 403658\n", + " False\n", + " 1\n", + " vente en ligne\n", + " 2023-11-08 19:30:28+00:00\n", + " 1256137\n", + " \n", + " \n", + " 1826671\n", + " 20662819\n", + " 403658\n", + " False\n", + " 1\n", + " vente en ligne\n", + " 2023-11-08 19:30:28+00:00\n", + " 1256137\n", + " \n", + " \n", + "\n", + "

1826672 rows × 7 columns

\n", + "" + ], + "text/plain": [ + " ticket_id product_id is_from_subscription type_of supplier_name \\\n", + "0 13070859 225251 False 1 vente en ligne \n", + "1 13070860 224914 False 1 vente en ligne \n", + "2 13070861 224914 False 1 vente en ligne \n", + "3 13070862 224914 False 1 vente en ligne \n", + "4 13070863 224914 False 1 vente en ligne \n", + "... ... ... ... ... ... \n", + "1826667 20662815 405689 False 1 vente en ligne \n", + "1826668 20662816 403658 False 1 vente en ligne \n", + "1826669 20662817 403658 False 1 vente en ligne \n", + "1826670 20662818 403658 False 1 vente en ligne \n", + "1826671 20662819 403658 False 1 vente en ligne \n", + "\n", + " purchase_date customer_id \n", + "0 2018-12-28 14:47:50+00:00 48187 \n", + "1 2018-12-28 14:47:50+00:00 48187 \n", + "2 2018-12-28 14:47:50+00:00 48187 \n", + "3 2018-12-28 14:47:50+00:00 48187 \n", + "4 2018-12-28 14:47:50+00:00 48187 \n", + "... ... ... \n", + "1826667 2023-11-08 17:23:54+00:00 1256135 \n", + "1826668 2023-11-08 18:32:18+00:00 1256136 \n", + "1826669 2023-11-08 18:32:18+00:00 1256136 \n", + "1826670 2023-11-08 19:30:28+00:00 1256137 \n", + "1826671 2023-11-08 19:30:28+00:00 1256137 \n", + "\n", + "[1826672 rows x 7 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1_ticket_information" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "ac9a6373-c1c6-46b5-873b-dc22f17bcbdb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1826672 entries, 0 to 1826671\n", + "Data columns (total 7 columns):\n", + " # Column Dtype \n", + "--- ------ ----- \n", + " 0 ticket_id int64 \n", + " 1 product_id int64 \n", + " 2 is_from_subscription bool \n", + " 3 type_of int64 \n", + " 4 supplier_name object \n", + " 5 purchase_date datetime64[ns, UTC]\n", + " 6 customer_id int64 \n", + "dtypes: bool(1), datetime64[ns, UTC](1), int64(4), object(1)\n", + "memory usage: 85.4+ MB\n" + ] + } + ], + "source": [ + "df1_ticket_information.info()" + ] + }, + { + "cell_type": "markdown", + "id": "b1719943-89eb-4ba0-a107-2f96d5d01ec9", + "metadata": {}, + "source": [ + "# Customer information" + ] + }, + { + "cell_type": "markdown", + "id": "a2132ee2-3f22-45fd-b65b-72689c8b672c", + "metadata": {}, + "source": [ + "## Target area" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "da5d4708-7147-4cc8-8686-52d4bcba5a7a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_619/2625134041.py:3: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df1_targets_clean.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n" + ] + } + ], + "source": [ + "# Target.csv cleaning\n", + "df1_targets_clean = df1_targets[[\"id\", \"target_type_id\", \"name\"]]\n", + "df1_targets_clean.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n", + "\n", + "# target_type cleaning\n", + "df1_target_types_clean = df1_target_types[[\"id\",\"is_import\",\"name\"]].add_prefix(\"target_type_\")\n", + "\n", + "#customer_target_mappings cleaning\n", + "df1_customer_target_mappings_clean = df1_customer_target_mappings[[\"id\", \"customer_id\", \"target_id\"]]\n", + "\n", + "# Merge target et target_type\n", + "df1_targets_full = pd.merge(df1_targets_clean, df1_target_types_clean, left_on='target_type_id', right_on='target_type_id', how='inner')\n", + "df1_targets_full.drop(['target_type_id'], axis = 1, inplace=True)\n", + "\n", + "# Merge\n", + "df1_targets_full = pd.merge(df1_customer_target_mappings_clean, df1_targets_full, left_on='target_id', right_on='target_id', how='inner')\n", + "df1_targets_full.drop(['target_id'], axis = 1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "b4fa5fe3-ce8e-4b0a-af94-fb468d241bad", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id 5.080902\n", + "dtype: float64" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1_targets_test = df1_targets_full[['id', 'customer_id']].groupby(['customer_id']).count()\n", + "len(df1_targets_test[df1_targets_test['id'] > 1]) / len(df1_targets_test)\n", + "\n", + "# 99,6% des 151 000 client visés sont catégorisés plusieurs fois et en moyenne 5 fois... \n", + "df1_targets_test.mean()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "8072bbb7-1360-4882-bb2b-2f43b6beea0d", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idcustomer_idtarget_nametarget_type_is_importtarget_type_name
879345845991consentement optin jeune publicFalsemanual_static_filter
1324945674651DDCP rentrée culturelle 2023Falsemanual_static_filter
2142445448051spectateurs cine dimanche_cine concert_2122Falsemanual_static_filter
2166545449111DDCP Cine 2023Falsemanual_static_filter
2281145457661DDCP OLBJ! 2023Falsemanual_static_filter
5730544579091ddcp_promo_visiteurs occasionnels_musee_8moisFalsemanual_dynamic_filter
5884336888721DDCP promo livemagFalsemanual_static_filter
6681343136461DDCP spectateurs Classique mais pas que 2022Falsemanual_static_filter
6836745476621ddcp_promo_musee_au moins 3 achats_dps8moisFalsemanual_dynamic_filter
7732042855201DDCP spectateurs IminenteFalsemanual_static_filter
8435040378051DDCP spectateurs Marseille Jazz 18-19-21Falsemanual_static_filter
8538345695041DDCP rendez-vous de septembre offre spécialeFalsemanual_static_filter
9286844330641ddcp_promo_plein air_ateliers_jardinsFalsemanual_static_filter
9967038586841Acid ArabFalsemanual_static_filter
10547743218101Arenametrix_bascule tel vers sibFalsemanual_static_filter
16951336979921ddcp_achats billets nb dps 19052021Falsemanual_static_filter
21442129253241consentement optout scolairesFalsemanual_static_filter
23454645759571Portrait de Leila shahidFalsemanual_static_filter
25980837222591consentement optin b2bFalsemanual_static_filter
27438045104231DDCP_marseille_jazz_2023Falsemanual_static_filter
30751151744661ddcp actoral 21-22Falsemanual_static_filter
35750944425261ddcp musique barvaloFalsemanual_static_filter
39292043906421ddcp_md_promo_spectateurs theatre contempoFalsemanual_static_filter
44962044118971FORMATION _ acheteurs optin last yearFalsemanual_dynamic_filter
50380947345911consentement optin mediation specialiseeFalsemanual_static_filter
65122235544261consentement optin b2cFalsemanual_static_filter
65424651822121DDCP spectateurs Festival de Marseille 2023Falsemanual_static_filter
65439551824561rencontres_echelle_spectateurs_2021_2023Falsemanual_static_filter
\n", + "
" + ], + "text/plain": [ + " id customer_id target_name \\\n", + "8793 4584599 1 consentement optin jeune public \n", + "13249 4567465 1 DDCP rentrée culturelle 2023 \n", + "21424 4544805 1 spectateurs cine dimanche_cine concert_2122 \n", + "21665 4544911 1 DDCP Cine 2023 \n", + "22811 4545766 1 DDCP OLBJ! 2023 \n", + "57305 4457909 1 ddcp_promo_visiteurs occasionnels_musee_8mois \n", + "58843 3688872 1 DDCP promo livemag \n", + "66813 4313646 1 DDCP spectateurs Classique mais pas que 2022 \n", + "68367 4547662 1 ddcp_promo_musee_au moins 3 achats_dps8mois \n", + "77320 4285520 1 DDCP spectateurs Iminente \n", + "84350 4037805 1 DDCP spectateurs Marseille Jazz 18-19-21 \n", + "85383 4569504 1 DDCP rendez-vous de septembre offre spéciale \n", + "92868 4433064 1 ddcp_promo_plein air_ateliers_jardins \n", + "99670 3858684 1 Acid Arab \n", + "105477 4321810 1 Arenametrix_bascule tel vers sib \n", + "169513 3697992 1 ddcp_achats billets nb dps 19052021 \n", + "214421 2925324 1 consentement optout scolaires \n", + "234546 4575957 1 Portrait de Leila shahid \n", + "259808 3722259 1 consentement optin b2b \n", + "274380 4510423 1 DDCP_marseille_jazz_2023 \n", + "307511 5174466 1 ddcp actoral 21-22 \n", + "357509 4442526 1 ddcp musique barvalo \n", + "392920 4390642 1 ddcp_md_promo_spectateurs theatre contempo \n", + "449620 4411897 1 FORMATION _ acheteurs optin last year \n", + "503809 4734591 1 consentement optin mediation specialisee \n", + "651222 3554426 1 consentement optin b2c \n", + "654246 5182212 1 DDCP spectateurs Festival de Marseille 2023 \n", + "654395 5182456 1 rencontres_echelle_spectateurs_2021_2023 \n", + "\n", + " target_type_is_import target_type_name \n", + "8793 False manual_static_filter \n", + "13249 False manual_static_filter \n", + "21424 False manual_static_filter \n", + "21665 False manual_static_filter \n", + "22811 False manual_static_filter \n", + "57305 False manual_dynamic_filter \n", + "58843 False manual_static_filter \n", + "66813 False manual_static_filter \n", + "68367 False manual_dynamic_filter \n", + "77320 False manual_static_filter \n", + "84350 False manual_static_filter \n", + "85383 False manual_static_filter \n", + "92868 False manual_static_filter \n", + "99670 False manual_static_filter \n", + "105477 False manual_static_filter \n", + "169513 False manual_static_filter \n", + "214421 False manual_static_filter \n", + "234546 False manual_static_filter \n", + "259808 False manual_static_filter \n", + "274380 False manual_static_filter \n", + "307511 False manual_static_filter \n", + "357509 False manual_static_filter \n", + "392920 False manual_static_filter \n", + "449620 False manual_dynamic_filter \n", + "503809 False manual_static_filter \n", + "651222 False manual_static_filter \n", + "654246 False manual_static_filter \n", + "654395 False manual_static_filter " + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1_targets_full[df1_targets_full['customer_id'] == 1]" + ] + }, + { + "cell_type": "markdown", + "id": "2f665824-a026-4acd-8358-b408a61854b4", + "metadata": {}, + "source": [ + "## Campaign area" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "5d05203c-ea30-4208-a29f-fef7737c672e", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_9792/1967867975.py:15: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n", + "/tmp/ipykernel_9792/1967867975.py:15: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n", + "/tmp/ipykernel_9792/1967867975.py:15: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n" + ] + } + ], + "source": [ + "# campaign_stats cleaning \n", + "df1_campaign_stats_clean = df1_campaign_stats[[\"id\", \"campaign_id\", \"customer_id\", \"opened_at\", \"sent_at\", \"delivered_at\"]]\n", + "cleaning_date(df1_campaign_stats_clean, 'opened_at')\n", + "cleaning_date(df1_campaign_stats_clean, 'sent_at')\n", + "cleaning_date(df1_campaign_stats_clean, 'delivered_at')\n", + "\n", + "# campaigns cleaning\n", + "df1_campaigns_clean = df1_campaigns[[\"id\", \"name\", \"service_id\", \"sent_at\"]].add_prefix(\"campaign_\")\n", + "cleaning_date(df1_campaigns_clean, 'campaign_sent_at')\n", + "\n", + "# Merge \n", + "df1_campaigns_full = pd.merge(df1_campaign_stats_clean, df1_campaigns_clean, on = \"campaign_id\", how = \"left\")\n", + "df1_campaigns_full.drop(['campaign_id'], axis = 1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "8ac634cf-2a30-4ccc-a34d-0fd401a49aaa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 6214808 entries, 0 to 6214807\n", + "Data columns (total 8 columns):\n", + " # Column Dtype \n", + "--- ------ ----- \n", + " 0 id int64 \n", + " 1 customer_id int64 \n", + " 2 opened_at datetime64[ns, UTC]\n", + " 3 sent_at datetime64[ns, UTC]\n", + " 4 delivered_at datetime64[ns, UTC]\n", + " 5 campaign_name object \n", + " 6 campaign_service_id int64 \n", + " 7 campaign_sent_at datetime64[ns, UTC]\n", + "dtypes: datetime64[ns, UTC](4), int64(3), object(1)\n", + "memory usage: 379.3+ MB\n" + ] + } + ], + "source": [ + "df1_campaigns_full.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "7d22cdd5-2060-4922-8e04-27b613d4ee27", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
idcustomer_idopened_atsent_atdelivered_atcampaign_namecampaign_service_idcampaign_sent_at
019793112597NaT2021-03-28 16:01:09+00:002021-03-28 16:24:18+00:00Le Mucem chez vous, gardons le lien #224042021-03-27 23:00:00+00:00
114211113666NaT2021-03-28 16:01:09+00:002021-03-28 16:21:02+00:00Le Mucem chez vous, gardons le lien #224042021-03-27 23:00:00+00:00
213150280561NaT2021-03-28 16:00:59+00:002021-03-28 16:08:45+00:00Le Mucem chez vous, gardons le lien #224042021-03-27 23:00:00+00:00
370731010072021-03-28 18:11:06+00:002021-03-28 16:00:59+00:002021-03-28 16:09:47+00:00Le Mucem chez vous, gardons le lien #224042021-03-27 23:00:00+00:00
45175103972NaT2021-03-28 16:01:06+00:002021-03-28 16:05:03+00:00Le Mucem chez vous, gardons le lien #224042021-03-27 23:00:00+00:00
...........................
182666718643847350454FalsevadBillet en nombrepricing_formula2022-08-02 08:59:17+00:0041621480383029942661552023-10-23 09:43:25+00:002023-10-23 09:32:33+00:002023-10-23 09:32:34+00:00dre_nov_202313182023-10-23 09:31:17+00:00
182666819853111383564FalsevadBillet en nombrepricing_formula2022-11-04 14:25:42+00:006276362148048303307213552023-10-23 09:44:02+00:002023-10-23 09:32:49+00:002023-10-23 09:32:49+00:00dre_nov_202313182023-10-23 09:31:17+00:00
182666919860514383751FalsevadBillet en nombrepricing_formula2022-11-18 10:47:26+00:00119556662148058304346218492023-10-23 09:45:52+00:002023-10-23 09:33:28+00:002023-10-23 09:33:29+00:00dre_nov_202313182023-10-23 09:31:17+00:00
182667019860515383751FalsevadBillet en nombrepricing_formula2022-11-18 10:47:26+00:001195566621480683020376677892023-10-23 09:47:32+00:002023-10-23 09:31:53+00:002023-10-23 09:31:54+00:00dre_nov_202313182023-10-23 09:31:17+00:00
182667119860516383751FalsevadBillet en nombrepricing_formula2022-11-18 10:47:26+00:00119556662148078304939294154NaT2023-10-23 09:33:54+00:002023-10-23 09:33:55+00:00dre_nov_202313182023-10-23 09:31:17+00:00
\n", - "

1826672 rows × 8 columns

\n", + "

6214808 rows × 8 columns

\n", "
" ], "text/plain": [ - " ticket_id product_id is_from_subscription supplier_name \\\n", - "0 13070859 225251 False vente en ligne \n", - "1 13070860 224914 False vente en ligne \n", - "2 13070861 224914 False vente en ligne \n", - "3 13070862 224914 False vente en ligne \n", - "4 13070863 224914 False vente en ligne \n", - "... ... ... ... ... \n", - "1826667 18643847 350454 False vad \n", - "1826668 19853111 383564 False vad \n", - "1826669 19860514 383751 False vad \n", - "1826670 19860515 383751 False vad \n", - "1826671 19860516 383751 False vad \n", + " id customer_id opened_at \\\n", + "0 19793 112597 NaT \n", + "1 14211 113666 NaT \n", + "2 13150 280561 NaT \n", + "3 7073 101007 2021-03-28 18:11:06+00:00 \n", + "4 5175 103972 NaT \n", + "... ... ... ... \n", + "6214803 8302994 266155 2023-10-23 09:43:25+00:00 \n", + "6214804 8303307 21355 2023-10-23 09:44:02+00:00 \n", + "6214805 8304346 21849 2023-10-23 09:45:52+00:00 \n", + "6214806 8302037 667789 2023-10-23 09:47:32+00:00 \n", + "6214807 8304939 294154 NaT \n", "\n", - " type_of_ticket_name children purchase_date \\\n", - "0 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", - "1 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", - "2 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", - "3 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", - "4 Atelier pricing_formula 2018-12-28 14:47:50+00:00 \n", - "... ... ... ... \n", - "1826667 Billet en nombre pricing_formula 2022-08-02 08:59:17+00:00 \n", - "1826668 Billet en nombre pricing_formula 2022-11-04 14:25:42+00:00 \n", - "1826669 Billet en nombre pricing_formula 2022-11-18 10:47:26+00:00 \n", - "1826670 Billet en nombre pricing_formula 2022-11-18 10:47:26+00:00 \n", - "1826671 Billet en nombre pricing_formula 2022-11-18 10:47:26+00:00 \n", + " sent_at delivered_at \\\n", + "0 2021-03-28 16:01:09+00:00 2021-03-28 16:24:18+00:00 \n", + "1 2021-03-28 16:01:09+00:00 2021-03-28 16:21:02+00:00 \n", + "2 2021-03-28 16:00:59+00:00 2021-03-28 16:08:45+00:00 \n", + "3 2021-03-28 16:00:59+00:00 2021-03-28 16:09:47+00:00 \n", + "4 2021-03-28 16:01:06+00:00 2021-03-28 16:05:03+00:00 \n", + "... ... ... \n", + "6214803 2023-10-23 09:32:33+00:00 2023-10-23 09:32:34+00:00 \n", + "6214804 2023-10-23 09:32:49+00:00 2023-10-23 09:32:49+00:00 \n", + "6214805 2023-10-23 09:33:28+00:00 2023-10-23 09:33:29+00:00 \n", + "6214806 2023-10-23 09:31:53+00:00 2023-10-23 09:31:54+00:00 \n", + "6214807 2023-10-23 09:33:54+00:00 2023-10-23 09:33:55+00:00 \n", "\n", - " customer_id \n", - "0 48187 \n", - "1 48187 \n", - "2 48187 \n", - "3 48187 \n", - "4 48187 \n", - "... ... \n", - "1826667 41 \n", - "1826668 62763 \n", - "1826669 1195566 \n", - "1826670 1195566 \n", - "1826671 1195566 \n", + " campaign_name campaign_service_id \\\n", + "0 Le Mucem chez vous, gardons le lien #22 404 \n", + "1 Le Mucem chez vous, gardons le lien #22 404 \n", + "2 Le Mucem chez vous, gardons le lien #22 404 \n", + "3 Le Mucem chez vous, gardons le lien #22 404 \n", + "4 Le Mucem chez vous, gardons le lien #22 404 \n", + "... ... ... \n", + "6214803 dre_nov_2023 1318 \n", + "6214804 dre_nov_2023 1318 \n", + "6214805 dre_nov_2023 1318 \n", + "6214806 dre_nov_2023 1318 \n", + "6214807 dre_nov_2023 1318 \n", "\n", - "[1826672 rows x 8 columns]" + " campaign_sent_at \n", + "0 2021-03-27 23:00:00+00:00 \n", + "1 2021-03-27 23:00:00+00:00 \n", + "2 2021-03-27 23:00:00+00:00 \n", + "3 2021-03-27 23:00:00+00:00 \n", + "4 2021-03-27 23:00:00+00:00 \n", + "... ... \n", + "6214803 2023-10-23 09:31:17+00:00 \n", + "6214804 2023-10-23 09:31:17+00:00 \n", + "6214805 2023-10-23 09:31:17+00:00 \n", + "6214806 2023-10-23 09:31:17+00:00 \n", + "6214807 2023-10-23 09:31:17+00:00 \n", + "\n", + "[6214808 rows x 8 columns]" ] }, - "execution_count": 29, + "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df1_ticket_information" + "df1_campaigns_information" + ] + }, + { + "cell_type": "markdown", + "id": "0a5b24f0-4bca-4cde-a6ba-eb130b38cac4", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "## Link area" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "bc63bc4e-6cc1-4d35-9635-faf55339e186", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnameservice_idcreated_atupdated_atprocess_idreport_urlcategoryto_be_syncedidentifiersent_at
01319613newsletter enseignants janvier 20227212022-01-14 16:06:42.586321+01:002022-02-03 14:17:27.112963+01:00NaNNaN0.0Falseaba3b6fd5d186d28e06ff97135cade7f2022-01-14 00:00:00+01:00
11319586lsf_janvier_20227172022-01-07 11:30:35.315895+01:002022-02-03 14:17:27.116171+01:00NaNNaN0.0False788d986905533aba051261497ecffcbb2022-01-07 00:00:00+01:00
21319282Invitation à déjeuner au Mucem | Vernissage « ...5912021-09-28 12:50:24.448752+02:002022-02-03 14:17:27.119582+01:00NaNNaN0.0False3493894fa4ea036cfc6433c3e2ee63b02021-09-28 00:00:00+02:00
31319283Vacances de la Toussaint - centres des loisirs5902021-09-28 18:01:04.692073+02:002022-02-03 14:17:27.124408+01:00NaNNaN0.0False08b255a5d42b89b0585260b6f2360bdd2021-09-28 00:00:00+02:00
41319636ddcp_promo_md_livemag7302022-01-27 18:00:41.053069+01:002022-02-03 14:17:27.127607+01:00NaNNaN0.0Falsed5cfead94f5350c12c322b5b664544c12022-01-27 00:00:00+01:00
....................................
9521320072dre_gaza01068812022-05-26 09:01:35.523639+02:002022-12-02 17:51:22.614046+01:00NaNNaN0.0False7504adad8bb96320eb3afdd4df6e1f602022-05-26 00:00:00+02:00
953661398DDCP Plan Bis 4 - Marketing direct - MJ5C1832021-06-18 10:30:01.259578+02:002021-09-24 11:56:09.082785+02:00NaNNaN0.0Falsecedebb6e872f539bef8c3f919874e9d72020-07-27 00:00:00+02:00
9541320487Invitation portes ouvertes amitiés9882022-09-29 18:01:33.834090+02:002022-12-02 17:51:23.258324+01:00NaNNaN0.0False9908279ebbf1f9b250ba689db6a0222b2022-09-29 00:00:00+02:00
955906903DDCP PROMO La méditerranée des philosophes #3 ...3102021-07-19 14:07:16.177390+02:002021-09-24 11:56:09.086101+02:00NaNNaN0.0False06eb61b839a0cefee4967c67ccb099dc2020-12-23 00:00:00+01:00
956579313ddcp_promo_automation_manuel_pre_visit4812021-06-08 17:38:54.041310+02:002021-09-24 11:56:09.089394+02:00NaNNaN0.0False9461cce28ebe3e76fb4b931c35a169b02021-06-08 00:00:00+02:00
\n", + "

957 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " id name service_id \\\n", + "0 1319613 newsletter enseignants janvier 2022 721 \n", + "1 1319586 lsf_janvier_2022 717 \n", + "2 1319282 Invitation à déjeuner au Mucem | Vernissage « ... 591 \n", + "3 1319283 Vacances de la Toussaint - centres des loisirs 590 \n", + "4 1319636 ddcp_promo_md_livemag 730 \n", + ".. ... ... ... \n", + "952 1320072 dre_gaza0106 881 \n", + "953 661398 DDCP Plan Bis 4 - Marketing direct - MJ5C 183 \n", + "954 1320487 Invitation portes ouvertes amitiés 988 \n", + "955 906903 DDCP PROMO La méditerranée des philosophes #3 ... 310 \n", + "956 579313 ddcp_promo_automation_manuel_pre_visit 481 \n", + "\n", + " created_at updated_at \\\n", + "0 2022-01-14 16:06:42.586321+01:00 2022-02-03 14:17:27.112963+01:00 \n", + "1 2022-01-07 11:30:35.315895+01:00 2022-02-03 14:17:27.116171+01:00 \n", + "2 2021-09-28 12:50:24.448752+02:00 2022-02-03 14:17:27.119582+01:00 \n", + "3 2021-09-28 18:01:04.692073+02:00 2022-02-03 14:17:27.124408+01:00 \n", + "4 2022-01-27 18:00:41.053069+01:00 2022-02-03 14:17:27.127607+01:00 \n", + ".. ... ... \n", + "952 2022-05-26 09:01:35.523639+02:00 2022-12-02 17:51:22.614046+01:00 \n", + "953 2021-06-18 10:30:01.259578+02:00 2021-09-24 11:56:09.082785+02:00 \n", + "954 2022-09-29 18:01:33.834090+02:00 2022-12-02 17:51:23.258324+01:00 \n", + "955 2021-07-19 14:07:16.177390+02:00 2021-09-24 11:56:09.086101+02:00 \n", + "956 2021-06-08 17:38:54.041310+02:00 2021-09-24 11:56:09.089394+02:00 \n", + "\n", + " process_id report_url category to_be_synced \\\n", + "0 NaN NaN 0.0 False \n", + "1 NaN NaN 0.0 False \n", + "2 NaN NaN 0.0 False \n", + "3 NaN NaN 0.0 False \n", + "4 NaN NaN 0.0 False \n", + ".. ... ... ... ... \n", + "952 NaN NaN 0.0 False \n", + "953 NaN NaN 0.0 False \n", + "954 NaN NaN 0.0 False \n", + "955 NaN NaN 0.0 False \n", + "956 NaN NaN 0.0 False \n", + "\n", + " identifier sent_at \n", + "0 aba3b6fd5d186d28e06ff97135cade7f 2022-01-14 00:00:00+01:00 \n", + "1 788d986905533aba051261497ecffcbb 2022-01-07 00:00:00+01:00 \n", + "2 3493894fa4ea036cfc6433c3e2ee63b0 2021-09-28 00:00:00+02:00 \n", + "3 08b255a5d42b89b0585260b6f2360bdd 2021-09-28 00:00:00+02:00 \n", + "4 d5cfead94f5350c12c322b5b664544c1 2022-01-27 00:00:00+01:00 \n", + ".. ... ... \n", + "952 7504adad8bb96320eb3afdd4df6e1f60 2022-05-26 00:00:00+02:00 \n", + "953 cedebb6e872f539bef8c3f919874e9d7 2020-07-27 00:00:00+02:00 \n", + "954 9908279ebbf1f9b250ba689db6a0222b 2022-09-29 00:00:00+02:00 \n", + "955 06eb61b839a0cefee4967c67ccb099dc 2020-12-23 00:00:00+01:00 \n", + "956 9461cce28ebe3e76fb4b931c35a169b0 2021-06-08 00:00:00+02:00 \n", + "\n", + "[957 rows x 11 columns]" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1_campaigns" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "c19b321f-65f9-4d6c-8c1f-edb2eb9d70e7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idclicked_atlink_idcustomer_idcreated_atupdated_at
012021-03-26 16:30:36+01:0012840332021-03-26 15:30:37.050161+01:002021-03-26 15:30:37.050161+01:00
122021-03-26 17:16:34+01:0021197682021-03-26 16:16:34.950871+01:002021-03-26 16:16:34.950871+01:00
22722021-03-28 20:03:32+02:00421131052021-03-28 18:03:32.736394+02:002021-03-28 18:03:32.736394+02:00
342021-03-26 17:43:19+01:0032722802021-03-26 16:43:19.338321+01:002021-03-26 16:43:19.338321+01:00
452021-03-26 17:46:00+01:0031050952021-03-26 16:46:00.502945+01:002021-03-26 16:46:00.502945+01:00
.....................
1510462435532023-11-09 16:34:27+01:00146669982023-11-09 15:34:29.425425+01:002023-11-09 15:34:29.425425+01:00
1510472435542023-11-09 16:34:35+01:00146709982023-11-09 15:34:37.505505+01:002023-11-09 15:34:37.505505+01:00
1510482435592023-11-09 16:51:15+01:0014686829232023-11-09 15:51:17.439518+01:002023-11-09 15:51:17.439518+01:00
1510492435612023-11-09 16:59:42+01:0014677829232023-11-09 15:59:44.030922+01:002023-11-09 15:59:44.030922+01:00
1510502435642023-11-09 17:16:41+01:001469112543552023-11-09 16:16:43.012932+01:002023-11-09 16:16:43.012932+01:00
\n", + "

151051 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " id clicked_at link_id customer_id \\\n", + "0 1 2021-03-26 16:30:36+01:00 1 284033 \n", + "1 2 2021-03-26 17:16:34+01:00 2 119768 \n", + "2 272 2021-03-28 20:03:32+02:00 42 113105 \n", + "3 4 2021-03-26 17:43:19+01:00 3 272280 \n", + "4 5 2021-03-26 17:46:00+01:00 3 105095 \n", + "... ... ... ... ... \n", + "151046 243553 2023-11-09 16:34:27+01:00 14666 998 \n", + "151047 243554 2023-11-09 16:34:35+01:00 14670 998 \n", + "151048 243559 2023-11-09 16:51:15+01:00 14686 82923 \n", + "151049 243561 2023-11-09 16:59:42+01:00 14677 82923 \n", + "151050 243564 2023-11-09 17:16:41+01:00 14691 1254355 \n", + "\n", + " created_at updated_at \n", + "0 2021-03-26 15:30:37.050161+01:00 2021-03-26 15:30:37.050161+01:00 \n", + "1 2021-03-26 16:16:34.950871+01:00 2021-03-26 16:16:34.950871+01:00 \n", + "2 2021-03-28 18:03:32.736394+02:00 2021-03-28 18:03:32.736394+02:00 \n", + "3 2021-03-26 16:43:19.338321+01:00 2021-03-26 16:43:19.338321+01:00 \n", + "4 2021-03-26 16:46:00.502945+01:00 2021-03-26 16:46:00.502945+01:00 \n", + "... ... ... \n", + "151046 2023-11-09 15:34:29.425425+01:00 2023-11-09 15:34:29.425425+01:00 \n", + "151047 2023-11-09 15:34:37.505505+01:00 2023-11-09 15:34:37.505505+01:00 \n", + "151048 2023-11-09 15:51:17.439518+01:00 2023-11-09 15:51:17.439518+01:00 \n", + "151049 2023-11-09 15:59:44.030922+01:00 2023-11-09 15:59:44.030922+01:00 \n", + "151050 2023-11-09 16:16:43.012932+01:00 2023-11-09 16:16:43.012932+01:00 \n", + "\n", + "[151051 rows x 6 columns]" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1_link_stats" ] }, { "cell_type": "markdown", "id": "96ea2523-38dc-47ef-a49e-2c2d9ad0b1c6", - "metadata": {}, + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ "## Exploration variables" ] }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 28, "id": "aaa41688-ea7e-4dba-851c-1f0b0ec43c71", "metadata": {}, "outputs": [], @@ -2018,7 +3250,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 29, "id": "2fecc2e1-113f-46ed-9065-0b9ee416166e", "metadata": {}, "outputs": [], @@ -2028,7 +3260,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 30, "id": "55f6170a-36fb-4efb-9810-f982883660cf", "metadata": {}, "outputs": [ @@ -2076,7 +3308,7 @@ "0 9 100.0 100.0 100.0" ] }, - "execution_count": 43, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -2087,7 +3319,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 31, "id": "0030fd02-09e3-42f5-9c83-290458a38c29", "metadata": {}, "outputs": [], @@ -2102,7 +3334,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 32, "id": "6b1736d1-8fd7-4fcc-9431-b8bf0c7b4f2b", "metadata": {}, "outputs": [ @@ -2126,7 +3358,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "id": "226b694b-0b00-4167-b69f-3178902254eb", "metadata": {}, "outputs": [],