diff --git a/0_Cleaning_and_merge.ipynb b/0_Cleaning_and_merge.ipynb
new file mode 100644
index 0000000..3f3b639
--- /dev/null
+++ b/0_Cleaning_and_merge.ipynb
@@ -0,0 +1,1465 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "ad414c84-be46-4d2c-be8b-9fc4d24cc672",
+ "metadata": {},
+ "source": [
+ "# Business Data Challenge - Team 1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "15103481-8d74-404c-aa09-7601fe7730da",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import os\n",
+ "import s3fs\n",
+ "import re"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ee97665c-39af-4c1c-a62b-c9c79feae18f",
+ "metadata": {},
+ "source": [
+ "Configuration de l'accès aux données"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "5d83bb1a-d341-446e-91f6-1c428607f6d4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create filesystem object\n",
+ "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
+ "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9cbd72c5-6f8e-4366-ab66-96c32c6e963a",
+ "metadata": {},
+ "source": [
+ "# Exemple sur Company 1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "db26e59a-927c-407e-b54b-1815473b0b34",
+ "metadata": {},
+ "source": [
+ "## Chargement données"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "699664b9-eee4-4f8d-a207-e524526560c5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "BUCKET = \"bdc2324-data/1\"\n",
+ "liste_database = fs.ls(BUCKET)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "aaf64d60-bf92-470c-8210-d09abd6a653e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['bdc2324-data/1/1campaign_stats.csv',\n",
+ " 'bdc2324-data/1/1campaigns.csv',\n",
+ " 'bdc2324-data/1/1categories.csv',\n",
+ " 'bdc2324-data/1/1countries.csv',\n",
+ " 'bdc2324-data/1/1currencies.csv',\n",
+ " 'bdc2324-data/1/1customer_target_mappings.csv',\n",
+ " 'bdc2324-data/1/1customersplus.csv',\n",
+ " 'bdc2324-data/1/1event_types.csv',\n",
+ " 'bdc2324-data/1/1events.csv',\n",
+ " 'bdc2324-data/1/1facilities.csv',\n",
+ " 'bdc2324-data/1/1link_stats.csv',\n",
+ " 'bdc2324-data/1/1pricing_formulas.csv',\n",
+ " 'bdc2324-data/1/1product_packs.csv',\n",
+ " 'bdc2324-data/1/1products.csv',\n",
+ " 'bdc2324-data/1/1products_groups.csv',\n",
+ " 'bdc2324-data/1/1purchases.csv',\n",
+ " 'bdc2324-data/1/1representation_category_capacities.csv',\n",
+ " 'bdc2324-data/1/1representations.csv',\n",
+ " 'bdc2324-data/1/1seasons.csv',\n",
+ " 'bdc2324-data/1/1structure_tag_mappings.csv',\n",
+ " 'bdc2324-data/1/1suppliers.csv',\n",
+ " 'bdc2324-data/1/1tags.csv',\n",
+ " 'bdc2324-data/1/1target_types.csv',\n",
+ " 'bdc2324-data/1/1targets.csv',\n",
+ " 'bdc2324-data/1/1tickets.csv',\n",
+ " 'bdc2324-data/1/1type_of_categories.csv',\n",
+ " 'bdc2324-data/1/1type_of_pricing_formulas.csv',\n",
+ " 'bdc2324-data/1/1type_ofs.csv']"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "liste_database"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "dd6a3518-b752-4a1e-b77b-9e03e853c3ed",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_50143/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ " df = pd.read_csv(file_in)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# loop to create dataframes from liste\n",
+ "files_path = liste_database\n",
+ "\n",
+ "client_number = files_path[0].split(\"/\")[1]\n",
+ "df_prefix = \"df\" + str(client_number) + \"_\"\n",
+ "\n",
+ "for i in range(len(files_path)) :\n",
+ " current_path = files_path[i]\n",
+ " with fs.open(current_path, mode=\"rb\") as file_in:\n",
+ " df = pd.read_csv(file_in)\n",
+ " # the pattern of the name is df1xxx\n",
+ " nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n",
+ " globals()[nom_dataframe] = df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4004c8bf-11d9-413d-bb42-2cb8ddde7716",
+ "metadata": {},
+ "source": [
+ "## Cleaning functions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "d237be96-8c86-4a91-b7a1-487e87a16c3d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def cleaning_date(df, column_name):\n",
+ " \"\"\"\n",
+ " Nettoie la colonne spécifiée du DataFrame en convertissant les valeurs en datetime avec le format ISO8601.\n",
+ "\n",
+ " Parameters:\n",
+ " - df: DataFrame\n",
+ " Le DataFrame contenant la colonne à nettoyer.\n",
+ " - column_name: str\n",
+ " Le nom de la colonne à nettoyer.\n",
+ "\n",
+ " Returns:\n",
+ " - DataFrame\n",
+ " Le DataFrame modifié avec la colonne nettoyée.\n",
+ " \"\"\"\n",
+ " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "398804d8-2225-4fd3-bceb-75ab1588e359",
+ "metadata": {},
+ "source": [
+ "## Preprocessing"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "568cb180-0dd9-4b27-aecb-05e4c3775ba6",
+ "metadata": {},
+ "source": [
+ "## customer_plus"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7e7b90ce-da54-4f00-bc34-64c543b0858f",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bade04b1-0cdf-4d10-bcca-7dc7e4831656",
+ "metadata": {},
+ "source": [
+ "## Ticket area"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "b95464b1-26bc-4aac-84b4-45da83b92251",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Fonction de nettoyage et selection\n",
+ "def preprocessing_tickets_area(tickets = None, purchases = None, suppliers = None, type_ofs = None):\n",
+ " # Base des tickets\n",
+ " tickets = tickets[['id', 'purchase_id', 'product_id', 'is_from_subscription', 'type_of', 'supplier_id']]\n",
+ " tickets.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n",
+ "\n",
+ " # Base des fournisseurs\n",
+ " suppliers = suppliers[['id', 'name']]\n",
+ " suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n",
+ "\n",
+ " # Base des types de billets\n",
+ " # type_ofs = type_ofs[['id', 'name', 'children']]\n",
+ " # type_ofs.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)\n",
+ "\n",
+ " # Base des achats\n",
+ " # Nettoyage de la date d'achat\n",
+ " cleaning_date(purchases, 'purchase_date')\n",
+ " # Selection des variables\n",
+ " purchases = purchases[['id', 'purchase_date', 'customer_id']]\n",
+ "\n",
+ " # Fusions \n",
+ " # Fusion avec fournisseurs\n",
+ " ticket_information = pd.merge(tickets, suppliers, left_on = 'supplier_id', right_on = 'id', how = 'inner')\n",
+ " ticket_information.drop(['supplier_id', 'id'], axis = 1, inplace=True)\n",
+ " \n",
+ " # # Fusion avec type de tickets\n",
+ " # ticket_information = pd.merge(ticket_information, type_ofs, left_on = 'type_of', right_on = 'id', how = 'inner')\n",
+ " # ticket_information.drop(['type_of', 'id'], axis = 1, inplace=True)\n",
+ " \n",
+ " # Fusion avec achats\n",
+ " ticket_information = pd.merge(ticket_information, purchases, left_on = 'purchase_id', right_on = 'id', how = 'inner')\n",
+ " ticket_information.drop(['purchase_id', 'id'], axis = 1, inplace=True)\n",
+ "\n",
+ " return ticket_information"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "3e1d2ba7-ff4f-48eb-93a8-2bb648c70396",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_50143/1320335767.py:5: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " tickets.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n",
+ "/tmp/ipykernel_50143/1320335767.py:9: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n"
+ ]
+ }
+ ],
+ "source": [
+ "df1_ticket_information = preprocessing_tickets_area(tickets = df1_tickets, purchases = df1_purchases, suppliers = df1_suppliers, type_ofs = df1_type_ofs)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "id": "4b18edfc-6450-4c6a-9e7b-ee5a5808c8c9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ticket_id | \n",
+ " product_id | \n",
+ " is_from_subscription | \n",
+ " type_of | \n",
+ " supplier_name | \n",
+ " purchase_date | \n",
+ " customer_id | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 13070859 | \n",
+ " 225251 | \n",
+ " False | \n",
+ " 1 | \n",
+ " vente en ligne | \n",
+ " 2018-12-28 14:47:50+00:00 | \n",
+ " 48187 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 13070860 | \n",
+ " 224914 | \n",
+ " False | \n",
+ " 1 | \n",
+ " vente en ligne | \n",
+ " 2018-12-28 14:47:50+00:00 | \n",
+ " 48187 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 13070861 | \n",
+ " 224914 | \n",
+ " False | \n",
+ " 1 | \n",
+ " vente en ligne | \n",
+ " 2018-12-28 14:47:50+00:00 | \n",
+ " 48187 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 13070862 | \n",
+ " 224914 | \n",
+ " False | \n",
+ " 1 | \n",
+ " vente en ligne | \n",
+ " 2018-12-28 14:47:50+00:00 | \n",
+ " 48187 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 13070863 | \n",
+ " 224914 | \n",
+ " False | \n",
+ " 1 | \n",
+ " vente en ligne | \n",
+ " 2018-12-28 14:47:50+00:00 | \n",
+ " 48187 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 1826667 | \n",
+ " 20662815 | \n",
+ " 405689 | \n",
+ " False | \n",
+ " 1 | \n",
+ " vente en ligne | \n",
+ " 2023-11-08 17:23:54+00:00 | \n",
+ " 1256135 | \n",
+ "
\n",
+ " \n",
+ " | 1826668 | \n",
+ " 20662816 | \n",
+ " 403658 | \n",
+ " False | \n",
+ " 1 | \n",
+ " vente en ligne | \n",
+ " 2023-11-08 18:32:18+00:00 | \n",
+ " 1256136 | \n",
+ "
\n",
+ " \n",
+ " | 1826669 | \n",
+ " 20662817 | \n",
+ " 403658 | \n",
+ " False | \n",
+ " 1 | \n",
+ " vente en ligne | \n",
+ " 2023-11-08 18:32:18+00:00 | \n",
+ " 1256136 | \n",
+ "
\n",
+ " \n",
+ " | 1826670 | \n",
+ " 20662818 | \n",
+ " 403658 | \n",
+ " False | \n",
+ " 1 | \n",
+ " vente en ligne | \n",
+ " 2023-11-08 19:30:28+00:00 | \n",
+ " 1256137 | \n",
+ "
\n",
+ " \n",
+ " | 1826671 | \n",
+ " 20662819 | \n",
+ " 403658 | \n",
+ " False | \n",
+ " 1 | \n",
+ " vente en ligne | \n",
+ " 2023-11-08 19:30:28+00:00 | \n",
+ " 1256137 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1826672 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ticket_id product_id is_from_subscription type_of supplier_name \\\n",
+ "0 13070859 225251 False 1 vente en ligne \n",
+ "1 13070860 224914 False 1 vente en ligne \n",
+ "2 13070861 224914 False 1 vente en ligne \n",
+ "3 13070862 224914 False 1 vente en ligne \n",
+ "4 13070863 224914 False 1 vente en ligne \n",
+ "... ... ... ... ... ... \n",
+ "1826667 20662815 405689 False 1 vente en ligne \n",
+ "1826668 20662816 403658 False 1 vente en ligne \n",
+ "1826669 20662817 403658 False 1 vente en ligne \n",
+ "1826670 20662818 403658 False 1 vente en ligne \n",
+ "1826671 20662819 403658 False 1 vente en ligne \n",
+ "\n",
+ " purchase_date customer_id \n",
+ "0 2018-12-28 14:47:50+00:00 48187 \n",
+ "1 2018-12-28 14:47:50+00:00 48187 \n",
+ "2 2018-12-28 14:47:50+00:00 48187 \n",
+ "3 2018-12-28 14:47:50+00:00 48187 \n",
+ "4 2018-12-28 14:47:50+00:00 48187 \n",
+ "... ... ... \n",
+ "1826667 2023-11-08 17:23:54+00:00 1256135 \n",
+ "1826668 2023-11-08 18:32:18+00:00 1256136 \n",
+ "1826669 2023-11-08 18:32:18+00:00 1256136 \n",
+ "1826670 2023-11-08 19:30:28+00:00 1256137 \n",
+ "1826671 2023-11-08 19:30:28+00:00 1256137 \n",
+ "\n",
+ "[1826672 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 70,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_ticket_information"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "096e47f4-1d65-4575-989d-83227eedad2b",
+ "metadata": {},
+ "source": [
+ "## Target area"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "baed146a-9d3a-4397-a812-3d50c9a2f038",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def preprocessing_target_area(targets = None, target_types = None, customer_target_mappings = None):\n",
+ " # Target.csv cleaning\n",
+ " targets = targets[[\"id\", \"target_type_id\", \"name\"]]\n",
+ " targets.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n",
+ " \n",
+ " # target_type cleaning\n",
+ " target_types = target_types[[\"id\",\"is_import\",\"name\"]].add_prefix(\"target_type_\")\n",
+ " \n",
+ " #customer_target_mappings cleaning\n",
+ " customer_target_mappings = customer_target_mappings[[\"id\", \"customer_id\", \"target_id\"]]\n",
+ " \n",
+ " # Merge target et target_type\n",
+ " targets_full = pd.merge(targets, target_types, left_on='target_type_id', right_on='target_type_id', how='inner')\n",
+ " targets_full.drop(['target_type_id'], axis = 1, inplace=True)\n",
+ " \n",
+ " # Merge\n",
+ " targets_full = pd.merge(customer_target_mappings, targets_full, left_on='target_id', right_on='target_id', how='inner')\n",
+ " targets_full.drop(['target_id'], axis = 1, inplace=True)\n",
+ "\n",
+ " return targets_full"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "5fbfd88b-b94c-489c-9201-670e96e453e7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_50143/3848597476.py:4: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " targets.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n"
+ ]
+ }
+ ],
+ "source": [
+ "df1_target_information = preprocessing_target_area(targets = df1_targets, target_types = df1_target_types, customer_target_mappings = df1_customer_target_mappings)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "b4f05142-2a22-42ef-a60d-f23cc4b5cb09",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ "
\n",
+ " \n",
+ " | target_name | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | consentement optin mediation specialisee | \n",
+ " 150000 | \n",
+ "
\n",
+ " \n",
+ " | consentement optin jeune public | \n",
+ " 149979 | \n",
+ "
\n",
+ " \n",
+ " | consentement optin b2c | \n",
+ " 108909 | \n",
+ "
\n",
+ " \n",
+ " | Arenametrix_bascule tel vers sib | \n",
+ " 35216 | \n",
+ "
\n",
+ " \n",
+ " | consentement optout b2c | \n",
+ " 34523 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | Automation_parrainage_newsletter_handicap_visuel | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | consentement optout mediation specialisee | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | Inscrits NL LSF formulaire | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | Market auto - contacts inactifs post-scénario | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | Inactifs - fin du scénario | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
283 rows × 1 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id\n",
+ "target_name \n",
+ "consentement optin mediation specialisee 150000\n",
+ "consentement optin jeune public 149979\n",
+ "consentement optin b2c 108909\n",
+ "Arenametrix_bascule tel vers sib 35216\n",
+ "consentement optout b2c 34523\n",
+ "... ...\n",
+ "Automation_parrainage_newsletter_handicap_visuel 1\n",
+ "consentement optout mediation specialisee 1\n",
+ "Inscrits NL LSF formulaire 1\n",
+ "Market auto - contacts inactifs post-scénario 1\n",
+ "Inactifs - fin du scénario 1\n",
+ "\n",
+ "[283 rows x 1 columns]"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_target_information[['target_name', 'customer_id']].groupby('target_name').count().sort_values(by='customer_id', ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "4417ff51-f501-4ab9-a192-4ab75764a8ed",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ "
\n",
+ " \n",
+ " | target_name | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Arenametrix_bascule tel vers sib | \n",
+ " 35216 | \n",
+ "
\n",
+ " \n",
+ " | Autres_interet_exposition | \n",
+ " 1021 | \n",
+ "
\n",
+ " \n",
+ " | COM Inscrits NL générale (historique) | \n",
+ " 23005 | \n",
+ "
\n",
+ " \n",
+ " | Contacts_prenomsdoubles | \n",
+ " 11643 | \n",
+ "
\n",
+ " \n",
+ " | DDCP MD Procès du Siècle | \n",
+ " 1684 | \n",
+ "
\n",
+ " \n",
+ " | DDCP Newsletter centres de loisirs | \n",
+ " 1032 | \n",
+ "
\n",
+ " \n",
+ " | DDCP Newsletter enseignants | \n",
+ " 4510 | \n",
+ "
\n",
+ " \n",
+ " | DDCP Newsletter jeune public | \n",
+ " 3862 | \n",
+ "
\n",
+ " \n",
+ " | DDCP Newsletter relais champ social | \n",
+ " 2270 | \n",
+ "
\n",
+ " \n",
+ " | DDCP PROMO Participants ateliers (adultes et enfants) | \n",
+ " 1954 | \n",
+ "
\n",
+ " \n",
+ " | DDCP billets famille | \n",
+ " 3609 | \n",
+ "
\n",
+ " \n",
+ " | DDCP promo MD pass musées dps oct 2018 | \n",
+ " 1785 | \n",
+ "
\n",
+ " \n",
+ " | DDCP promo Plan B 2019 (concerts) | \n",
+ " 1948 | \n",
+ "
\n",
+ " \n",
+ " | DDCP promo spectateurs prog 21-22 (spectacles, ciné, ateliers) | \n",
+ " 1293 | \n",
+ "
\n",
+ " \n",
+ " | DDCP rentrée culturelle 2023 | \n",
+ " 1757 | \n",
+ "
\n",
+ " \n",
+ " | DDCP_marseille_jazz_2023 | \n",
+ " 1043 | \n",
+ "
\n",
+ " \n",
+ " | DRE Festival Jean Rouch | \n",
+ " 1502 | \n",
+ "
\n",
+ " \n",
+ " | DRE MucemLab | \n",
+ " 2302 | \n",
+ "
\n",
+ " \n",
+ " | DRE chercheurs | \n",
+ " 1557 | \n",
+ "
\n",
+ " \n",
+ " | DRE institutionnels | \n",
+ " 2229 | \n",
+ "
\n",
+ " \n",
+ " | FORMATION _ acheteurs optin last year | \n",
+ " 10485 | \n",
+ "
\n",
+ " \n",
+ " | Inscrits NL générale (export_291019 + operation_videomaton) | \n",
+ " 14086 | \n",
+ "
\n",
+ " \n",
+ " | Inscrits NL générale site web | \n",
+ " 3732 | \n",
+ "
\n",
+ " \n",
+ " | Inscrits NL jeune public site web | \n",
+ " 1249 | \n",
+ "
\n",
+ " \n",
+ " | Votre première liste | \n",
+ " 3715 | \n",
+ "
\n",
+ " \n",
+ " | consentement optin b2b | \n",
+ " 12735 | \n",
+ "
\n",
+ " \n",
+ " | consentement optin b2c | \n",
+ " 108909 | \n",
+ "
\n",
+ " \n",
+ " | consentement optin dre | \n",
+ " 4527 | \n",
+ "
\n",
+ " \n",
+ " | consentement optin jeune public | \n",
+ " 149979 | \n",
+ "
\n",
+ " \n",
+ " | consentement optin mediation specialisee | \n",
+ " 150000 | \n",
+ "
\n",
+ " \n",
+ " | consentement optin newsletter generale | \n",
+ " 22095 | \n",
+ "
\n",
+ " \n",
+ " | consentement optin scolaires | \n",
+ " 4849 | \n",
+ "
\n",
+ " \n",
+ " | consentement optout b2b | \n",
+ " 14219 | \n",
+ "
\n",
+ " \n",
+ " | consentement optout b2c | \n",
+ " 34523 | \n",
+ "
\n",
+ " \n",
+ " | consentement optout dre | \n",
+ " 14328 | \n",
+ "
\n",
+ " \n",
+ " | consentement optout newsletter generale | \n",
+ " 18855 | \n",
+ "
\n",
+ " \n",
+ " | consentement optout scolaires | \n",
+ " 15744 | \n",
+ "
\n",
+ " \n",
+ " | ddcp_md_scene_ouverte_au_talent | \n",
+ " 1577 | \n",
+ "
\n",
+ " \n",
+ " | ddcp_promo_MD_billet_musée_oct_2019_agarder2 | \n",
+ " 5482 | \n",
+ "
\n",
+ " \n",
+ " | ddcp_promo_md_musée_dps 011019 | \n",
+ " 6010 | \n",
+ "
\n",
+ " \n",
+ " | ddcp_promo_visiteurs occasionnels_musee_8mois | \n",
+ " 6640 | \n",
+ "
\n",
+ " \n",
+ " | ddcp_visiteurs dps 010622 | \n",
+ " 12355 | \n",
+ "
\n",
+ " \n",
+ " | festival_jean_rouch | \n",
+ " 1502 | \n",
+ "
\n",
+ " \n",
+ " | rappel po barvalo | \n",
+ " 1248 | \n",
+ "
\n",
+ " \n",
+ " | structures_etiquette champ social | \n",
+ " 1488 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id\n",
+ "target_name \n",
+ "Arenametrix_bascule tel vers sib 35216\n",
+ "Autres_interet_exposition 1021\n",
+ "COM Inscrits NL générale (historique) 23005\n",
+ "Contacts_prenomsdoubles 11643\n",
+ "DDCP MD Procès du Siècle 1684\n",
+ "DDCP Newsletter centres de loisirs 1032\n",
+ "DDCP Newsletter enseignants 4510\n",
+ "DDCP Newsletter jeune public 3862\n",
+ "DDCP Newsletter relais champ social 2270\n",
+ "DDCP PROMO Participants ateliers (adultes et en... 1954\n",
+ "DDCP billets famille 3609\n",
+ "DDCP promo MD pass musées dps oct 2018 1785\n",
+ "DDCP promo Plan B 2019 (concerts) 1948\n",
+ "DDCP promo spectateurs prog 21-22 (spectacles, ... 1293\n",
+ "DDCP rentrée culturelle 2023 1757\n",
+ "DDCP_marseille_jazz_2023 1043\n",
+ "DRE Festival Jean Rouch 1502\n",
+ "DRE MucemLab 2302\n",
+ "DRE chercheurs 1557\n",
+ "DRE institutionnels 2229\n",
+ "FORMATION _ acheteurs optin last year 10485\n",
+ "Inscrits NL générale (export_291019 + operation... 14086\n",
+ "Inscrits NL générale site web 3732\n",
+ "Inscrits NL jeune public site web 1249\n",
+ "Votre première liste 3715\n",
+ "consentement optin b2b 12735\n",
+ "consentement optin b2c 108909\n",
+ "consentement optin dre 4527\n",
+ "consentement optin jeune public 149979\n",
+ "consentement optin mediation specialisee 150000\n",
+ "consentement optin newsletter generale 22095\n",
+ "consentement optin scolaires 4849\n",
+ "consentement optout b2b 14219\n",
+ "consentement optout b2c 34523\n",
+ "consentement optout dre 14328\n",
+ "consentement optout newsletter generale 18855\n",
+ "consentement optout scolaires 15744\n",
+ "ddcp_md_scene_ouverte_au_talent 1577\n",
+ "ddcp_promo_MD_billet_musée_oct_2019_agarder2 5482\n",
+ "ddcp_promo_md_musée_dps 011019 6010\n",
+ "ddcp_promo_visiteurs occasionnels_musee_8mois 6640\n",
+ "ddcp_visiteurs dps 010622 12355\n",
+ "festival_jean_rouch 1502\n",
+ "rappel po barvalo 1248\n",
+ "structures_etiquette champ social 1488"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_target_information_reduced = df1_target_information[['target_name', 'customer_id']].groupby('target_name').count()\n",
+ "df1_target_information_reduced[df1_target_information_reduced['customer_id'] >= 1000]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cdbb48b4-5e16-4ef4-8791-ed213d68d52f",
+ "metadata": {},
+ "source": [
+ "## Campaings area"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "d883cc7b-ac43-4485-b86f-eaf595fbad85",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def preprocessing_campaigns_area(campaign_stats = None, campaigns = None):\n",
+ " # campaign_stats cleaning \n",
+ " campaign_stats = campaign_stats[[\"id\", \"campaign_id\", \"customer_id\", \"opened_at\", \"sent_at\", \"delivered_at\"]]\n",
+ " cleaning_date(campaign_stats, 'opened_at')\n",
+ " cleaning_date(campaign_stats, 'sent_at')\n",
+ " cleaning_date(campaign_stats, 'delivered_at')\n",
+ " \n",
+ " # campaigns cleaning\n",
+ " campaigns = campaigns[[\"id\", \"name\", \"service_id\", \"sent_at\"]].add_prefix(\"campaign_\")\n",
+ " cleaning_date(campaigns, 'campaign_sent_at')\n",
+ " \n",
+ " # Merge \n",
+ " campaigns_full = pd.merge(campaign_stats, campaigns, on = \"campaign_id\", how = \"left\")\n",
+ " campaigns_full.drop(['campaign_id'], axis = 1, inplace=True)\n",
+ "\n",
+ " return campaigns_full"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "c8552dd6-52c5-4431-b43d-3cd6c578fd9f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_50143/1967867975.py:15: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
+ "/tmp/ipykernel_50143/1967867975.py:15: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
+ "/tmp/ipykernel_50143/1967867975.py:15: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n"
+ ]
+ }
+ ],
+ "source": [
+ "df1_campaigns_information = preprocessing_campaigns_area(campaign_stats = df1_campaign_stats, campaigns = df1_campaigns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "c24457e7-3cad-451a-a65b-7373b656bd6e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " customer_id | \n",
+ " opened_at | \n",
+ " sent_at | \n",
+ " delivered_at | \n",
+ " campaign_name | \n",
+ " campaign_service_id | \n",
+ " campaign_sent_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 19793 | \n",
+ " 112597 | \n",
+ " NaT | \n",
+ " 2021-03-28 16:01:09+00:00 | \n",
+ " 2021-03-28 16:24:18+00:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-27 23:00:00+00:00 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 14211 | \n",
+ " 113666 | \n",
+ " NaT | \n",
+ " 2021-03-28 16:01:09+00:00 | \n",
+ " 2021-03-28 16:21:02+00:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-27 23:00:00+00:00 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 13150 | \n",
+ " 280561 | \n",
+ " NaT | \n",
+ " 2021-03-28 16:00:59+00:00 | \n",
+ " 2021-03-28 16:08:45+00:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-27 23:00:00+00:00 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 7073 | \n",
+ " 101007 | \n",
+ " 2021-03-28 18:11:06+00:00 | \n",
+ " 2021-03-28 16:00:59+00:00 | \n",
+ " 2021-03-28 16:09:47+00:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-27 23:00:00+00:00 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5175 | \n",
+ " 103972 | \n",
+ " NaT | \n",
+ " 2021-03-28 16:01:06+00:00 | \n",
+ " 2021-03-28 16:05:03+00:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-27 23:00:00+00:00 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 6214803 | \n",
+ " 8302994 | \n",
+ " 266155 | \n",
+ " 2023-10-23 09:43:25+00:00 | \n",
+ " 2023-10-23 09:32:33+00:00 | \n",
+ " 2023-10-23 09:32:34+00:00 | \n",
+ " dre_nov_2023 | \n",
+ " 1318 | \n",
+ " 2023-10-23 09:31:17+00:00 | \n",
+ "
\n",
+ " \n",
+ " | 6214804 | \n",
+ " 8303307 | \n",
+ " 21355 | \n",
+ " 2023-10-23 09:44:02+00:00 | \n",
+ " 2023-10-23 09:32:49+00:00 | \n",
+ " 2023-10-23 09:32:49+00:00 | \n",
+ " dre_nov_2023 | \n",
+ " 1318 | \n",
+ " 2023-10-23 09:31:17+00:00 | \n",
+ "
\n",
+ " \n",
+ " | 6214805 | \n",
+ " 8304346 | \n",
+ " 21849 | \n",
+ " 2023-10-23 09:45:52+00:00 | \n",
+ " 2023-10-23 09:33:28+00:00 | \n",
+ " 2023-10-23 09:33:29+00:00 | \n",
+ " dre_nov_2023 | \n",
+ " 1318 | \n",
+ " 2023-10-23 09:31:17+00:00 | \n",
+ "
\n",
+ " \n",
+ " | 6214806 | \n",
+ " 8302037 | \n",
+ " 667789 | \n",
+ " 2023-10-23 09:47:32+00:00 | \n",
+ " 2023-10-23 09:31:53+00:00 | \n",
+ " 2023-10-23 09:31:54+00:00 | \n",
+ " dre_nov_2023 | \n",
+ " 1318 | \n",
+ " 2023-10-23 09:31:17+00:00 | \n",
+ "
\n",
+ " \n",
+ " | 6214807 | \n",
+ " 8304939 | \n",
+ " 294154 | \n",
+ " NaT | \n",
+ " 2023-10-23 09:33:54+00:00 | \n",
+ " 2023-10-23 09:33:55+00:00 | \n",
+ " dre_nov_2023 | \n",
+ " 1318 | \n",
+ " 2023-10-23 09:31:17+00:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
6214808 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id customer_id opened_at \\\n",
+ "0 19793 112597 NaT \n",
+ "1 14211 113666 NaT \n",
+ "2 13150 280561 NaT \n",
+ "3 7073 101007 2021-03-28 18:11:06+00:00 \n",
+ "4 5175 103972 NaT \n",
+ "... ... ... ... \n",
+ "6214803 8302994 266155 2023-10-23 09:43:25+00:00 \n",
+ "6214804 8303307 21355 2023-10-23 09:44:02+00:00 \n",
+ "6214805 8304346 21849 2023-10-23 09:45:52+00:00 \n",
+ "6214806 8302037 667789 2023-10-23 09:47:32+00:00 \n",
+ "6214807 8304939 294154 NaT \n",
+ "\n",
+ " sent_at delivered_at \\\n",
+ "0 2021-03-28 16:01:09+00:00 2021-03-28 16:24:18+00:00 \n",
+ "1 2021-03-28 16:01:09+00:00 2021-03-28 16:21:02+00:00 \n",
+ "2 2021-03-28 16:00:59+00:00 2021-03-28 16:08:45+00:00 \n",
+ "3 2021-03-28 16:00:59+00:00 2021-03-28 16:09:47+00:00 \n",
+ "4 2021-03-28 16:01:06+00:00 2021-03-28 16:05:03+00:00 \n",
+ "... ... ... \n",
+ "6214803 2023-10-23 09:32:33+00:00 2023-10-23 09:32:34+00:00 \n",
+ "6214804 2023-10-23 09:32:49+00:00 2023-10-23 09:32:49+00:00 \n",
+ "6214805 2023-10-23 09:33:28+00:00 2023-10-23 09:33:29+00:00 \n",
+ "6214806 2023-10-23 09:31:53+00:00 2023-10-23 09:31:54+00:00 \n",
+ "6214807 2023-10-23 09:33:54+00:00 2023-10-23 09:33:55+00:00 \n",
+ "\n",
+ " campaign_name campaign_service_id \\\n",
+ "0 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "1 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "2 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "3 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "4 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "... ... ... \n",
+ "6214803 dre_nov_2023 1318 \n",
+ "6214804 dre_nov_2023 1318 \n",
+ "6214805 dre_nov_2023 1318 \n",
+ "6214806 dre_nov_2023 1318 \n",
+ "6214807 dre_nov_2023 1318 \n",
+ "\n",
+ " campaign_sent_at \n",
+ "0 2021-03-27 23:00:00+00:00 \n",
+ "1 2021-03-27 23:00:00+00:00 \n",
+ "2 2021-03-27 23:00:00+00:00 \n",
+ "3 2021-03-27 23:00:00+00:00 \n",
+ "4 2021-03-27 23:00:00+00:00 \n",
+ "... ... \n",
+ "6214803 2023-10-23 09:31:17+00:00 \n",
+ "6214804 2023-10-23 09:31:17+00:00 \n",
+ "6214805 2023-10-23 09:31:17+00:00 \n",
+ "6214806 2023-10-23 09:31:17+00:00 \n",
+ "6214807 2023-10-23 09:31:17+00:00 \n",
+ "\n",
+ "[6214808 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaigns_information"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "id": "e2c88552-b863-47a2-be23-8d2898fb28bc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def campaigns_kpi(campaigns_information = None):\n",
+ " # Nombre de campagnes de mails\n",
+ " nb_campaigns = campaigns_information[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()\n",
+ " nb_campaigns.rename(columns = {'campaign_name' : 'nb_campaigns'}, inplace = True)\n",
+ " # Temps d'ouverture en min moyen \n",
+ " campaigns_information['time_to_open'] = campaigns_information['opened_at'] - campaigns_information['delivered_at']\n",
+ " time_to_open = campaigns_information[['customer_id', 'time_to_open']].groupby('customer_id').mean().reset_index()\n",
+ "\n",
+ " # Nombre de mail ouvert \n",
+ " opened_campaign = campaigns_information[['customer_id', 'campaign_name', 'opened_at']]\n",
+ " opened_campaign.dropna(subset=['opened_at'], inplace=True)\n",
+ " opened_campaign = opened_campaign[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()\n",
+ " opened_campaign.rename(columns = {'campaign_name' : 'nb_campaigns_opened' }, inplace = True)\n",
+ "\n",
+ " # Fusion des indicateurs\n",
+ " campaigns_reduced = pd.merge(nb_campaigns, opened_campaign, on = 'customer_id', how = 'left')\n",
+ " campaigns_reduced = pd.merge(campaigns_reduced, time_to_open, on = 'customer_id', how = 'left')\n",
+ "\n",
+ " # Remplir les NaN : nb_campaigns_opened\n",
+ " campaigns_reduced['nb_campaigns_opened'].fillna(0, inplace=True)\n",
+ "\n",
+ " # Remplir les NaT : time_to_open (??)\n",
+ "\n",
+ " return campaigns_reduced\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "id": "24537647-bc29-4777-9848-ac4120a4aa60",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_50143/2679359833.py:11: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " opened_campaign.dropna(subset=['opened_at'], inplace=True)\n",
+ "/tmp/ipykernel_50143/2679359833.py:20: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
+ "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
+ "\n",
+ "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
+ "\n",
+ "\n",
+ " campaigns_reduced['nb_campaigns_opened'].fillna(0, inplace=True)\n"
+ ]
+ }
+ ],
+ "source": [
+ "df1_campaigns_kpi = campaigns_kpi(campaigns_information = df1_campaigns_information) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "id": "6be2a9a6-056b-4e19-8c26-a18ba3df36b3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_id | \n",
+ " nb_campaigns | \n",
+ " nb_campaigns_opened | \n",
+ " time_to_open | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 3 | \n",
+ " 222 | \n",
+ " 124.0 | \n",
+ " 1 days 00:28:30.169354838 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 4 | \n",
+ " 7 | \n",
+ " 7.0 | \n",
+ " 1 days 04:31:01.428571428 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 5 | \n",
+ " 4 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 6 | \n",
+ " 20 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 130467 | \n",
+ " 1256097 | \n",
+ " 1 | \n",
+ " 1.0 | \n",
+ " 0 days 02:11:15 | \n",
+ "
\n",
+ " \n",
+ " | 130468 | \n",
+ " 1256098 | \n",
+ " 1 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " | 130469 | \n",
+ " 1256099 | \n",
+ " 1 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " | 130470 | \n",
+ " 1256100 | \n",
+ " 1 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ " | 130471 | \n",
+ " 1256101 | \n",
+ " 1 | \n",
+ " 0.0 | \n",
+ " NaT | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
130472 rows × 4 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_id nb_campaigns nb_campaigns_opened \\\n",
+ "0 2 4 0.0 \n",
+ "1 3 222 124.0 \n",
+ "2 4 7 7.0 \n",
+ "3 5 4 0.0 \n",
+ "4 6 20 0.0 \n",
+ "... ... ... ... \n",
+ "130467 1256097 1 1.0 \n",
+ "130468 1256098 1 0.0 \n",
+ "130469 1256099 1 0.0 \n",
+ "130470 1256100 1 0.0 \n",
+ "130471 1256101 1 0.0 \n",
+ "\n",
+ " time_to_open \n",
+ "0 NaT \n",
+ "1 1 days 00:28:30.169354838 \n",
+ "2 1 days 04:31:01.428571428 \n",
+ "3 NaT \n",
+ "4 NaT \n",
+ "... ... \n",
+ "130467 0 days 02:11:15 \n",
+ "130468 NaT \n",
+ "130469 NaT \n",
+ "130470 NaT \n",
+ "130471 NaT \n",
+ "\n",
+ "[130472 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 66,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaigns_kpi"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}