BDC-team-1/0_Cleaning_and_merge.ipynb

1466 lines
53 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"id": "ad414c84-be46-4d2c-be8b-9fc4d24cc672",
"metadata": {},
"source": [
"# Business Data Challenge - Team 1"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "15103481-8d74-404c-aa09-7601fe7730da",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import s3fs\n",
"import re"
]
},
{
"cell_type": "markdown",
"id": "ee97665c-39af-4c1c-a62b-c9c79feae18f",
"metadata": {},
"source": [
"Configuration de l'accès aux données"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5d83bb1a-d341-446e-91f6-1c428607f6d4",
"metadata": {},
"outputs": [],
"source": [
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
]
},
{
"cell_type": "markdown",
"id": "9cbd72c5-6f8e-4366-ab66-96c32c6e963a",
"metadata": {},
"source": [
"# Exemple sur Company 1"
]
},
{
"cell_type": "markdown",
"id": "db26e59a-927c-407e-b54b-1815473b0b34",
"metadata": {},
"source": [
"## Chargement données"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "699664b9-eee4-4f8d-a207-e524526560c5",
"metadata": {},
"outputs": [],
"source": [
"BUCKET = \"bdc2324-data/1\"\n",
"liste_database = fs.ls(BUCKET)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "aaf64d60-bf92-470c-8210-d09abd6a653e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['bdc2324-data/1/1campaign_stats.csv',\n",
" 'bdc2324-data/1/1campaigns.csv',\n",
" 'bdc2324-data/1/1categories.csv',\n",
" 'bdc2324-data/1/1countries.csv',\n",
" 'bdc2324-data/1/1currencies.csv',\n",
" 'bdc2324-data/1/1customer_target_mappings.csv',\n",
" 'bdc2324-data/1/1customersplus.csv',\n",
" 'bdc2324-data/1/1event_types.csv',\n",
" 'bdc2324-data/1/1events.csv',\n",
" 'bdc2324-data/1/1facilities.csv',\n",
" 'bdc2324-data/1/1link_stats.csv',\n",
" 'bdc2324-data/1/1pricing_formulas.csv',\n",
" 'bdc2324-data/1/1product_packs.csv',\n",
" 'bdc2324-data/1/1products.csv',\n",
" 'bdc2324-data/1/1products_groups.csv',\n",
" 'bdc2324-data/1/1purchases.csv',\n",
" 'bdc2324-data/1/1representation_category_capacities.csv',\n",
" 'bdc2324-data/1/1representations.csv',\n",
" 'bdc2324-data/1/1seasons.csv',\n",
" 'bdc2324-data/1/1structure_tag_mappings.csv',\n",
" 'bdc2324-data/1/1suppliers.csv',\n",
" 'bdc2324-data/1/1tags.csv',\n",
" 'bdc2324-data/1/1target_types.csv',\n",
" 'bdc2324-data/1/1targets.csv',\n",
" 'bdc2324-data/1/1tickets.csv',\n",
" 'bdc2324-data/1/1type_of_categories.csv',\n",
" 'bdc2324-data/1/1type_of_pricing_formulas.csv',\n",
" 'bdc2324-data/1/1type_ofs.csv']"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"liste_database"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "dd6a3518-b752-4a1e-b77b-9e03e853c3ed",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_50143/4081512283.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in)\n"
]
}
],
"source": [
"# loop to create dataframes from liste\n",
"files_path = liste_database\n",
"\n",
"client_number = files_path[0].split(\"/\")[1]\n",
"df_prefix = \"df\" + str(client_number) + \"_\"\n",
"\n",
"for i in range(len(files_path)) :\n",
" current_path = files_path[i]\n",
" with fs.open(current_path, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in)\n",
" # the pattern of the name is df1xxx\n",
" nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n",
" globals()[nom_dataframe] = df"
]
},
{
"cell_type": "markdown",
"id": "4004c8bf-11d9-413d-bb42-2cb8ddde7716",
"metadata": {},
"source": [
"## Cleaning functions"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "d237be96-8c86-4a91-b7a1-487e87a16c3d",
"metadata": {},
"outputs": [],
"source": [
"def cleaning_date(df, column_name):\n",
" \"\"\"\n",
" Nettoie la colonne spécifiée du DataFrame en convertissant les valeurs en datetime avec le format ISO8601.\n",
"\n",
" Parameters:\n",
" - df: DataFrame\n",
" Le DataFrame contenant la colonne à nettoyer.\n",
" - column_name: str\n",
" Le nom de la colonne à nettoyer.\n",
"\n",
" Returns:\n",
" - DataFrame\n",
" Le DataFrame modifié avec la colonne nettoyée.\n",
" \"\"\"\n",
" df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
" return df"
]
},
{
"cell_type": "markdown",
"id": "398804d8-2225-4fd3-bceb-75ab1588e359",
"metadata": {},
"source": [
"## Preprocessing"
]
},
{
"cell_type": "markdown",
"id": "568cb180-0dd9-4b27-aecb-05e4c3775ba6",
"metadata": {},
"source": [
"## customer_plus"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7e7b90ce-da54-4f00-bc34-64c543b0858f",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "bade04b1-0cdf-4d10-bcca-7dc7e4831656",
"metadata": {},
"source": [
"## Ticket area"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "b95464b1-26bc-4aac-84b4-45da83b92251",
"metadata": {},
"outputs": [],
"source": [
"# Fonction de nettoyage et selection\n",
"def preprocessing_tickets_area(tickets = None, purchases = None, suppliers = None, type_ofs = None):\n",
" # Base des tickets\n",
" tickets = tickets[['id', 'purchase_id', 'product_id', 'is_from_subscription', 'type_of', 'supplier_id']]\n",
" tickets.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n",
"\n",
" # Base des fournisseurs\n",
" suppliers = suppliers[['id', 'name']]\n",
" suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n",
"\n",
" # Base des types de billets\n",
" # type_ofs = type_ofs[['id', 'name', 'children']]\n",
" # type_ofs.rename(columns = {'name' : 'type_of_ticket_name'}, inplace = True)\n",
"\n",
" # Base des achats\n",
" # Nettoyage de la date d'achat\n",
" cleaning_date(purchases, 'purchase_date')\n",
" # Selection des variables\n",
" purchases = purchases[['id', 'purchase_date', 'customer_id']]\n",
"\n",
" # Fusions \n",
" # Fusion avec fournisseurs\n",
" ticket_information = pd.merge(tickets, suppliers, left_on = 'supplier_id', right_on = 'id', how = 'inner')\n",
" ticket_information.drop(['supplier_id', 'id'], axis = 1, inplace=True)\n",
" \n",
" # # Fusion avec type de tickets\n",
" # ticket_information = pd.merge(ticket_information, type_ofs, left_on = 'type_of', right_on = 'id', how = 'inner')\n",
" # ticket_information.drop(['type_of', 'id'], axis = 1, inplace=True)\n",
" \n",
" # Fusion avec achats\n",
" ticket_information = pd.merge(ticket_information, purchases, left_on = 'purchase_id', right_on = 'id', how = 'inner')\n",
" ticket_information.drop(['purchase_id', 'id'], axis = 1, inplace=True)\n",
"\n",
" return ticket_information"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "3e1d2ba7-ff4f-48eb-93a8-2bb648c70396",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_50143/1320335767.py:5: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" tickets.rename(columns = {'id' : 'ticket_id'}, inplace = True)\n",
"/tmp/ipykernel_50143/1320335767.py:9: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" suppliers.rename(columns = {'name' : 'supplier_name'}, inplace = True)\n"
]
}
],
"source": [
"df1_ticket_information = preprocessing_tickets_area(tickets = df1_tickets, purchases = df1_purchases, suppliers = df1_suppliers, type_ofs = df1_type_ofs)"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "4b18edfc-6450-4c6a-9e7b-ee5a5808c8c9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ticket_id</th>\n",
" <th>product_id</th>\n",
" <th>is_from_subscription</th>\n",
" <th>type_of</th>\n",
" <th>supplier_name</th>\n",
" <th>purchase_date</th>\n",
" <th>customer_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>13070859</td>\n",
" <td>225251</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>48187</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>13070860</td>\n",
" <td>224914</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>48187</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13070861</td>\n",
" <td>224914</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>48187</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13070862</td>\n",
" <td>224914</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>48187</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>13070863</td>\n",
" <td>224914</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>48187</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826667</th>\n",
" <td>20662815</td>\n",
" <td>405689</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 17:23:54+00:00</td>\n",
" <td>1256135</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826668</th>\n",
" <td>20662816</td>\n",
" <td>403658</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 18:32:18+00:00</td>\n",
" <td>1256136</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826669</th>\n",
" <td>20662817</td>\n",
" <td>403658</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 18:32:18+00:00</td>\n",
" <td>1256136</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826670</th>\n",
" <td>20662818</td>\n",
" <td>403658</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 19:30:28+00:00</td>\n",
" <td>1256137</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826671</th>\n",
" <td>20662819</td>\n",
" <td>403658</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 19:30:28+00:00</td>\n",
" <td>1256137</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1826672 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" ticket_id product_id is_from_subscription type_of supplier_name \\\n",
"0 13070859 225251 False 1 vente en ligne \n",
"1 13070860 224914 False 1 vente en ligne \n",
"2 13070861 224914 False 1 vente en ligne \n",
"3 13070862 224914 False 1 vente en ligne \n",
"4 13070863 224914 False 1 vente en ligne \n",
"... ... ... ... ... ... \n",
"1826667 20662815 405689 False 1 vente en ligne \n",
"1826668 20662816 403658 False 1 vente en ligne \n",
"1826669 20662817 403658 False 1 vente en ligne \n",
"1826670 20662818 403658 False 1 vente en ligne \n",
"1826671 20662819 403658 False 1 vente en ligne \n",
"\n",
" purchase_date customer_id \n",
"0 2018-12-28 14:47:50+00:00 48187 \n",
"1 2018-12-28 14:47:50+00:00 48187 \n",
"2 2018-12-28 14:47:50+00:00 48187 \n",
"3 2018-12-28 14:47:50+00:00 48187 \n",
"4 2018-12-28 14:47:50+00:00 48187 \n",
"... ... ... \n",
"1826667 2023-11-08 17:23:54+00:00 1256135 \n",
"1826668 2023-11-08 18:32:18+00:00 1256136 \n",
"1826669 2023-11-08 18:32:18+00:00 1256136 \n",
"1826670 2023-11-08 19:30:28+00:00 1256137 \n",
"1826671 2023-11-08 19:30:28+00:00 1256137 \n",
"\n",
"[1826672 rows x 7 columns]"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1_ticket_information"
]
},
{
"cell_type": "markdown",
"id": "096e47f4-1d65-4575-989d-83227eedad2b",
"metadata": {},
"source": [
"## Target area"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "baed146a-9d3a-4397-a812-3d50c9a2f038",
"metadata": {},
"outputs": [],
"source": [
"def preprocessing_target_area(targets = None, target_types = None, customer_target_mappings = None):\n",
" # Target.csv cleaning\n",
" targets = targets[[\"id\", \"target_type_id\", \"name\"]]\n",
" targets.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n",
" \n",
" # target_type cleaning\n",
" target_types = target_types[[\"id\",\"is_import\",\"name\"]].add_prefix(\"target_type_\")\n",
" \n",
" #customer_target_mappings cleaning\n",
" customer_target_mappings = customer_target_mappings[[\"id\", \"customer_id\", \"target_id\"]]\n",
" \n",
" # Merge target et target_type\n",
" targets_full = pd.merge(targets, target_types, left_on='target_type_id', right_on='target_type_id', how='inner')\n",
" targets_full.drop(['target_type_id'], axis = 1, inplace=True)\n",
" \n",
" # Merge\n",
" targets_full = pd.merge(customer_target_mappings, targets_full, left_on='target_id', right_on='target_id', how='inner')\n",
" targets_full.drop(['target_id'], axis = 1, inplace=True)\n",
"\n",
" return targets_full"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "5fbfd88b-b94c-489c-9201-670e96e453e7",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_50143/3848597476.py:4: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" targets.rename(columns = {'id' : 'target_id' , 'name' : 'target_name'}, inplace = True)\n"
]
}
],
"source": [
"df1_target_information = preprocessing_target_area(targets = df1_targets, target_types = df1_target_types, customer_target_mappings = df1_customer_target_mappings)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "b4f05142-2a22-42ef-a60d-f23cc4b5cb09",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" </tr>\n",
" <tr>\n",
" <th>target_name</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>consentement optin mediation specialisee</th>\n",
" <td>150000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>consentement optin jeune public</th>\n",
" <td>149979</td>\n",
" </tr>\n",
" <tr>\n",
" <th>consentement optin b2c</th>\n",
" <td>108909</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Arenametrix_bascule tel vers sib</th>\n",
" <td>35216</td>\n",
" </tr>\n",
" <tr>\n",
" <th>consentement optout b2c</th>\n",
" <td>34523</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Automation_parrainage_newsletter_handicap_visuel</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>consentement optout mediation specialisee</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Inscrits NL LSF formulaire</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Market auto - contacts inactifs post-scénario</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Inactifs - fin du scénario</th>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>283 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id\n",
"target_name \n",
"consentement optin mediation specialisee 150000\n",
"consentement optin jeune public 149979\n",
"consentement optin b2c 108909\n",
"Arenametrix_bascule tel vers sib 35216\n",
"consentement optout b2c 34523\n",
"... ...\n",
"Automation_parrainage_newsletter_handicap_visuel 1\n",
"consentement optout mediation specialisee 1\n",
"Inscrits NL LSF formulaire 1\n",
"Market auto - contacts inactifs post-scénario 1\n",
"Inactifs - fin du scénario 1\n",
"\n",
"[283 rows x 1 columns]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1_target_information[['target_name', 'customer_id']].groupby('target_name').count().sort_values(by='customer_id', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "4417ff51-f501-4ab9-a192-4ab75764a8ed",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" </tr>\n",
" <tr>\n",
" <th>target_name</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Arenametrix_bascule tel vers sib</th>\n",
" <td>35216</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Autres_interet_exposition</th>\n",
" <td>1021</td>\n",
" </tr>\n",
" <tr>\n",
" <th>COM Inscrits NL générale (historique)</th>\n",
" <td>23005</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Contacts_prenomsdoubles</th>\n",
" <td>11643</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DDCP MD Procès du Siècle</th>\n",
" <td>1684</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DDCP Newsletter centres de loisirs</th>\n",
" <td>1032</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DDCP Newsletter enseignants</th>\n",
" <td>4510</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DDCP Newsletter jeune public</th>\n",
" <td>3862</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DDCP Newsletter relais champ social</th>\n",
" <td>2270</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DDCP PROMO Participants ateliers (adultes et enfants)</th>\n",
" <td>1954</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DDCP billets famille</th>\n",
" <td>3609</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DDCP promo MD pass musées dps oct 2018</th>\n",
" <td>1785</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DDCP promo Plan B 2019 (concerts)</th>\n",
" <td>1948</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DDCP promo spectateurs prog 21-22 (spectacles, ciné, ateliers)</th>\n",
" <td>1293</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DDCP rentrée culturelle 2023</th>\n",
" <td>1757</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DDCP_marseille_jazz_2023</th>\n",
" <td>1043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DRE Festival Jean Rouch</th>\n",
" <td>1502</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DRE MucemLab</th>\n",
" <td>2302</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DRE chercheurs</th>\n",
" <td>1557</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DRE institutionnels</th>\n",
" <td>2229</td>\n",
" </tr>\n",
" <tr>\n",
" <th>FORMATION _ acheteurs optin last year</th>\n",
" <td>10485</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Inscrits NL générale (export_291019 + operation_videomaton)</th>\n",
" <td>14086</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Inscrits NL générale site web</th>\n",
" <td>3732</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Inscrits NL jeune public site web</th>\n",
" <td>1249</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Votre première liste</th>\n",
" <td>3715</td>\n",
" </tr>\n",
" <tr>\n",
" <th>consentement optin b2b</th>\n",
" <td>12735</td>\n",
" </tr>\n",
" <tr>\n",
" <th>consentement optin b2c</th>\n",
" <td>108909</td>\n",
" </tr>\n",
" <tr>\n",
" <th>consentement optin dre</th>\n",
" <td>4527</td>\n",
" </tr>\n",
" <tr>\n",
" <th>consentement optin jeune public</th>\n",
" <td>149979</td>\n",
" </tr>\n",
" <tr>\n",
" <th>consentement optin mediation specialisee</th>\n",
" <td>150000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>consentement optin newsletter generale</th>\n",
" <td>22095</td>\n",
" </tr>\n",
" <tr>\n",
" <th>consentement optin scolaires</th>\n",
" <td>4849</td>\n",
" </tr>\n",
" <tr>\n",
" <th>consentement optout b2b</th>\n",
" <td>14219</td>\n",
" </tr>\n",
" <tr>\n",
" <th>consentement optout b2c</th>\n",
" <td>34523</td>\n",
" </tr>\n",
" <tr>\n",
" <th>consentement optout dre</th>\n",
" <td>14328</td>\n",
" </tr>\n",
" <tr>\n",
" <th>consentement optout newsletter generale</th>\n",
" <td>18855</td>\n",
" </tr>\n",
" <tr>\n",
" <th>consentement optout scolaires</th>\n",
" <td>15744</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ddcp_md_scene_ouverte_au_talent</th>\n",
" <td>1577</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ddcp_promo_MD_billet_musée_oct_2019_agarder2</th>\n",
" <td>5482</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ddcp_promo_md_musée_dps 011019</th>\n",
" <td>6010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ddcp_promo_visiteurs occasionnels_musee_8mois</th>\n",
" <td>6640</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ddcp_visiteurs dps 010622</th>\n",
" <td>12355</td>\n",
" </tr>\n",
" <tr>\n",
" <th>festival_jean_rouch</th>\n",
" <td>1502</td>\n",
" </tr>\n",
" <tr>\n",
" <th>rappel po barvalo</th>\n",
" <td>1248</td>\n",
" </tr>\n",
" <tr>\n",
" <th>structures_etiquette champ social</th>\n",
" <td>1488</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" customer_id\n",
"target_name \n",
"Arenametrix_bascule tel vers sib 35216\n",
"Autres_interet_exposition 1021\n",
"COM Inscrits NL générale (historique) 23005\n",
"Contacts_prenomsdoubles 11643\n",
"DDCP MD Procès du Siècle 1684\n",
"DDCP Newsletter centres de loisirs 1032\n",
"DDCP Newsletter enseignants 4510\n",
"DDCP Newsletter jeune public 3862\n",
"DDCP Newsletter relais champ social 2270\n",
"DDCP PROMO Participants ateliers (adultes et en... 1954\n",
"DDCP billets famille 3609\n",
"DDCP promo MD pass musées dps oct 2018 1785\n",
"DDCP promo Plan B 2019 (concerts) 1948\n",
"DDCP promo spectateurs prog 21-22 (spectacles, ... 1293\n",
"DDCP rentrée culturelle 2023 1757\n",
"DDCP_marseille_jazz_2023 1043\n",
"DRE Festival Jean Rouch 1502\n",
"DRE MucemLab 2302\n",
"DRE chercheurs 1557\n",
"DRE institutionnels 2229\n",
"FORMATION _ acheteurs optin last year 10485\n",
"Inscrits NL générale (export_291019 + operation... 14086\n",
"Inscrits NL générale site web 3732\n",
"Inscrits NL jeune public site web 1249\n",
"Votre première liste 3715\n",
"consentement optin b2b 12735\n",
"consentement optin b2c 108909\n",
"consentement optin dre 4527\n",
"consentement optin jeune public 149979\n",
"consentement optin mediation specialisee 150000\n",
"consentement optin newsletter generale 22095\n",
"consentement optin scolaires 4849\n",
"consentement optout b2b 14219\n",
"consentement optout b2c 34523\n",
"consentement optout dre 14328\n",
"consentement optout newsletter generale 18855\n",
"consentement optout scolaires 15744\n",
"ddcp_md_scene_ouverte_au_talent 1577\n",
"ddcp_promo_MD_billet_musée_oct_2019_agarder2 5482\n",
"ddcp_promo_md_musée_dps 011019 6010\n",
"ddcp_promo_visiteurs occasionnels_musee_8mois 6640\n",
"ddcp_visiteurs dps 010622 12355\n",
"festival_jean_rouch 1502\n",
"rappel po barvalo 1248\n",
"structures_etiquette champ social 1488"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1_target_information_reduced = df1_target_information[['target_name', 'customer_id']].groupby('target_name').count()\n",
"df1_target_information_reduced[df1_target_information_reduced['customer_id'] >= 1000]"
]
},
{
"cell_type": "markdown",
"id": "cdbb48b4-5e16-4ef4-8791-ed213d68d52f",
"metadata": {},
"source": [
"## Campaings area"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "d883cc7b-ac43-4485-b86f-eaf595fbad85",
"metadata": {},
"outputs": [],
"source": [
"def preprocessing_campaigns_area(campaign_stats = None, campaigns = None):\n",
" # campaign_stats cleaning \n",
" campaign_stats = campaign_stats[[\"id\", \"campaign_id\", \"customer_id\", \"opened_at\", \"sent_at\", \"delivered_at\"]]\n",
" cleaning_date(campaign_stats, 'opened_at')\n",
" cleaning_date(campaign_stats, 'sent_at')\n",
" cleaning_date(campaign_stats, 'delivered_at')\n",
" \n",
" # campaigns cleaning\n",
" campaigns = campaigns[[\"id\", \"name\", \"service_id\", \"sent_at\"]].add_prefix(\"campaign_\")\n",
" cleaning_date(campaigns, 'campaign_sent_at')\n",
" \n",
" # Merge \n",
" campaigns_full = pd.merge(campaign_stats, campaigns, on = \"campaign_id\", how = \"left\")\n",
" campaigns_full.drop(['campaign_id'], axis = 1, inplace=True)\n",
"\n",
" return campaigns_full"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "c8552dd6-52c5-4431-b43d-3cd6c578fd9f",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_50143/1967867975.py:15: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
"/tmp/ipykernel_50143/1967867975.py:15: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n",
"/tmp/ipykernel_50143/1967867975.py:15: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df[column_name] = pd.to_datetime(df[column_name], utc = True, format = 'ISO8601')\n"
]
}
],
"source": [
"df1_campaigns_information = preprocessing_campaigns_area(campaign_stats = df1_campaign_stats, campaigns = df1_campaigns)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "c24457e7-3cad-451a-a65b-7373b656bd6e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>customer_id</th>\n",
" <th>opened_at</th>\n",
" <th>sent_at</th>\n",
" <th>delivered_at</th>\n",
" <th>campaign_name</th>\n",
" <th>campaign_service_id</th>\n",
" <th>campaign_sent_at</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>19793</td>\n",
" <td>112597</td>\n",
" <td>NaT</td>\n",
" <td>2021-03-28 16:01:09+00:00</td>\n",
" <td>2021-03-28 16:24:18+00:00</td>\n",
" <td>Le Mucem chez vous, gardons le lien #22</td>\n",
" <td>404</td>\n",
" <td>2021-03-27 23:00:00+00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>14211</td>\n",
" <td>113666</td>\n",
" <td>NaT</td>\n",
" <td>2021-03-28 16:01:09+00:00</td>\n",
" <td>2021-03-28 16:21:02+00:00</td>\n",
" <td>Le Mucem chez vous, gardons le lien #22</td>\n",
" <td>404</td>\n",
" <td>2021-03-27 23:00:00+00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13150</td>\n",
" <td>280561</td>\n",
" <td>NaT</td>\n",
" <td>2021-03-28 16:00:59+00:00</td>\n",
" <td>2021-03-28 16:08:45+00:00</td>\n",
" <td>Le Mucem chez vous, gardons le lien #22</td>\n",
" <td>404</td>\n",
" <td>2021-03-27 23:00:00+00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>7073</td>\n",
" <td>101007</td>\n",
" <td>2021-03-28 18:11:06+00:00</td>\n",
" <td>2021-03-28 16:00:59+00:00</td>\n",
" <td>2021-03-28 16:09:47+00:00</td>\n",
" <td>Le Mucem chez vous, gardons le lien #22</td>\n",
" <td>404</td>\n",
" <td>2021-03-27 23:00:00+00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5175</td>\n",
" <td>103972</td>\n",
" <td>NaT</td>\n",
" <td>2021-03-28 16:01:06+00:00</td>\n",
" <td>2021-03-28 16:05:03+00:00</td>\n",
" <td>Le Mucem chez vous, gardons le lien #22</td>\n",
" <td>404</td>\n",
" <td>2021-03-27 23:00:00+00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6214803</th>\n",
" <td>8302994</td>\n",
" <td>266155</td>\n",
" <td>2023-10-23 09:43:25+00:00</td>\n",
" <td>2023-10-23 09:32:33+00:00</td>\n",
" <td>2023-10-23 09:32:34+00:00</td>\n",
" <td>dre_nov_2023</td>\n",
" <td>1318</td>\n",
" <td>2023-10-23 09:31:17+00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6214804</th>\n",
" <td>8303307</td>\n",
" <td>21355</td>\n",
" <td>2023-10-23 09:44:02+00:00</td>\n",
" <td>2023-10-23 09:32:49+00:00</td>\n",
" <td>2023-10-23 09:32:49+00:00</td>\n",
" <td>dre_nov_2023</td>\n",
" <td>1318</td>\n",
" <td>2023-10-23 09:31:17+00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6214805</th>\n",
" <td>8304346</td>\n",
" <td>21849</td>\n",
" <td>2023-10-23 09:45:52+00:00</td>\n",
" <td>2023-10-23 09:33:28+00:00</td>\n",
" <td>2023-10-23 09:33:29+00:00</td>\n",
" <td>dre_nov_2023</td>\n",
" <td>1318</td>\n",
" <td>2023-10-23 09:31:17+00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6214806</th>\n",
" <td>8302037</td>\n",
" <td>667789</td>\n",
" <td>2023-10-23 09:47:32+00:00</td>\n",
" <td>2023-10-23 09:31:53+00:00</td>\n",
" <td>2023-10-23 09:31:54+00:00</td>\n",
" <td>dre_nov_2023</td>\n",
" <td>1318</td>\n",
" <td>2023-10-23 09:31:17+00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6214807</th>\n",
" <td>8304939</td>\n",
" <td>294154</td>\n",
" <td>NaT</td>\n",
" <td>2023-10-23 09:33:54+00:00</td>\n",
" <td>2023-10-23 09:33:55+00:00</td>\n",
" <td>dre_nov_2023</td>\n",
" <td>1318</td>\n",
" <td>2023-10-23 09:31:17+00:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>6214808 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
" id customer_id opened_at \\\n",
"0 19793 112597 NaT \n",
"1 14211 113666 NaT \n",
"2 13150 280561 NaT \n",
"3 7073 101007 2021-03-28 18:11:06+00:00 \n",
"4 5175 103972 NaT \n",
"... ... ... ... \n",
"6214803 8302994 266155 2023-10-23 09:43:25+00:00 \n",
"6214804 8303307 21355 2023-10-23 09:44:02+00:00 \n",
"6214805 8304346 21849 2023-10-23 09:45:52+00:00 \n",
"6214806 8302037 667789 2023-10-23 09:47:32+00:00 \n",
"6214807 8304939 294154 NaT \n",
"\n",
" sent_at delivered_at \\\n",
"0 2021-03-28 16:01:09+00:00 2021-03-28 16:24:18+00:00 \n",
"1 2021-03-28 16:01:09+00:00 2021-03-28 16:21:02+00:00 \n",
"2 2021-03-28 16:00:59+00:00 2021-03-28 16:08:45+00:00 \n",
"3 2021-03-28 16:00:59+00:00 2021-03-28 16:09:47+00:00 \n",
"4 2021-03-28 16:01:06+00:00 2021-03-28 16:05:03+00:00 \n",
"... ... ... \n",
"6214803 2023-10-23 09:32:33+00:00 2023-10-23 09:32:34+00:00 \n",
"6214804 2023-10-23 09:32:49+00:00 2023-10-23 09:32:49+00:00 \n",
"6214805 2023-10-23 09:33:28+00:00 2023-10-23 09:33:29+00:00 \n",
"6214806 2023-10-23 09:31:53+00:00 2023-10-23 09:31:54+00:00 \n",
"6214807 2023-10-23 09:33:54+00:00 2023-10-23 09:33:55+00:00 \n",
"\n",
" campaign_name campaign_service_id \\\n",
"0 Le Mucem chez vous, gardons le lien #22 404 \n",
"1 Le Mucem chez vous, gardons le lien #22 404 \n",
"2 Le Mucem chez vous, gardons le lien #22 404 \n",
"3 Le Mucem chez vous, gardons le lien #22 404 \n",
"4 Le Mucem chez vous, gardons le lien #22 404 \n",
"... ... ... \n",
"6214803 dre_nov_2023 1318 \n",
"6214804 dre_nov_2023 1318 \n",
"6214805 dre_nov_2023 1318 \n",
"6214806 dre_nov_2023 1318 \n",
"6214807 dre_nov_2023 1318 \n",
"\n",
" campaign_sent_at \n",
"0 2021-03-27 23:00:00+00:00 \n",
"1 2021-03-27 23:00:00+00:00 \n",
"2 2021-03-27 23:00:00+00:00 \n",
"3 2021-03-27 23:00:00+00:00 \n",
"4 2021-03-27 23:00:00+00:00 \n",
"... ... \n",
"6214803 2023-10-23 09:31:17+00:00 \n",
"6214804 2023-10-23 09:31:17+00:00 \n",
"6214805 2023-10-23 09:31:17+00:00 \n",
"6214806 2023-10-23 09:31:17+00:00 \n",
"6214807 2023-10-23 09:31:17+00:00 \n",
"\n",
"[6214808 rows x 8 columns]"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1_campaigns_information"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "e2c88552-b863-47a2-be23-8d2898fb28bc",
"metadata": {},
"outputs": [],
"source": [
"def campaigns_kpi(campaigns_information = None):\n",
" # Nombre de campagnes de mails\n",
" nb_campaigns = campaigns_information[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()\n",
" nb_campaigns.rename(columns = {'campaign_name' : 'nb_campaigns'}, inplace = True)\n",
" # Temps d'ouverture en min moyen \n",
" campaigns_information['time_to_open'] = campaigns_information['opened_at'] - campaigns_information['delivered_at']\n",
" time_to_open = campaigns_information[['customer_id', 'time_to_open']].groupby('customer_id').mean().reset_index()\n",
"\n",
" # Nombre de mail ouvert \n",
" opened_campaign = campaigns_information[['customer_id', 'campaign_name', 'opened_at']]\n",
" opened_campaign.dropna(subset=['opened_at'], inplace=True)\n",
" opened_campaign = opened_campaign[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()\n",
" opened_campaign.rename(columns = {'campaign_name' : 'nb_campaigns_opened' }, inplace = True)\n",
"\n",
" # Fusion des indicateurs\n",
" campaigns_reduced = pd.merge(nb_campaigns, opened_campaign, on = 'customer_id', how = 'left')\n",
" campaigns_reduced = pd.merge(campaigns_reduced, time_to_open, on = 'customer_id', how = 'left')\n",
"\n",
" # Remplir les NaN : nb_campaigns_opened\n",
" campaigns_reduced['nb_campaigns_opened'].fillna(0, inplace=True)\n",
"\n",
" # Remplir les NaT : time_to_open (??)\n",
"\n",
" return campaigns_reduced\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 69,
"id": "24537647-bc29-4777-9848-ac4120a4aa60",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_50143/2679359833.py:11: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" opened_campaign.dropna(subset=['opened_at'], inplace=True)\n",
"/tmp/ipykernel_50143/2679359833.py:20: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
"\n",
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
"\n",
"\n",
" campaigns_reduced['nb_campaigns_opened'].fillna(0, inplace=True)\n"
]
}
],
"source": [
"df1_campaigns_kpi = campaigns_kpi(campaigns_information = df1_campaigns_information) "
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "6be2a9a6-056b-4e19-8c26-a18ba3df36b3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3</td>\n",
" <td>222</td>\n",
" <td>124.0</td>\n",
" <td>1 days 00:28:30.169354838</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>7.0</td>\n",
" <td>1 days 04:31:01.428571428</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>6</td>\n",
" <td>20</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>130467</th>\n",
" <td>1256097</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0 days 02:11:15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>130468</th>\n",
" <td>1256098</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>130469</th>\n",
" <td>1256099</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>130470</th>\n",
" <td>1256100</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>130471</th>\n",
" <td>1256101</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>130472 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_campaigns nb_campaigns_opened \\\n",
"0 2 4 0.0 \n",
"1 3 222 124.0 \n",
"2 4 7 7.0 \n",
"3 5 4 0.0 \n",
"4 6 20 0.0 \n",
"... ... ... ... \n",
"130467 1256097 1 1.0 \n",
"130468 1256098 1 0.0 \n",
"130469 1256099 1 0.0 \n",
"130470 1256100 1 0.0 \n",
"130471 1256101 1 0.0 \n",
"\n",
" time_to_open \n",
"0 NaT \n",
"1 1 days 00:28:30.169354838 \n",
"2 1 days 04:31:01.428571428 \n",
"3 NaT \n",
"4 NaT \n",
"... ... \n",
"130467 0 days 02:11:15 \n",
"130468 NaT \n",
"130469 NaT \n",
"130470 NaT \n",
"130471 NaT \n",
"\n",
"[130472 rows x 4 columns]"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1_campaigns_kpi"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}