BDC-team-1/TP_merge_tables_clean.ipynb

1761 lines
62 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"id": "8c56d518-3634-4492-b249-0d8ef33dd527",
"metadata": {},
"source": [
"## First steps : package importations, set up working environment and import data"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "dede42d9-1262-45f7-bd7a-586ae800092a",
"metadata": {},
"outputs": [],
"source": [
"# importations\n",
"\n",
"import os \n",
"import s3fs\n",
"import pandas as pd\n",
"import re\n",
"from datetime import datetime, timezone, timedelta\n",
"import math\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "6ce34b58-b5ba-4b54-ba4d-fc82ef01b09c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['bdc2324-data/1',\n",
" 'bdc2324-data/10',\n",
" 'bdc2324-data/101',\n",
" 'bdc2324-data/11',\n",
" 'bdc2324-data/12',\n",
" 'bdc2324-data/13',\n",
" 'bdc2324-data/14',\n",
" 'bdc2324-data/2',\n",
" 'bdc2324-data/3',\n",
" 'bdc2324-data/4',\n",
" 'bdc2324-data/5',\n",
" 'bdc2324-data/6',\n",
" 'bdc2324-data/7',\n",
" 'bdc2324-data/8',\n",
" 'bdc2324-data/9']"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# bucket for accessing the data\n",
"\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"\n",
"fs = s3fs.S3FileSystem(client_kwargs = {\"endpoint_url\" : S3_ENDPOINT_URL})\n",
"BUCKET = \"bdc2324-data\"\n",
"fs.ls(BUCKET)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "8eb13dd3-53c7-4a70-94a4-846168473aa1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['bdc2324-data/1/1campaign_stats.csv',\n",
" 'bdc2324-data/1/1campaigns.csv',\n",
" 'bdc2324-data/1/1categories.csv',\n",
" 'bdc2324-data/1/1countries.csv',\n",
" 'bdc2324-data/1/1currencies.csv',\n",
" 'bdc2324-data/1/1customer_target_mappings.csv',\n",
" 'bdc2324-data/1/1customersplus.csv',\n",
" 'bdc2324-data/1/1event_types.csv',\n",
" 'bdc2324-data/1/1events.csv',\n",
" 'bdc2324-data/1/1facilities.csv',\n",
" 'bdc2324-data/1/1link_stats.csv',\n",
" 'bdc2324-data/1/1pricing_formulas.csv',\n",
" 'bdc2324-data/1/1product_packs.csv',\n",
" 'bdc2324-data/1/1products.csv',\n",
" 'bdc2324-data/1/1products_groups.csv',\n",
" 'bdc2324-data/1/1purchases.csv',\n",
" 'bdc2324-data/1/1representation_category_capacities.csv',\n",
" 'bdc2324-data/1/1representations.csv',\n",
" 'bdc2324-data/1/1seasons.csv',\n",
" 'bdc2324-data/1/1structure_tag_mappings.csv',\n",
" 'bdc2324-data/1/1suppliers.csv',\n",
" 'bdc2324-data/1/1tags.csv',\n",
" 'bdc2324-data/1/1target_types.csv',\n",
" 'bdc2324-data/1/1targets.csv',\n",
" 'bdc2324-data/1/1tickets.csv',\n",
" 'bdc2324-data/1/1type_of_categories.csv',\n",
" 'bdc2324-data/1/1type_of_pricing_formulas.csv',\n",
" 'bdc2324-data/1/1type_ofs.csv']"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"FILE_PATH_S3 = fs.ls(BUCKET)[0] # focus on the company number 1\n",
"files_path = fs.ls(FILE_PATH_S3)\n",
"files_path"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "1ea66c4e-1307-4f19-836e-3104fba2ff41",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_487/2894332003.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in)\n"
]
}
],
"source": [
"# loop to create dataframes related to company 1\n",
"\n",
"client_number = files_path[0].split(\"/\")[1]\n",
"print(client_number)\n",
"df_prefix = \"df\" + str(client_number) + \"_\"\n",
"\n",
"for i in range(len(files_path)) :\n",
" current_path = files_path[i]\n",
" with fs.open(current_path, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in)\n",
" # the pattern of the name is df1xxx\n",
" nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n",
" globals()[nom_dataframe] = df"
]
},
{
"cell_type": "markdown",
"id": "13d70b2c-6580-4caf-b839-10f72b2e0b39",
"metadata": {},
"source": [
"## Target, target types and customer target mapping"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "4dbc7fea-ac3b-4348-83fb-dfb1a460f936",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>is_import</th>\n",
" <th>name</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>identifier</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>69</td>\n",
" <td>False</td>\n",
" <td>manual_dynamic_filter</td>\n",
" <td>2020-11-30 09:46:18.881030+01:00</td>\n",
" <td>2020-11-30 09:46:18.881030+01:00</td>\n",
" <td>e0f4b8693184850fefd6d2a38f10584e</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>48</td>\n",
" <td>True</td>\n",
" <td>manual_structure</td>\n",
" <td>2020-11-04 17:16:19.548275+01:00</td>\n",
" <td>2020-11-04 17:16:19.548275+01:00</td>\n",
" <td>382bca214204a2d3462f5ec2728d5d1e</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>manual_import</td>\n",
" <td>2020-10-14 18:37:40.521623+02:00</td>\n",
" <td>2020-10-14 18:37:40.521623+02:00</td>\n",
" <td>12213df2ce68a624e4c0070521437bac</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>56</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" <td>2020-11-04 18:08:37.233486+01:00</td>\n",
" <td>2020-11-04 18:08:37.233486+01:00</td>\n",
" <td>fb27e81baa4debc6a4e1a8639c20e808</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id is_import name created_at \\\n",
"0 69 False manual_dynamic_filter 2020-11-30 09:46:18.881030+01:00 \n",
"1 48 True manual_structure 2020-11-04 17:16:19.548275+01:00 \n",
"2 1 True manual_import 2020-10-14 18:37:40.521623+02:00 \n",
"3 56 False manual_static_filter 2020-11-04 18:08:37.233486+01:00 \n",
"\n",
" updated_at identifier \n",
"0 2020-11-30 09:46:18.881030+01:00 e0f4b8693184850fefd6d2a38f10584e \n",
"1 2020-11-04 17:16:19.548275+01:00 382bca214204a2d3462f5ec2728d5d1e \n",
"2 2020-10-14 18:37:40.521623+02:00 12213df2ce68a624e4c0070521437bac \n",
"3 2020-11-04 18:08:37.233486+01:00 fb27e81baa4debc6a4e1a8639c20e808 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 1. target types\n",
"df1_target_types.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "0e9f5dcb-0dc3-4052-b866-e5c4cb954a1f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>target_type_id</th>\n",
" <th>name</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>217</td>\n",
" <td>56</td>\n",
" <td>DDCP PROMO Art contemporain - salle de chauffe...</td>\n",
" <td>2021-01-04 15:00:05.401899+01:00</td>\n",
" <td>2021-03-02 18:38:19.025969+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>701</td>\n",
" <td>56</td>\n",
" <td>consentement optin scolaires</td>\n",
" <td>2021-12-21 16:03:59.840785+01:00</td>\n",
" <td>2022-02-18 17:23:44.761388+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>134</td>\n",
" <td>56</td>\n",
" <td>DDCP Newsletter jeune public</td>\n",
" <td>2020-11-10 09:43:19.667471+01:00</td>\n",
" <td>2021-03-02 18:38:19.052304+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>700</td>\n",
" <td>56</td>\n",
" <td>consentement optout scolaires</td>\n",
" <td>2021-12-21 16:01:57.524946+01:00</td>\n",
" <td>2022-02-18 17:23:44.807776+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>964</td>\n",
" <td>56</td>\n",
" <td>DDCP achat billet nbr dep 19052021</td>\n",
" <td>2022-04-14 10:58:17.142834+02:00</td>\n",
" <td>2022-04-14 10:58:23.677264+02:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id target_type_id name \\\n",
"0 217 56 DDCP PROMO Art contemporain - salle de chauffe... \n",
"1 701 56 consentement optin scolaires \n",
"2 134 56 DDCP Newsletter jeune public \n",
"3 700 56 consentement optout scolaires \n",
"4 964 56 DDCP achat billet nbr dep 19052021 \n",
"\n",
" created_at updated_at \n",
"0 2021-01-04 15:00:05.401899+01:00 2021-03-02 18:38:19.025969+01:00 \n",
"1 2021-12-21 16:03:59.840785+01:00 2022-02-18 17:23:44.761388+01:00 \n",
"2 2020-11-10 09:43:19.667471+01:00 2021-03-02 18:38:19.052304+01:00 \n",
"3 2021-12-21 16:01:57.524946+01:00 2022-02-18 17:23:44.807776+01:00 \n",
"4 2022-04-14 10:58:17.142834+02:00 2022-04-14 10:58:23.677264+02:00 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 2. targets\n",
"df1_targets.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "c5c62302-370a-462f-bd79-eac31593f65c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>customer_id</th>\n",
" <th>target_id</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>name</th>\n",
" <th>extra_field</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1184824</td>\n",
" <td>645400</td>\n",
" <td>130</td>\n",
" <td>2021-09-23 09:35:47.617275+02:00</td>\n",
" <td>2021-09-23 09:35:47.617275+02:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1184825</td>\n",
" <td>645400</td>\n",
" <td>345</td>\n",
" <td>2021-09-23 09:35:47.668846+02:00</td>\n",
" <td>2021-09-23 09:35:47.668846+02:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1184828</td>\n",
" <td>645402</td>\n",
" <td>126</td>\n",
" <td>2021-09-23 12:02:51.253269+02:00</td>\n",
" <td>2021-09-23 12:02:51.253269+02:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1184829</td>\n",
" <td>645403</td>\n",
" <td>126</td>\n",
" <td>2021-09-23 12:20:47.394480+02:00</td>\n",
" <td>2021-09-23 12:20:47.394480+02:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1295770</td>\n",
" <td>647301</td>\n",
" <td>346</td>\n",
" <td>2021-09-28 16:02:29.372608+02:00</td>\n",
" <td>2021-09-28 16:02:29.372608+02:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id customer_id target_id created_at \\\n",
"0 1184824 645400 130 2021-09-23 09:35:47.617275+02:00 \n",
"1 1184825 645400 345 2021-09-23 09:35:47.668846+02:00 \n",
"2 1184828 645402 126 2021-09-23 12:02:51.253269+02:00 \n",
"3 1184829 645403 126 2021-09-23 12:20:47.394480+02:00 \n",
"4 1295770 647301 346 2021-09-28 16:02:29.372608+02:00 \n",
"\n",
" updated_at name extra_field \n",
"0 2021-09-23 09:35:47.617275+02:00 NaN NaN \n",
"1 2021-09-23 09:35:47.668846+02:00 NaN NaN \n",
"2 2021-09-23 12:02:51.253269+02:00 NaN NaN \n",
"3 2021-09-23 12:20:47.394480+02:00 NaN NaN \n",
"4 2021-09-28 16:02:29.372608+02:00 NaN NaN "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 3. customer target mapping\n",
"\n",
"df1_customer_target_mappings.head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "1a87cebf-c1dd-408d-a523-26633419da1e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>target_type_id</th>\n",
" <th>name</th>\n",
" <th>target_type_is_import</th>\n",
" <th>target_type_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>217</td>\n",
" <td>56</td>\n",
" <td>DDCP PROMO Art contemporain - salle de chauffe...</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>701</td>\n",
" <td>56</td>\n",
" <td>consentement optin scolaires</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>134</td>\n",
" <td>56</td>\n",
" <td>DDCP Newsletter jeune public</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>700</td>\n",
" <td>56</td>\n",
" <td>consentement optout scolaires</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>964</td>\n",
" <td>56</td>\n",
" <td>DDCP achat billet nbr dep 19052021</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id target_type_id name \\\n",
"0 217 56 DDCP PROMO Art contemporain - salle de chauffe... \n",
"1 701 56 consentement optin scolaires \n",
"2 134 56 DDCP Newsletter jeune public \n",
"3 700 56 consentement optout scolaires \n",
"4 964 56 DDCP achat billet nbr dep 19052021 \n",
"\n",
" target_type_is_import target_type_name \n",
"0 False manual_static_filter \n",
"1 False manual_static_filter \n",
"2 False manual_static_filter \n",
"3 False manual_static_filter \n",
"4 False manual_static_filter "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 4.1. merge target with target type\n",
"\n",
"df1_targets_full = pd.merge(df1_targets[[\"id\", \"target_type_id\", \"name\"]], df1_target_types[[\"id\",\"is_import\",\"name\"]].add_prefix(\"target_type_\"), left_on='target_type_id', right_on='target_type_id', how='left')\n",
"df1_targets_full.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "d48c1fff-73c2-4e75-8799-da2b80694be7",
"metadata": {},
"outputs": [],
"source": [
"# 4.2. merge df1_customer_target_mappings with df1_targets_full\n",
"\n",
"# change the position of the column target type id\n",
"\n",
"# Spécifiez le nom de la colonne à déplacer et la colonne après laquelle vous souhaitez la placer\n",
"column_to_move = 'target_type_id'\n",
"\n",
"# Récupérez l'index de la colonne de référence\n",
"reference_index = df1_targets_full.columns.get_loc(\"target_type_name\")\n",
"\n",
"# Créez une copie de la colonne que vous voulez déplacer\n",
"column_copy = df1_targets_full[column_to_move].copy()\n",
"\n",
"# Supprimez la colonne d'origine\n",
"df1_targets_full = df1_targets_full.drop(column_to_move, axis=1)\n",
"\n",
"# Utilisez la méthode insert pour déplacer la colonne à la nouvelle position\n",
"df1_targets_full.insert(reference_index - 1, column_to_move, column_copy)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "a874514a-c7dc-42d4-a440-dedd3a270e24",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>target_id</th>\n",
" <th>target_name</th>\n",
" <th>target_type_is_import</th>\n",
" <th>target_type_id</th>\n",
" <th>target_type_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>217</td>\n",
" <td>DDCP PROMO Art contemporain - salle de chauffe...</td>\n",
" <td>False</td>\n",
" <td>56</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>701</td>\n",
" <td>consentement optin scolaires</td>\n",
" <td>False</td>\n",
" <td>56</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>134</td>\n",
" <td>DDCP Newsletter jeune public</td>\n",
" <td>False</td>\n",
" <td>56</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>700</td>\n",
" <td>consentement optout scolaires</td>\n",
" <td>False</td>\n",
" <td>56</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>964</td>\n",
" <td>DDCP achat billet nbr dep 19052021</td>\n",
" <td>False</td>\n",
" <td>56</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" target_id target_name \\\n",
"0 217 DDCP PROMO Art contemporain - salle de chauffe... \n",
"1 701 consentement optin scolaires \n",
"2 134 DDCP Newsletter jeune public \n",
"3 700 consentement optout scolaires \n",
"4 964 DDCP achat billet nbr dep 19052021 \n",
"\n",
" target_type_is_import target_type_id target_type_name \n",
"0 False 56 manual_static_filter \n",
"1 False 56 manual_static_filter \n",
"2 False 56 manual_static_filter \n",
"3 False 56 manual_static_filter \n",
"4 False 56 manual_static_filter "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1_targets_full = df1_targets_full.rename(columns=lambda x: 'target_' + x if not x.startswith('target_') else x)\n",
"df1_targets_full.head()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "0db0172a-5119-4b7f-97f8-36fc5c985205",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>customer_id</th>\n",
" <th>target_id</th>\n",
" <th>target_name</th>\n",
" <th>target_type_is_import</th>\n",
" <th>target_type_id</th>\n",
" <th>target_type_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1184824</td>\n",
" <td>645400</td>\n",
" <td>130</td>\n",
" <td>DDCP PROMO Réseau livres</td>\n",
" <td>False</td>\n",
" <td>56</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1184825</td>\n",
" <td>645400</td>\n",
" <td>345</td>\n",
" <td>Inscrits NL générale site web</td>\n",
" <td>False</td>\n",
" <td>56</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1184828</td>\n",
" <td>645402</td>\n",
" <td>126</td>\n",
" <td>DDCP PROMO Art contemporain</td>\n",
" <td>False</td>\n",
" <td>56</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1184829</td>\n",
" <td>645403</td>\n",
" <td>126</td>\n",
" <td>DDCP PROMO Art contemporain</td>\n",
" <td>False</td>\n",
" <td>56</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1295770</td>\n",
" <td>647301</td>\n",
" <td>346</td>\n",
" <td>Votre première liste</td>\n",
" <td>False</td>\n",
" <td>56</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>768019</th>\n",
" <td>2737545</td>\n",
" <td>666983</td>\n",
" <td>345</td>\n",
" <td>Inscrits NL générale site web</td>\n",
" <td>False</td>\n",
" <td>56</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>768020</th>\n",
" <td>2737546</td>\n",
" <td>666983</td>\n",
" <td>346</td>\n",
" <td>Votre première liste</td>\n",
" <td>False</td>\n",
" <td>56</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>768021</th>\n",
" <td>2737575</td>\n",
" <td>666986</td>\n",
" <td>346</td>\n",
" <td>Votre première liste</td>\n",
" <td>False</td>\n",
" <td>56</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>768022</th>\n",
" <td>2737576</td>\n",
" <td>666987</td>\n",
" <td>345</td>\n",
" <td>Inscrits NL générale site web</td>\n",
" <td>False</td>\n",
" <td>56</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>768023</th>\n",
" <td>2737577</td>\n",
" <td>666987</td>\n",
" <td>346</td>\n",
" <td>Votre première liste</td>\n",
" <td>False</td>\n",
" <td>56</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>768024 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" id customer_id target_id target_name \\\n",
"0 1184824 645400 130 DDCP PROMO Réseau livres \n",
"1 1184825 645400 345 Inscrits NL générale site web \n",
"2 1184828 645402 126 DDCP PROMO Art contemporain \n",
"3 1184829 645403 126 DDCP PROMO Art contemporain \n",
"4 1295770 647301 346 Votre première liste \n",
"... ... ... ... ... \n",
"768019 2737545 666983 345 Inscrits NL générale site web \n",
"768020 2737546 666983 346 Votre première liste \n",
"768021 2737575 666986 346 Votre première liste \n",
"768022 2737576 666987 345 Inscrits NL générale site web \n",
"768023 2737577 666987 346 Votre première liste \n",
"\n",
" target_type_is_import target_type_id target_type_name \n",
"0 False 56 manual_static_filter \n",
"1 False 56 manual_static_filter \n",
"2 False 56 manual_static_filter \n",
"3 False 56 manual_static_filter \n",
"4 False 56 manual_static_filter \n",
"... ... ... ... \n",
"768019 False 56 manual_static_filter \n",
"768020 False 56 manual_static_filter \n",
"768021 False 56 manual_static_filter \n",
"768022 False 56 manual_static_filter \n",
"768023 False 56 manual_static_filter \n",
"\n",
"[768024 rows x 7 columns]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# finally, merge\n",
"\n",
"# pour df1_customer_target_mappings on enlève les colonnes name, extra_field, et updated_at (valeur égale à created_at)\n",
"# note : by making a left join on df1_customer_target_mappings, we suppress 2 targets that have no customer associated\n",
"\n",
"df1_customer_targets = pd.merge(df1_customer_target_mappings[[\"id\", \"customer_id\", \"target_id\"]], \n",
" df1_targets_full, left_on='target_id', right_on='target_id', how='left')\n",
"df1_customer_targets"
]
},
{
"cell_type": "markdown",
"id": "52326267-c5ba-4e21-b8ab-4b4c62de75d1",
"metadata": {},
"source": [
"## Campaign stats, campaigns"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "06dca910-5c07-4ee1-bbf2-3b11b48ba1f2",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" <th>service_id</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>process_id</th>\n",
" <th>report_url</th>\n",
" <th>category</th>\n",
" <th>to_be_synced</th>\n",
" <th>identifier</th>\n",
" <th>sent_at</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1319613</td>\n",
" <td>newsletter enseignants janvier 2022</td>\n",
" <td>721</td>\n",
" <td>2022-01-14 16:06:42.586321+01:00</td>\n",
" <td>2022-02-03 14:17:27.112963+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>False</td>\n",
" <td>aba3b6fd5d186d28e06ff97135cade7f</td>\n",
" <td>2022-01-14 00:00:00+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1319586</td>\n",
" <td>lsf_janvier_2022</td>\n",
" <td>717</td>\n",
" <td>2022-01-07 11:30:35.315895+01:00</td>\n",
" <td>2022-02-03 14:17:27.116171+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>False</td>\n",
" <td>788d986905533aba051261497ecffcbb</td>\n",
" <td>2022-01-07 00:00:00+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1319282</td>\n",
" <td>Invitation à déjeuner au Mucem | Vernissage « ...</td>\n",
" <td>591</td>\n",
" <td>2021-09-28 12:50:24.448752+02:00</td>\n",
" <td>2022-02-03 14:17:27.119582+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>False</td>\n",
" <td>3493894fa4ea036cfc6433c3e2ee63b0</td>\n",
" <td>2021-09-28 00:00:00+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1319283</td>\n",
" <td>Vacances de la Toussaint - centres des loisirs</td>\n",
" <td>590</td>\n",
" <td>2021-09-28 18:01:04.692073+02:00</td>\n",
" <td>2022-02-03 14:17:27.124408+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>False</td>\n",
" <td>08b255a5d42b89b0585260b6f2360bdd</td>\n",
" <td>2021-09-28 00:00:00+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1319636</td>\n",
" <td>ddcp_promo_md_livemag</td>\n",
" <td>730</td>\n",
" <td>2022-01-27 18:00:41.053069+01:00</td>\n",
" <td>2022-02-03 14:17:27.127607+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>False</td>\n",
" <td>d5cfead94f5350c12c322b5b664544c1</td>\n",
" <td>2022-01-27 00:00:00+01:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id name service_id \\\n",
"0 1319613 newsletter enseignants janvier 2022 721 \n",
"1 1319586 lsf_janvier_2022 717 \n",
"2 1319282 Invitation à déjeuner au Mucem | Vernissage « ... 591 \n",
"3 1319283 Vacances de la Toussaint - centres des loisirs 590 \n",
"4 1319636 ddcp_promo_md_livemag 730 \n",
"\n",
" created_at updated_at \\\n",
"0 2022-01-14 16:06:42.586321+01:00 2022-02-03 14:17:27.112963+01:00 \n",
"1 2022-01-07 11:30:35.315895+01:00 2022-02-03 14:17:27.116171+01:00 \n",
"2 2021-09-28 12:50:24.448752+02:00 2022-02-03 14:17:27.119582+01:00 \n",
"3 2021-09-28 18:01:04.692073+02:00 2022-02-03 14:17:27.124408+01:00 \n",
"4 2022-01-27 18:00:41.053069+01:00 2022-02-03 14:17:27.127607+01:00 \n",
"\n",
" process_id report_url category to_be_synced \\\n",
"0 NaN NaN 0.0 False \n",
"1 NaN NaN 0.0 False \n",
"2 NaN NaN 0.0 False \n",
"3 NaN NaN 0.0 False \n",
"4 NaN NaN 0.0 False \n",
"\n",
" identifier sent_at \n",
"0 aba3b6fd5d186d28e06ff97135cade7f 2022-01-14 00:00:00+01:00 \n",
"1 788d986905533aba051261497ecffcbb 2022-01-07 00:00:00+01:00 \n",
"2 3493894fa4ea036cfc6433c3e2ee63b0 2021-09-28 00:00:00+02:00 \n",
"3 08b255a5d42b89b0585260b6f2360bdd 2021-09-28 00:00:00+02:00 \n",
"4 d5cfead94f5350c12c322b5b664544c1 2022-01-27 00:00:00+01:00 "
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 1. campaigns\n",
"df1_campaigns.head()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "83eaa447-9144-41ed-9e26-f0f23799a8fd",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>campaign_id</th>\n",
" <th>customer_id</th>\n",
" <th>opened_at</th>\n",
" <th>sent_at</th>\n",
" <th>delivered_at</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>19793</td>\n",
" <td>58</td>\n",
" <td>112597</td>\n",
" <td>NaN</td>\n",
" <td>2021-03-28 18:01:09+02:00</td>\n",
" <td>2021-03-28 18:24:18+02:00</td>\n",
" <td>2021-03-28 18:34:20.616136+02:00</td>\n",
" <td>2022-04-15 22:52:04.397693+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>14211</td>\n",
" <td>58</td>\n",
" <td>113666</td>\n",
" <td>NaN</td>\n",
" <td>2021-03-28 18:01:09+02:00</td>\n",
" <td>2021-03-28 18:21:02+02:00</td>\n",
" <td>2021-03-28 18:21:04.297213+02:00</td>\n",
" <td>2022-04-15 22:52:04.397693+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13150</td>\n",
" <td>58</td>\n",
" <td>280561</td>\n",
" <td>NaN</td>\n",
" <td>2021-03-28 18:00:59+02:00</td>\n",
" <td>2021-03-28 18:08:45+02:00</td>\n",
" <td>2021-03-28 18:18:49.991042+02:00</td>\n",
" <td>2022-04-15 22:52:04.397693+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>7073</td>\n",
" <td>58</td>\n",
" <td>101007</td>\n",
" <td>2021-03-28 20:11:06+02:00</td>\n",
" <td>2021-03-28 18:00:59+02:00</td>\n",
" <td>2021-03-28 18:09:47+02:00</td>\n",
" <td>2021-03-28 18:09:50.915354+02:00</td>\n",
" <td>2022-04-15 22:52:04.397693+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5175</td>\n",
" <td>58</td>\n",
" <td>103972</td>\n",
" <td>NaN</td>\n",
" <td>2021-03-28 18:01:06+02:00</td>\n",
" <td>2021-03-28 18:05:03+02:00</td>\n",
" <td>2021-03-28 18:05:08.507398+02:00</td>\n",
" <td>2022-04-15 22:52:04.397693+02:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id campaign_id customer_id opened_at \\\n",
"0 19793 58 112597 NaN \n",
"1 14211 58 113666 NaN \n",
"2 13150 58 280561 NaN \n",
"3 7073 58 101007 2021-03-28 20:11:06+02:00 \n",
"4 5175 58 103972 NaN \n",
"\n",
" sent_at delivered_at \\\n",
"0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n",
"1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n",
"2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n",
"3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n",
"4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n",
"\n",
" created_at updated_at \n",
"0 2021-03-28 18:34:20.616136+02:00 2022-04-15 22:52:04.397693+02:00 \n",
"1 2021-03-28 18:21:04.297213+02:00 2022-04-15 22:52:04.397693+02:00 \n",
"2 2021-03-28 18:18:49.991042+02:00 2022-04-15 22:52:04.397693+02:00 \n",
"3 2021-03-28 18:09:50.915354+02:00 2022-04-15 22:52:04.397693+02:00 \n",
"4 2021-03-28 18:05:08.507398+02:00 2022-04-15 22:52:04.397693+02:00 "
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 2. campaigns stats\n",
"df1_campaign_stats.head()"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "7f25eb1b-e7c8-4715-bc30-7ac29a7181ac",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>campaign_id</th>\n",
" <th>customer_id</th>\n",
" <th>opened_at</th>\n",
" <th>sent_at</th>\n",
" <th>delivered_at</th>\n",
" <th>campaign_name</th>\n",
" <th>campaign_service_id</th>\n",
" <th>campaign_sent_at</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>19793</td>\n",
" <td>58</td>\n",
" <td>112597</td>\n",
" <td>NaN</td>\n",
" <td>2021-03-28 18:01:09+02:00</td>\n",
" <td>2021-03-28 18:24:18+02:00</td>\n",
" <td>Le Mucem chez vous, gardons le lien #22</td>\n",
" <td>404</td>\n",
" <td>2021-03-28 00:00:00+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>14211</td>\n",
" <td>58</td>\n",
" <td>113666</td>\n",
" <td>NaN</td>\n",
" <td>2021-03-28 18:01:09+02:00</td>\n",
" <td>2021-03-28 18:21:02+02:00</td>\n",
" <td>Le Mucem chez vous, gardons le lien #22</td>\n",
" <td>404</td>\n",
" <td>2021-03-28 00:00:00+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13150</td>\n",
" <td>58</td>\n",
" <td>280561</td>\n",
" <td>NaN</td>\n",
" <td>2021-03-28 18:00:59+02:00</td>\n",
" <td>2021-03-28 18:08:45+02:00</td>\n",
" <td>Le Mucem chez vous, gardons le lien #22</td>\n",
" <td>404</td>\n",
" <td>2021-03-28 00:00:00+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>7073</td>\n",
" <td>58</td>\n",
" <td>101007</td>\n",
" <td>2021-03-28 20:11:06+02:00</td>\n",
" <td>2021-03-28 18:00:59+02:00</td>\n",
" <td>2021-03-28 18:09:47+02:00</td>\n",
" <td>Le Mucem chez vous, gardons le lien #22</td>\n",
" <td>404</td>\n",
" <td>2021-03-28 00:00:00+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5175</td>\n",
" <td>58</td>\n",
" <td>103972</td>\n",
" <td>NaN</td>\n",
" <td>2021-03-28 18:01:06+02:00</td>\n",
" <td>2021-03-28 18:05:03+02:00</td>\n",
" <td>Le Mucem chez vous, gardons le lien #22</td>\n",
" <td>404</td>\n",
" <td>2021-03-28 00:00:00+01:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id campaign_id customer_id opened_at \\\n",
"0 19793 58 112597 NaN \n",
"1 14211 58 113666 NaN \n",
"2 13150 58 280561 NaN \n",
"3 7073 58 101007 2021-03-28 20:11:06+02:00 \n",
"4 5175 58 103972 NaN \n",
"\n",
" sent_at delivered_at \\\n",
"0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n",
"1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n",
"2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n",
"3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n",
"4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n",
"\n",
" campaign_name campaign_service_id \\\n",
"0 Le Mucem chez vous, gardons le lien #22 404 \n",
"1 Le Mucem chez vous, gardons le lien #22 404 \n",
"2 Le Mucem chez vous, gardons le lien #22 404 \n",
"3 Le Mucem chez vous, gardons le lien #22 404 \n",
"4 Le Mucem chez vous, gardons le lien #22 404 \n",
"\n",
" campaign_sent_at \n",
"0 2021-03-28 00:00:00+01:00 \n",
"1 2021-03-28 00:00:00+01:00 \n",
"2 2021-03-28 00:00:00+01:00 \n",
"3 2021-03-28 00:00:00+01:00 \n",
"4 2021-03-28 00:00:00+01:00 "
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 3. merge campaigns and campaigns stats\n",
"\n",
"df1_campaigns_full = pd.merge(df1_campaign_stats[[\"id\", \"campaign_id\", \"customer_id\", \"opened_at\", \"sent_at\", \"delivered_at\"]], \n",
" df1_campaigns[[\"id\", \"name\", \"service_id\", \"sent_at\"]].add_prefix(\"campaign_\"),\n",
" on = \"campaign_id\", how = \"left\")\n",
"df1_campaigns_full.head()"
]
},
{
"cell_type": "markdown",
"id": "87fc686a-4a80-40ab-9987-20d2774f3055",
"metadata": {},
"source": [
"## Link stats"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "2f9df2d0-8a23-496b-8e92-617285f64530",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>clicked_at</th>\n",
" <th>link_id</th>\n",
" <th>customer_id</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2021-03-26 16:30:36+01:00</td>\n",
" <td>1</td>\n",
" <td>284033</td>\n",
" <td>2021-03-26 15:30:37.050161+01:00</td>\n",
" <td>2021-03-26 15:30:37.050161+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>2021-03-26 17:16:34+01:00</td>\n",
" <td>2</td>\n",
" <td>119768</td>\n",
" <td>2021-03-26 16:16:34.950871+01:00</td>\n",
" <td>2021-03-26 16:16:34.950871+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>272</td>\n",
" <td>2021-03-28 20:03:32+02:00</td>\n",
" <td>42</td>\n",
" <td>113105</td>\n",
" <td>2021-03-28 18:03:32.736394+02:00</td>\n",
" <td>2021-03-28 18:03:32.736394+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>2021-03-26 17:43:19+01:00</td>\n",
" <td>3</td>\n",
" <td>272280</td>\n",
" <td>2021-03-26 16:43:19.338321+01:00</td>\n",
" <td>2021-03-26 16:43:19.338321+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>2021-03-26 17:46:00+01:00</td>\n",
" <td>3</td>\n",
" <td>105095</td>\n",
" <td>2021-03-26 16:46:00.502945+01:00</td>\n",
" <td>2021-03-26 16:46:00.502945+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>151046</th>\n",
" <td>243553</td>\n",
" <td>2023-11-09 16:34:27+01:00</td>\n",
" <td>14666</td>\n",
" <td>998</td>\n",
" <td>2023-11-09 15:34:29.425425+01:00</td>\n",
" <td>2023-11-09 15:34:29.425425+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>151047</th>\n",
" <td>243554</td>\n",
" <td>2023-11-09 16:34:35+01:00</td>\n",
" <td>14670</td>\n",
" <td>998</td>\n",
" <td>2023-11-09 15:34:37.505505+01:00</td>\n",
" <td>2023-11-09 15:34:37.505505+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>151048</th>\n",
" <td>243559</td>\n",
" <td>2023-11-09 16:51:15+01:00</td>\n",
" <td>14686</td>\n",
" <td>82923</td>\n",
" <td>2023-11-09 15:51:17.439518+01:00</td>\n",
" <td>2023-11-09 15:51:17.439518+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>151049</th>\n",
" <td>243561</td>\n",
" <td>2023-11-09 16:59:42+01:00</td>\n",
" <td>14677</td>\n",
" <td>82923</td>\n",
" <td>2023-11-09 15:59:44.030922+01:00</td>\n",
" <td>2023-11-09 15:59:44.030922+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>151050</th>\n",
" <td>243564</td>\n",
" <td>2023-11-09 17:16:41+01:00</td>\n",
" <td>14691</td>\n",
" <td>1254355</td>\n",
" <td>2023-11-09 16:16:43.012932+01:00</td>\n",
" <td>2023-11-09 16:16:43.012932+01:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>151051 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" id clicked_at link_id customer_id \\\n",
"0 1 2021-03-26 16:30:36+01:00 1 284033 \n",
"1 2 2021-03-26 17:16:34+01:00 2 119768 \n",
"2 272 2021-03-28 20:03:32+02:00 42 113105 \n",
"3 4 2021-03-26 17:43:19+01:00 3 272280 \n",
"4 5 2021-03-26 17:46:00+01:00 3 105095 \n",
"... ... ... ... ... \n",
"151046 243553 2023-11-09 16:34:27+01:00 14666 998 \n",
"151047 243554 2023-11-09 16:34:35+01:00 14670 998 \n",
"151048 243559 2023-11-09 16:51:15+01:00 14686 82923 \n",
"151049 243561 2023-11-09 16:59:42+01:00 14677 82923 \n",
"151050 243564 2023-11-09 17:16:41+01:00 14691 1254355 \n",
"\n",
" created_at updated_at \n",
"0 2021-03-26 15:30:37.050161+01:00 2021-03-26 15:30:37.050161+01:00 \n",
"1 2021-03-26 16:16:34.950871+01:00 2021-03-26 16:16:34.950871+01:00 \n",
"2 2021-03-28 18:03:32.736394+02:00 2021-03-28 18:03:32.736394+02:00 \n",
"3 2021-03-26 16:43:19.338321+01:00 2021-03-26 16:43:19.338321+01:00 \n",
"4 2021-03-26 16:46:00.502945+01:00 2021-03-26 16:46:00.502945+01:00 \n",
"... ... ... \n",
"151046 2023-11-09 15:34:29.425425+01:00 2023-11-09 15:34:29.425425+01:00 \n",
"151047 2023-11-09 15:34:37.505505+01:00 2023-11-09 15:34:37.505505+01:00 \n",
"151048 2023-11-09 15:51:17.439518+01:00 2023-11-09 15:51:17.439518+01:00 \n",
"151049 2023-11-09 15:59:44.030922+01:00 2023-11-09 15:59:44.030922+01:00 \n",
"151050 2023-11-09 16:16:43.012932+01:00 2023-11-09 16:16:43.012932+01:00 \n",
"\n",
"[151051 rows x 6 columns]"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1_link_stats"
]
},
{
"cell_type": "markdown",
"id": "aad6fb14-9694-4c1e-9885-1ebe0f38afe3",
"metadata": {},
"source": [
"## Bonus : peut-on lier link stats et campaign ? Non, les dates à laquelle le client clique sur le lie/ouvre la campagne ne permettent pas de faire coincider link_id et campaign_id"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "8be7c974-72c9-4e31-a874-d7e5d2719fb3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>clicked_at</th>\n",
" <th>link_id</th>\n",
" <th>customer_id</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2021-03-26 16:30:36+01:00</td>\n",
" <td>1</td>\n",
" <td>284033</td>\n",
" <td>2021-03-26 15:30:37.050161+01:00</td>\n",
" <td>2021-03-26 15:30:37.050161+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7526</th>\n",
" <td>14018</td>\n",
" <td>2021-05-10 18:07:59+02:00</td>\n",
" <td>312</td>\n",
" <td>284033</td>\n",
" <td>2021-05-10 16:08:00.541322+02:00</td>\n",
" <td>2021-05-10 16:08:00.541322+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96848</th>\n",
" <td>133449</td>\n",
" <td>2021-03-25 08:42:22+01:00</td>\n",
" <td>4</td>\n",
" <td>284033</td>\n",
" <td>2022-04-15 22:51:01.994343+02:00</td>\n",
" <td>2022-04-15 22:51:01.994343+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>115728</th>\n",
" <td>207544</td>\n",
" <td>2022-08-23 10:33:04+02:00</td>\n",
" <td>12365</td>\n",
" <td>284033</td>\n",
" <td>2022-08-23 08:33:06.498908+02:00</td>\n",
" <td>2022-08-23 08:33:06.498908+02:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id clicked_at link_id customer_id \\\n",
"0 1 2021-03-26 16:30:36+01:00 1 284033 \n",
"7526 14018 2021-05-10 18:07:59+02:00 312 284033 \n",
"96848 133449 2021-03-25 08:42:22+01:00 4 284033 \n",
"115728 207544 2022-08-23 10:33:04+02:00 12365 284033 \n",
"\n",
" created_at updated_at \n",
"0 2021-03-26 15:30:37.050161+01:00 2021-03-26 15:30:37.050161+01:00 \n",
"7526 2021-05-10 16:08:00.541322+02:00 2021-05-10 16:08:00.541322+02:00 \n",
"96848 2022-04-15 22:51:01.994343+02:00 2022-04-15 22:51:01.994343+02:00 \n",
"115728 2022-08-23 08:33:06.498908+02:00 2022-08-23 08:33:06.498908+02:00 "
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1_link_stats[df1_link_stats[\"customer_id\"] == 284033]"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "902e9947-58e1-44f4-b634-1239b0e4df02",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>campaign_id</th>\n",
" <th>customer_id</th>\n",
" <th>opened_at</th>\n",
" <th>sent_at</th>\n",
" <th>delivered_at</th>\n",
" <th>campaign_name</th>\n",
" <th>campaign_service_id</th>\n",
" <th>campaign_sent_at</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>4030643</th>\n",
" <td>4036376</td>\n",
" <td>4</td>\n",
" <td>284033</td>\n",
" <td>NaN</td>\n",
" <td>2021-03-21 18:01:22+01:00</td>\n",
" <td>2021-03-21 18:08:04+01:00</td>\n",
" <td>Le Mucem chez vous, gardons le lien #21</td>\n",
" <td>398</td>\n",
" <td>2021-03-21 00:00:00+01:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id campaign_id customer_id opened_at \\\n",
"4030643 4036376 4 284033 NaN \n",
"\n",
" sent_at delivered_at \\\n",
"4030643 2021-03-21 18:01:22+01:00 2021-03-21 18:08:04+01:00 \n",
"\n",
" campaign_name campaign_service_id \\\n",
"4030643 Le Mucem chez vous, gardons le lien #21 398 \n",
"\n",
" campaign_sent_at \n",
"4030643 2021-03-21 00:00:00+01:00 "
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1_campaigns_full[ (df1_campaigns_full[\"customer_id\"] == 284033) & (df1_campaigns_full[\"campaign_id\"] == 4)]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}