diff --git a/TP_merge_tables_clean.ipynb b/TP_merge_tables_clean.ipynb
new file mode 100644
index 0000000..66b5228
--- /dev/null
+++ b/TP_merge_tables_clean.ipynb
@@ -0,0 +1,1760 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "8c56d518-3634-4492-b249-0d8ef33dd527",
+ "metadata": {},
+ "source": [
+ "## First steps : package importations, set up working environment and import data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "dede42d9-1262-45f7-bd7a-586ae800092a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# importations\n",
+ "\n",
+ "import os \n",
+ "import s3fs\n",
+ "import pandas as pd\n",
+ "import re\n",
+ "from datetime import datetime, timezone, timedelta\n",
+ "import math\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "6ce34b58-b5ba-4b54-ba4d-fc82ef01b09c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['bdc2324-data/1',\n",
+ " 'bdc2324-data/10',\n",
+ " 'bdc2324-data/101',\n",
+ " 'bdc2324-data/11',\n",
+ " 'bdc2324-data/12',\n",
+ " 'bdc2324-data/13',\n",
+ " 'bdc2324-data/14',\n",
+ " 'bdc2324-data/2',\n",
+ " 'bdc2324-data/3',\n",
+ " 'bdc2324-data/4',\n",
+ " 'bdc2324-data/5',\n",
+ " 'bdc2324-data/6',\n",
+ " 'bdc2324-data/7',\n",
+ " 'bdc2324-data/8',\n",
+ " 'bdc2324-data/9']"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# bucket for accessing the data\n",
+ "\n",
+ "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
+ "\n",
+ "fs = s3fs.S3FileSystem(client_kwargs = {\"endpoint_url\" : S3_ENDPOINT_URL})\n",
+ "BUCKET = \"bdc2324-data\"\n",
+ "fs.ls(BUCKET)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "8eb13dd3-53c7-4a70-94a4-846168473aa1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['bdc2324-data/1/1campaign_stats.csv',\n",
+ " 'bdc2324-data/1/1campaigns.csv',\n",
+ " 'bdc2324-data/1/1categories.csv',\n",
+ " 'bdc2324-data/1/1countries.csv',\n",
+ " 'bdc2324-data/1/1currencies.csv',\n",
+ " 'bdc2324-data/1/1customer_target_mappings.csv',\n",
+ " 'bdc2324-data/1/1customersplus.csv',\n",
+ " 'bdc2324-data/1/1event_types.csv',\n",
+ " 'bdc2324-data/1/1events.csv',\n",
+ " 'bdc2324-data/1/1facilities.csv',\n",
+ " 'bdc2324-data/1/1link_stats.csv',\n",
+ " 'bdc2324-data/1/1pricing_formulas.csv',\n",
+ " 'bdc2324-data/1/1product_packs.csv',\n",
+ " 'bdc2324-data/1/1products.csv',\n",
+ " 'bdc2324-data/1/1products_groups.csv',\n",
+ " 'bdc2324-data/1/1purchases.csv',\n",
+ " 'bdc2324-data/1/1representation_category_capacities.csv',\n",
+ " 'bdc2324-data/1/1representations.csv',\n",
+ " 'bdc2324-data/1/1seasons.csv',\n",
+ " 'bdc2324-data/1/1structure_tag_mappings.csv',\n",
+ " 'bdc2324-data/1/1suppliers.csv',\n",
+ " 'bdc2324-data/1/1tags.csv',\n",
+ " 'bdc2324-data/1/1target_types.csv',\n",
+ " 'bdc2324-data/1/1targets.csv',\n",
+ " 'bdc2324-data/1/1tickets.csv',\n",
+ " 'bdc2324-data/1/1type_of_categories.csv',\n",
+ " 'bdc2324-data/1/1type_of_pricing_formulas.csv',\n",
+ " 'bdc2324-data/1/1type_ofs.csv']"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "FILE_PATH_S3 = fs.ls(BUCKET)[0] # focus on the company number 1\n",
+ "files_path = fs.ls(FILE_PATH_S3)\n",
+ "files_path"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "1ea66c4e-1307-4f19-836e-3104fba2ff41",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_487/2894332003.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ " df = pd.read_csv(file_in)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# loop to create dataframes related to company 1\n",
+ "\n",
+ "client_number = files_path[0].split(\"/\")[1]\n",
+ "print(client_number)\n",
+ "df_prefix = \"df\" + str(client_number) + \"_\"\n",
+ "\n",
+ "for i in range(len(files_path)) :\n",
+ " current_path = files_path[i]\n",
+ " with fs.open(current_path, mode=\"rb\") as file_in:\n",
+ " df = pd.read_csv(file_in)\n",
+ " # the pattern of the name is df1xxx\n",
+ " nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n",
+ " globals()[nom_dataframe] = df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "13d70b2c-6580-4caf-b839-10f72b2e0b39",
+ "metadata": {},
+ "source": [
+ "## Target, target types and customer target mapping"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "4dbc7fea-ac3b-4348-83fb-dfb1a460f936",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " is_import | \n",
+ " name | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 69 | \n",
+ " False | \n",
+ " manual_dynamic_filter | \n",
+ " 2020-11-30 09:46:18.881030+01:00 | \n",
+ " 2020-11-30 09:46:18.881030+01:00 | \n",
+ " e0f4b8693184850fefd6d2a38f10584e | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 48 | \n",
+ " True | \n",
+ " manual_structure | \n",
+ " 2020-11-04 17:16:19.548275+01:00 | \n",
+ " 2020-11-04 17:16:19.548275+01:00 | \n",
+ " 382bca214204a2d3462f5ec2728d5d1e | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1 | \n",
+ " True | \n",
+ " manual_import | \n",
+ " 2020-10-14 18:37:40.521623+02:00 | \n",
+ " 2020-10-14 18:37:40.521623+02:00 | \n",
+ " 12213df2ce68a624e4c0070521437bac | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 56 | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " 2020-11-04 18:08:37.233486+01:00 | \n",
+ " 2020-11-04 18:08:37.233486+01:00 | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id is_import name created_at \\\n",
+ "0 69 False manual_dynamic_filter 2020-11-30 09:46:18.881030+01:00 \n",
+ "1 48 True manual_structure 2020-11-04 17:16:19.548275+01:00 \n",
+ "2 1 True manual_import 2020-10-14 18:37:40.521623+02:00 \n",
+ "3 56 False manual_static_filter 2020-11-04 18:08:37.233486+01:00 \n",
+ "\n",
+ " updated_at identifier \n",
+ "0 2020-11-30 09:46:18.881030+01:00 e0f4b8693184850fefd6d2a38f10584e \n",
+ "1 2020-11-04 17:16:19.548275+01:00 382bca214204a2d3462f5ec2728d5d1e \n",
+ "2 2020-10-14 18:37:40.521623+02:00 12213df2ce68a624e4c0070521437bac \n",
+ "3 2020-11-04 18:08:37.233486+01:00 fb27e81baa4debc6a4e1a8639c20e808 "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 1. target types\n",
+ "df1_target_types.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "0e9f5dcb-0dc3-4052-b866-e5c4cb954a1f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " target_type_id | \n",
+ " name | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 217 | \n",
+ " 56 | \n",
+ " DDCP PROMO Art contemporain - salle de chauffe... | \n",
+ " 2021-01-04 15:00:05.401899+01:00 | \n",
+ " 2021-03-02 18:38:19.025969+01:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 701 | \n",
+ " 56 | \n",
+ " consentement optin scolaires | \n",
+ " 2021-12-21 16:03:59.840785+01:00 | \n",
+ " 2022-02-18 17:23:44.761388+01:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 134 | \n",
+ " 56 | \n",
+ " DDCP Newsletter jeune public | \n",
+ " 2020-11-10 09:43:19.667471+01:00 | \n",
+ " 2021-03-02 18:38:19.052304+01:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 700 | \n",
+ " 56 | \n",
+ " consentement optout scolaires | \n",
+ " 2021-12-21 16:01:57.524946+01:00 | \n",
+ " 2022-02-18 17:23:44.807776+01:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 964 | \n",
+ " 56 | \n",
+ " DDCP achat billet nbr dep 19052021 | \n",
+ " 2022-04-14 10:58:17.142834+02:00 | \n",
+ " 2022-04-14 10:58:23.677264+02:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id target_type_id name \\\n",
+ "0 217 56 DDCP PROMO Art contemporain - salle de chauffe... \n",
+ "1 701 56 consentement optin scolaires \n",
+ "2 134 56 DDCP Newsletter jeune public \n",
+ "3 700 56 consentement optout scolaires \n",
+ "4 964 56 DDCP achat billet nbr dep 19052021 \n",
+ "\n",
+ " created_at updated_at \n",
+ "0 2021-01-04 15:00:05.401899+01:00 2021-03-02 18:38:19.025969+01:00 \n",
+ "1 2021-12-21 16:03:59.840785+01:00 2022-02-18 17:23:44.761388+01:00 \n",
+ "2 2020-11-10 09:43:19.667471+01:00 2021-03-02 18:38:19.052304+01:00 \n",
+ "3 2021-12-21 16:01:57.524946+01:00 2022-02-18 17:23:44.807776+01:00 \n",
+ "4 2022-04-14 10:58:17.142834+02:00 2022-04-14 10:58:23.677264+02:00 "
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 2. targets\n",
+ "df1_targets.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "c5c62302-370a-462f-bd79-eac31593f65c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " customer_id | \n",
+ " target_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " name | \n",
+ " extra_field | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1184824 | \n",
+ " 645400 | \n",
+ " 130 | \n",
+ " 2021-09-23 09:35:47.617275+02:00 | \n",
+ " 2021-09-23 09:35:47.617275+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1184825 | \n",
+ " 645400 | \n",
+ " 345 | \n",
+ " 2021-09-23 09:35:47.668846+02:00 | \n",
+ " 2021-09-23 09:35:47.668846+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1184828 | \n",
+ " 645402 | \n",
+ " 126 | \n",
+ " 2021-09-23 12:02:51.253269+02:00 | \n",
+ " 2021-09-23 12:02:51.253269+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1184829 | \n",
+ " 645403 | \n",
+ " 126 | \n",
+ " 2021-09-23 12:20:47.394480+02:00 | \n",
+ " 2021-09-23 12:20:47.394480+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1295770 | \n",
+ " 647301 | \n",
+ " 346 | \n",
+ " 2021-09-28 16:02:29.372608+02:00 | \n",
+ " 2021-09-28 16:02:29.372608+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id customer_id target_id created_at \\\n",
+ "0 1184824 645400 130 2021-09-23 09:35:47.617275+02:00 \n",
+ "1 1184825 645400 345 2021-09-23 09:35:47.668846+02:00 \n",
+ "2 1184828 645402 126 2021-09-23 12:02:51.253269+02:00 \n",
+ "3 1184829 645403 126 2021-09-23 12:20:47.394480+02:00 \n",
+ "4 1295770 647301 346 2021-09-28 16:02:29.372608+02:00 \n",
+ "\n",
+ " updated_at name extra_field \n",
+ "0 2021-09-23 09:35:47.617275+02:00 NaN NaN \n",
+ "1 2021-09-23 09:35:47.668846+02:00 NaN NaN \n",
+ "2 2021-09-23 12:02:51.253269+02:00 NaN NaN \n",
+ "3 2021-09-23 12:20:47.394480+02:00 NaN NaN \n",
+ "4 2021-09-28 16:02:29.372608+02:00 NaN NaN "
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 3. customer target mapping\n",
+ "\n",
+ "df1_customer_target_mappings.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "1a87cebf-c1dd-408d-a523-26633419da1e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " target_type_id | \n",
+ " name | \n",
+ " target_type_is_import | \n",
+ " target_type_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 217 | \n",
+ " 56 | \n",
+ " DDCP PROMO Art contemporain - salle de chauffe... | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 701 | \n",
+ " 56 | \n",
+ " consentement optin scolaires | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 134 | \n",
+ " 56 | \n",
+ " DDCP Newsletter jeune public | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 700 | \n",
+ " 56 | \n",
+ " consentement optout scolaires | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 964 | \n",
+ " 56 | \n",
+ " DDCP achat billet nbr dep 19052021 | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id target_type_id name \\\n",
+ "0 217 56 DDCP PROMO Art contemporain - salle de chauffe... \n",
+ "1 701 56 consentement optin scolaires \n",
+ "2 134 56 DDCP Newsletter jeune public \n",
+ "3 700 56 consentement optout scolaires \n",
+ "4 964 56 DDCP achat billet nbr dep 19052021 \n",
+ "\n",
+ " target_type_is_import target_type_name \n",
+ "0 False manual_static_filter \n",
+ "1 False manual_static_filter \n",
+ "2 False manual_static_filter \n",
+ "3 False manual_static_filter \n",
+ "4 False manual_static_filter "
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 4.1. merge target with target type\n",
+ "\n",
+ "df1_targets_full = pd.merge(df1_targets[[\"id\", \"target_type_id\", \"name\"]], df1_target_types[[\"id\",\"is_import\",\"name\"]].add_prefix(\"target_type_\"), left_on='target_type_id', right_on='target_type_id', how='left')\n",
+ "df1_targets_full.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "d48c1fff-73c2-4e75-8799-da2b80694be7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 4.2. merge df1_customer_target_mappings with df1_targets_full\n",
+ "\n",
+ "# change the position of the column target type id\n",
+ "\n",
+ "# Spécifiez le nom de la colonne à déplacer et la colonne après laquelle vous souhaitez la placer\n",
+ "column_to_move = 'target_type_id'\n",
+ "\n",
+ "# Récupérez l'index de la colonne de référence\n",
+ "reference_index = df1_targets_full.columns.get_loc(\"target_type_name\")\n",
+ "\n",
+ "# Créez une copie de la colonne que vous voulez déplacer\n",
+ "column_copy = df1_targets_full[column_to_move].copy()\n",
+ "\n",
+ "# Supprimez la colonne d'origine\n",
+ "df1_targets_full = df1_targets_full.drop(column_to_move, axis=1)\n",
+ "\n",
+ "# Utilisez la méthode insert pour déplacer la colonne à la nouvelle position\n",
+ "df1_targets_full.insert(reference_index - 1, column_to_move, column_copy)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "a874514a-c7dc-42d4-a440-dedd3a270e24",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " target_id | \n",
+ " target_name | \n",
+ " target_type_is_import | \n",
+ " target_type_id | \n",
+ " target_type_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 217 | \n",
+ " DDCP PROMO Art contemporain - salle de chauffe... | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 701 | \n",
+ " consentement optin scolaires | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 134 | \n",
+ " DDCP Newsletter jeune public | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 700 | \n",
+ " consentement optout scolaires | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 964 | \n",
+ " DDCP achat billet nbr dep 19052021 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " target_id target_name \\\n",
+ "0 217 DDCP PROMO Art contemporain - salle de chauffe... \n",
+ "1 701 consentement optin scolaires \n",
+ "2 134 DDCP Newsletter jeune public \n",
+ "3 700 consentement optout scolaires \n",
+ "4 964 DDCP achat billet nbr dep 19052021 \n",
+ "\n",
+ " target_type_is_import target_type_id target_type_name \n",
+ "0 False 56 manual_static_filter \n",
+ "1 False 56 manual_static_filter \n",
+ "2 False 56 manual_static_filter \n",
+ "3 False 56 manual_static_filter \n",
+ "4 False 56 manual_static_filter "
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_targets_full = df1_targets_full.rename(columns=lambda x: 'target_' + x if not x.startswith('target_') else x)\n",
+ "df1_targets_full.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "0db0172a-5119-4b7f-97f8-36fc5c985205",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " customer_id | \n",
+ " target_id | \n",
+ " target_name | \n",
+ " target_type_is_import | \n",
+ " target_type_id | \n",
+ " target_type_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1184824 | \n",
+ " 645400 | \n",
+ " 130 | \n",
+ " DDCP PROMO Réseau livres | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1184825 | \n",
+ " 645400 | \n",
+ " 345 | \n",
+ " Inscrits NL générale site web | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1184828 | \n",
+ " 645402 | \n",
+ " 126 | \n",
+ " DDCP PROMO Art contemporain | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1184829 | \n",
+ " 645403 | \n",
+ " 126 | \n",
+ " DDCP PROMO Art contemporain | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1295770 | \n",
+ " 647301 | \n",
+ " 346 | \n",
+ " Votre première liste | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 768019 | \n",
+ " 2737545 | \n",
+ " 666983 | \n",
+ " 345 | \n",
+ " Inscrits NL générale site web | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 768020 | \n",
+ " 2737546 | \n",
+ " 666983 | \n",
+ " 346 | \n",
+ " Votre première liste | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 768021 | \n",
+ " 2737575 | \n",
+ " 666986 | \n",
+ " 346 | \n",
+ " Votre première liste | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 768022 | \n",
+ " 2737576 | \n",
+ " 666987 | \n",
+ " 345 | \n",
+ " Inscrits NL générale site web | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 768023 | \n",
+ " 2737577 | \n",
+ " 666987 | \n",
+ " 346 | \n",
+ " Votre première liste | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
768024 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id customer_id target_id target_name \\\n",
+ "0 1184824 645400 130 DDCP PROMO Réseau livres \n",
+ "1 1184825 645400 345 Inscrits NL générale site web \n",
+ "2 1184828 645402 126 DDCP PROMO Art contemporain \n",
+ "3 1184829 645403 126 DDCP PROMO Art contemporain \n",
+ "4 1295770 647301 346 Votre première liste \n",
+ "... ... ... ... ... \n",
+ "768019 2737545 666983 345 Inscrits NL générale site web \n",
+ "768020 2737546 666983 346 Votre première liste \n",
+ "768021 2737575 666986 346 Votre première liste \n",
+ "768022 2737576 666987 345 Inscrits NL générale site web \n",
+ "768023 2737577 666987 346 Votre première liste \n",
+ "\n",
+ " target_type_is_import target_type_id target_type_name \n",
+ "0 False 56 manual_static_filter \n",
+ "1 False 56 manual_static_filter \n",
+ "2 False 56 manual_static_filter \n",
+ "3 False 56 manual_static_filter \n",
+ "4 False 56 manual_static_filter \n",
+ "... ... ... ... \n",
+ "768019 False 56 manual_static_filter \n",
+ "768020 False 56 manual_static_filter \n",
+ "768021 False 56 manual_static_filter \n",
+ "768022 False 56 manual_static_filter \n",
+ "768023 False 56 manual_static_filter \n",
+ "\n",
+ "[768024 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# finally, merge\n",
+ "\n",
+ "# pour df1_customer_target_mappings on enlève les colonnes name, extra_field, et updated_at (valeur égale à created_at)\n",
+ "# note : by making a left join on df1_customer_target_mappings, we suppress 2 targets that have no customer associated\n",
+ "\n",
+ "df1_customer_targets = pd.merge(df1_customer_target_mappings[[\"id\", \"customer_id\", \"target_id\"]], \n",
+ " df1_targets_full, left_on='target_id', right_on='target_id', how='left')\n",
+ "df1_customer_targets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "52326267-c5ba-4e21-b8ab-4b4c62de75d1",
+ "metadata": {},
+ "source": [
+ "## Campaign stats, campaigns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "06dca910-5c07-4ee1-bbf2-3b11b48ba1f2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " service_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " process_id | \n",
+ " report_url | \n",
+ " category | \n",
+ " to_be_synced | \n",
+ " identifier | \n",
+ " sent_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1319613 | \n",
+ " newsletter enseignants janvier 2022 | \n",
+ " 721 | \n",
+ " 2022-01-14 16:06:42.586321+01:00 | \n",
+ " 2022-02-03 14:17:27.112963+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " aba3b6fd5d186d28e06ff97135cade7f | \n",
+ " 2022-01-14 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1319586 | \n",
+ " lsf_janvier_2022 | \n",
+ " 717 | \n",
+ " 2022-01-07 11:30:35.315895+01:00 | \n",
+ " 2022-02-03 14:17:27.116171+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 788d986905533aba051261497ecffcbb | \n",
+ " 2022-01-07 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1319282 | \n",
+ " Invitation à déjeuner au Mucem | Vernissage « ... | \n",
+ " 591 | \n",
+ " 2021-09-28 12:50:24.448752+02:00 | \n",
+ " 2022-02-03 14:17:27.119582+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 3493894fa4ea036cfc6433c3e2ee63b0 | \n",
+ " 2021-09-28 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1319283 | \n",
+ " Vacances de la Toussaint - centres des loisirs | \n",
+ " 590 | \n",
+ " 2021-09-28 18:01:04.692073+02:00 | \n",
+ " 2022-02-03 14:17:27.124408+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 08b255a5d42b89b0585260b6f2360bdd | \n",
+ " 2021-09-28 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1319636 | \n",
+ " ddcp_promo_md_livemag | \n",
+ " 730 | \n",
+ " 2022-01-27 18:00:41.053069+01:00 | \n",
+ " 2022-02-03 14:17:27.127607+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " d5cfead94f5350c12c322b5b664544c1 | \n",
+ " 2022-01-27 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id name service_id \\\n",
+ "0 1319613 newsletter enseignants janvier 2022 721 \n",
+ "1 1319586 lsf_janvier_2022 717 \n",
+ "2 1319282 Invitation à déjeuner au Mucem | Vernissage « ... 591 \n",
+ "3 1319283 Vacances de la Toussaint - centres des loisirs 590 \n",
+ "4 1319636 ddcp_promo_md_livemag 730 \n",
+ "\n",
+ " created_at updated_at \\\n",
+ "0 2022-01-14 16:06:42.586321+01:00 2022-02-03 14:17:27.112963+01:00 \n",
+ "1 2022-01-07 11:30:35.315895+01:00 2022-02-03 14:17:27.116171+01:00 \n",
+ "2 2021-09-28 12:50:24.448752+02:00 2022-02-03 14:17:27.119582+01:00 \n",
+ "3 2021-09-28 18:01:04.692073+02:00 2022-02-03 14:17:27.124408+01:00 \n",
+ "4 2022-01-27 18:00:41.053069+01:00 2022-02-03 14:17:27.127607+01:00 \n",
+ "\n",
+ " process_id report_url category to_be_synced \\\n",
+ "0 NaN NaN 0.0 False \n",
+ "1 NaN NaN 0.0 False \n",
+ "2 NaN NaN 0.0 False \n",
+ "3 NaN NaN 0.0 False \n",
+ "4 NaN NaN 0.0 False \n",
+ "\n",
+ " identifier sent_at \n",
+ "0 aba3b6fd5d186d28e06ff97135cade7f 2022-01-14 00:00:00+01:00 \n",
+ "1 788d986905533aba051261497ecffcbb 2022-01-07 00:00:00+01:00 \n",
+ "2 3493894fa4ea036cfc6433c3e2ee63b0 2021-09-28 00:00:00+02:00 \n",
+ "3 08b255a5d42b89b0585260b6f2360bdd 2021-09-28 00:00:00+02:00 \n",
+ "4 d5cfead94f5350c12c322b5b664544c1 2022-01-27 00:00:00+01:00 "
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 1. campaigns\n",
+ "df1_campaigns.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "83eaa447-9144-41ed-9e26-f0f23799a8fd",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " campaign_id | \n",
+ " customer_id | \n",
+ " opened_at | \n",
+ " sent_at | \n",
+ " delivered_at | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 19793 | \n",
+ " 58 | \n",
+ " 112597 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:09+02:00 | \n",
+ " 2021-03-28 18:24:18+02:00 | \n",
+ " 2021-03-28 18:34:20.616136+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 14211 | \n",
+ " 58 | \n",
+ " 113666 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:09+02:00 | \n",
+ " 2021-03-28 18:21:02+02:00 | \n",
+ " 2021-03-28 18:21:04.297213+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 13150 | \n",
+ " 58 | \n",
+ " 280561 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:08:45+02:00 | \n",
+ " 2021-03-28 18:18:49.991042+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 7073 | \n",
+ " 58 | \n",
+ " 101007 | \n",
+ " 2021-03-28 20:11:06+02:00 | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:09:47+02:00 | \n",
+ " 2021-03-28 18:09:50.915354+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5175 | \n",
+ " 58 | \n",
+ " 103972 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:06+02:00 | \n",
+ " 2021-03-28 18:05:03+02:00 | \n",
+ " 2021-03-28 18:05:08.507398+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id campaign_id customer_id opened_at \\\n",
+ "0 19793 58 112597 NaN \n",
+ "1 14211 58 113666 NaN \n",
+ "2 13150 58 280561 NaN \n",
+ "3 7073 58 101007 2021-03-28 20:11:06+02:00 \n",
+ "4 5175 58 103972 NaN \n",
+ "\n",
+ " sent_at delivered_at \\\n",
+ "0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n",
+ "1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n",
+ "2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n",
+ "3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n",
+ "4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n",
+ "\n",
+ " created_at updated_at \n",
+ "0 2021-03-28 18:34:20.616136+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "1 2021-03-28 18:21:04.297213+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "2 2021-03-28 18:18:49.991042+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "3 2021-03-28 18:09:50.915354+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "4 2021-03-28 18:05:08.507398+02:00 2022-04-15 22:52:04.397693+02:00 "
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 2. campaigns stats\n",
+ "df1_campaign_stats.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "7f25eb1b-e7c8-4715-bc30-7ac29a7181ac",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " campaign_id | \n",
+ " customer_id | \n",
+ " opened_at | \n",
+ " sent_at | \n",
+ " delivered_at | \n",
+ " campaign_name | \n",
+ " campaign_service_id | \n",
+ " campaign_sent_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 19793 | \n",
+ " 58 | \n",
+ " 112597 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:09+02:00 | \n",
+ " 2021-03-28 18:24:18+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 14211 | \n",
+ " 58 | \n",
+ " 113666 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:09+02:00 | \n",
+ " 2021-03-28 18:21:02+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 13150 | \n",
+ " 58 | \n",
+ " 280561 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:08:45+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 7073 | \n",
+ " 58 | \n",
+ " 101007 | \n",
+ " 2021-03-28 20:11:06+02:00 | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:09:47+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5175 | \n",
+ " 58 | \n",
+ " 103972 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:06+02:00 | \n",
+ " 2021-03-28 18:05:03+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id campaign_id customer_id opened_at \\\n",
+ "0 19793 58 112597 NaN \n",
+ "1 14211 58 113666 NaN \n",
+ "2 13150 58 280561 NaN \n",
+ "3 7073 58 101007 2021-03-28 20:11:06+02:00 \n",
+ "4 5175 58 103972 NaN \n",
+ "\n",
+ " sent_at delivered_at \\\n",
+ "0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n",
+ "1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n",
+ "2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n",
+ "3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n",
+ "4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n",
+ "\n",
+ " campaign_name campaign_service_id \\\n",
+ "0 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "1 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "2 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "3 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "4 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "\n",
+ " campaign_sent_at \n",
+ "0 2021-03-28 00:00:00+01:00 \n",
+ "1 2021-03-28 00:00:00+01:00 \n",
+ "2 2021-03-28 00:00:00+01:00 \n",
+ "3 2021-03-28 00:00:00+01:00 \n",
+ "4 2021-03-28 00:00:00+01:00 "
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 3. merge campaigns and campaigns stats\n",
+ "\n",
+ "df1_campaigns_full = pd.merge(df1_campaign_stats[[\"id\", \"campaign_id\", \"customer_id\", \"opened_at\", \"sent_at\", \"delivered_at\"]], \n",
+ " df1_campaigns[[\"id\", \"name\", \"service_id\", \"sent_at\"]].add_prefix(\"campaign_\"),\n",
+ " on = \"campaign_id\", how = \"left\")\n",
+ "df1_campaigns_full.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "87fc686a-4a80-40ab-9987-20d2774f3055",
+ "metadata": {},
+ "source": [
+ "## Link stats"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "2f9df2d0-8a23-496b-8e92-617285f64530",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " clicked_at | \n",
+ " link_id | \n",
+ " customer_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2021-03-26 16:30:36+01:00 | \n",
+ " 1 | \n",
+ " 284033 | \n",
+ " 2021-03-26 15:30:37.050161+01:00 | \n",
+ " 2021-03-26 15:30:37.050161+01:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 2021-03-26 17:16:34+01:00 | \n",
+ " 2 | \n",
+ " 119768 | \n",
+ " 2021-03-26 16:16:34.950871+01:00 | \n",
+ " 2021-03-26 16:16:34.950871+01:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 272 | \n",
+ " 2021-03-28 20:03:32+02:00 | \n",
+ " 42 | \n",
+ " 113105 | \n",
+ " 2021-03-28 18:03:32.736394+02:00 | \n",
+ " 2021-03-28 18:03:32.736394+02:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 2021-03-26 17:43:19+01:00 | \n",
+ " 3 | \n",
+ " 272280 | \n",
+ " 2021-03-26 16:43:19.338321+01:00 | \n",
+ " 2021-03-26 16:43:19.338321+01:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 2021-03-26 17:46:00+01:00 | \n",
+ " 3 | \n",
+ " 105095 | \n",
+ " 2021-03-26 16:46:00.502945+01:00 | \n",
+ " 2021-03-26 16:46:00.502945+01:00 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 151046 | \n",
+ " 243553 | \n",
+ " 2023-11-09 16:34:27+01:00 | \n",
+ " 14666 | \n",
+ " 998 | \n",
+ " 2023-11-09 15:34:29.425425+01:00 | \n",
+ " 2023-11-09 15:34:29.425425+01:00 | \n",
+ "
\n",
+ " \n",
+ " 151047 | \n",
+ " 243554 | \n",
+ " 2023-11-09 16:34:35+01:00 | \n",
+ " 14670 | \n",
+ " 998 | \n",
+ " 2023-11-09 15:34:37.505505+01:00 | \n",
+ " 2023-11-09 15:34:37.505505+01:00 | \n",
+ "
\n",
+ " \n",
+ " 151048 | \n",
+ " 243559 | \n",
+ " 2023-11-09 16:51:15+01:00 | \n",
+ " 14686 | \n",
+ " 82923 | \n",
+ " 2023-11-09 15:51:17.439518+01:00 | \n",
+ " 2023-11-09 15:51:17.439518+01:00 | \n",
+ "
\n",
+ " \n",
+ " 151049 | \n",
+ " 243561 | \n",
+ " 2023-11-09 16:59:42+01:00 | \n",
+ " 14677 | \n",
+ " 82923 | \n",
+ " 2023-11-09 15:59:44.030922+01:00 | \n",
+ " 2023-11-09 15:59:44.030922+01:00 | \n",
+ "
\n",
+ " \n",
+ " 151050 | \n",
+ " 243564 | \n",
+ " 2023-11-09 17:16:41+01:00 | \n",
+ " 14691 | \n",
+ " 1254355 | \n",
+ " 2023-11-09 16:16:43.012932+01:00 | \n",
+ " 2023-11-09 16:16:43.012932+01:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
151051 rows × 6 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id clicked_at link_id customer_id \\\n",
+ "0 1 2021-03-26 16:30:36+01:00 1 284033 \n",
+ "1 2 2021-03-26 17:16:34+01:00 2 119768 \n",
+ "2 272 2021-03-28 20:03:32+02:00 42 113105 \n",
+ "3 4 2021-03-26 17:43:19+01:00 3 272280 \n",
+ "4 5 2021-03-26 17:46:00+01:00 3 105095 \n",
+ "... ... ... ... ... \n",
+ "151046 243553 2023-11-09 16:34:27+01:00 14666 998 \n",
+ "151047 243554 2023-11-09 16:34:35+01:00 14670 998 \n",
+ "151048 243559 2023-11-09 16:51:15+01:00 14686 82923 \n",
+ "151049 243561 2023-11-09 16:59:42+01:00 14677 82923 \n",
+ "151050 243564 2023-11-09 17:16:41+01:00 14691 1254355 \n",
+ "\n",
+ " created_at updated_at \n",
+ "0 2021-03-26 15:30:37.050161+01:00 2021-03-26 15:30:37.050161+01:00 \n",
+ "1 2021-03-26 16:16:34.950871+01:00 2021-03-26 16:16:34.950871+01:00 \n",
+ "2 2021-03-28 18:03:32.736394+02:00 2021-03-28 18:03:32.736394+02:00 \n",
+ "3 2021-03-26 16:43:19.338321+01:00 2021-03-26 16:43:19.338321+01:00 \n",
+ "4 2021-03-26 16:46:00.502945+01:00 2021-03-26 16:46:00.502945+01:00 \n",
+ "... ... ... \n",
+ "151046 2023-11-09 15:34:29.425425+01:00 2023-11-09 15:34:29.425425+01:00 \n",
+ "151047 2023-11-09 15:34:37.505505+01:00 2023-11-09 15:34:37.505505+01:00 \n",
+ "151048 2023-11-09 15:51:17.439518+01:00 2023-11-09 15:51:17.439518+01:00 \n",
+ "151049 2023-11-09 15:59:44.030922+01:00 2023-11-09 15:59:44.030922+01:00 \n",
+ "151050 2023-11-09 16:16:43.012932+01:00 2023-11-09 16:16:43.012932+01:00 \n",
+ "\n",
+ "[151051 rows x 6 columns]"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_link_stats"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "aad6fb14-9694-4c1e-9885-1ebe0f38afe3",
+ "metadata": {},
+ "source": [
+ "## Bonus : peut-on lier link stats et campaign ? Non, les dates à laquelle le client clique sur le lie/ouvre la campagne ne permettent pas de faire coincider link_id et campaign_id"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "id": "8be7c974-72c9-4e31-a874-d7e5d2719fb3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " clicked_at | \n",
+ " link_id | \n",
+ " customer_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2021-03-26 16:30:36+01:00 | \n",
+ " 1 | \n",
+ " 284033 | \n",
+ " 2021-03-26 15:30:37.050161+01:00 | \n",
+ " 2021-03-26 15:30:37.050161+01:00 | \n",
+ "
\n",
+ " \n",
+ " 7526 | \n",
+ " 14018 | \n",
+ " 2021-05-10 18:07:59+02:00 | \n",
+ " 312 | \n",
+ " 284033 | \n",
+ " 2021-05-10 16:08:00.541322+02:00 | \n",
+ " 2021-05-10 16:08:00.541322+02:00 | \n",
+ "
\n",
+ " \n",
+ " 96848 | \n",
+ " 133449 | \n",
+ " 2021-03-25 08:42:22+01:00 | \n",
+ " 4 | \n",
+ " 284033 | \n",
+ " 2022-04-15 22:51:01.994343+02:00 | \n",
+ " 2022-04-15 22:51:01.994343+02:00 | \n",
+ "
\n",
+ " \n",
+ " 115728 | \n",
+ " 207544 | \n",
+ " 2022-08-23 10:33:04+02:00 | \n",
+ " 12365 | \n",
+ " 284033 | \n",
+ " 2022-08-23 08:33:06.498908+02:00 | \n",
+ " 2022-08-23 08:33:06.498908+02:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id clicked_at link_id customer_id \\\n",
+ "0 1 2021-03-26 16:30:36+01:00 1 284033 \n",
+ "7526 14018 2021-05-10 18:07:59+02:00 312 284033 \n",
+ "96848 133449 2021-03-25 08:42:22+01:00 4 284033 \n",
+ "115728 207544 2022-08-23 10:33:04+02:00 12365 284033 \n",
+ "\n",
+ " created_at updated_at \n",
+ "0 2021-03-26 15:30:37.050161+01:00 2021-03-26 15:30:37.050161+01:00 \n",
+ "7526 2021-05-10 16:08:00.541322+02:00 2021-05-10 16:08:00.541322+02:00 \n",
+ "96848 2022-04-15 22:51:01.994343+02:00 2022-04-15 22:51:01.994343+02:00 \n",
+ "115728 2022-08-23 08:33:06.498908+02:00 2022-08-23 08:33:06.498908+02:00 "
+ ]
+ },
+ "execution_count": 67,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_link_stats[df1_link_stats[\"customer_id\"] == 284033]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "id": "902e9947-58e1-44f4-b634-1239b0e4df02",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " campaign_id | \n",
+ " customer_id | \n",
+ " opened_at | \n",
+ " sent_at | \n",
+ " delivered_at | \n",
+ " campaign_name | \n",
+ " campaign_service_id | \n",
+ " campaign_sent_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 4030643 | \n",
+ " 4036376 | \n",
+ " 4 | \n",
+ " 284033 | \n",
+ " NaN | \n",
+ " 2021-03-21 18:01:22+01:00 | \n",
+ " 2021-03-21 18:08:04+01:00 | \n",
+ " Le Mucem chez vous, gardons le lien #21 | \n",
+ " 398 | \n",
+ " 2021-03-21 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id campaign_id customer_id opened_at \\\n",
+ "4030643 4036376 4 284033 NaN \n",
+ "\n",
+ " sent_at delivered_at \\\n",
+ "4030643 2021-03-21 18:01:22+01:00 2021-03-21 18:08:04+01:00 \n",
+ "\n",
+ " campaign_name campaign_service_id \\\n",
+ "4030643 Le Mucem chez vous, gardons le lien #21 398 \n",
+ "\n",
+ " campaign_sent_at \n",
+ "4030643 2021-03-21 00:00:00+01:00 "
+ ]
+ },
+ "execution_count": 82,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaigns_full[ (df1_campaigns_full[\"customer_id\"] == 284033) & (df1_campaigns_full[\"campaign_id\"] == 4)]"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}