diff --git a/exploratory_analysis/TP_exploratory_analysis-Copy1.ipynb b/exploratory_analysis/TP_exploratory_analysis-Copy1.ipynb
new file mode 100644
index 0000000..021b463
--- /dev/null
+++ b/exploratory_analysis/TP_exploratory_analysis-Copy1.ipynb
@@ -0,0 +1,7990 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "6c0589ab-924f-4706-bef7-65500f0c4dd5",
+ "metadata": {},
+ "source": [
+ "# Exploratory study of variables : targets, campaign and link stats"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "83319f84-427f-43aa-af26-06797244e89c",
+ "metadata": {
+ "jp-MarkdownHeadingCollapsed": true
+ },
+ "source": [
+ "## First steps : package importations, set up working environment and import data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 253,
+ "id": "a26f3f09-3961-43fe-b4d9-1abe3b906a2c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# importations\n",
+ "\n",
+ "import os \n",
+ "import s3fs\n",
+ "import pandas as pd\n",
+ "import re\n",
+ "from datetime import datetime, timezone, timedelta\n",
+ "import math\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 188,
+ "id": "78478dbf-bd91-45e0-9f2b-2d9e6b0f648c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['bdc2324-data/1',\n",
+ " 'bdc2324-data/10',\n",
+ " 'bdc2324-data/101',\n",
+ " 'bdc2324-data/11',\n",
+ " 'bdc2324-data/12',\n",
+ " 'bdc2324-data/13',\n",
+ " 'bdc2324-data/14',\n",
+ " 'bdc2324-data/2',\n",
+ " 'bdc2324-data/3',\n",
+ " 'bdc2324-data/4',\n",
+ " 'bdc2324-data/5',\n",
+ " 'bdc2324-data/6',\n",
+ " 'bdc2324-data/7',\n",
+ " 'bdc2324-data/8',\n",
+ " 'bdc2324-data/9']"
+ ]
+ },
+ "execution_count": 188,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# bucket for accessing the data\n",
+ "\n",
+ "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
+ "\n",
+ "fs = s3fs.S3FileSystem(client_kwargs = {\"endpoint_url\" : S3_ENDPOINT_URL})\n",
+ "BUCKET = \"bdc2324-data\"\n",
+ "fs.ls(BUCKET)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "a7e1b277-4381-45c0-b1ec-4050af54a3b6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['bdc2324-data/1/1campaign_stats.csv',\n",
+ " 'bdc2324-data/1/1campaigns.csv',\n",
+ " 'bdc2324-data/1/1categories.csv',\n",
+ " 'bdc2324-data/1/1countries.csv',\n",
+ " 'bdc2324-data/1/1currencies.csv',\n",
+ " 'bdc2324-data/1/1customer_target_mappings.csv',\n",
+ " 'bdc2324-data/1/1customersplus.csv',\n",
+ " 'bdc2324-data/1/1event_types.csv',\n",
+ " 'bdc2324-data/1/1events.csv',\n",
+ " 'bdc2324-data/1/1facilities.csv',\n",
+ " 'bdc2324-data/1/1link_stats.csv',\n",
+ " 'bdc2324-data/1/1pricing_formulas.csv',\n",
+ " 'bdc2324-data/1/1product_packs.csv',\n",
+ " 'bdc2324-data/1/1products.csv',\n",
+ " 'bdc2324-data/1/1products_groups.csv',\n",
+ " 'bdc2324-data/1/1purchases.csv',\n",
+ " 'bdc2324-data/1/1representation_category_capacities.csv',\n",
+ " 'bdc2324-data/1/1representations.csv',\n",
+ " 'bdc2324-data/1/1seasons.csv',\n",
+ " 'bdc2324-data/1/1structure_tag_mappings.csv',\n",
+ " 'bdc2324-data/1/1suppliers.csv',\n",
+ " 'bdc2324-data/1/1tags.csv',\n",
+ " 'bdc2324-data/1/1target_types.csv',\n",
+ " 'bdc2324-data/1/1targets.csv',\n",
+ " 'bdc2324-data/1/1tickets.csv',\n",
+ " 'bdc2324-data/1/1type_of_categories.csv',\n",
+ " 'bdc2324-data/1/1type_of_pricing_formulas.csv',\n",
+ " 'bdc2324-data/1/1type_ofs.csv']"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "FILE_PATH_S3 = fs.ls(BUCKET)[0] # focus on the company number 1\n",
+ "files_path = fs.ls(FILE_PATH_S3)\n",
+ "files_path"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "b26f7d2b-391f-4326-a60b-5b379186b4e8",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_624/107044352.py:9: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ " df = pd.read_csv(file_in)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# loop to create dataframes related to company 1\n",
+ "\n",
+ "client_number = files_path[0].split(\"/\")[1]\n",
+ "df_prefix = \"df\" + str(client_number) + \"_\"\n",
+ "\n",
+ "for i in range(len(files_path)) :\n",
+ " current_path = files_path[i]\n",
+ " with fs.open(current_path, mode=\"rb\") as file_in:\n",
+ " df = pd.read_csv(file_in)\n",
+ " # the pattern of the name is df1xxx\n",
+ " nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n",
+ " globals()[nom_dataframe] = df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5cb3e9dc-ba6e-408c-b1a6-a2c5a2215f71",
+ "metadata": {},
+ "source": [
+ "## Target, target types and customer target mapping"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "c6dbd777-b6da-485f-a650-b0a12f3d90c4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id int64\n",
+ "is_import bool\n",
+ "name object\n",
+ "created_at object\n",
+ "updated_at object\n",
+ "identifier object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 1. target types\n",
+ "df1_target_types.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "04d625e8-b077-450f-a654-1a3b05fc1325",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "str"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "type(df1_target_types[\"created_at\"][0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "607441b9-33a8-41a7-a089-120dfe266de0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " is_import | \n",
+ " name | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 69 | \n",
+ " False | \n",
+ " manual_dynamic_filter | \n",
+ " 2020-11-30 09:46:18.881030+01:00 | \n",
+ " 2020-11-30 09:46:18.881030+01:00 | \n",
+ " e0f4b8693184850fefd6d2a38f10584e | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 48 | \n",
+ " True | \n",
+ " manual_structure | \n",
+ " 2020-11-04 17:16:19.548275+01:00 | \n",
+ " 2020-11-04 17:16:19.548275+01:00 | \n",
+ " 382bca214204a2d3462f5ec2728d5d1e | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1 | \n",
+ " True | \n",
+ " manual_import | \n",
+ " 2020-10-14 18:37:40.521623+02:00 | \n",
+ " 2020-10-14 18:37:40.521623+02:00 | \n",
+ " 12213df2ce68a624e4c0070521437bac | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 56 | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " 2020-11-04 18:08:37.233486+01:00 | \n",
+ " 2020-11-04 18:08:37.233486+01:00 | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id is_import name created_at \\\n",
+ "0 69 False manual_dynamic_filter 2020-11-30 09:46:18.881030+01:00 \n",
+ "1 48 True manual_structure 2020-11-04 17:16:19.548275+01:00 \n",
+ "2 1 True manual_import 2020-10-14 18:37:40.521623+02:00 \n",
+ "3 56 False manual_static_filter 2020-11-04 18:08:37.233486+01:00 \n",
+ "\n",
+ " updated_at identifier \n",
+ "0 2020-11-30 09:46:18.881030+01:00 e0f4b8693184850fefd6d2a38f10584e \n",
+ "1 2020-11-04 17:16:19.548275+01:00 382bca214204a2d3462f5ec2728d5d1e \n",
+ "2 2020-10-14 18:37:40.521623+02:00 12213df2ce68a624e4c0070521437bac \n",
+ "3 2020-11-04 18:08:37.233486+01:00 fb27e81baa4debc6a4e1a8639c20e808 "
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_target_types"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "6c036742-3069-438d-82af-62acc89aa000",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " target_type_id | \n",
+ " name | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 217 | \n",
+ " 56 | \n",
+ " DDCP PROMO Art contemporain - salle de chauffe... | \n",
+ " 2021-01-04 15:00:05.401899+01:00 | \n",
+ " 2021-03-02 18:38:19.025969+01:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 701 | \n",
+ " 56 | \n",
+ " consentement optin scolaires | \n",
+ " 2021-12-21 16:03:59.840785+01:00 | \n",
+ " 2022-02-18 17:23:44.761388+01:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 134 | \n",
+ " 56 | \n",
+ " DDCP Newsletter jeune public | \n",
+ " 2020-11-10 09:43:19.667471+01:00 | \n",
+ " 2021-03-02 18:38:19.052304+01:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 700 | \n",
+ " 56 | \n",
+ " consentement optout scolaires | \n",
+ " 2021-12-21 16:01:57.524946+01:00 | \n",
+ " 2022-02-18 17:23:44.807776+01:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 964 | \n",
+ " 56 | \n",
+ " DDCP achat billet nbr dep 19052021 | \n",
+ " 2022-04-14 10:58:17.142834+02:00 | \n",
+ " 2022-04-14 10:58:23.677264+02:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id target_type_id name \\\n",
+ "0 217 56 DDCP PROMO Art contemporain - salle de chauffe... \n",
+ "1 701 56 consentement optin scolaires \n",
+ "2 134 56 DDCP Newsletter jeune public \n",
+ "3 700 56 consentement optout scolaires \n",
+ "4 964 56 DDCP achat billet nbr dep 19052021 \n",
+ "\n",
+ " created_at updated_at \n",
+ "0 2021-01-04 15:00:05.401899+01:00 2021-03-02 18:38:19.025969+01:00 \n",
+ "1 2021-12-21 16:03:59.840785+01:00 2022-02-18 17:23:44.761388+01:00 \n",
+ "2 2020-11-10 09:43:19.667471+01:00 2021-03-02 18:38:19.052304+01:00 \n",
+ "3 2021-12-21 16:01:57.524946+01:00 2022-02-18 17:23:44.807776+01:00 \n",
+ "4 2022-04-14 10:58:17.142834+02:00 2022-04-14 10:58:23.677264+02:00 "
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 2. targets\n",
+ "\n",
+ "df1_targets.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "6bcde543-3eea-4584-82a2-903a1007c4ee",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id int64\n",
+ "target_type_id int64\n",
+ "name object\n",
+ "created_at object\n",
+ "updated_at object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_targets.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "bc9acebd-a030-4a40-bd1f-2ff0ab3f59d2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "str"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "type(df1_targets[\"created_at\"][0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "9e1b38d3-220c-4a20-a60b-a8f87dfd5bff",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id 0\n",
+ "target_type_id 0\n",
+ "name 0\n",
+ "created_at 0\n",
+ "updated_at 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# valeurs manquantes\n",
+ "\n",
+ "df1_targets.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "bf660284-974f-40aa-a914-100d45fceafc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "287"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_targets[\"name\"].nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "6589e11c-9c7a-4bd8-8953-3c5a23fa0ba2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " target_type_id | \n",
+ " name | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 217 | \n",
+ " 56 | \n",
+ " DDCP PROMO Art contemporain - salle de chauffe... | \n",
+ " 2021-01-04 15:00:05.401899+01:00 | \n",
+ " 2021-03-02 18:38:19.025969+01:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 701 | \n",
+ " 56 | \n",
+ " consentement optin scolaires | \n",
+ " 2021-12-21 16:03:59.840785+01:00 | \n",
+ " 2022-02-18 17:23:44.761388+01:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 134 | \n",
+ " 56 | \n",
+ " DDCP Newsletter jeune public | \n",
+ " 2020-11-10 09:43:19.667471+01:00 | \n",
+ " 2021-03-02 18:38:19.052304+01:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 700 | \n",
+ " 56 | \n",
+ " consentement optout scolaires | \n",
+ " 2021-12-21 16:01:57.524946+01:00 | \n",
+ " 2022-02-18 17:23:44.807776+01:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 964 | \n",
+ " 56 | \n",
+ " DDCP achat billet nbr dep 19052021 | \n",
+ " 2022-04-14 10:58:17.142834+02:00 | \n",
+ " 2022-04-14 10:58:23.677264+02:00 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 282 | \n",
+ " 1811 | \n",
+ " 1 | \n",
+ " ddcp_promo_ribambelle_2022_mapado_naikko_opt in | \n",
+ " 2022-11-30 15:57:05.681956+01:00 | \n",
+ " 2022-11-30 16:00:32.649210+01:00 | \n",
+ "
\n",
+ " \n",
+ " 283 | \n",
+ " 2006 | \n",
+ " 1 | \n",
+ " cp 14 mars | \n",
+ " 2023-03-03 18:07:00.223750+01:00 | \n",
+ " 2023-03-03 18:15:01.390970+01:00 | \n",
+ "
\n",
+ " \n",
+ " 284 | \n",
+ " 2193 | \n",
+ " 1 | \n",
+ " ddcp fichier musique 2 | \n",
+ " 2023-04-14 14:33:53.628142+02:00 | \n",
+ " 2023-04-14 15:00:35.608210+02:00 | \n",
+ "
\n",
+ " \n",
+ " 285 | \n",
+ " 2429 | \n",
+ " 1 | \n",
+ " import_mucem | \n",
+ " 2023-06-26 18:32:40.146757+02:00 | \n",
+ " 2023-06-26 18:45:02.614668+02:00 | \n",
+ "
\n",
+ " \n",
+ " 286 | \n",
+ " 2485 | \n",
+ " 1 | \n",
+ " po_au salon_2e envoi | \n",
+ " 2023-07-03 13:09:48.598072+02:00 | \n",
+ " 2023-07-03 13:15:03.634600+02:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
287 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id target_type_id name \\\n",
+ "0 217 56 DDCP PROMO Art contemporain - salle de chauffe... \n",
+ "1 701 56 consentement optin scolaires \n",
+ "2 134 56 DDCP Newsletter jeune public \n",
+ "3 700 56 consentement optout scolaires \n",
+ "4 964 56 DDCP achat billet nbr dep 19052021 \n",
+ ".. ... ... ... \n",
+ "282 1811 1 ddcp_promo_ribambelle_2022_mapado_naikko_opt in \n",
+ "283 2006 1 cp 14 mars \n",
+ "284 2193 1 ddcp fichier musique 2 \n",
+ "285 2429 1 import_mucem \n",
+ "286 2485 1 po_au salon_2e envoi \n",
+ "\n",
+ " created_at updated_at \n",
+ "0 2021-01-04 15:00:05.401899+01:00 2021-03-02 18:38:19.025969+01:00 \n",
+ "1 2021-12-21 16:03:59.840785+01:00 2022-02-18 17:23:44.761388+01:00 \n",
+ "2 2020-11-10 09:43:19.667471+01:00 2021-03-02 18:38:19.052304+01:00 \n",
+ "3 2021-12-21 16:01:57.524946+01:00 2022-02-18 17:23:44.807776+01:00 \n",
+ "4 2022-04-14 10:58:17.142834+02:00 2022-04-14 10:58:23.677264+02:00 \n",
+ ".. ... ... \n",
+ "282 2022-11-30 15:57:05.681956+01:00 2022-11-30 16:00:32.649210+01:00 \n",
+ "283 2023-03-03 18:07:00.223750+01:00 2023-03-03 18:15:01.390970+01:00 \n",
+ "284 2023-04-14 14:33:53.628142+02:00 2023-04-14 15:00:35.608210+02:00 \n",
+ "285 2023-06-26 18:32:40.146757+02:00 2023-06-26 18:45:02.614668+02:00 \n",
+ "286 2023-07-03 13:09:48.598072+02:00 2023-07-03 13:15:03.634600+02:00 \n",
+ "\n",
+ "[287 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_targets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "ef56e8ec-0429-475e-9c28-07983654c37b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " customer_id | \n",
+ " target_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " name | \n",
+ " extra_field | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1184824 | \n",
+ " 645400 | \n",
+ " 130 | \n",
+ " 2021-09-23 09:35:47.617275+02:00 | \n",
+ " 2021-09-23 09:35:47.617275+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1184825 | \n",
+ " 645400 | \n",
+ " 345 | \n",
+ " 2021-09-23 09:35:47.668846+02:00 | \n",
+ " 2021-09-23 09:35:47.668846+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1184828 | \n",
+ " 645402 | \n",
+ " 126 | \n",
+ " 2021-09-23 12:02:51.253269+02:00 | \n",
+ " 2021-09-23 12:02:51.253269+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1184829 | \n",
+ " 645403 | \n",
+ " 126 | \n",
+ " 2021-09-23 12:20:47.394480+02:00 | \n",
+ " 2021-09-23 12:20:47.394480+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1295770 | \n",
+ " 647301 | \n",
+ " 346 | \n",
+ " 2021-09-28 16:02:29.372608+02:00 | \n",
+ " 2021-09-28 16:02:29.372608+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id customer_id target_id created_at \\\n",
+ "0 1184824 645400 130 2021-09-23 09:35:47.617275+02:00 \n",
+ "1 1184825 645400 345 2021-09-23 09:35:47.668846+02:00 \n",
+ "2 1184828 645402 126 2021-09-23 12:02:51.253269+02:00 \n",
+ "3 1184829 645403 126 2021-09-23 12:20:47.394480+02:00 \n",
+ "4 1295770 647301 346 2021-09-28 16:02:29.372608+02:00 \n",
+ "\n",
+ " updated_at name extra_field \n",
+ "0 2021-09-23 09:35:47.617275+02:00 NaN NaN \n",
+ "1 2021-09-23 09:35:47.668846+02:00 NaN NaN \n",
+ "2 2021-09-23 12:02:51.253269+02:00 NaN NaN \n",
+ "3 2021-09-23 12:20:47.394480+02:00 NaN NaN \n",
+ "4 2021-09-28 16:02:29.372608+02:00 NaN NaN "
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 3. customer target mapping\n",
+ "\n",
+ "df1_customer_target_mappings.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "5244543f-1948-4769-be1f-691ad13174a8",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id 0.000000\n",
+ "customer_id 0.000000\n",
+ "target_id 0.000000\n",
+ "created_at 0.000022\n",
+ "updated_at 0.000022\n",
+ "name 1.000000\n",
+ "extra_field 1.000000\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_customer_target_mappings.isna().sum()/df1_customer_target_mappings.shape[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "1c59e2ae-ee24-4195-bfea-ae55b92368ec",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "768024"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_customer_target_mappings[\"id\"].nunique()\n",
+ "# df1_customer_target_mappings.shape[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "4ed49f39-e6d3-4785-ba7d-bce918d423ee",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# les couples customer_id / target_id sont-ils uniques ?\n",
+ "df1_customer_target_mappings.duplicated(subset = [\"customer_id\", \"target_id\"]).sum() # aucun doublon"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "f8cb1740-2cb0-4b3a-bfb0-d35423dc2cc7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " target_type_id | \n",
+ " target_type_is_import | \n",
+ " target_type_name | \n",
+ " target_type_identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 69 | \n",
+ " False | \n",
+ " manual_dynamic_filter | \n",
+ " e0f4b8693184850fefd6d2a38f10584e | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 48 | \n",
+ " True | \n",
+ " manual_structure | \n",
+ " 382bca214204a2d3462f5ec2728d5d1e | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1 | \n",
+ " True | \n",
+ " manual_import | \n",
+ " 12213df2ce68a624e4c0070521437bac | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 56 | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " target_type_id target_type_is_import target_type_name \\\n",
+ "0 69 False manual_dynamic_filter \n",
+ "1 48 True manual_structure \n",
+ "2 1 True manual_import \n",
+ "3 56 False manual_static_filter \n",
+ "\n",
+ " target_type_identifier \n",
+ "0 e0f4b8693184850fefd6d2a38f10584e \n",
+ "1 382bca214204a2d3462f5ec2728d5d1e \n",
+ "2 12213df2ce68a624e4c0070521437bac \n",
+ "3 fb27e81baa4debc6a4e1a8639c20e808 "
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 4.1. merge target with target type\n",
+ "\n",
+ "df1_target_types[[\"id\",\"is_import\",\"name\",\"identifier\"]].add_prefix(\"target_type_\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 94,
+ "id": "ebabdebd-3d75-4048-b65d-4cbd69bee390",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " target_type_id | \n",
+ " name | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " target_type_is_import | \n",
+ " target_type_name | \n",
+ " target_type_identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 217 | \n",
+ " 56 | \n",
+ " DDCP PROMO Art contemporain - salle de chauffe... | \n",
+ " 2021-01-04 15:00:05.401899+01:00 | \n",
+ " 2021-03-02 18:38:19.025969+01:00 | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 701 | \n",
+ " 56 | \n",
+ " consentement optin scolaires | \n",
+ " 2021-12-21 16:03:59.840785+01:00 | \n",
+ " 2022-02-18 17:23:44.761388+01:00 | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 134 | \n",
+ " 56 | \n",
+ " DDCP Newsletter jeune public | \n",
+ " 2020-11-10 09:43:19.667471+01:00 | \n",
+ " 2021-03-02 18:38:19.052304+01:00 | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 700 | \n",
+ " 56 | \n",
+ " consentement optout scolaires | \n",
+ " 2021-12-21 16:01:57.524946+01:00 | \n",
+ " 2022-02-18 17:23:44.807776+01:00 | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 964 | \n",
+ " 56 | \n",
+ " DDCP achat billet nbr dep 19052021 | \n",
+ " 2022-04-14 10:58:17.142834+02:00 | \n",
+ " 2022-04-14 10:58:23.677264+02:00 | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 282 | \n",
+ " 1811 | \n",
+ " 1 | \n",
+ " ddcp_promo_ribambelle_2022_mapado_naikko_opt in | \n",
+ " 2022-11-30 15:57:05.681956+01:00 | \n",
+ " 2022-11-30 16:00:32.649210+01:00 | \n",
+ " True | \n",
+ " manual_import | \n",
+ " 12213df2ce68a624e4c0070521437bac | \n",
+ "
\n",
+ " \n",
+ " 283 | \n",
+ " 2006 | \n",
+ " 1 | \n",
+ " cp 14 mars | \n",
+ " 2023-03-03 18:07:00.223750+01:00 | \n",
+ " 2023-03-03 18:15:01.390970+01:00 | \n",
+ " True | \n",
+ " manual_import | \n",
+ " 12213df2ce68a624e4c0070521437bac | \n",
+ "
\n",
+ " \n",
+ " 284 | \n",
+ " 2193 | \n",
+ " 1 | \n",
+ " ddcp fichier musique 2 | \n",
+ " 2023-04-14 14:33:53.628142+02:00 | \n",
+ " 2023-04-14 15:00:35.608210+02:00 | \n",
+ " True | \n",
+ " manual_import | \n",
+ " 12213df2ce68a624e4c0070521437bac | \n",
+ "
\n",
+ " \n",
+ " 285 | \n",
+ " 2429 | \n",
+ " 1 | \n",
+ " import_mucem | \n",
+ " 2023-06-26 18:32:40.146757+02:00 | \n",
+ " 2023-06-26 18:45:02.614668+02:00 | \n",
+ " True | \n",
+ " manual_import | \n",
+ " 12213df2ce68a624e4c0070521437bac | \n",
+ "
\n",
+ " \n",
+ " 286 | \n",
+ " 2485 | \n",
+ " 1 | \n",
+ " po_au salon_2e envoi | \n",
+ " 2023-07-03 13:09:48.598072+02:00 | \n",
+ " 2023-07-03 13:15:03.634600+02:00 | \n",
+ " True | \n",
+ " manual_import | \n",
+ " 12213df2ce68a624e4c0070521437bac | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
287 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id target_type_id name \\\n",
+ "0 217 56 DDCP PROMO Art contemporain - salle de chauffe... \n",
+ "1 701 56 consentement optin scolaires \n",
+ "2 134 56 DDCP Newsletter jeune public \n",
+ "3 700 56 consentement optout scolaires \n",
+ "4 964 56 DDCP achat billet nbr dep 19052021 \n",
+ ".. ... ... ... \n",
+ "282 1811 1 ddcp_promo_ribambelle_2022_mapado_naikko_opt in \n",
+ "283 2006 1 cp 14 mars \n",
+ "284 2193 1 ddcp fichier musique 2 \n",
+ "285 2429 1 import_mucem \n",
+ "286 2485 1 po_au salon_2e envoi \n",
+ "\n",
+ " created_at updated_at \\\n",
+ "0 2021-01-04 15:00:05.401899+01:00 2021-03-02 18:38:19.025969+01:00 \n",
+ "1 2021-12-21 16:03:59.840785+01:00 2022-02-18 17:23:44.761388+01:00 \n",
+ "2 2020-11-10 09:43:19.667471+01:00 2021-03-02 18:38:19.052304+01:00 \n",
+ "3 2021-12-21 16:01:57.524946+01:00 2022-02-18 17:23:44.807776+01:00 \n",
+ "4 2022-04-14 10:58:17.142834+02:00 2022-04-14 10:58:23.677264+02:00 \n",
+ ".. ... ... \n",
+ "282 2022-11-30 15:57:05.681956+01:00 2022-11-30 16:00:32.649210+01:00 \n",
+ "283 2023-03-03 18:07:00.223750+01:00 2023-03-03 18:15:01.390970+01:00 \n",
+ "284 2023-04-14 14:33:53.628142+02:00 2023-04-14 15:00:35.608210+02:00 \n",
+ "285 2023-06-26 18:32:40.146757+02:00 2023-06-26 18:45:02.614668+02:00 \n",
+ "286 2023-07-03 13:09:48.598072+02:00 2023-07-03 13:15:03.634600+02:00 \n",
+ "\n",
+ " target_type_is_import target_type_name \\\n",
+ "0 False manual_static_filter \n",
+ "1 False manual_static_filter \n",
+ "2 False manual_static_filter \n",
+ "3 False manual_static_filter \n",
+ "4 False manual_static_filter \n",
+ ".. ... ... \n",
+ "282 True manual_import \n",
+ "283 True manual_import \n",
+ "284 True manual_import \n",
+ "285 True manual_import \n",
+ "286 True manual_import \n",
+ "\n",
+ " target_type_identifier \n",
+ "0 fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "1 fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "2 fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "3 fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "4 fb27e81baa4debc6a4e1a8639c20e808 \n",
+ ".. ... \n",
+ "282 12213df2ce68a624e4c0070521437bac \n",
+ "283 12213df2ce68a624e4c0070521437bac \n",
+ "284 12213df2ce68a624e4c0070521437bac \n",
+ "285 12213df2ce68a624e4c0070521437bac \n",
+ "286 12213df2ce68a624e4c0070521437bac \n",
+ "\n",
+ "[287 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 94,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# merge\n",
+ "\n",
+ "df1_targets_full = pd.merge(df1_targets, df1_target_types[[\"id\",\"is_import\",\"name\",\"identifier\"]].add_prefix(\"target_type_\"), left_on='target_type_id', right_on='target_type_id', how='left')\n",
+ "df1_targets_full"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "f0b03a5d-b622-496a-bc71-ef92e91f9e51",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " customer_id | \n",
+ " target_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " name | \n",
+ " extra_field | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1184824 | \n",
+ " 645400 | \n",
+ " 130 | \n",
+ " 2021-09-23 09:35:47.617275+02:00 | \n",
+ " 2021-09-23 09:35:47.617275+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1184825 | \n",
+ " 645400 | \n",
+ " 345 | \n",
+ " 2021-09-23 09:35:47.668846+02:00 | \n",
+ " 2021-09-23 09:35:47.668846+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1184828 | \n",
+ " 645402 | \n",
+ " 126 | \n",
+ " 2021-09-23 12:02:51.253269+02:00 | \n",
+ " 2021-09-23 12:02:51.253269+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1184829 | \n",
+ " 645403 | \n",
+ " 126 | \n",
+ " 2021-09-23 12:20:47.394480+02:00 | \n",
+ " 2021-09-23 12:20:47.394480+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1295770 | \n",
+ " 647301 | \n",
+ " 346 | \n",
+ " 2021-09-28 16:02:29.372608+02:00 | \n",
+ " 2021-09-28 16:02:29.372608+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id customer_id target_id created_at \\\n",
+ "0 1184824 645400 130 2021-09-23 09:35:47.617275+02:00 \n",
+ "1 1184825 645400 345 2021-09-23 09:35:47.668846+02:00 \n",
+ "2 1184828 645402 126 2021-09-23 12:02:51.253269+02:00 \n",
+ "3 1184829 645403 126 2021-09-23 12:20:47.394480+02:00 \n",
+ "4 1295770 647301 346 2021-09-28 16:02:29.372608+02:00 \n",
+ "\n",
+ " updated_at name extra_field \n",
+ "0 2021-09-23 09:35:47.617275+02:00 NaN NaN \n",
+ "1 2021-09-23 09:35:47.668846+02:00 NaN NaN \n",
+ "2 2021-09-23 12:02:51.253269+02:00 NaN NaN \n",
+ "3 2021-09-23 12:20:47.394480+02:00 NaN NaN \n",
+ "4 2021-09-28 16:02:29.372608+02:00 NaN NaN "
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 4.2. merge df1_customer_target_mappings with df1_targets_full\n",
+ "\n",
+ "df1_customer_target_mappings.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "id": "906e01fd-23b3-4da7-bc5e-6618599fbb05",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "17"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Q : les dates de création et de mise à jour de la table customer target mapping sont elles égales ??\n",
+ "\n",
+ "# 17 observations for which creation date != update date, ms ce sont que des Nan, OK !\n",
+ "(df1_customer_target_mappings[\"created_at\"] != df1_customer_target_mappings[\"updated_at\"]).sum() "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "id": "c9265d2f-b636-415e-bc2d-99b932b89424",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " customer_id | \n",
+ " target_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " name | \n",
+ " extra_field | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 605484 | \n",
+ " 1691570 | \n",
+ " 661701 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 654549 | \n",
+ " 1832071 | \n",
+ " 651594 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 654550 | \n",
+ " 1832072 | \n",
+ " 663061 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 654551 | \n",
+ " 1832073 | \n",
+ " 663114 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 655162 | \n",
+ " 1949466 | \n",
+ " 663865 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 754038 | \n",
+ " 2154438 | \n",
+ " 664300 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 760929 | \n",
+ " 2282079 | \n",
+ " 665557 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 760930 | \n",
+ " 2282080 | \n",
+ " 665563 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 761787 | \n",
+ " 2675293 | \n",
+ " 661492 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 761798 | \n",
+ " 2721237 | \n",
+ " 665931 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 761799 | \n",
+ " 2721238 | \n",
+ " 665932 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 761800 | \n",
+ " 2721239 | \n",
+ " 665938 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 761801 | \n",
+ " 2721240 | \n",
+ " 665956 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 767918 | \n",
+ " 2736960 | \n",
+ " 666466 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 767919 | \n",
+ " 2736961 | \n",
+ " 666468 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 767968 | \n",
+ " 2737357 | \n",
+ " 666824 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 767984 | \n",
+ " 2737489 | \n",
+ " 107743 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id customer_id target_id created_at updated_at name \\\n",
+ "605484 1691570 661701 264 NaN NaN NaN \n",
+ "654549 1832071 651594 264 NaN NaN NaN \n",
+ "654550 1832072 663061 264 NaN NaN NaN \n",
+ "654551 1832073 663114 264 NaN NaN NaN \n",
+ "655162 1949466 663865 264 NaN NaN NaN \n",
+ "754038 2154438 664300 264 NaN NaN NaN \n",
+ "760929 2282079 665557 264 NaN NaN NaN \n",
+ "760930 2282080 665563 264 NaN NaN NaN \n",
+ "761787 2675293 661492 264 NaN NaN NaN \n",
+ "761798 2721237 665931 264 NaN NaN NaN \n",
+ "761799 2721238 665932 264 NaN NaN NaN \n",
+ "761800 2721239 665938 264 NaN NaN NaN \n",
+ "761801 2721240 665956 264 NaN NaN NaN \n",
+ "767918 2736960 666466 264 NaN NaN NaN \n",
+ "767919 2736961 666468 264 NaN NaN NaN \n",
+ "767968 2737357 666824 264 NaN NaN NaN \n",
+ "767984 2737489 107743 264 NaN NaN NaN \n",
+ "\n",
+ " extra_field \n",
+ "605484 NaN \n",
+ "654549 NaN \n",
+ "654550 NaN \n",
+ "654551 NaN \n",
+ "655162 NaN \n",
+ "754038 NaN \n",
+ "760929 NaN \n",
+ "760930 NaN \n",
+ "761787 NaN \n",
+ "761798 NaN \n",
+ "761799 NaN \n",
+ "761800 NaN \n",
+ "761801 NaN \n",
+ "767918 NaN \n",
+ "767919 NaN \n",
+ "767968 NaN \n",
+ "767984 NaN "
+ ]
+ },
+ "execution_count": 43,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_customer_target_mappings[df1_customer_target_mappings[\"created_at\"] != df1_customer_target_mappings[\"updated_at\"]]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "63e4ce23-ce13-46fc-82c5-9065a774b4b5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " customer_id | \n",
+ " target_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " name | \n",
+ " extra_field | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 140 | \n",
+ " 3416265 | \n",
+ " 1751 | \n",
+ " 264 | \n",
+ " 2022-01-28 20:00:16.448920+01:00 | \n",
+ " 2022-01-28 20:00:16.448920+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 149 | \n",
+ " 3416274 | \n",
+ " 2213 | \n",
+ " 264 | \n",
+ " 2022-01-28 20:30:17.323634+01:00 | \n",
+ " 2022-01-28 20:30:17.323634+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1120 | \n",
+ " 4292054 | \n",
+ " 1156059 | \n",
+ " 264 | \n",
+ " 2022-09-29 07:00:43.003440+02:00 | \n",
+ " 2022-09-29 07:00:43.003440+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1121 | \n",
+ " 4292055 | \n",
+ " 1156063 | \n",
+ " 264 | \n",
+ " 2022-09-29 07:00:43.003440+02:00 | \n",
+ " 2022-09-29 07:00:43.003440+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4006 | \n",
+ " 4428048 | \n",
+ " 34916 | \n",
+ " 264 | \n",
+ " 2023-03-14 07:01:27.868349+01:00 | \n",
+ " 2023-03-14 07:01:27.868349+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 761801 | \n",
+ " 2721240 | \n",
+ " 665956 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 767918 | \n",
+ " 2736960 | \n",
+ " 666466 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 767919 | \n",
+ " 2736961 | \n",
+ " 666468 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 767968 | \n",
+ " 2737357 | \n",
+ " 666824 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 767984 | \n",
+ " 2737489 | \n",
+ " 107743 | \n",
+ " 264 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1954 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id customer_id target_id created_at \\\n",
+ "140 3416265 1751 264 2022-01-28 20:00:16.448920+01:00 \n",
+ "149 3416274 2213 264 2022-01-28 20:30:17.323634+01:00 \n",
+ "1120 4292054 1156059 264 2022-09-29 07:00:43.003440+02:00 \n",
+ "1121 4292055 1156063 264 2022-09-29 07:00:43.003440+02:00 \n",
+ "4006 4428048 34916 264 2023-03-14 07:01:27.868349+01:00 \n",
+ "... ... ... ... ... \n",
+ "761801 2721240 665956 264 NaN \n",
+ "767918 2736960 666466 264 NaN \n",
+ "767919 2736961 666468 264 NaN \n",
+ "767968 2737357 666824 264 NaN \n",
+ "767984 2737489 107743 264 NaN \n",
+ "\n",
+ " updated_at name extra_field \n",
+ "140 2022-01-28 20:00:16.448920+01:00 NaN NaN \n",
+ "149 2022-01-28 20:30:17.323634+01:00 NaN NaN \n",
+ "1120 2022-09-29 07:00:43.003440+02:00 NaN NaN \n",
+ "1121 2022-09-29 07:00:43.003440+02:00 NaN NaN \n",
+ "4006 2023-03-14 07:01:27.868349+01:00 NaN NaN \n",
+ "... ... ... ... \n",
+ "761801 NaN NaN NaN \n",
+ "767918 NaN NaN NaN \n",
+ "767919 NaN NaN NaN \n",
+ "767968 NaN NaN NaN \n",
+ "767984 NaN NaN NaN \n",
+ "\n",
+ "[1954 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 44,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# ces données manquantes concernent le target avec id 264, mais les autres valeurs pr ce même target sont bien renseignées\n",
+ "df1_customer_target_mappings[df1_customer_target_mappings[\"target_id\"]==264]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "id": "0681b3e6-71bb-4132-b11a-646382f78de6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'2021-10-28 11:30:42.717180+02:00'"
+ ]
+ },
+ "execution_count": 71,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Q : les dates de creation / update sont elles-uniques selon le client ou selon la target ?\n",
+ "\n",
+ "df1_customer_target_mappings[df1_customer_target_mappings[\"target_id\"]==217][\"updated_at\"].max()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "id": "93e4a125-08dd-42ba-baa6-0dc5996a76af",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " target_type_id | \n",
+ " name | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " target_type_is_import | \n",
+ " target_type_name | \n",
+ " target_type_identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 217 | \n",
+ " 56 | \n",
+ " DDCP PROMO Art contemporain - salle de chauffe... | \n",
+ " 2021-01-04 15:00:05.401899+01:00 | \n",
+ " 2021-03-02 18:38:19.025969+01:00 | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id target_type_id name \\\n",
+ "0 217 56 DDCP PROMO Art contemporain - salle de chauffe... \n",
+ "\n",
+ " created_at updated_at \\\n",
+ "0 2021-01-04 15:00:05.401899+01:00 2021-03-02 18:38:19.025969+01:00 \n",
+ "\n",
+ " target_type_is_import target_type_name \\\n",
+ "0 False manual_static_filter \n",
+ "\n",
+ " target_type_identifier \n",
+ "0 fb27e81baa4debc6a4e1a8639c20e808 "
+ ]
+ },
+ "execution_count": 65,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_targets_full[df1_targets_full[\"id\"]==217]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "id": "88eac1a6-74b1-4ce1-91a1-c1c69e7a9264",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " target_type_id | \n",
+ " name | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " target_type_is_import | \n",
+ " target_type_name | \n",
+ " target_type_identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 217 | \n",
+ " 56 | \n",
+ " DDCP PROMO Art contemporain - salle de chauffe... | \n",
+ " 2021-01-04 15:00:05.401899+01:00 | \n",
+ " 2021-03-02 18:38:19.025969+01:00 | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 701 | \n",
+ " 56 | \n",
+ " consentement optin scolaires | \n",
+ " 2021-12-21 16:03:59.840785+01:00 | \n",
+ " 2022-02-18 17:23:44.761388+01:00 | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 134 | \n",
+ " 56 | \n",
+ " DDCP Newsletter jeune public | \n",
+ " 2020-11-10 09:43:19.667471+01:00 | \n",
+ " 2021-03-02 18:38:19.052304+01:00 | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 700 | \n",
+ " 56 | \n",
+ " consentement optout scolaires | \n",
+ " 2021-12-21 16:01:57.524946+01:00 | \n",
+ " 2022-02-18 17:23:44.807776+01:00 | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 964 | \n",
+ " 56 | \n",
+ " DDCP achat billet nbr dep 19052021 | \n",
+ " 2022-04-14 10:58:17.142834+02:00 | \n",
+ " 2022-04-14 10:58:23.677264+02:00 | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id target_type_id name \\\n",
+ "0 217 56 DDCP PROMO Art contemporain - salle de chauffe... \n",
+ "1 701 56 consentement optin scolaires \n",
+ "2 134 56 DDCP Newsletter jeune public \n",
+ "3 700 56 consentement optout scolaires \n",
+ "4 964 56 DDCP achat billet nbr dep 19052021 \n",
+ "\n",
+ " created_at updated_at \\\n",
+ "0 2021-01-04 15:00:05.401899+01:00 2021-03-02 18:38:19.025969+01:00 \n",
+ "1 2021-12-21 16:03:59.840785+01:00 2022-02-18 17:23:44.761388+01:00 \n",
+ "2 2020-11-10 09:43:19.667471+01:00 2021-03-02 18:38:19.052304+01:00 \n",
+ "3 2021-12-21 16:01:57.524946+01:00 2022-02-18 17:23:44.807776+01:00 \n",
+ "4 2022-04-14 10:58:17.142834+02:00 2022-04-14 10:58:23.677264+02:00 \n",
+ "\n",
+ " target_type_is_import target_type_name \\\n",
+ "0 False manual_static_filter \n",
+ "1 False manual_static_filter \n",
+ "2 False manual_static_filter \n",
+ "3 False manual_static_filter \n",
+ "4 False manual_static_filter \n",
+ "\n",
+ " target_type_identifier \n",
+ "0 fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "1 fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "2 fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "3 fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "4 fb27e81baa4debc6a4e1a8639c20e808 "
+ ]
+ },
+ "execution_count": 69,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_targets_full.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "id": "9af4066e-97d8-4066-a7ef-094807e33ba3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " customer_id | \n",
+ " target_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " name | \n",
+ " extra_field | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1184824 | \n",
+ " 645400 | \n",
+ " 130 | \n",
+ " 2021-09-23 09:35:47.617275+02:00 | \n",
+ " 2021-09-23 09:35:47.617275+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1184825 | \n",
+ " 645400 | \n",
+ " 345 | \n",
+ " 2021-09-23 09:35:47.668846+02:00 | \n",
+ " 2021-09-23 09:35:47.668846+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1184828 | \n",
+ " 645402 | \n",
+ " 126 | \n",
+ " 2021-09-23 12:02:51.253269+02:00 | \n",
+ " 2021-09-23 12:02:51.253269+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1184829 | \n",
+ " 645403 | \n",
+ " 126 | \n",
+ " 2021-09-23 12:20:47.394480+02:00 | \n",
+ " 2021-09-23 12:20:47.394480+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1295770 | \n",
+ " 647301 | \n",
+ " 346 | \n",
+ " 2021-09-28 16:02:29.372608+02:00 | \n",
+ " 2021-09-28 16:02:29.372608+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 768019 | \n",
+ " 2737545 | \n",
+ " 666983 | \n",
+ " 345 | \n",
+ " 2021-12-14 14:48:05.456842+01:00 | \n",
+ " 2021-12-14 14:48:05.456842+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 768020 | \n",
+ " 2737546 | \n",
+ " 666983 | \n",
+ " 346 | \n",
+ " 2021-12-14 14:48:05.465830+01:00 | \n",
+ " 2021-12-14 14:48:05.465830+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 768021 | \n",
+ " 2737575 | \n",
+ " 666986 | \n",
+ " 346 | \n",
+ " 2021-12-14 23:15:42.757832+01:00 | \n",
+ " 2021-12-14 23:15:42.757832+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 768022 | \n",
+ " 2737576 | \n",
+ " 666987 | \n",
+ " 345 | \n",
+ " 2021-12-15 00:14:59.018215+01:00 | \n",
+ " 2021-12-15 00:14:59.018215+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 768023 | \n",
+ " 2737577 | \n",
+ " 666987 | \n",
+ " 346 | \n",
+ " 2021-12-15 00:14:59.029434+01:00 | \n",
+ " 2021-12-15 00:14:59.029434+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
768024 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id customer_id target_id created_at \\\n",
+ "0 1184824 645400 130 2021-09-23 09:35:47.617275+02:00 \n",
+ "1 1184825 645400 345 2021-09-23 09:35:47.668846+02:00 \n",
+ "2 1184828 645402 126 2021-09-23 12:02:51.253269+02:00 \n",
+ "3 1184829 645403 126 2021-09-23 12:20:47.394480+02:00 \n",
+ "4 1295770 647301 346 2021-09-28 16:02:29.372608+02:00 \n",
+ "... ... ... ... ... \n",
+ "768019 2737545 666983 345 2021-12-14 14:48:05.456842+01:00 \n",
+ "768020 2737546 666983 346 2021-12-14 14:48:05.465830+01:00 \n",
+ "768021 2737575 666986 346 2021-12-14 23:15:42.757832+01:00 \n",
+ "768022 2737576 666987 345 2021-12-15 00:14:59.018215+01:00 \n",
+ "768023 2737577 666987 346 2021-12-15 00:14:59.029434+01:00 \n",
+ "\n",
+ " updated_at name extra_field \n",
+ "0 2021-09-23 09:35:47.617275+02:00 NaN NaN \n",
+ "1 2021-09-23 09:35:47.668846+02:00 NaN NaN \n",
+ "2 2021-09-23 12:02:51.253269+02:00 NaN NaN \n",
+ "3 2021-09-23 12:20:47.394480+02:00 NaN NaN \n",
+ "4 2021-09-28 16:02:29.372608+02:00 NaN NaN \n",
+ "... ... ... ... \n",
+ "768019 2021-12-14 14:48:05.456842+01:00 NaN NaN \n",
+ "768020 2021-12-14 14:48:05.465830+01:00 NaN NaN \n",
+ "768021 2021-12-14 23:15:42.757832+01:00 NaN NaN \n",
+ "768022 2021-12-15 00:14:59.018215+01:00 NaN NaN \n",
+ "768023 2021-12-15 00:14:59.029434+01:00 NaN NaN \n",
+ "\n",
+ "[768024 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 74,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_customer_target_mappings"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 104,
+ "id": "bcb53207-017c-4c62-ae05-56fbbfbeb3e9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# change the position of the column target type id\n",
+ "\n",
+ "# Spécifiez le nom de la colonne à déplacer et la colonne après laquelle vous souhaitez la placer\n",
+ "column_to_move = 'target_type_id'\n",
+ "\n",
+ "# Récupérez l'index de la colonne de référence\n",
+ "reference_index = df1_targets_full.columns.get_loc(\"target_type_name\")\n",
+ "\n",
+ "# Créez une copie de la colonne que vous voulez déplacer\n",
+ "column_copy = df1_targets_full[column_to_move].copy()\n",
+ "\n",
+ "# Supprimez la colonne d'origine\n",
+ "df1_targets_full = df1_targets_full.drop(column_to_move, axis=1)\n",
+ "\n",
+ "# Utilisez la méthode insert pour déplacer la colonne à la nouvelle position\n",
+ "df1_targets_full.insert(reference_index - 1, column_to_move, column_copy)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 109,
+ "id": "e3e2b729-c661-44dd-acf3-afdb85353bce",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " target_id | \n",
+ " target_name | \n",
+ " target_created_at | \n",
+ " target_updated_at | \n",
+ " target_type_is_import | \n",
+ " target_type_id | \n",
+ " target_type_name | \n",
+ " target_type_identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 217 | \n",
+ " DDCP PROMO Art contemporain - salle de chauffe... | \n",
+ " 2021-01-04 15:00:05.401899+01:00 | \n",
+ " 2021-03-02 18:38:19.025969+01:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 701 | \n",
+ " consentement optin scolaires | \n",
+ " 2021-12-21 16:03:59.840785+01:00 | \n",
+ " 2022-02-18 17:23:44.761388+01:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 134 | \n",
+ " DDCP Newsletter jeune public | \n",
+ " 2020-11-10 09:43:19.667471+01:00 | \n",
+ " 2021-03-02 18:38:19.052304+01:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 700 | \n",
+ " consentement optout scolaires | \n",
+ " 2021-12-21 16:01:57.524946+01:00 | \n",
+ " 2022-02-18 17:23:44.807776+01:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 964 | \n",
+ " DDCP achat billet nbr dep 19052021 | \n",
+ " 2022-04-14 10:58:17.142834+02:00 | \n",
+ " 2022-04-14 10:58:23.677264+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " target_id target_name \\\n",
+ "0 217 DDCP PROMO Art contemporain - salle de chauffe... \n",
+ "1 701 consentement optin scolaires \n",
+ "2 134 DDCP Newsletter jeune public \n",
+ "3 700 consentement optout scolaires \n",
+ "4 964 DDCP achat billet nbr dep 19052021 \n",
+ "\n",
+ " target_created_at target_updated_at \\\n",
+ "0 2021-01-04 15:00:05.401899+01:00 2021-03-02 18:38:19.025969+01:00 \n",
+ "1 2021-12-21 16:03:59.840785+01:00 2022-02-18 17:23:44.761388+01:00 \n",
+ "2 2020-11-10 09:43:19.667471+01:00 2021-03-02 18:38:19.052304+01:00 \n",
+ "3 2021-12-21 16:01:57.524946+01:00 2022-02-18 17:23:44.807776+01:00 \n",
+ "4 2022-04-14 10:58:17.142834+02:00 2022-04-14 10:58:23.677264+02:00 \n",
+ "\n",
+ " target_type_is_import target_type_id target_type_name \\\n",
+ "0 False 56 manual_static_filter \n",
+ "1 False 56 manual_static_filter \n",
+ "2 False 56 manual_static_filter \n",
+ "3 False 56 manual_static_filter \n",
+ "4 False 56 manual_static_filter \n",
+ "\n",
+ " target_type_identifier \n",
+ "0 fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "1 fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "2 fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "3 fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "4 fb27e81baa4debc6a4e1a8639c20e808 "
+ ]
+ },
+ "execution_count": 109,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_targets_full = df1_targets_full.rename(columns=lambda x: 'target_' + x if not x.startswith('target_') else x)\n",
+ "df1_targets_full.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 110,
+ "id": "cda50294-e9f3-4c0e-9172-85fde93efa70",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " customer_id | \n",
+ " target_id | \n",
+ " created_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1184824 | \n",
+ " 645400 | \n",
+ " 130 | \n",
+ " 2021-09-23 09:35:47.617275+02:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1184825 | \n",
+ " 645400 | \n",
+ " 345 | \n",
+ " 2021-09-23 09:35:47.668846+02:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1184828 | \n",
+ " 645402 | \n",
+ " 126 | \n",
+ " 2021-09-23 12:02:51.253269+02:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1184829 | \n",
+ " 645403 | \n",
+ " 126 | \n",
+ " 2021-09-23 12:20:47.394480+02:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1295770 | \n",
+ " 647301 | \n",
+ " 346 | \n",
+ " 2021-09-28 16:02:29.372608+02:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id customer_id target_id created_at\n",
+ "0 1184824 645400 130 2021-09-23 09:35:47.617275+02:00\n",
+ "1 1184825 645400 345 2021-09-23 09:35:47.668846+02:00\n",
+ "2 1184828 645402 126 2021-09-23 12:02:51.253269+02:00\n",
+ "3 1184829 645403 126 2021-09-23 12:20:47.394480+02:00\n",
+ "4 1295770 647301 346 2021-09-28 16:02:29.372608+02:00"
+ ]
+ },
+ "execution_count": 110,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_customer_target_mappings[[\"id\", \"customer_id\", \"target_id\", \"created_at\"]].head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 111,
+ "id": "1aaac887-5ea9-4651-8628-920c7d80f120",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " customer_id | \n",
+ " target_id | \n",
+ " created_at | \n",
+ " target_name | \n",
+ " target_created_at | \n",
+ " target_updated_at | \n",
+ " target_type_is_import | \n",
+ " target_type_id | \n",
+ " target_type_name | \n",
+ " target_type_identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1184824 | \n",
+ " 645400 | \n",
+ " 130 | \n",
+ " 2021-09-23 09:35:47.617275+02:00 | \n",
+ " DDCP PROMO Réseau livres | \n",
+ " 2020-11-04 18:40:49.500866+01:00 | \n",
+ " 2021-03-02 18:38:19.084287+01:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1184825 | \n",
+ " 645400 | \n",
+ " 345 | \n",
+ " 2021-09-23 09:35:47.668846+02:00 | \n",
+ " Inscrits NL générale site web | \n",
+ " 2021-04-16 17:17:26.069199+02:00 | \n",
+ " 2021-04-16 17:17:26.069199+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1184828 | \n",
+ " 645402 | \n",
+ " 126 | \n",
+ " 2021-09-23 12:02:51.253269+02:00 | \n",
+ " DDCP PROMO Art contemporain | \n",
+ " 2020-11-04 18:38:53.016572+01:00 | \n",
+ " 2021-04-16 17:17:25.850107+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1184829 | \n",
+ " 645403 | \n",
+ " 126 | \n",
+ " 2021-09-23 12:20:47.394480+02:00 | \n",
+ " DDCP PROMO Art contemporain | \n",
+ " 2020-11-04 18:38:53.016572+01:00 | \n",
+ " 2021-04-16 17:17:25.850107+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1295770 | \n",
+ " 647301 | \n",
+ " 346 | \n",
+ " 2021-09-28 16:02:29.372608+02:00 | \n",
+ " Votre première liste | \n",
+ " 2021-04-16 17:17:26.080378+02:00 | \n",
+ " 2021-04-16 17:17:26.080378+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 768019 | \n",
+ " 2737545 | \n",
+ " 666983 | \n",
+ " 345 | \n",
+ " 2021-12-14 14:48:05.456842+01:00 | \n",
+ " Inscrits NL générale site web | \n",
+ " 2021-04-16 17:17:26.069199+02:00 | \n",
+ " 2021-04-16 17:17:26.069199+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 768020 | \n",
+ " 2737546 | \n",
+ " 666983 | \n",
+ " 346 | \n",
+ " 2021-12-14 14:48:05.465830+01:00 | \n",
+ " Votre première liste | \n",
+ " 2021-04-16 17:17:26.080378+02:00 | \n",
+ " 2021-04-16 17:17:26.080378+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 768021 | \n",
+ " 2737575 | \n",
+ " 666986 | \n",
+ " 346 | \n",
+ " 2021-12-14 23:15:42.757832+01:00 | \n",
+ " Votre première liste | \n",
+ " 2021-04-16 17:17:26.080378+02:00 | \n",
+ " 2021-04-16 17:17:26.080378+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 768022 | \n",
+ " 2737576 | \n",
+ " 666987 | \n",
+ " 345 | \n",
+ " 2021-12-15 00:14:59.018215+01:00 | \n",
+ " Inscrits NL générale site web | \n",
+ " 2021-04-16 17:17:26.069199+02:00 | \n",
+ " 2021-04-16 17:17:26.069199+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 768023 | \n",
+ " 2737577 | \n",
+ " 666987 | \n",
+ " 346 | \n",
+ " 2021-12-15 00:14:59.029434+01:00 | \n",
+ " Votre première liste | \n",
+ " 2021-04-16 17:17:26.080378+02:00 | \n",
+ " 2021-04-16 17:17:26.080378+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
768024 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id customer_id target_id created_at \\\n",
+ "0 1184824 645400 130 2021-09-23 09:35:47.617275+02:00 \n",
+ "1 1184825 645400 345 2021-09-23 09:35:47.668846+02:00 \n",
+ "2 1184828 645402 126 2021-09-23 12:02:51.253269+02:00 \n",
+ "3 1184829 645403 126 2021-09-23 12:20:47.394480+02:00 \n",
+ "4 1295770 647301 346 2021-09-28 16:02:29.372608+02:00 \n",
+ "... ... ... ... ... \n",
+ "768019 2737545 666983 345 2021-12-14 14:48:05.456842+01:00 \n",
+ "768020 2737546 666983 346 2021-12-14 14:48:05.465830+01:00 \n",
+ "768021 2737575 666986 346 2021-12-14 23:15:42.757832+01:00 \n",
+ "768022 2737576 666987 345 2021-12-15 00:14:59.018215+01:00 \n",
+ "768023 2737577 666987 346 2021-12-15 00:14:59.029434+01:00 \n",
+ "\n",
+ " target_name target_created_at \\\n",
+ "0 DDCP PROMO Réseau livres 2020-11-04 18:40:49.500866+01:00 \n",
+ "1 Inscrits NL générale site web 2021-04-16 17:17:26.069199+02:00 \n",
+ "2 DDCP PROMO Art contemporain 2020-11-04 18:38:53.016572+01:00 \n",
+ "3 DDCP PROMO Art contemporain 2020-11-04 18:38:53.016572+01:00 \n",
+ "4 Votre première liste 2021-04-16 17:17:26.080378+02:00 \n",
+ "... ... ... \n",
+ "768019 Inscrits NL générale site web 2021-04-16 17:17:26.069199+02:00 \n",
+ "768020 Votre première liste 2021-04-16 17:17:26.080378+02:00 \n",
+ "768021 Votre première liste 2021-04-16 17:17:26.080378+02:00 \n",
+ "768022 Inscrits NL générale site web 2021-04-16 17:17:26.069199+02:00 \n",
+ "768023 Votre première liste 2021-04-16 17:17:26.080378+02:00 \n",
+ "\n",
+ " target_updated_at target_type_is_import \\\n",
+ "0 2021-03-02 18:38:19.084287+01:00 False \n",
+ "1 2021-04-16 17:17:26.069199+02:00 False \n",
+ "2 2021-04-16 17:17:25.850107+02:00 False \n",
+ "3 2021-04-16 17:17:25.850107+02:00 False \n",
+ "4 2021-04-16 17:17:26.080378+02:00 False \n",
+ "... ... ... \n",
+ "768019 2021-04-16 17:17:26.069199+02:00 False \n",
+ "768020 2021-04-16 17:17:26.080378+02:00 False \n",
+ "768021 2021-04-16 17:17:26.080378+02:00 False \n",
+ "768022 2021-04-16 17:17:26.069199+02:00 False \n",
+ "768023 2021-04-16 17:17:26.080378+02:00 False \n",
+ "\n",
+ " target_type_id target_type_name target_type_identifier \n",
+ "0 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "1 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "2 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "3 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "4 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "... ... ... ... \n",
+ "768019 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "768020 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "768021 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "768022 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "768023 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "\n",
+ "[768024 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 111,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# finally, merge\n",
+ "\n",
+ "# pour df1_customer_target_mappings on enlève les colonnes name, extra_field, et updated_at (valeur égale à created_at)\n",
+ "# note : by making a left join on df1_customer_target_mappings, we suppress 2 targets that have no customer associated\n",
+ "\n",
+ "df1_customer_targets = pd.merge(df1_customer_target_mappings[[\"id\", \"customer_id\", \"target_id\", \"created_at\"]], \n",
+ " df1_targets_full, left_on='target_id', right_on='target_id', how='left')\n",
+ "df1_customer_targets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 138,
+ "id": "95657bda-d060-48ca-8217-3e3f119028c1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " customer_id | \n",
+ " target_id | \n",
+ " created_at | \n",
+ " target_name | \n",
+ " target_created_at | \n",
+ " target_updated_at | \n",
+ " target_type_is_import | \n",
+ " target_type_id | \n",
+ " target_type_name | \n",
+ " target_type_identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1184824 | \n",
+ " 645400 | \n",
+ " 130 | \n",
+ " 2021-09-23 09:35:47.617275+02:00 | \n",
+ " DDCP PROMO Réseau livres | \n",
+ " 2020-11-04 18:40:49.500866+01:00 | \n",
+ " 2021-03-02 18:38:19.084287+01:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1184825 | \n",
+ " 645400 | \n",
+ " 345 | \n",
+ " 2021-09-23 09:35:47.668846+02:00 | \n",
+ " Inscrits NL générale site web | \n",
+ " 2021-04-16 17:17:26.069199+02:00 | \n",
+ " 2021-04-16 17:17:26.069199+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1184828 | \n",
+ " 645402 | \n",
+ " 126 | \n",
+ " 2021-09-23 12:02:51.253269+02:00 | \n",
+ " DDCP PROMO Art contemporain | \n",
+ " 2020-11-04 18:38:53.016572+01:00 | \n",
+ " 2021-04-16 17:17:25.850107+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1184829 | \n",
+ " 645403 | \n",
+ " 126 | \n",
+ " 2021-09-23 12:20:47.394480+02:00 | \n",
+ " DDCP PROMO Art contemporain | \n",
+ " 2020-11-04 18:38:53.016572+01:00 | \n",
+ " 2021-04-16 17:17:25.850107+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1295770 | \n",
+ " 647301 | \n",
+ " 346 | \n",
+ " 2021-09-28 16:02:29.372608+02:00 | \n",
+ " Votre première liste | \n",
+ " 2021-04-16 17:17:26.080378+02:00 | \n",
+ " 2021-04-16 17:17:26.080378+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 768019 | \n",
+ " 2737545 | \n",
+ " 666983 | \n",
+ " 345 | \n",
+ " 2021-12-14 14:48:05.456842+01:00 | \n",
+ " Inscrits NL générale site web | \n",
+ " 2021-04-16 17:17:26.069199+02:00 | \n",
+ " 2021-04-16 17:17:26.069199+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 768020 | \n",
+ " 2737546 | \n",
+ " 666983 | \n",
+ " 346 | \n",
+ " 2021-12-14 14:48:05.465830+01:00 | \n",
+ " Votre première liste | \n",
+ " 2021-04-16 17:17:26.080378+02:00 | \n",
+ " 2021-04-16 17:17:26.080378+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 768021 | \n",
+ " 2737575 | \n",
+ " 666986 | \n",
+ " 346 | \n",
+ " 2021-12-14 23:15:42.757832+01:00 | \n",
+ " Votre première liste | \n",
+ " 2021-04-16 17:17:26.080378+02:00 | \n",
+ " 2021-04-16 17:17:26.080378+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 768022 | \n",
+ " 2737576 | \n",
+ " 666987 | \n",
+ " 345 | \n",
+ " 2021-12-15 00:14:59.018215+01:00 | \n",
+ " Inscrits NL générale site web | \n",
+ " 2021-04-16 17:17:26.069199+02:00 | \n",
+ " 2021-04-16 17:17:26.069199+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ " 768023 | \n",
+ " 2737577 | \n",
+ " 666987 | \n",
+ " 346 | \n",
+ " 2021-12-15 00:14:59.029434+01:00 | \n",
+ " Votre première liste | \n",
+ " 2021-04-16 17:17:26.080378+02:00 | \n",
+ " 2021-04-16 17:17:26.080378+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
768024 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id customer_id target_id created_at \\\n",
+ "0 1184824 645400 130 2021-09-23 09:35:47.617275+02:00 \n",
+ "1 1184825 645400 345 2021-09-23 09:35:47.668846+02:00 \n",
+ "2 1184828 645402 126 2021-09-23 12:02:51.253269+02:00 \n",
+ "3 1184829 645403 126 2021-09-23 12:20:47.394480+02:00 \n",
+ "4 1295770 647301 346 2021-09-28 16:02:29.372608+02:00 \n",
+ "... ... ... ... ... \n",
+ "768019 2737545 666983 345 2021-12-14 14:48:05.456842+01:00 \n",
+ "768020 2737546 666983 346 2021-12-14 14:48:05.465830+01:00 \n",
+ "768021 2737575 666986 346 2021-12-14 23:15:42.757832+01:00 \n",
+ "768022 2737576 666987 345 2021-12-15 00:14:59.018215+01:00 \n",
+ "768023 2737577 666987 346 2021-12-15 00:14:59.029434+01:00 \n",
+ "\n",
+ " target_name target_created_at \\\n",
+ "0 DDCP PROMO Réseau livres 2020-11-04 18:40:49.500866+01:00 \n",
+ "1 Inscrits NL générale site web 2021-04-16 17:17:26.069199+02:00 \n",
+ "2 DDCP PROMO Art contemporain 2020-11-04 18:38:53.016572+01:00 \n",
+ "3 DDCP PROMO Art contemporain 2020-11-04 18:38:53.016572+01:00 \n",
+ "4 Votre première liste 2021-04-16 17:17:26.080378+02:00 \n",
+ "... ... ... \n",
+ "768019 Inscrits NL générale site web 2021-04-16 17:17:26.069199+02:00 \n",
+ "768020 Votre première liste 2021-04-16 17:17:26.080378+02:00 \n",
+ "768021 Votre première liste 2021-04-16 17:17:26.080378+02:00 \n",
+ "768022 Inscrits NL générale site web 2021-04-16 17:17:26.069199+02:00 \n",
+ "768023 Votre première liste 2021-04-16 17:17:26.080378+02:00 \n",
+ "\n",
+ " target_updated_at target_type_is_import \\\n",
+ "0 2021-03-02 18:38:19.084287+01:00 False \n",
+ "1 2021-04-16 17:17:26.069199+02:00 False \n",
+ "2 2021-04-16 17:17:25.850107+02:00 False \n",
+ "3 2021-04-16 17:17:25.850107+02:00 False \n",
+ "4 2021-04-16 17:17:26.080378+02:00 False \n",
+ "... ... ... \n",
+ "768019 2021-04-16 17:17:26.069199+02:00 False \n",
+ "768020 2021-04-16 17:17:26.080378+02:00 False \n",
+ "768021 2021-04-16 17:17:26.080378+02:00 False \n",
+ "768022 2021-04-16 17:17:26.069199+02:00 False \n",
+ "768023 2021-04-16 17:17:26.080378+02:00 False \n",
+ "\n",
+ " target_type_id target_type_name target_type_identifier \n",
+ "0 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "1 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "2 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "3 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "4 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "... ... ... ... \n",
+ "768019 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "768020 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "768021 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "768022 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "768023 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n",
+ "\n",
+ "[768024 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 138,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# rq : on dirait que la date de création des targets est à peine inférieure à la date minimum de création des targets des customers \n",
+ "# idée : les targets sont créées puis envoyées aux clients, d'où un léger délai \n",
+ "# mais question substiste : pourquoi les clients ne reçoivent-ils pas la target en même temps ? \n",
+ "\n",
+ "# vérifions que la date de création de la target est tjrs inférieure à la date de création minimum pour tous les clients ayant reçu la target\n",
+ "\n",
+ "# first step : convert strings into dates\n",
+ "\n",
+ "df1_customer_targets[\"created_at\"] = df1_customer_targets[\"created_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\n",
+ "df1_customer_targets[\"target_created_at\"] = df1_customer_targets[\"target_created_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\n",
+ "df1_customer_targets[\"target_updated_at\"] = df1_customer_targets[\"target_updated_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 144,
+ "id": "58b22fab-d13d-456a-8250-1da035572fe9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "target_id\n",
+ "116 0 days 00:00:00.949028\n",
+ "117 0 days 00:00:00.037337\n",
+ "119 0 days 00:00:00.024423\n",
+ "120 0 days 00:00:00.058732\n",
+ "122 0 days 00:00:00.027283\n",
+ " ... \n",
+ "2779 0 days 00:00:19.087958\n",
+ "2788 0 days 00:01:36.372927\n",
+ "2825 0 days 00:00:00.028771\n",
+ "2830 0 days 00:00:01.587058\n",
+ "2833 0 days 00:00:00.031071\n",
+ "Name: creation_delay, Length: 283, dtype: object"
+ ]
+ },
+ "execution_count": 144,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# second step : compute delay and minimum by target\n",
+ "\n",
+ "df1_customer_targets[\"creation_delay\"] = df1_customer_targets[\"created_at\"] -df1_customer_targets[\"target_created_at\"]\n",
+ "\n",
+ "\n",
+ "df1_customer_targets.groupby(\"target_id\")[\"creation_delay\"].min()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 148,
+ "id": "4b5c8f3e-9227-466c-a4c0-2280864a5036",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0 days 00:00:00.009293\n",
+ "686 days 23:14:10.435866\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(df1_customer_targets.groupby(\"target_id\")[\"creation_delay\"].min().min())\n",
+ "print((df1_customer_targets.groupby(\"target_id\")[\"creation_delay\"].min()).max())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 153,
+ "id": "41e4040c-45a0-41ac-be91-4c86ef5ab1a8",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "target_id\n",
+ "335 285 days 22:56:30.356536\n",
+ "339 86 days 21:34:19.282253\n",
+ "469 7 days 07:24:03.446563\n",
+ "490 3 days 16:28:38.068677\n",
+ "502 7 days 20:15:19.326651\n",
+ "515 1 days 22:49:33.761856\n",
+ "517 76 days 00:41:25.366394\n",
+ "528 26 days 06:17:44.689111\n",
+ "529 6 days 02:41:29.617761\n",
+ "530 1 days 04:34:33.843116\n",
+ "642 219 days 16:50:10.816034\n",
+ "695 668 days 03:31:22.896313\n",
+ "697 58 days 20:26:26.744823\n",
+ "699 686 days 23:14:10.435866\n",
+ "786 625 days 14:47:48.797084\n",
+ "1747 14 days 04:08:24.295840\n",
+ "2094 239 days 15:13:18.681637\n",
+ "2321 167 days 21:19:37.490219\n",
+ "Name: creation_delay, dtype: object"
+ ]
+ },
+ "execution_count": 153,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# glt, le délai création de la target - création pour le premier client est très court, envoi quasi instantanné\n",
+ "# mais parfois, le délai est très long, plus d'une année pour les cas extrêmes\n",
+ "\n",
+ "min_target_delay = df1_customer_targets.groupby(\"target_id\")[\"creation_delay\"].min()\n",
+ "min_target_delay[min_target_delay > timedelta(days=1)]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 155,
+ "id": "ffb2d1be-b1cb-4285-9584-d96ffeee146e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "target_type_id\n",
+ "1 0 days 00:00:06.490151\n",
+ "56 0 days 00:00:00.009293\n",
+ "69 0 days 00:00:00.032269\n",
+ "Name: creation_delay, dtype: object"
+ ]
+ },
+ "execution_count": 155,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_customer_targets.groupby(\"target_type_id\")[\"creation_delay\"].min() # les target de type 1 ont un plus grd délai"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 159,
+ "id": "44d5a1f5-0691-43de-bb9f-9915830bbb77",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[56 69 1]\n",
+ "[56 69 1]\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(df1_customer_targets[\"target_type_id\"].unique())\n",
+ "print(df1_targets[\"target_type_id\"].unique()) # rq : slt 3 types de target sur les 4 sont dans la table"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 165,
+ "id": "3a21df0d-0199-45d7-9019-e69dab67c9a8",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " customer_id | \n",
+ " target_id | \n",
+ " created_at | \n",
+ " target_name | \n",
+ " target_created_at | \n",
+ " target_updated_at | \n",
+ " target_type_is_import | \n",
+ " target_type_id | \n",
+ " target_type_name | \n",
+ " target_type_identifier | \n",
+ " creation_delay | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1184824 | \n",
+ " 645400 | \n",
+ " 130 | \n",
+ " 2021-09-23 09:35:47.617275+02:00 | \n",
+ " DDCP PROMO Réseau livres | \n",
+ " 2020-11-04 18:40:49.500866+01:00 | \n",
+ " 2021-03-02 18:38:19.084287+01:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ " 322 days, 13:54:58.116409 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1184825 | \n",
+ " 645400 | \n",
+ " 345 | \n",
+ " 2021-09-23 09:35:47.668846+02:00 | \n",
+ " Inscrits NL générale site web | \n",
+ " 2021-04-16 17:17:26.069199+02:00 | \n",
+ " 2021-04-16 17:17:26.069199+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ " 159 days, 16:18:21.599647 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1184828 | \n",
+ " 645402 | \n",
+ " 126 | \n",
+ " 2021-09-23 12:02:51.253269+02:00 | \n",
+ " DDCP PROMO Art contemporain | \n",
+ " 2020-11-04 18:38:53.016572+01:00 | \n",
+ " 2021-04-16 17:17:25.850107+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ " 322 days, 16:23:58.236697 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1184829 | \n",
+ " 645403 | \n",
+ " 126 | \n",
+ " 2021-09-23 12:20:47.394480+02:00 | \n",
+ " DDCP PROMO Art contemporain | \n",
+ " 2020-11-04 18:38:53.016572+01:00 | \n",
+ " 2021-04-16 17:17:25.850107+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ " 322 days, 16:41:54.377908 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1295770 | \n",
+ " 647301 | \n",
+ " 346 | \n",
+ " 2021-09-28 16:02:29.372608+02:00 | \n",
+ " Votre première liste | \n",
+ " 2021-04-16 17:17:26.080378+02:00 | \n",
+ " 2021-04-16 17:17:26.080378+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ " 164 days, 22:45:03.292230 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 1184833 | \n",
+ " 645627 | \n",
+ " 398 | \n",
+ " 2021-09-24 18:16:33.432760+02:00 | \n",
+ " DDCP PROMO MD participants ateliers yoga | \n",
+ " 2021-05-26 10:54:12.232999+02:00 | \n",
+ " 2021-05-26 10:54:22.378253+02:00 | \n",
+ " False | \n",
+ " 69 | \n",
+ " manual_dynamic_filter | \n",
+ " e0f4b8693184850fefd6d2a38f10584e | \n",
+ " 121 days, 7:22:21.199761 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 4452818 | \n",
+ " 1208736 | \n",
+ " 631 | \n",
+ " 2023-05-06 03:29:43.875970+02:00 | \n",
+ " consentement optin b2b | \n",
+ " 2021-11-30 10:03:37.430645+01:00 | \n",
+ " 2022-02-18 17:21:30.653027+01:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ " 521 days, 16:26:06.445325 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 4291702 | \n",
+ " 1155845 | \n",
+ " 502 | \n",
+ " 2022-09-28 12:55:36.843316+02:00 | \n",
+ " Automation_parrainage_newsletter_générale | \n",
+ " 2021-08-10 15:25:56.142538+02:00 | \n",
+ " 2021-08-10 15:26:06.275964+02:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ " 413 days, 21:29:40.700778 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 4096406 | \n",
+ " 1121651 | \n",
+ " 469 | \n",
+ " 2022-07-31 11:45:19.694236+02:00 | \n",
+ " RI Newsletter Alexandrie (inscriptions formula... | \n",
+ " 2021-07-08 11:31:10.246495+02:00 | \n",
+ " 2022-01-26 12:14:17.941253+01:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ " 388 days, 0:14:09.447741 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 4452824 | \n",
+ " 1208742 | \n",
+ " 631 | \n",
+ " 2023-05-06 03:29:43.901323+02:00 | \n",
+ " consentement optin b2b | \n",
+ " 2021-11-30 10:03:37.430645+01:00 | \n",
+ " 2022-02-18 17:21:30.653027+01:00 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ " 521 days, 16:26:06.470678 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id customer_id target_id created_at \\\n",
+ "0 1184824 645400 130 2021-09-23 09:35:47.617275+02:00 \n",
+ "1 1184825 645400 345 2021-09-23 09:35:47.668846+02:00 \n",
+ "2 1184828 645402 126 2021-09-23 12:02:51.253269+02:00 \n",
+ "3 1184829 645403 126 2021-09-23 12:20:47.394480+02:00 \n",
+ "4 1295770 647301 346 2021-09-28 16:02:29.372608+02:00 \n",
+ "5 1184833 645627 398 2021-09-24 18:16:33.432760+02:00 \n",
+ "6 4452818 1208736 631 2023-05-06 03:29:43.875970+02:00 \n",
+ "7 4291702 1155845 502 2022-09-28 12:55:36.843316+02:00 \n",
+ "8 4096406 1121651 469 2022-07-31 11:45:19.694236+02:00 \n",
+ "9 4452824 1208742 631 2023-05-06 03:29:43.901323+02:00 \n",
+ "\n",
+ " target_name \\\n",
+ "0 DDCP PROMO Réseau livres \n",
+ "1 Inscrits NL générale site web \n",
+ "2 DDCP PROMO Art contemporain \n",
+ "3 DDCP PROMO Art contemporain \n",
+ "4 Votre première liste \n",
+ "5 DDCP PROMO MD participants ateliers yoga \n",
+ "6 consentement optin b2b \n",
+ "7 Automation_parrainage_newsletter_générale \n",
+ "8 RI Newsletter Alexandrie (inscriptions formula... \n",
+ "9 consentement optin b2b \n",
+ "\n",
+ " target_created_at target_updated_at \\\n",
+ "0 2020-11-04 18:40:49.500866+01:00 2021-03-02 18:38:19.084287+01:00 \n",
+ "1 2021-04-16 17:17:26.069199+02:00 2021-04-16 17:17:26.069199+02:00 \n",
+ "2 2020-11-04 18:38:53.016572+01:00 2021-04-16 17:17:25.850107+02:00 \n",
+ "3 2020-11-04 18:38:53.016572+01:00 2021-04-16 17:17:25.850107+02:00 \n",
+ "4 2021-04-16 17:17:26.080378+02:00 2021-04-16 17:17:26.080378+02:00 \n",
+ "5 2021-05-26 10:54:12.232999+02:00 2021-05-26 10:54:22.378253+02:00 \n",
+ "6 2021-11-30 10:03:37.430645+01:00 2022-02-18 17:21:30.653027+01:00 \n",
+ "7 2021-08-10 15:25:56.142538+02:00 2021-08-10 15:26:06.275964+02:00 \n",
+ "8 2021-07-08 11:31:10.246495+02:00 2022-01-26 12:14:17.941253+01:00 \n",
+ "9 2021-11-30 10:03:37.430645+01:00 2022-02-18 17:21:30.653027+01:00 \n",
+ "\n",
+ " target_type_is_import target_type_id target_type_name \\\n",
+ "0 False 56 manual_static_filter \n",
+ "1 False 56 manual_static_filter \n",
+ "2 False 56 manual_static_filter \n",
+ "3 False 56 manual_static_filter \n",
+ "4 False 56 manual_static_filter \n",
+ "5 False 69 manual_dynamic_filter \n",
+ "6 False 56 manual_static_filter \n",
+ "7 False 56 manual_static_filter \n",
+ "8 False 56 manual_static_filter \n",
+ "9 False 56 manual_static_filter \n",
+ "\n",
+ " target_type_identifier creation_delay \n",
+ "0 fb27e81baa4debc6a4e1a8639c20e808 322 days, 13:54:58.116409 \n",
+ "1 fb27e81baa4debc6a4e1a8639c20e808 159 days, 16:18:21.599647 \n",
+ "2 fb27e81baa4debc6a4e1a8639c20e808 322 days, 16:23:58.236697 \n",
+ "3 fb27e81baa4debc6a4e1a8639c20e808 322 days, 16:41:54.377908 \n",
+ "4 fb27e81baa4debc6a4e1a8639c20e808 164 days, 22:45:03.292230 \n",
+ "5 e0f4b8693184850fefd6d2a38f10584e 121 days, 7:22:21.199761 \n",
+ "6 fb27e81baa4debc6a4e1a8639c20e808 521 days, 16:26:06.445325 \n",
+ "7 fb27e81baa4debc6a4e1a8639c20e808 413 days, 21:29:40.700778 \n",
+ "8 fb27e81baa4debc6a4e1a8639c20e808 388 days, 0:14:09.447741 \n",
+ "9 fb27e81baa4debc6a4e1a8639c20e808 521 days, 16:26:06.470678 "
+ ]
+ },
+ "execution_count": 165,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# final visu : nice table for targets\n",
+ "\n",
+ "# pour la suite, on peut supprimer la colonne creation delay, \n",
+ "# était juste utile pour vérifier que la date de création était postérieure à la date de création de la target\n",
+ "\n",
+ "df1_customer_targets.head(10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d762394b-3aee-4284-a472-40a6b6f4308a",
+ "metadata": {},
+ "source": [
+ "## Campaign stats, campaigns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 189,
+ "id": "9d338a1a-52a5-49c4-a277-37be3f190e81",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " service_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " process_id | \n",
+ " report_url | \n",
+ " category | \n",
+ " to_be_synced | \n",
+ " identifier | \n",
+ " sent_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1319613 | \n",
+ " newsletter enseignants janvier 2022 | \n",
+ " 721 | \n",
+ " 2022-01-14 16:06:42.586321+01:00 | \n",
+ " 2022-02-03 14:17:27.112963+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " aba3b6fd5d186d28e06ff97135cade7f | \n",
+ " 2022-01-14 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1319586 | \n",
+ " lsf_janvier_2022 | \n",
+ " 717 | \n",
+ " 2022-01-07 11:30:35.315895+01:00 | \n",
+ " 2022-02-03 14:17:27.116171+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 788d986905533aba051261497ecffcbb | \n",
+ " 2022-01-07 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1319282 | \n",
+ " Invitation à déjeuner au Mucem | Vernissage « ... | \n",
+ " 591 | \n",
+ " 2021-09-28 12:50:24.448752+02:00 | \n",
+ " 2022-02-03 14:17:27.119582+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 3493894fa4ea036cfc6433c3e2ee63b0 | \n",
+ " 2021-09-28 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1319283 | \n",
+ " Vacances de la Toussaint - centres des loisirs | \n",
+ " 590 | \n",
+ " 2021-09-28 18:01:04.692073+02:00 | \n",
+ " 2022-02-03 14:17:27.124408+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 08b255a5d42b89b0585260b6f2360bdd | \n",
+ " 2021-09-28 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1319636 | \n",
+ " ddcp_promo_md_livemag | \n",
+ " 730 | \n",
+ " 2022-01-27 18:00:41.053069+01:00 | \n",
+ " 2022-02-03 14:17:27.127607+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " d5cfead94f5350c12c322b5b664544c1 | \n",
+ " 2022-01-27 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id name service_id \\\n",
+ "0 1319613 newsletter enseignants janvier 2022 721 \n",
+ "1 1319586 lsf_janvier_2022 717 \n",
+ "2 1319282 Invitation à déjeuner au Mucem | Vernissage « ... 591 \n",
+ "3 1319283 Vacances de la Toussaint - centres des loisirs 590 \n",
+ "4 1319636 ddcp_promo_md_livemag 730 \n",
+ "\n",
+ " created_at updated_at \\\n",
+ "0 2022-01-14 16:06:42.586321+01:00 2022-02-03 14:17:27.112963+01:00 \n",
+ "1 2022-01-07 11:30:35.315895+01:00 2022-02-03 14:17:27.116171+01:00 \n",
+ "2 2021-09-28 12:50:24.448752+02:00 2022-02-03 14:17:27.119582+01:00 \n",
+ "3 2021-09-28 18:01:04.692073+02:00 2022-02-03 14:17:27.124408+01:00 \n",
+ "4 2022-01-27 18:00:41.053069+01:00 2022-02-03 14:17:27.127607+01:00 \n",
+ "\n",
+ " process_id report_url category to_be_synced \\\n",
+ "0 NaN NaN 0.0 False \n",
+ "1 NaN NaN 0.0 False \n",
+ "2 NaN NaN 0.0 False \n",
+ "3 NaN NaN 0.0 False \n",
+ "4 NaN NaN 0.0 False \n",
+ "\n",
+ " identifier sent_at \n",
+ "0 aba3b6fd5d186d28e06ff97135cade7f 2022-01-14 00:00:00+01:00 \n",
+ "1 788d986905533aba051261497ecffcbb 2022-01-07 00:00:00+01:00 \n",
+ "2 3493894fa4ea036cfc6433c3e2ee63b0 2021-09-28 00:00:00+02:00 \n",
+ "3 08b255a5d42b89b0585260b6f2360bdd 2021-09-28 00:00:00+02:00 \n",
+ "4 d5cfead94f5350c12c322b5b664544c1 2022-01-27 00:00:00+01:00 "
+ ]
+ },
+ "execution_count": 189,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 1. campaigns\n",
+ "\n",
+ "df1_campaigns.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 171,
+ "id": "fad1a58c-cece-45f9-a44f-ca46884a9a81",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id 0.000000\n",
+ "name 0.000000\n",
+ "service_id 0.000000\n",
+ "created_at 0.000000\n",
+ "updated_at 0.000000\n",
+ "process_id 1.000000\n",
+ "report_url 1.000000\n",
+ "category 0.002090\n",
+ "to_be_synced 0.000000\n",
+ "identifier 0.000000\n",
+ "sent_at 0.003135\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 171,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# part de Nan pour chaque variable\n",
+ "\n",
+ "df1_campaigns.isna().sum() / df1_campaigns.shape[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 185,
+ "id": "cdeebf18-a3a4-4131-ad88-d45c39ec5786",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id int64\n",
+ "name object\n",
+ "service_id int64\n",
+ "created_at object\n",
+ "updated_at object\n",
+ "process_id float64\n",
+ "report_url float64\n",
+ "category float64\n",
+ "to_be_synced bool\n",
+ "identifier object\n",
+ "sent_at object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 185,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaigns.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 186,
+ "id": "5c9b669a-477b-4f33-86df-b22ff2c21382",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "str"
+ ]
+ },
+ "execution_count": 186,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "type(df1_campaigns[\"identifier\"][0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 187,
+ "id": "b5b0af8d-b9a0-4224-a229-d74d90ac2686",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([ 0., nan])"
+ ]
+ },
+ "execution_count": 187,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# category\n",
+ "\n",
+ "df1_campaigns[\"category\"].isna()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 191,
+ "id": "4cc618ae-063f-48fc-bce7-8b72d30ad4ca",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "957\n",
+ "957\n"
+ ]
+ }
+ ],
+ "source": [
+ "# identifier\n",
+ "\n",
+ "print(df1_campaigns[\"identifier\"].nunique())\n",
+ "print(df1_campaigns.shape[0]) # identifier is unique"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 194,
+ "id": "d13c3f21-ebd7-4e9b-baca-1f3a10ac24a9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "id 957\n",
+ "name 855\n",
+ "service_id 957\n",
+ "created_at 957\n",
+ "updated_at 957\n",
+ "process_id 0\n",
+ "report_url 0\n",
+ "category 1\n",
+ "to_be_synced 2\n",
+ "identifier 957\n",
+ "sent_at 737\n",
+ "dtype: int64\n"
+ ]
+ }
+ ],
+ "source": [
+ "# service id\n",
+ "\n",
+ "print(df1_campaigns.nunique()) # on a un identifiant de service par campagne, mais pas un nom unique"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 211,
+ "id": "aea65b10-8a7f-472e-a7f5-455a90d3cfef",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " service_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " process_id | \n",
+ " report_url | \n",
+ " category | \n",
+ " to_be_synced | \n",
+ " identifier | \n",
+ " sent_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 777 | \n",
+ " 1319239 | \n",
+ " \"L'Orient sonore\" au Mucem à partir du 22 juillet | \n",
+ " 184 | \n",
+ " 2021-09-24 11:56:09.277085+02:00 | \n",
+ " 2021-09-24 11:56:09.277085+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 6cdd60ea0045eb7a6ec44c54d29ed402 | \n",
+ " 2020-07-15 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 778 | \n",
+ " 1319240 | \n",
+ " \"L'Orient sonore\" au Mucem à partir du 22 juillet | \n",
+ " 181 | \n",
+ " 2021-09-24 11:56:09.284647+02:00 | \n",
+ " 2021-09-24 11:56:09.284647+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " fc221309746013ac554571fbd180e1c8 | \n",
+ " 2020-07-09 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 255 | \n",
+ " 1320926 | \n",
+ " Alexandrie NL2 | \n",
+ " 1116 | \n",
+ " 2023-01-31 11:08:55.915268+01:00 | \n",
+ " 2023-01-31 11:08:56.286044+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " dd77279f7d325eec933f05b1672f6a1f | \n",
+ " 2023-01-31 12:08:54+01:00 | \n",
+ "
\n",
+ " \n",
+ " 161 | \n",
+ " 1320910 | \n",
+ " Alexandrie NL2 | \n",
+ " 1077 | \n",
+ " 2023-01-24 09:01:00.250855+01:00 | \n",
+ " 2023-01-24 09:01:00.271292+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 062ddb6c727310e76b6200b7c71f63b5 | \n",
+ " 2023-01-24 10:00:58+01:00 | \n",
+ "
\n",
+ " \n",
+ " 241 | \n",
+ " 1320574 | \n",
+ " Alexandrie NL2 | \n",
+ " 731 | \n",
+ " 2022-10-11 07:00:50.971513+02:00 | \n",
+ " 2022-12-02 17:51:21.670983+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 59c33016884a62116be975a9bb8257e3 | \n",
+ " 2022-10-11 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 317 | \n",
+ " 1320972 | \n",
+ " Centres_loisirs _vacances de février | \n",
+ " 1124 | \n",
+ " 2023-02-08 12:01:16.732961+01:00 | \n",
+ " 2023-02-08 12:01:16.808008+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " c7635bfd99248a2cdef8249ef7bfbef4 | \n",
+ " 2023-02-08 13:01:15+01:00 | \n",
+ "
\n",
+ " \n",
+ " 166 | \n",
+ " 1320954 | \n",
+ " Centres_loisirs _vacances de février | \n",
+ " 1110 | \n",
+ " 2023-02-01 09:30:41.267232+01:00 | \n",
+ " 2023-02-01 09:30:41.354117+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 2cbca44843a864533ec05b321ae1f9d1 | \n",
+ " 2023-02-01 10:30:40+01:00 | \n",
+ "
\n",
+ " \n",
+ " 672 | \n",
+ " 148 | \n",
+ " Champ social décembre 2020 | \n",
+ " 283 | \n",
+ " 2021-04-03 18:24:42.186026+02:00 | \n",
+ " 2021-09-24 11:56:08.182818+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 0f49c89d1e7298bb9930789c8ed59d48 | \n",
+ " 2020-12-03 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 569 | \n",
+ " 72 | \n",
+ " Champ social décembre 2020 | \n",
+ " 284 | \n",
+ " 2021-03-29 15:41:53.631952+02:00 | \n",
+ " 2021-09-24 11:56:07.748770+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 46ba9f2a6976570b0353203ec4474217 | \n",
+ " 2020-12-04 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 175 | \n",
+ " 1319881 | \n",
+ " Champ social mars 2022 | \n",
+ " 833 | \n",
+ " 2022-04-25 10:00:26.029871+02:00 | \n",
+ " 2022-12-02 17:51:22.319899+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 013a006f03dbc5392effeb8f18fda755 | \n",
+ " 2022-04-25 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 316 | \n",
+ " 1319760 | \n",
+ " Champ social mars 2022 | \n",
+ " 785 | \n",
+ " 2022-03-11 13:00:28.333251+01:00 | \n",
+ " 2022-12-02 17:51:21.991906+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 4b04a686b0ad13dce35fa99fa4161c65 | \n",
+ " 2022-03-11 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 326 | \n",
+ " 1319798 | \n",
+ " DDCP Newsletter Destination Mucem Est 2 | \n",
+ " 804 | \n",
+ " 2022-03-22 10:21:02.122363+01:00 | \n",
+ " 2022-12-02 17:51:22.119041+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " dc5689792e08eb2e219dce49e64c885b | \n",
+ " 2022-03-22 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 177 | \n",
+ " 1319882 | \n",
+ " DDCP Newsletter Destination Mucem Est 2 | \n",
+ " 843 | \n",
+ " 2022-04-26 09:00:44.083713+02:00 | \n",
+ " 2022-12-02 17:51:22.454684+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 3d8e28caf901313a554cebc7d32e67e5 | \n",
+ " 2022-04-26 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 347 | \n",
+ " 1319883 | \n",
+ " DDCP Newsletter Destination Mucem Nord 2 | \n",
+ " 845 | \n",
+ " 2022-04-26 09:00:46.020370+02:00 | \n",
+ " 2022-12-02 17:51:22.463986+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " b86e8d03fe992d1b0e19656875ee557c | \n",
+ " 2022-04-26 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 319 | \n",
+ " 1319768 | \n",
+ " DDCP Newsletter Destination Mucem Nord 2 | \n",
+ " 789 | \n",
+ " 2022-03-17 10:20:51.757178+01:00 | \n",
+ " 2022-12-02 17:51:22.064760+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 68053af2923e00204c3ca7c6a3150cf7 | \n",
+ " 2022-03-17 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 176 | \n",
+ " 1319885 | \n",
+ " DDCP Newsletter Destination Mucem Nord Est 2 | \n",
+ " 842 | \n",
+ " 2022-04-26 09:30:57.232149+02:00 | \n",
+ " 2022-12-02 17:51:22.447304+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " fc3cf452d3da8402bebb765225ce8c0e | \n",
+ " 2022-04-26 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 324 | \n",
+ " 1319769 | \n",
+ " DDCP Newsletter Destination Mucem Nord Est 2 | \n",
+ " 800 | \n",
+ " 2022-03-17 10:22:58.736431+01:00 | \n",
+ " 2022-12-02 17:51:22.107694+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 7a53928fa4dd31e82c6ef826f341daec | \n",
+ " 2022-03-17 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 243 | \n",
+ " 1319884 | \n",
+ " DDCP Newsletter Destination Mucem Sud 2 | \n",
+ " 844 | \n",
+ " 2022-04-26 09:00:46.894528+02:00 | \n",
+ " 2022-12-02 17:51:22.459272+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " e97ee2054defb209c35fe4dc94599061 | \n",
+ " 2022-04-26 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 327 | \n",
+ " 1319799 | \n",
+ " DDCP Newsletter Destination Mucem Sud 2 | \n",
+ " 805 | \n",
+ " 2022-03-22 10:24:05.787335+01:00 | \n",
+ " 2022-12-02 17:51:22.123726+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 846c260d715e5b854ffad5f70a516c88 | \n",
+ " 2022-03-22 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 620 | \n",
+ " 2681 | \n",
+ " DDCP PROMO programmation Orient sonore Pass mu... | \n",
+ " 226 | \n",
+ " 2021-04-08 21:10:40.634455+02:00 | \n",
+ " 2021-09-24 11:56:07.922243+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 9cfdf10e8fc047a44b08ed031e1f0ed1 | \n",
+ " 2020-10-09 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id name service_id \\\n",
+ "777 1319239 \"L'Orient sonore\" au Mucem à partir du 22 juillet 184 \n",
+ "778 1319240 \"L'Orient sonore\" au Mucem à partir du 22 juillet 181 \n",
+ "255 1320926 Alexandrie NL2 1116 \n",
+ "161 1320910 Alexandrie NL2 1077 \n",
+ "241 1320574 Alexandrie NL2 731 \n",
+ "317 1320972 Centres_loisirs _vacances de février 1124 \n",
+ "166 1320954 Centres_loisirs _vacances de février 1110 \n",
+ "672 148 Champ social décembre 2020 283 \n",
+ "569 72 Champ social décembre 2020 284 \n",
+ "175 1319881 Champ social mars 2022 833 \n",
+ "316 1319760 Champ social mars 2022 785 \n",
+ "326 1319798 DDCP Newsletter Destination Mucem Est 2 804 \n",
+ "177 1319882 DDCP Newsletter Destination Mucem Est 2 843 \n",
+ "347 1319883 DDCP Newsletter Destination Mucem Nord 2 845 \n",
+ "319 1319768 DDCP Newsletter Destination Mucem Nord 2 789 \n",
+ "176 1319885 DDCP Newsletter Destination Mucem Nord Est 2 842 \n",
+ "324 1319769 DDCP Newsletter Destination Mucem Nord Est 2 800 \n",
+ "243 1319884 DDCP Newsletter Destination Mucem Sud 2 844 \n",
+ "327 1319799 DDCP Newsletter Destination Mucem Sud 2 805 \n",
+ "620 2681 DDCP PROMO programmation Orient sonore Pass mu... 226 \n",
+ "\n",
+ " created_at updated_at \\\n",
+ "777 2021-09-24 11:56:09.277085+02:00 2021-09-24 11:56:09.277085+02:00 \n",
+ "778 2021-09-24 11:56:09.284647+02:00 2021-09-24 11:56:09.284647+02:00 \n",
+ "255 2023-01-31 11:08:55.915268+01:00 2023-01-31 11:08:56.286044+01:00 \n",
+ "161 2023-01-24 09:01:00.250855+01:00 2023-01-24 09:01:00.271292+01:00 \n",
+ "241 2022-10-11 07:00:50.971513+02:00 2022-12-02 17:51:21.670983+01:00 \n",
+ "317 2023-02-08 12:01:16.732961+01:00 2023-02-08 12:01:16.808008+01:00 \n",
+ "166 2023-02-01 09:30:41.267232+01:00 2023-02-01 09:30:41.354117+01:00 \n",
+ "672 2021-04-03 18:24:42.186026+02:00 2021-09-24 11:56:08.182818+02:00 \n",
+ "569 2021-03-29 15:41:53.631952+02:00 2021-09-24 11:56:07.748770+02:00 \n",
+ "175 2022-04-25 10:00:26.029871+02:00 2022-12-02 17:51:22.319899+01:00 \n",
+ "316 2022-03-11 13:00:28.333251+01:00 2022-12-02 17:51:21.991906+01:00 \n",
+ "326 2022-03-22 10:21:02.122363+01:00 2022-12-02 17:51:22.119041+01:00 \n",
+ "177 2022-04-26 09:00:44.083713+02:00 2022-12-02 17:51:22.454684+01:00 \n",
+ "347 2022-04-26 09:00:46.020370+02:00 2022-12-02 17:51:22.463986+01:00 \n",
+ "319 2022-03-17 10:20:51.757178+01:00 2022-12-02 17:51:22.064760+01:00 \n",
+ "176 2022-04-26 09:30:57.232149+02:00 2022-12-02 17:51:22.447304+01:00 \n",
+ "324 2022-03-17 10:22:58.736431+01:00 2022-12-02 17:51:22.107694+01:00 \n",
+ "243 2022-04-26 09:00:46.894528+02:00 2022-12-02 17:51:22.459272+01:00 \n",
+ "327 2022-03-22 10:24:05.787335+01:00 2022-12-02 17:51:22.123726+01:00 \n",
+ "620 2021-04-08 21:10:40.634455+02:00 2021-09-24 11:56:07.922243+02:00 \n",
+ "\n",
+ " process_id report_url category to_be_synced \\\n",
+ "777 NaN NaN 0.0 False \n",
+ "778 NaN NaN 0.0 False \n",
+ "255 NaN NaN 0.0 False \n",
+ "161 NaN NaN 0.0 False \n",
+ "241 NaN NaN 0.0 False \n",
+ "317 NaN NaN 0.0 False \n",
+ "166 NaN NaN 0.0 False \n",
+ "672 NaN NaN 0.0 False \n",
+ "569 NaN NaN 0.0 False \n",
+ "175 NaN NaN 0.0 False \n",
+ "316 NaN NaN 0.0 False \n",
+ "326 NaN NaN 0.0 False \n",
+ "177 NaN NaN 0.0 False \n",
+ "347 NaN NaN 0.0 False \n",
+ "319 NaN NaN 0.0 False \n",
+ "176 NaN NaN 0.0 False \n",
+ "324 NaN NaN 0.0 False \n",
+ "243 NaN NaN 0.0 False \n",
+ "327 NaN NaN 0.0 False \n",
+ "620 NaN NaN 0.0 False \n",
+ "\n",
+ " identifier sent_at \n",
+ "777 6cdd60ea0045eb7a6ec44c54d29ed402 2020-07-15 00:00:00+02:00 \n",
+ "778 fc221309746013ac554571fbd180e1c8 2020-07-09 00:00:00+02:00 \n",
+ "255 dd77279f7d325eec933f05b1672f6a1f 2023-01-31 12:08:54+01:00 \n",
+ "161 062ddb6c727310e76b6200b7c71f63b5 2023-01-24 10:00:58+01:00 \n",
+ "241 59c33016884a62116be975a9bb8257e3 2022-10-11 00:00:00+02:00 \n",
+ "317 c7635bfd99248a2cdef8249ef7bfbef4 2023-02-08 13:01:15+01:00 \n",
+ "166 2cbca44843a864533ec05b321ae1f9d1 2023-02-01 10:30:40+01:00 \n",
+ "672 0f49c89d1e7298bb9930789c8ed59d48 2020-12-03 00:00:00+01:00 \n",
+ "569 46ba9f2a6976570b0353203ec4474217 2020-12-04 00:00:00+01:00 \n",
+ "175 013a006f03dbc5392effeb8f18fda755 2022-04-25 00:00:00+02:00 \n",
+ "316 4b04a686b0ad13dce35fa99fa4161c65 2022-03-11 00:00:00+01:00 \n",
+ "326 dc5689792e08eb2e219dce49e64c885b 2022-03-22 00:00:00+01:00 \n",
+ "177 3d8e28caf901313a554cebc7d32e67e5 2022-04-26 00:00:00+02:00 \n",
+ "347 b86e8d03fe992d1b0e19656875ee557c 2022-04-26 00:00:00+02:00 \n",
+ "319 68053af2923e00204c3ca7c6a3150cf7 2022-03-17 00:00:00+01:00 \n",
+ "176 fc3cf452d3da8402bebb765225ce8c0e 2022-04-26 00:00:00+02:00 \n",
+ "324 7a53928fa4dd31e82c6ef826f341daec 2022-03-17 00:00:00+01:00 \n",
+ "243 e97ee2054defb209c35fe4dc94599061 2022-04-26 00:00:00+02:00 \n",
+ "327 846c260d715e5b854ffad5f70a516c88 2022-03-22 00:00:00+01:00 \n",
+ "620 9cfdf10e8fc047a44b08ed031e1f0ed1 2020-10-09 00:00:00+02:00 "
+ ]
+ },
+ "execution_count": 211,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# name\n",
+ "\n",
+ "df1_campaigns[df1_campaigns.duplicated(subset = [\"name\"], keep=False)].sort_values(\"name\").head(20)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 207,
+ "id": "35ea834e-01a3-4841-a9a9-351c25c5af37",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "175 True\n",
+ "316 True\n",
+ "dtype: bool"
+ ]
+ },
+ "execution_count": 207,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaigns[df1_campaigns[\"name\"]==\"Champ social mars 2022\"].duplicated(subset=\"name\", keep=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 226,
+ "id": "5e16bf37-c2e0-48c9-8a90-6713f7c6206c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Share of campaigns to synce : 0.52 % \n"
+ ]
+ }
+ ],
+ "source": [
+ "# to be synced \n",
+ "\n",
+ "share_campaigns_to_be_synced = round(100 * df1_campaigns[\"to_be_synced\"].mean(),2)\n",
+ "print(f\"Share of campaigns to synce : {share_campaigns_to_be_synced} % \") # 0.5% of campaigns to synce"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 235,
+ "id": "88a6f9d4-ddd2-4288-9bba-7d9e76c66f51",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " service_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " process_id | \n",
+ " report_url | \n",
+ " category | \n",
+ " to_be_synced | \n",
+ " identifier | \n",
+ " sent_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 43 | \n",
+ " 1320752 | \n",
+ " dre_alors_on_sort0712_tech&cult1212_lesreveill... | \n",
+ " 1019 | \n",
+ " 2022-11-28 09:30:31.189207+01:00 | \n",
+ " 2022-12-02 17:51:23.474745+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " True | \n",
+ " 03e0704b5690a2dee1861dc3ad3316c9 | \n",
+ " 2022-11-28 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 79 | \n",
+ " 1320755 | \n",
+ " News hebdo du 5 au 4 décembre 2022 | \n",
+ " 1060 | \n",
+ " 2022-12-04 18:01:29.971417+01:00 | \n",
+ " 2022-12-04 18:01:30.037656+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " True | \n",
+ " 299a23a2291e2126b91d54f3601ec162 | \n",
+ " 2022-12-04 19:01:27+01:00 | \n",
+ "
\n",
+ " \n",
+ " 464 | \n",
+ " 1320749 | \n",
+ " dre_le_sel_241122 | \n",
+ " 1054 | \n",
+ " 2022-11-24 09:01:37.467710+01:00 | \n",
+ " 2022-12-02 17:51:23.622812+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " True | \n",
+ " db576a7d2453575f29eab4bac787b919 | \n",
+ " 2022-11-24 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 465 | \n",
+ " 1320751 | \n",
+ " News hebdo du 28 novembre au 4 décembre | \n",
+ " 1057 | \n",
+ " 2022-11-27 18:01:44.546081+01:00 | \n",
+ " 2022-12-02 17:51:23.627178+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " True | \n",
+ " d8700cbd38cc9f30cecb34f0c195b137 | \n",
+ " 2022-11-27 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 888 | \n",
+ " 1319474 | \n",
+ " ddcp_promo_temps fort salammbo | \n",
+ " 670 | \n",
+ " 2021-11-25 13:19:41.547780+01:00 | \n",
+ " 2022-02-03 14:17:27.728648+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " True | \n",
+ " 17c276c8e723eb46aef576537e9d56d0 | \n",
+ " 2021-11-25 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id name service_id \\\n",
+ "43 1320752 dre_alors_on_sort0712_tech&cult1212_lesreveill... 1019 \n",
+ "79 1320755 News hebdo du 5 au 4 décembre 2022 1060 \n",
+ "464 1320749 dre_le_sel_241122 1054 \n",
+ "465 1320751 News hebdo du 28 novembre au 4 décembre 1057 \n",
+ "888 1319474 ddcp_promo_temps fort salammbo 670 \n",
+ "\n",
+ " created_at updated_at \\\n",
+ "43 2022-11-28 09:30:31.189207+01:00 2022-12-02 17:51:23.474745+01:00 \n",
+ "79 2022-12-04 18:01:29.971417+01:00 2022-12-04 18:01:30.037656+01:00 \n",
+ "464 2022-11-24 09:01:37.467710+01:00 2022-12-02 17:51:23.622812+01:00 \n",
+ "465 2022-11-27 18:01:44.546081+01:00 2022-12-02 17:51:23.627178+01:00 \n",
+ "888 2021-11-25 13:19:41.547780+01:00 2022-02-03 14:17:27.728648+01:00 \n",
+ "\n",
+ " process_id report_url category to_be_synced \\\n",
+ "43 NaN NaN 0.0 True \n",
+ "79 NaN NaN 0.0 True \n",
+ "464 NaN NaN 0.0 True \n",
+ "465 NaN NaN 0.0 True \n",
+ "888 NaN NaN 0.0 True \n",
+ "\n",
+ " identifier sent_at \n",
+ "43 03e0704b5690a2dee1861dc3ad3316c9 2022-11-28 00:00:00+01:00 \n",
+ "79 299a23a2291e2126b91d54f3601ec162 2022-12-04 19:01:27+01:00 \n",
+ "464 db576a7d2453575f29eab4bac787b919 2022-11-24 00:00:00+01:00 \n",
+ "465 d8700cbd38cc9f30cecb34f0c195b137 2022-11-27 00:00:00+01:00 \n",
+ "888 17c276c8e723eb46aef576537e9d56d0 2021-11-25 00:00:00+01:00 "
+ ]
+ },
+ "execution_count": 235,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# focus : campaigns to synce - 5 cases\n",
+ "# la date d'envoie semble cohérente. Pas d'observation particulière sur ces cas ...\n",
+ "\n",
+ "df1_campaigns[df1_campaigns[\"to_be_synced\"]]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 234,
+ "id": "cf9dedd6-2554-4f9e-a09b-f1465718a18d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " service_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " process_id | \n",
+ " report_url | \n",
+ " category | \n",
+ " to_be_synced | \n",
+ " identifier | \n",
+ " sent_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 43 | \n",
+ " 1320752 | \n",
+ " dre_alors_on_sort0712_tech&cult1212_lesreveill... | \n",
+ " 1019 | \n",
+ " 2022-11-28 09:30:31.189207+01:00 | \n",
+ " 2022-12-02 17:51:23.474745+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " True | \n",
+ " 03e0704b5690a2dee1861dc3ad3316c9 | \n",
+ " 2022-11-28 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 79 | \n",
+ " 1320755 | \n",
+ " News hebdo du 5 au 4 décembre 2022 | \n",
+ " 1060 | \n",
+ " 2022-12-04 18:01:29.971417+01:00 | \n",
+ " 2022-12-04 18:01:30.037656+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " True | \n",
+ " 299a23a2291e2126b91d54f3601ec162 | \n",
+ " 2022-12-04 19:01:27+01:00 | \n",
+ "
\n",
+ " \n",
+ " 464 | \n",
+ " 1320749 | \n",
+ " dre_le_sel_241122 | \n",
+ " 1054 | \n",
+ " 2022-11-24 09:01:37.467710+01:00 | \n",
+ " 2022-12-02 17:51:23.622812+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " True | \n",
+ " db576a7d2453575f29eab4bac787b919 | \n",
+ " 2022-11-24 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 465 | \n",
+ " 1320751 | \n",
+ " News hebdo du 28 novembre au 4 décembre | \n",
+ " 1057 | \n",
+ " 2022-11-27 18:01:44.546081+01:00 | \n",
+ " 2022-12-02 17:51:23.627178+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " True | \n",
+ " d8700cbd38cc9f30cecb34f0c195b137 | \n",
+ " 2022-11-27 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 888 | \n",
+ " 1319474 | \n",
+ " ddcp_promo_temps fort salammbo | \n",
+ " 670 | \n",
+ " 2021-11-25 13:19:41.547780+01:00 | \n",
+ " 2022-02-03 14:17:27.728648+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " True | \n",
+ " 17c276c8e723eb46aef576537e9d56d0 | \n",
+ " 2021-11-25 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id name service_id \\\n",
+ "43 1320752 dre_alors_on_sort0712_tech&cult1212_lesreveill... 1019 \n",
+ "79 1320755 News hebdo du 5 au 4 décembre 2022 1060 \n",
+ "464 1320749 dre_le_sel_241122 1054 \n",
+ "465 1320751 News hebdo du 28 novembre au 4 décembre 1057 \n",
+ "888 1319474 ddcp_promo_temps fort salammbo 670 \n",
+ "\n",
+ " created_at updated_at \\\n",
+ "43 2022-11-28 09:30:31.189207+01:00 2022-12-02 17:51:23.474745+01:00 \n",
+ "79 2022-12-04 18:01:29.971417+01:00 2022-12-04 18:01:30.037656+01:00 \n",
+ "464 2022-11-24 09:01:37.467710+01:00 2022-12-02 17:51:23.622812+01:00 \n",
+ "465 2022-11-27 18:01:44.546081+01:00 2022-12-02 17:51:23.627178+01:00 \n",
+ "888 2021-11-25 13:19:41.547780+01:00 2022-02-03 14:17:27.728648+01:00 \n",
+ "\n",
+ " process_id report_url category to_be_synced \\\n",
+ "43 NaN NaN 0.0 True \n",
+ "79 NaN NaN 0.0 True \n",
+ "464 NaN NaN 0.0 True \n",
+ "465 NaN NaN 0.0 True \n",
+ "888 NaN NaN 0.0 True \n",
+ "\n",
+ " identifier sent_at \n",
+ "43 03e0704b5690a2dee1861dc3ad3316c9 2022-11-28 00:00:00+01:00 \n",
+ "79 299a23a2291e2126b91d54f3601ec162 2022-12-04 19:01:27+01:00 \n",
+ "464 db576a7d2453575f29eab4bac787b919 2022-11-24 00:00:00+01:00 \n",
+ "465 d8700cbd38cc9f30cecb34f0c195b137 2022-11-27 00:00:00+01:00 \n",
+ "888 17c276c8e723eb46aef576537e9d56d0 2021-11-25 00:00:00+01:00 "
+ ]
+ },
+ "execution_count": 234,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaigns[df1_campaigns[\"name\"].isin(df1_campaigns[df1_campaigns[\"to_be_synced\"]][\"name\"].unique()) ]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 237,
+ "id": "ba2f188f-be49-4e19-9cb3-0ec54e58d0c1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " service_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " process_id | \n",
+ " report_url | \n",
+ " category | \n",
+ " to_be_synced | \n",
+ " identifier | \n",
+ " sent_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1319613 | \n",
+ " newsletter enseignants janvier 2022 | \n",
+ " 721 | \n",
+ " 2022-01-14 16:06:42.586321+01:00 | \n",
+ " 2022-02-03 14:17:27.112963+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " aba3b6fd5d186d28e06ff97135cade7f | \n",
+ " 2022-01-14 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1319586 | \n",
+ " lsf_janvier_2022 | \n",
+ " 717 | \n",
+ " 2022-01-07 11:30:35.315895+01:00 | \n",
+ " 2022-02-03 14:17:27.116171+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 788d986905533aba051261497ecffcbb | \n",
+ " 2022-01-07 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1319282 | \n",
+ " Invitation à déjeuner au Mucem | Vernissage « ... | \n",
+ " 591 | \n",
+ " 2021-09-28 12:50:24.448752+02:00 | \n",
+ " 2022-02-03 14:17:27.119582+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 3493894fa4ea036cfc6433c3e2ee63b0 | \n",
+ " 2021-09-28 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1319283 | \n",
+ " Vacances de la Toussaint - centres des loisirs | \n",
+ " 590 | \n",
+ " 2021-09-28 18:01:04.692073+02:00 | \n",
+ " 2022-02-03 14:17:27.124408+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 08b255a5d42b89b0585260b6f2360bdd | \n",
+ " 2021-09-28 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1319636 | \n",
+ " ddcp_promo_md_livemag | \n",
+ " 730 | \n",
+ " 2022-01-27 18:00:41.053069+01:00 | \n",
+ " 2022-02-03 14:17:27.127607+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " d5cfead94f5350c12c322b5b664544c1 | \n",
+ " 2022-01-27 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 1319614 | \n",
+ " News hebdo du 17 janv au 23 janv 2022 | \n",
+ " 712 | \n",
+ " 2022-01-16 18:01:28.974157+01:00 | \n",
+ " 2022-02-03 14:17:27.130944+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 19bc916108fc6938f52cb96f7e087941 | \n",
+ " 2022-01-16 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 1319263 | \n",
+ " ddcp_promo_automne_littérature_relance_nn_ouverts | \n",
+ " 586 | \n",
+ " 2021-09-24 15:00:04.174247+02:00 | \n",
+ " 2021-09-24 16:13:10.505400+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 605ff764c617d3cd28dbbdd72be8f9a2 | \n",
+ " 2021-09-24 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 1319284 | \n",
+ " Invitation au vernissage de l'exposition \"La C... | \n",
+ " 593 | \n",
+ " 2021-09-30 14:47:18.135394+02:00 | \n",
+ " 2022-02-03 14:17:27.134073+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " acc3e0404646c57502b480dc052c4fe1 | \n",
+ " 2021-09-30 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 1319625 | \n",
+ " dre_mobilisations_artistiques_et_politiques | \n",
+ " 704 | \n",
+ " 2022-01-27 10:01:16.716706+01:00 | \n",
+ " 2022-02-03 14:17:27.172039+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " f64eac11f2cd8f0efa196f8ad173178e | \n",
+ " 2022-01-27 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 1319285 | \n",
+ " ddcp_promo_soyinka_taubira_infos_pratiques | \n",
+ " 594 | \n",
+ " 2021-10-01 12:16:57.031796+02:00 | \n",
+ " 2022-02-03 14:17:27.137444+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 076a0c97d09cf1a0ec3e19c7f2529f2b | \n",
+ " 2021-10-01 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id name service_id \\\n",
+ "0 1319613 newsletter enseignants janvier 2022 721 \n",
+ "1 1319586 lsf_janvier_2022 717 \n",
+ "2 1319282 Invitation à déjeuner au Mucem | Vernissage « ... 591 \n",
+ "3 1319283 Vacances de la Toussaint - centres des loisirs 590 \n",
+ "4 1319636 ddcp_promo_md_livemag 730 \n",
+ "5 1319614 News hebdo du 17 janv au 23 janv 2022 712 \n",
+ "6 1319263 ddcp_promo_automne_littérature_relance_nn_ouverts 586 \n",
+ "7 1319284 Invitation au vernissage de l'exposition \"La C... 593 \n",
+ "8 1319625 dre_mobilisations_artistiques_et_politiques 704 \n",
+ "9 1319285 ddcp_promo_soyinka_taubira_infos_pratiques 594 \n",
+ "\n",
+ " created_at updated_at \\\n",
+ "0 2022-01-14 16:06:42.586321+01:00 2022-02-03 14:17:27.112963+01:00 \n",
+ "1 2022-01-07 11:30:35.315895+01:00 2022-02-03 14:17:27.116171+01:00 \n",
+ "2 2021-09-28 12:50:24.448752+02:00 2022-02-03 14:17:27.119582+01:00 \n",
+ "3 2021-09-28 18:01:04.692073+02:00 2022-02-03 14:17:27.124408+01:00 \n",
+ "4 2022-01-27 18:00:41.053069+01:00 2022-02-03 14:17:27.127607+01:00 \n",
+ "5 2022-01-16 18:01:28.974157+01:00 2022-02-03 14:17:27.130944+01:00 \n",
+ "6 2021-09-24 15:00:04.174247+02:00 2021-09-24 16:13:10.505400+02:00 \n",
+ "7 2021-09-30 14:47:18.135394+02:00 2022-02-03 14:17:27.134073+01:00 \n",
+ "8 2022-01-27 10:01:16.716706+01:00 2022-02-03 14:17:27.172039+01:00 \n",
+ "9 2021-10-01 12:16:57.031796+02:00 2022-02-03 14:17:27.137444+01:00 \n",
+ "\n",
+ " process_id report_url category to_be_synced \\\n",
+ "0 NaN NaN 0.0 False \n",
+ "1 NaN NaN 0.0 False \n",
+ "2 NaN NaN 0.0 False \n",
+ "3 NaN NaN 0.0 False \n",
+ "4 NaN NaN 0.0 False \n",
+ "5 NaN NaN 0.0 False \n",
+ "6 NaN NaN 0.0 False \n",
+ "7 NaN NaN 0.0 False \n",
+ "8 NaN NaN 0.0 False \n",
+ "9 NaN NaN 0.0 False \n",
+ "\n",
+ " identifier sent_at \n",
+ "0 aba3b6fd5d186d28e06ff97135cade7f 2022-01-14 00:00:00+01:00 \n",
+ "1 788d986905533aba051261497ecffcbb 2022-01-07 00:00:00+01:00 \n",
+ "2 3493894fa4ea036cfc6433c3e2ee63b0 2021-09-28 00:00:00+02:00 \n",
+ "3 08b255a5d42b89b0585260b6f2360bdd 2021-09-28 00:00:00+02:00 \n",
+ "4 d5cfead94f5350c12c322b5b664544c1 2022-01-27 00:00:00+01:00 \n",
+ "5 19bc916108fc6938f52cb96f7e087941 2022-01-16 00:00:00+01:00 \n",
+ "6 605ff764c617d3cd28dbbdd72be8f9a2 2021-09-24 00:00:00+02:00 \n",
+ "7 acc3e0404646c57502b480dc052c4fe1 2021-09-30 00:00:00+02:00 \n",
+ "8 f64eac11f2cd8f0efa196f8ad173178e 2022-01-27 00:00:00+01:00 \n",
+ "9 076a0c97d09cf1a0ec3e19c7f2529f2b 2021-10-01 00:00:00+02:00 "
+ ]
+ },
+ "execution_count": 237,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaigns[~df1_campaigns[\"to_be_synced\"]].head(10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 240,
+ "id": "4bf2cbdd-6236-43b8-9a13-74f2803a6ac5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " campaign_id | \n",
+ " customer_id | \n",
+ " opened_at | \n",
+ " sent_at | \n",
+ " delivered_at | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 19793 | \n",
+ " 58 | \n",
+ " 112597 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:09+02:00 | \n",
+ " 2021-03-28 18:24:18+02:00 | \n",
+ " 2021-03-28 18:34:20.616136+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 14211 | \n",
+ " 58 | \n",
+ " 113666 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:09+02:00 | \n",
+ " 2021-03-28 18:21:02+02:00 | \n",
+ " 2021-03-28 18:21:04.297213+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 13150 | \n",
+ " 58 | \n",
+ " 280561 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:08:45+02:00 | \n",
+ " 2021-03-28 18:18:49.991042+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 7073 | \n",
+ " 58 | \n",
+ " 101007 | \n",
+ " 2021-03-28 20:11:06+02:00 | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:09:47+02:00 | \n",
+ " 2021-03-28 18:09:50.915354+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5175 | \n",
+ " 58 | \n",
+ " 103972 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:06+02:00 | \n",
+ " 2021-03-28 18:05:03+02:00 | \n",
+ " 2021-03-28 18:05:08.507398+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 4809 | \n",
+ " 58 | \n",
+ " 104599 | \n",
+ " 2021-03-28 18:12:12+02:00 | \n",
+ " 2021-03-28 18:01:06+02:00 | \n",
+ " 2021-03-28 18:04:18+02:00 | \n",
+ " 2021-03-28 18:04:19.662496+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 11605 | \n",
+ " 58 | \n",
+ " 280579 | \n",
+ " 2021-03-28 18:16:14+02:00 | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:16:09+02:00 | \n",
+ " 2021-03-28 18:16:10.974208+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 18714 | \n",
+ " 58 | \n",
+ " 34173 | \n",
+ " 2021-03-29 05:31:37+02:00 | \n",
+ " 2021-03-28 18:00:58+02:00 | \n",
+ " 2021-03-28 18:31:02+02:00 | \n",
+ " 2021-03-28 18:31:07.619032+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 17119 | \n",
+ " 58 | \n",
+ " 34992 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:00:58+02:00 | \n",
+ " 2021-03-28 18:28:00+02:00 | \n",
+ " 2021-03-28 18:28:03.574600+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 14001 | \n",
+ " 58 | \n",
+ " 35343 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:00:58+02:00 | \n",
+ " 2021-03-28 18:20:48+02:00 | \n",
+ " 2021-03-28 18:20:49.258826+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id campaign_id customer_id opened_at \\\n",
+ "0 19793 58 112597 NaN \n",
+ "1 14211 58 113666 NaN \n",
+ "2 13150 58 280561 NaN \n",
+ "3 7073 58 101007 2021-03-28 20:11:06+02:00 \n",
+ "4 5175 58 103972 NaN \n",
+ "5 4809 58 104599 2021-03-28 18:12:12+02:00 \n",
+ "6 11605 58 280579 2021-03-28 18:16:14+02:00 \n",
+ "7 18714 58 34173 2021-03-29 05:31:37+02:00 \n",
+ "8 17119 58 34992 NaN \n",
+ "9 14001 58 35343 NaN \n",
+ "\n",
+ " sent_at delivered_at \\\n",
+ "0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n",
+ "1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n",
+ "2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n",
+ "3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n",
+ "4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n",
+ "5 2021-03-28 18:01:06+02:00 2021-03-28 18:04:18+02:00 \n",
+ "6 2021-03-28 18:00:59+02:00 2021-03-28 18:16:09+02:00 \n",
+ "7 2021-03-28 18:00:58+02:00 2021-03-28 18:31:02+02:00 \n",
+ "8 2021-03-28 18:00:58+02:00 2021-03-28 18:28:00+02:00 \n",
+ "9 2021-03-28 18:00:58+02:00 2021-03-28 18:20:48+02:00 \n",
+ "\n",
+ " created_at updated_at \n",
+ "0 2021-03-28 18:34:20.616136+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "1 2021-03-28 18:21:04.297213+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "2 2021-03-28 18:18:49.991042+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "3 2021-03-28 18:09:50.915354+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "4 2021-03-28 18:05:08.507398+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "5 2021-03-28 18:04:19.662496+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "6 2021-03-28 18:16:10.974208+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "7 2021-03-28 18:31:07.619032+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "8 2021-03-28 18:28:03.574600+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "9 2021-03-28 18:20:49.258826+02:00 2022-04-15 22:52:04.397693+02:00 "
+ ]
+ },
+ "execution_count": 240,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 2. campaigns stats\n",
+ "\n",
+ "df1_campaign_stats.head(10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 242,
+ "id": "0bf6f3d8-40f3-4268-a89d-fc962acd6c4a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id 0.000000\n",
+ "campaign_id 0.000000\n",
+ "customer_id 0.000000\n",
+ "opened_at 0.807672\n",
+ "sent_at 0.000969\n",
+ "delivered_at 0.021495\n",
+ "created_at 0.000000\n",
+ "updated_at 0.000000\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 242,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaign_stats.isna().sum() / df1_campaign_stats.shape[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 243,
+ "id": "2d3140db-fa86-41dd-81c9-2c6ca1e9402e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id int64\n",
+ "campaign_id int64\n",
+ "customer_id int64\n",
+ "opened_at object\n",
+ "sent_at object\n",
+ "delivered_at object\n",
+ "created_at object\n",
+ "updated_at object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 243,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaign_stats.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 244,
+ "id": "e4cc1b7c-5956-41c3-ad59-2738c5f2778c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id 6214808\n",
+ "campaign_id 949\n",
+ "customer_id 130472\n",
+ "opened_at 1102699\n",
+ "sent_at 152184\n",
+ "delivered_at 380248\n",
+ "created_at 4295988\n",
+ "updated_at 2176478\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 244,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "print(df1_campaign_stats.shape[0])\n",
+ "print(df1_campaign_stats.nunique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 262,
+ "id": "8735c5dd-1d02-4dae-804e-70ee1be08df8",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " campaign_id | \n",
+ " customer_id | \n",
+ " opened_at | \n",
+ " sent_at | \n",
+ " delivered_at | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 19793 | \n",
+ " 58 | \n",
+ " 112597 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:09+02:00 | \n",
+ " 2021-03-28 18:24:18+02:00 | \n",
+ " 2021-03-28 18:34:20.616136+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 14211 | \n",
+ " 58 | \n",
+ " 113666 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:09+02:00 | \n",
+ " 2021-03-28 18:21:02+02:00 | \n",
+ " 2021-03-28 18:21:04.297213+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 13150 | \n",
+ " 58 | \n",
+ " 280561 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:08:45+02:00 | \n",
+ " 2021-03-28 18:18:49.991042+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 7073 | \n",
+ " 58 | \n",
+ " 101007 | \n",
+ " 2021-03-28 20:11:06+02:00 | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:09:47+02:00 | \n",
+ " 2021-03-28 18:09:50.915354+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5175 | \n",
+ " 58 | \n",
+ " 103972 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:06+02:00 | \n",
+ " 2021-03-28 18:05:03+02:00 | \n",
+ " 2021-03-28 18:05:08.507398+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id campaign_id customer_id opened_at \\\n",
+ "0 19793 58 112597 NaN \n",
+ "1 14211 58 113666 NaN \n",
+ "2 13150 58 280561 NaN \n",
+ "3 7073 58 101007 2021-03-28 20:11:06+02:00 \n",
+ "4 5175 58 103972 NaN \n",
+ "\n",
+ " sent_at delivered_at \\\n",
+ "0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n",
+ "1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n",
+ "2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n",
+ "3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n",
+ "4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n",
+ "\n",
+ " created_at updated_at \n",
+ "0 2021-03-28 18:34:20.616136+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "1 2021-03-28 18:21:04.297213+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "2 2021-03-28 18:18:49.991042+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "3 2021-03-28 18:09:50.915354+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "4 2021-03-28 18:05:08.507398+02:00 2022-04-15 22:52:04.397693+02:00 "
+ ]
+ },
+ "execution_count": 262,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 3. merge campaigns and campaigns stats\n",
+ "\n",
+ "df1_campaign_stats.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 273,
+ "id": "1e88efca-96b1-4977-b633-25d13830633e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([1319243, 1319245, 1319247, 1319248, 1319250, 1319259, 1319260,\n",
+ " 1319262])"
+ ]
+ },
+ "execution_count": 273,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# étape préalable au merge : les identifiants de campagne des deux tabes sont - ils égaux ?\n",
+ "\n",
+ "id_campaigns = np.sort(df1_campaigns[\"id\"].unique())\n",
+ "id_campaigns_stats = np.sort(df1_campaign_stats[\"campaign_id\"].unique())\n",
+ "np.setdiff1d(id_campaigns, id_campaigns_stats)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 275,
+ "id": "43440e38-b141-43f1-9e0c-fa8559218e76",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " service_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " process_id | \n",
+ " report_url | \n",
+ " category | \n",
+ " to_be_synced | \n",
+ " identifier | \n",
+ " sent_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 789 | \n",
+ " 1319243 | \n",
+ " DRE Exposer le récit 13 mars | \n",
+ " 111 | \n",
+ " 2021-09-24 11:56:09.307905+02:00 | \n",
+ " 2021-09-24 11:56:09.307905+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 698d51a19d8a121ce581499d7b701668 | \n",
+ " 2020-03-03 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 791 | \n",
+ " 1319245 | \n",
+ " SDR Relance invit petit dej voyage voyages | \n",
+ " 109 | \n",
+ " 2021-09-24 11:56:09.323919+02:00 | \n",
+ " 2021-09-24 11:56:09.323919+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 2723d092b63885e0d7c260cc007e8b9d | \n",
+ " 2020-02-24 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 793 | \n",
+ " 1319247 | \n",
+ " Au Mucem en 2020 | \n",
+ " 97 | \n",
+ " 2021-09-24 11:56:09.339127+02:00 | \n",
+ " 2021-09-24 11:56:09.339127+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " e2ef524fbf3d9fe611d5a8e90fefdc9c | \n",
+ " 2020-01-31 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 794 | \n",
+ " 1319248 | \n",
+ " DRE Giono | \n",
+ " 92 | \n",
+ " 2021-09-24 11:56:09.346887+02:00 | \n",
+ " 2021-09-24 11:56:09.346887+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 92cc227532d17e56e07902b254dfad10 | \n",
+ " 2020-01-29 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 796 | \n",
+ " 1319250 | \n",
+ " Portes ouvertes \"Voyage, voyages\" au Mucem | M... | \n",
+ " 77 | \n",
+ " 2021-09-24 11:56:09.362114+02:00 | \n",
+ " 2021-09-24 11:56:09.362114+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 28dd2c7955ce926456240b2ff0100bde | \n",
+ " 2020-01-13 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 805 | \n",
+ " 1319259 | \n",
+ " Save the date | Vernissage \"Voyage, voyages\" a... | \n",
+ " 38 | \n",
+ " 2021-09-24 11:56:09.432720+02:00 | \n",
+ " 2021-09-24 11:56:09.432720+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " a5771bce93e200c36f7cd9dfd0e5deaa | \n",
+ " 2019-11-20 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 806 | \n",
+ " 1319260 | \n",
+ " Portes ouvertes \"Massilia Toy\" au Mucem | Merc... | \n",
+ " 37 | \n",
+ " 2021-09-24 11:56:09.440465+02:00 | \n",
+ " 2021-09-24 11:56:09.440465+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " a5bfc9e07964f8dddeb95fc584cd965d | \n",
+ " 2019-11-20 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 808 | \n",
+ " 1319262 | \n",
+ " TENK S-1 Corse | \n",
+ " 17 | \n",
+ " 2021-09-24 11:56:09.456460+02:00 | \n",
+ " 2021-09-24 11:56:09.456460+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 70efdf2ec9b086079795c442636b55fb | \n",
+ " 2019-11-07 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id name service_id \\\n",
+ "789 1319243 DRE Exposer le récit 13 mars 111 \n",
+ "791 1319245 SDR Relance invit petit dej voyage voyages 109 \n",
+ "793 1319247 Au Mucem en 2020 97 \n",
+ "794 1319248 DRE Giono 92 \n",
+ "796 1319250 Portes ouvertes \"Voyage, voyages\" au Mucem | M... 77 \n",
+ "805 1319259 Save the date | Vernissage \"Voyage, voyages\" a... 38 \n",
+ "806 1319260 Portes ouvertes \"Massilia Toy\" au Mucem | Merc... 37 \n",
+ "808 1319262 TENK S-1 Corse 17 \n",
+ "\n",
+ " created_at updated_at \\\n",
+ "789 2021-09-24 11:56:09.307905+02:00 2021-09-24 11:56:09.307905+02:00 \n",
+ "791 2021-09-24 11:56:09.323919+02:00 2021-09-24 11:56:09.323919+02:00 \n",
+ "793 2021-09-24 11:56:09.339127+02:00 2021-09-24 11:56:09.339127+02:00 \n",
+ "794 2021-09-24 11:56:09.346887+02:00 2021-09-24 11:56:09.346887+02:00 \n",
+ "796 2021-09-24 11:56:09.362114+02:00 2021-09-24 11:56:09.362114+02:00 \n",
+ "805 2021-09-24 11:56:09.432720+02:00 2021-09-24 11:56:09.432720+02:00 \n",
+ "806 2021-09-24 11:56:09.440465+02:00 2021-09-24 11:56:09.440465+02:00 \n",
+ "808 2021-09-24 11:56:09.456460+02:00 2021-09-24 11:56:09.456460+02:00 \n",
+ "\n",
+ " process_id report_url category to_be_synced \\\n",
+ "789 NaN NaN 0.0 False \n",
+ "791 NaN NaN 0.0 False \n",
+ "793 NaN NaN 0.0 False \n",
+ "794 NaN NaN 0.0 False \n",
+ "796 NaN NaN 0.0 False \n",
+ "805 NaN NaN 0.0 False \n",
+ "806 NaN NaN 0.0 False \n",
+ "808 NaN NaN 0.0 False \n",
+ "\n",
+ " identifier sent_at \n",
+ "789 698d51a19d8a121ce581499d7b701668 2020-03-03 00:00:00+01:00 \n",
+ "791 2723d092b63885e0d7c260cc007e8b9d 2020-02-24 00:00:00+01:00 \n",
+ "793 e2ef524fbf3d9fe611d5a8e90fefdc9c 2020-01-31 00:00:00+01:00 \n",
+ "794 92cc227532d17e56e07902b254dfad10 2020-01-29 00:00:00+01:00 \n",
+ "796 28dd2c7955ce926456240b2ff0100bde 2020-01-13 00:00:00+01:00 \n",
+ "805 a5771bce93e200c36f7cd9dfd0e5deaa 2019-11-20 00:00:00+01:00 \n",
+ "806 a5bfc9e07964f8dddeb95fc584cd965d 2019-11-20 00:00:00+01:00 \n",
+ "808 70efdf2ec9b086079795c442636b55fb 2019-11-07 00:00:00+01:00 "
+ ]
+ },
+ "execution_count": 275,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# ci-dessous des campagnes sans customer associé dans la table\n",
+ "# elles seront retirées lors du merge car pas utiles à notre étude\n",
+ "# on fera un merge à gauche en se basant sur campaign_stats \n",
+ "\n",
+ "df1_campaigns[df1_campaigns[\"id\"].isin([1319243, 1319245, 1319247, 1319248, 1319250, 1319259, 1319260,\n",
+ " 1319262])]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 338,
+ "id": "6cbcd261-a6ba-497c-929b-29a714e1812d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " campaign_id | \n",
+ " customer_id | \n",
+ " opened_at | \n",
+ " sent_at | \n",
+ " delivered_at | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " campaign_name | \n",
+ " campaign_service_id | \n",
+ " campaign_created_at | \n",
+ " campaign_updated_at | \n",
+ " campaign_sent_at | \n",
+ " campaign_identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 19793 | \n",
+ " 58 | \n",
+ " 112597 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:09+02:00 | \n",
+ " 2021-03-28 18:24:18+02:00 | \n",
+ " 2021-03-28 18:34:20.616136+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 14211 | \n",
+ " 58 | \n",
+ " 113666 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:09+02:00 | \n",
+ " 2021-03-28 18:21:02+02:00 | \n",
+ " 2021-03-28 18:21:04.297213+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 13150 | \n",
+ " 58 | \n",
+ " 280561 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:08:45+02:00 | \n",
+ " 2021-03-28 18:18:49.991042+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 7073 | \n",
+ " 58 | \n",
+ " 101007 | \n",
+ " 2021-03-28 20:11:06+02:00 | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:09:47+02:00 | \n",
+ " 2021-03-28 18:09:50.915354+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5175 | \n",
+ " 58 | \n",
+ " 103972 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:06+02:00 | \n",
+ " 2021-03-28 18:05:03+02:00 | \n",
+ " 2021-03-28 18:05:08.507398+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id campaign_id customer_id opened_at \\\n",
+ "0 19793 58 112597 NaN \n",
+ "1 14211 58 113666 NaN \n",
+ "2 13150 58 280561 NaN \n",
+ "3 7073 58 101007 2021-03-28 20:11:06+02:00 \n",
+ "4 5175 58 103972 NaN \n",
+ "\n",
+ " sent_at delivered_at \\\n",
+ "0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n",
+ "1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n",
+ "2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n",
+ "3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n",
+ "4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n",
+ "\n",
+ " created_at updated_at \\\n",
+ "0 2021-03-28 18:34:20.616136+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "1 2021-03-28 18:21:04.297213+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "2 2021-03-28 18:18:49.991042+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "3 2021-03-28 18:09:50.915354+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "4 2021-03-28 18:05:08.507398+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "\n",
+ " campaign_name campaign_service_id \\\n",
+ "0 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "1 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "2 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "3 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "4 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "\n",
+ " campaign_created_at campaign_updated_at \\\n",
+ "0 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "1 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "2 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "3 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "4 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "\n",
+ " campaign_sent_at campaign_identifier \n",
+ "0 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "1 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "2 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "3 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "4 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a "
+ ]
+ },
+ "execution_count": 338,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# merge \n",
+ "\n",
+ "# de campaigns on supprile les var valant tjrs NaN et to_be_synced qui semble pas très informatif\n",
+ "\n",
+ "df1_campaigns_full = pd.merge(df1_campaign_stats, \n",
+ " df1_campaigns[[\"id\", \"name\", \"service_id\", \"created_at\", \"updated_at\", \"sent_at\", \"identifier\"]].add_prefix(\"campaign_\"),\n",
+ " on = \"campaign_id\", how = \"left\")\n",
+ "df1_campaigns_full.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 328,
+ "id": "81e549e9-d165-439a-a824-17f053a33983",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id 0\n",
+ "campaign_id 0\n",
+ "customer_id 0\n",
+ "opened_at 5019527\n",
+ "sent_at 6023\n",
+ "delivered_at 133590\n",
+ "created_at 0\n",
+ "updated_at 0\n",
+ "campaign_name 0\n",
+ "campaign_service_id 0\n",
+ "campaign_created_at 0\n",
+ "campaign_updated_at 0\n",
+ "campaign_sent_at 6\n",
+ "campaign_identifier 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 328,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaigns_full.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 297,
+ "id": "aa249cdc-e0ac-41ec-b6f8-b9459f31eca3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# lien entre sent at et campaign sent at ? \n",
+ "# à quoi correspond la date de la campagne, est-ce le premier envoi à un client ?\n",
+ "\n",
+ "# first step : transform dates to have the good format\n",
+ "# VERY time-consuming bc the df has 6M lines !!!!\n",
+ "\n",
+ "from dateutil import parser\n",
+ "\n",
+ "def convert_to_datetime(column):\n",
+ " return column.apply(lambda x: parser.parse(str(x)) if pd.notna(x) else pd.NaT)\n",
+ "\n",
+ "# Liste des colonnes à convertir\n",
+ "columns_to_convert = [\"sent_at\", \"delivered_at\", \"created_at\", \"updated_at\", \n",
+ " \"campaign_sent_at\", \"campaign_created_at\", \"campaign_updated_at\"]\n",
+ "\n",
+ "# Appliquer la fonction à chaque colonne spécifiée\n",
+ "df1_campaigns_full[columns_to_convert] = df1_campaigns_full[columns_to_convert].apply(convert_to_datetime)\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 329,
+ "id": "f2b05227-e8d8-4ca8-8359-dc3471841763",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "UTC: 2021-03-28 16:01:09+00:00\n",
+ "Local: 2021-03-28 18:01:09+02:00\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Exemple d'élément\n",
+ "date_string = '2021-03-28 18:01:09+02:00'\n",
+ "\n",
+ "# Convertir en datetime en utilisant pd.to_datetime avec utc=True\n",
+ "datetime_object_utc = pd.to_datetime(date_string, utc=True)\n",
+ "print(\"UTC:\", datetime_object_utc)\n",
+ "\n",
+ "# Convertir en datetime en utilisant pd.to_datetime avec utc=False (ou sans spécifier utc)\n",
+ "datetime_object_local = pd.to_datetime(date_string, utc=False)\n",
+ "print(\"Local:\", datetime_object_local)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 332,
+ "id": "63fa4af8-0c28-4b20-97e2-560da4d4b77e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "UTC: 2021-03-28 16:00:00+00:00\n",
+ "Différence en heures: 1.5\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "# Exemple d'élément\n",
+ "date_string = '2021-03-28 18:00:00+02:00'\n",
+ "\n",
+ "# Convertir en datetime en utilisant pd.to_datetime avec utc=True\n",
+ "datetime_object_utc = pd.to_datetime(date_string, utc=True)\n",
+ "\n",
+ "# Afficher l'objet datetime en UTC\n",
+ "print(\"UTC:\", datetime_object_utc)\n",
+ "\n",
+ "# Effectuer un calcul de différence entre deux dates en UTC\n",
+ "other_date_string = '2021-03-28 20:30:00+03:00'\n",
+ "other_datetime_object_utc = pd.to_datetime(other_date_string, utc=True)\n",
+ "\n",
+ "# Calculer la différence entre les dates\n",
+ "time_difference = other_datetime_object_utc - datetime_object_utc\n",
+ "\n",
+ "# Afficher la différence\n",
+ "print(\"Différence en heures:\", time_difference.total_seconds() / 3600)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 321,
+ "id": "9388c008-e2a5-463d-95d2-8f5fea0d6a5a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " campaign_id | \n",
+ " customer_id | \n",
+ " opened_at | \n",
+ " sent_at | \n",
+ " delivered_at | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " campaign_name | \n",
+ " campaign_service_id | \n",
+ " campaign_created_at | \n",
+ " campaign_updated_at | \n",
+ " campaign_sent_at | \n",
+ " campaign_identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 19793 | \n",
+ " 58 | \n",
+ " 112597 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:09+02:00 | \n",
+ " 2021-03-28 18:24:18+02:00 | \n",
+ " 2021-03-28 18:34:20.616136+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 14211 | \n",
+ " 58 | \n",
+ " 113666 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:09+02:00 | \n",
+ " 2021-03-28 18:21:02+02:00 | \n",
+ " 2021-03-28 18:21:04.297213+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 13150 | \n",
+ " 58 | \n",
+ " 280561 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:08:45+02:00 | \n",
+ " 2021-03-28 18:18:49.991042+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 7073 | \n",
+ " 58 | \n",
+ " 101007 | \n",
+ " 2021-03-28 20:11:06+02:00 | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:09:47+02:00 | \n",
+ " 2021-03-28 18:09:50.915354+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5175 | \n",
+ " 58 | \n",
+ " 103972 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:06+02:00 | \n",
+ " 2021-03-28 18:05:03+02:00 | \n",
+ " 2021-03-28 18:05:08.507398+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id campaign_id customer_id opened_at \\\n",
+ "0 19793 58 112597 NaN \n",
+ "1 14211 58 113666 NaN \n",
+ "2 13150 58 280561 NaN \n",
+ "3 7073 58 101007 2021-03-28 20:11:06+02:00 \n",
+ "4 5175 58 103972 NaN \n",
+ "\n",
+ " sent_at delivered_at \\\n",
+ "0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n",
+ "1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n",
+ "2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n",
+ "3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n",
+ "4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n",
+ "\n",
+ " created_at updated_at \\\n",
+ "0 2021-03-28 18:34:20.616136+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "1 2021-03-28 18:21:04.297213+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "2 2021-03-28 18:18:49.991042+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "3 2021-03-28 18:09:50.915354+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "4 2021-03-28 18:05:08.507398+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "\n",
+ " campaign_name campaign_service_id \\\n",
+ "0 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "1 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "2 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "3 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "4 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "\n",
+ " campaign_created_at campaign_updated_at \\\n",
+ "0 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "1 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "2 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "3 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "4 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "\n",
+ " campaign_sent_at campaign_identifier \n",
+ "0 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "1 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "2 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "3 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "4 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a "
+ ]
+ },
+ "execution_count": 321,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# etape supp pour s'assurer que les dates non convertibles sont bien des Nan\n",
+ "\n",
+ "df1_campaigns_full[columns_to_convert] = df1_campaigns_full[columns_to_convert].apply(pd.to_datetime, errors='coerce')\n",
+ "df1_campaigns_full.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 333,
+ "id": "edb2f622-bf19-4c51-8213-1b8a3dacf72e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_624/1309539541.py:3: FutureWarning: In a future version of pandas, parsing datetimes with mixed time zones will raise an error unless `utc=True`. Please specify `utc=True` to opt in to the new behaviour and silence this warning. To create a `Series` with mixed offsets and `object` dtype, please use `apply` and `datetime.datetime.strptime`\n",
+ " df1_campaigns_full[\"sent_at\"] = pd.to_datetime(df1_campaigns_full[\"sent_at\"] , utc=False).astype('datetime64[ns]')\n"
+ ]
+ },
+ {
+ "ename": "ValueError",
+ "evalue": "Tz-aware datetime.datetime cannot be converted to datetime64 unless utc=True, at position 18",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[333], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# autre methode\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m df1_campaigns_full[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msent_at\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_datetime\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf1_campaigns_full\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msent_at\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mdatetime64[ns]\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/generic.py:6534\u001b[0m, in \u001b[0;36mNDFrame.astype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 6530\u001b[0m results \u001b[38;5;241m=\u001b[39m [ser\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy) \u001b[38;5;28;01mfor\u001b[39;00m _, ser \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mitems()]\n\u001b[1;32m 6532\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 6533\u001b[0m \u001b[38;5;66;03m# else, only a single dtype is given\u001b[39;00m\n\u001b[0;32m-> 6534\u001b[0m new_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_mgr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6535\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_constructor_from_mgr(new_data, axes\u001b[38;5;241m=\u001b[39mnew_data\u001b[38;5;241m.\u001b[39maxes)\n\u001b[1;32m 6536\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\u001b[38;5;241m.\u001b[39m__finalize__(\u001b[38;5;28mself\u001b[39m, method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mastype\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/internals/managers.py:414\u001b[0m, in \u001b[0;36mBaseBlockManager.astype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 411\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m using_copy_on_write():\n\u001b[1;32m 412\u001b[0m copy \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mastype\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43musing_cow\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43musing_copy_on_write\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/internals/managers.py:354\u001b[0m, in \u001b[0;36mBaseBlockManager.apply\u001b[0;34m(self, f, align_keys, **kwargs)\u001b[0m\n\u001b[1;32m 352\u001b[0m applied \u001b[38;5;241m=\u001b[39m b\u001b[38;5;241m.\u001b[39mapply(f, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 353\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 354\u001b[0m applied \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 355\u001b[0m result_blocks \u001b[38;5;241m=\u001b[39m extend_blocks(applied, result_blocks)\n\u001b[1;32m 357\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39mfrom_blocks(result_blocks, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxes)\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/internals/blocks.py:616\u001b[0m, in \u001b[0;36mBlock.astype\u001b[0;34m(self, dtype, copy, errors, using_cow)\u001b[0m\n\u001b[1;32m 596\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 597\u001b[0m \u001b[38;5;124;03mCoerce to the new dtype.\u001b[39;00m\n\u001b[1;32m 598\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 612\u001b[0m \u001b[38;5;124;03mBlock\u001b[39;00m\n\u001b[1;32m 613\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 614\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvalues\n\u001b[0;32m--> 616\u001b[0m new_values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_array_safe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 618\u001b[0m new_values \u001b[38;5;241m=\u001b[39m maybe_coerce_values(new_values)\n\u001b[1;32m 620\u001b[0m refs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:238\u001b[0m, in \u001b[0;36mastype_array_safe\u001b[0;34m(values, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 235\u001b[0m dtype \u001b[38;5;241m=\u001b[39m dtype\u001b[38;5;241m.\u001b[39mnumpy_dtype\n\u001b[1;32m 237\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 238\u001b[0m new_values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 239\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m):\n\u001b[1;32m 240\u001b[0m \u001b[38;5;66;03m# e.g. _astype_nansafe can fail on object-dtype of strings\u001b[39;00m\n\u001b[1;32m 241\u001b[0m \u001b[38;5;66;03m# trying to convert to float\u001b[39;00m\n\u001b[1;32m 242\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m errors \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:183\u001b[0m, in \u001b[0;36mastype_array\u001b[0;34m(values, dtype, copy)\u001b[0m\n\u001b[1;32m 180\u001b[0m values \u001b[38;5;241m=\u001b[39m values\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy)\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 183\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[43m_astype_nansafe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 185\u001b[0m \u001b[38;5;66;03m# in pandas we don't store numpy str dtypes, so convert to object\u001b[39;00m\n\u001b[1;32m 186\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(dtype, np\u001b[38;5;241m.\u001b[39mdtype) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(values\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;241m.\u001b[39mtype, \u001b[38;5;28mstr\u001b[39m):\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:110\u001b[0m, in \u001b[0;36m_astype_nansafe\u001b[0;34m(arr, dtype, copy, skipna)\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mis_np_dtype(dtype, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mM\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 108\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m to_datetime\n\u001b[0;32m--> 110\u001b[0m dti \u001b[38;5;241m=\u001b[39m \u001b[43mto_datetime\u001b[49m\u001b[43m(\u001b[49m\u001b[43marr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mravel\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 111\u001b[0m dta \u001b[38;5;241m=\u001b[39m dti\u001b[38;5;241m.\u001b[39m_data\u001b[38;5;241m.\u001b[39mreshape(arr\u001b[38;5;241m.\u001b[39mshape)\n\u001b[1;32m 112\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m dta\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\u001b[38;5;241m.\u001b[39m_ndarray\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/tools/datetimes.py:1131\u001b[0m, in \u001b[0;36mto_datetime\u001b[0;34m(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache)\u001b[0m\n\u001b[1;32m 1123\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1124\u001b[0m \u001b[38;5;66;03m# error: Argument 1 to \"_maybe_cache\" has incompatible type\u001b[39;00m\n\u001b[1;32m 1125\u001b[0m \u001b[38;5;66;03m# \"Union[float, str, datetime, List[Any], Tuple[Any, ...], ExtensionArray,\u001b[39;00m\n\u001b[1;32m 1126\u001b[0m \u001b[38;5;66;03m# ndarray[Any, Any], Series]\"; expected \"Union[List[Any], Tuple[Any, ...],\u001b[39;00m\n\u001b[1;32m 1127\u001b[0m \u001b[38;5;66;03m# Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series], Series]\"\u001b[39;00m\n\u001b[1;32m 1128\u001b[0m argc \u001b[38;5;241m=\u001b[39m cast(\n\u001b[1;32m 1129\u001b[0m Union[\u001b[38;5;28mlist\u001b[39m, \u001b[38;5;28mtuple\u001b[39m, ExtensionArray, np\u001b[38;5;241m.\u001b[39mndarray, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSeries\u001b[39m\u001b[38;5;124m\"\u001b[39m, Index], arg\n\u001b[1;32m 1130\u001b[0m )\n\u001b[0;32m-> 1131\u001b[0m cache_array \u001b[38;5;241m=\u001b[39m \u001b[43m_maybe_cache\u001b[49m\u001b[43m(\u001b[49m\u001b[43margc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcache\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert_listlike\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1132\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m OutOfBoundsDatetime:\n\u001b[1;32m 1133\u001b[0m \u001b[38;5;66;03m# caching attempts to create a DatetimeIndex, which may raise\u001b[39;00m\n\u001b[1;32m 1134\u001b[0m \u001b[38;5;66;03m# an OOB. If that's the desired behavior, then just reraise...\u001b[39;00m\n\u001b[1;32m 1135\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m errors \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/tools/datetimes.py:254\u001b[0m, in \u001b[0;36m_maybe_cache\u001b[0;34m(arg, format, cache, convert_listlike)\u001b[0m\n\u001b[1;32m 252\u001b[0m unique_dates \u001b[38;5;241m=\u001b[39m unique(arg)\n\u001b[1;32m 253\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(unique_dates) \u001b[38;5;241m<\u001b[39m \u001b[38;5;28mlen\u001b[39m(arg):\n\u001b[0;32m--> 254\u001b[0m cache_dates \u001b[38;5;241m=\u001b[39m \u001b[43mconvert_listlike\u001b[49m\u001b[43m(\u001b[49m\u001b[43munique_dates\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 255\u001b[0m \u001b[38;5;66;03m# GH#45319\u001b[39;00m\n\u001b[1;32m 256\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/tools/datetimes.py:490\u001b[0m, in \u001b[0;36m_convert_listlike_datetimes\u001b[0;34m(arg, format, name, utc, unit, errors, dayfirst, yearfirst, exact)\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mformat\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mformat\u001b[39m \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmixed\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 488\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _array_strptime_with_fallback(arg, name, utc, \u001b[38;5;28mformat\u001b[39m, exact, errors)\n\u001b[0;32m--> 490\u001b[0m result, tz_parsed \u001b[38;5;241m=\u001b[39m \u001b[43mobjects_to_datetime64ns\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 491\u001b[0m \u001b[43m \u001b[49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 492\u001b[0m \u001b[43m \u001b[49m\u001b[43mdayfirst\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdayfirst\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 493\u001b[0m \u001b[43m \u001b[49m\u001b[43myearfirst\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43myearfirst\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 494\u001b[0m \u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mutc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 495\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 496\u001b[0m \u001b[43m \u001b[49m\u001b[43mallow_object\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 497\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 499\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tz_parsed \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 500\u001b[0m \u001b[38;5;66;03m# We can take a shortcut since the datetime64 numpy array\u001b[39;00m\n\u001b[1;32m 501\u001b[0m \u001b[38;5;66;03m# is in UTC\u001b[39;00m\n\u001b[1;32m 502\u001b[0m dta \u001b[38;5;241m=\u001b[39m DatetimeArray(result, dtype\u001b[38;5;241m=\u001b[39mtz_to_dtype(tz_parsed))\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/arrays/datetimes.py:2346\u001b[0m, in \u001b[0;36mobjects_to_datetime64ns\u001b[0;34m(data, dayfirst, yearfirst, utc, errors, allow_object)\u001b[0m\n\u001b[1;32m 2343\u001b[0m \u001b[38;5;66;03m# if str-dtype, convert\u001b[39;00m\n\u001b[1;32m 2344\u001b[0m data \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marray(data, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, dtype\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39mobject_)\n\u001b[0;32m-> 2346\u001b[0m result, tz_parsed \u001b[38;5;241m=\u001b[39m \u001b[43mtslib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray_to_datetime\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2347\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2348\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2349\u001b[0m \u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mutc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2350\u001b[0m \u001b[43m \u001b[49m\u001b[43mdayfirst\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdayfirst\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2351\u001b[0m \u001b[43m \u001b[49m\u001b[43myearfirst\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43myearfirst\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2352\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2354\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tz_parsed \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 2355\u001b[0m \u001b[38;5;66;03m# We can take a shortcut since the datetime64 numpy array\u001b[39;00m\n\u001b[1;32m 2356\u001b[0m \u001b[38;5;66;03m# is in UTC\u001b[39;00m\n\u001b[1;32m 2357\u001b[0m \u001b[38;5;66;03m# Return i8 values to denote unix timestamps\u001b[39;00m\n\u001b[1;32m 2358\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\u001b[38;5;241m.\u001b[39mview(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mi8\u001b[39m\u001b[38;5;124m\"\u001b[39m), tz_parsed\n",
+ "File \u001b[0;32mtslib.pyx:403\u001b[0m, in \u001b[0;36mpandas._libs.tslib.array_to_datetime\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32mtslib.pyx:552\u001b[0m, in \u001b[0;36mpandas._libs.tslib.array_to_datetime\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32mtslib.pyx:480\u001b[0m, in \u001b[0;36mpandas._libs.tslib.array_to_datetime\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32mconversion.pyx:716\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.conversion.convert_timezone\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;31mValueError\u001b[0m: Tz-aware datetime.datetime cannot be converted to datetime64 unless utc=True, at position 18"
+ ]
+ }
+ ],
+ "source": [
+ "# autre methode\n",
+ "\n",
+ "df1_campaigns_full[\"sent_at\"] = pd.to_datetime(df1_campaigns_full[\"sent_at\"] , utc=False).astype('datetime64[ns]')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 334,
+ "id": "92bbdf80-e34b-4146-864a-b0dd4e04c5e9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " sent_at\n",
+ "0 2022-01-01 10:34:56+00:00\n",
+ "1 2022-02-01 13:45:30+00:00\n",
+ "2 2022-03-01 16:30:00+00:00\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "# Exemple de DataFrame avec une colonne 'sent_at' contenant des dates en format string\n",
+ "df1_campaigns_full = pd.DataFrame({\n",
+ " 'sent_at': ['2022-01-01 12:34:56+02:00', '2022-02-01 15:45:30+02:00', '2022-03-01 18:30:00+02:00']\n",
+ "})\n",
+ "\n",
+ "# Convertir la colonne 'sent_at' en datetime en conservant l'information sur le fuseau horaire (datetime64[ns])\n",
+ "df1_campaigns_full['sent_at'] = pd.to_datetime(df1_campaigns_full['sent_at'], utc=True)\n",
+ "\n",
+ "# Afficher le DataFrame résultant\n",
+ "print(df1_campaigns_full)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 324,
+ "id": "a8ad41ed-433c-4f7e-9f67-888dcb54d24e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "campaign_id\n",
+ "1 2021-03-24 00:00:00+01:00\n",
+ "2 2021-03-14 00:00:00+01:00\n",
+ "3 2021-03-15 00:00:00+01:00\n",
+ "4 2021-03-21 00:00:00+01:00\n",
+ "5 2021-03-10 00:00:00+01:00\n",
+ " ... \n",
+ "1321501 2023-11-06 13:30:12+01:00\n",
+ "1321503 2023-11-07 17:31:16+01:00\n",
+ "1321505 2023-11-08 11:15:52+01:00\n",
+ "1321506 2023-11-08 19:00:25+01:00\n",
+ "1321507 2023-11-08 19:00:37+01:00\n",
+ "Name: campaign_sent_at, Length: 949, dtype: datetime64[ns, tzoffset(None, 3600)]\n"
+ ]
+ },
+ {
+ "ename": "TypeError",
+ "evalue": "'bool' object is not callable",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[324], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# comparison \u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(df1_campaigns_full\u001b[38;5;241m.\u001b[39mgroupby(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcampaign_id\u001b[39m\u001b[38;5;124m\"\u001b[39m)[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcampaign_sent_at\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mfirst()) \u001b[38;5;66;03m# envoi des campagnes\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mdf1_campaigns_full\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgroupby\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcampaign_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msent_at\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdropna\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mmin())\n",
+ "\u001b[0;31mTypeError\u001b[0m: 'bool' object is not callable"
+ ]
+ }
+ ],
+ "source": [
+ "# comparison \n",
+ "\n",
+ "print(df1_campaigns_full.groupby(\"campaign_id\")[\"campaign_sent_at\"].first()) # envoi des campagnes\n",
+ "print(df1_campaigns_full.groupby(\"campaign_id\")[\"sent_at\"].dropna().min())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 325,
+ "id": "1771adeb-bbc9-40ef-afb6-49a6b3ff2e79",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id 0\n",
+ "campaign_id 0\n",
+ "customer_id 0\n",
+ "opened_at 5019527\n",
+ "sent_at 2741358\n",
+ "delivered_at 2807002\n",
+ "created_at 1547090\n",
+ "updated_at 766803\n",
+ "campaign_name 0\n",
+ "campaign_service_id 0\n",
+ "campaign_created_at 2216183\n",
+ "campaign_updated_at 2561268\n",
+ "campaign_sent_at 3504140\n",
+ "campaign_identifier 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 325,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaigns_full.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 326,
+ "id": "1a5a1d98-a076-4988-aaf3-e753c117e518",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id 0\n",
+ "name 0\n",
+ "service_id 0\n",
+ "created_at 0\n",
+ "updated_at 0\n",
+ "process_id 957\n",
+ "report_url 957\n",
+ "category 2\n",
+ "to_be_synced 0\n",
+ "identifier 0\n",
+ "sent_at 3\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 326,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaigns.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 320,
+ "id": "749df9f0-8a18-49f0-a820-05cc674a5fce",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "2020-06-02 10:24:08+02:00\n",
+ "2020-06-02 10:24:08+02:00\n"
+ ]
+ }
+ ],
+ "source": [
+ "# df1_campaigns_full[\"sent_at\"] = \n",
+ "print(pd.to_datetime(df1_campaigns_full[\"sent_at\"], errors='coerce').min())\n",
+ "print(df1_campaigns_full[\"sent_at\"].dropna().min())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 313,
+ "id": "f46000b8-4b7b-4121-b0af-8e8a388ce33c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "6214808"
+ ]
+ },
+ "execution_count": 313,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaigns_full[\"sent_at\"].apply(lambda x : isinstance(x, datetime)).sum()\n",
+ "# df1_campaigns_full[\"sent_at\"].tail(30)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 314,
+ "id": "0ae4aeca-6edc-44e8-bc72-74f19b62a8f3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "6214808"
+ ]
+ },
+ "execution_count": 314,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaigns_full.shape[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 340,
+ "id": "4ef4d3d5-5f0a-4798-86d1-1b56641fcce4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id int64\n",
+ "campaign_id int64\n",
+ "customer_id int64\n",
+ "opened_at object\n",
+ "sent_at object\n",
+ "delivered_at object\n",
+ "created_at object\n",
+ "updated_at object\n",
+ "campaign_name object\n",
+ "campaign_service_id int64\n",
+ "campaign_created_at object\n",
+ "campaign_updated_at object\n",
+ "campaign_sent_at object\n",
+ "campaign_identifier object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 340,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaigns_full.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 341,
+ "id": "8de270ac-c205-4686-8d53-6cd52d8239d0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " campaign_id | \n",
+ " customer_id | \n",
+ " opened_at | \n",
+ " sent_at | \n",
+ " delivered_at | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " campaign_name | \n",
+ " campaign_service_id | \n",
+ " campaign_created_at | \n",
+ " campaign_updated_at | \n",
+ " campaign_sent_at | \n",
+ " campaign_identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 19793 | \n",
+ " 58 | \n",
+ " 112597 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:09+02:00 | \n",
+ " 2021-03-28 18:24:18+02:00 | \n",
+ " 2021-03-28 18:34:20.616136+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 14211 | \n",
+ " 58 | \n",
+ " 113666 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:09+02:00 | \n",
+ " 2021-03-28 18:21:02+02:00 | \n",
+ " 2021-03-28 18:21:04.297213+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 13150 | \n",
+ " 58 | \n",
+ " 280561 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:08:45+02:00 | \n",
+ " 2021-03-28 18:18:49.991042+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 7073 | \n",
+ " 58 | \n",
+ " 101007 | \n",
+ " 2021-03-28 20:11:06+02:00 | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:09:47+02:00 | \n",
+ " 2021-03-28 18:09:50.915354+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5175 | \n",
+ " 58 | \n",
+ " 103972 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:06+02:00 | \n",
+ " 2021-03-28 18:05:03+02:00 | \n",
+ " 2021-03-28 18:05:08.507398+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id campaign_id customer_id opened_at \\\n",
+ "0 19793 58 112597 NaN \n",
+ "1 14211 58 113666 NaN \n",
+ "2 13150 58 280561 NaN \n",
+ "3 7073 58 101007 2021-03-28 20:11:06+02:00 \n",
+ "4 5175 58 103972 NaN \n",
+ "\n",
+ " sent_at delivered_at \\\n",
+ "0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n",
+ "1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n",
+ "2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n",
+ "3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n",
+ "4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n",
+ "\n",
+ " created_at updated_at \\\n",
+ "0 2021-03-28 18:34:20.616136+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "1 2021-03-28 18:21:04.297213+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "2 2021-03-28 18:18:49.991042+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "3 2021-03-28 18:09:50.915354+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "4 2021-03-28 18:05:08.507398+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "\n",
+ " campaign_name campaign_service_id \\\n",
+ "0 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "1 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "2 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "3 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "4 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "\n",
+ " campaign_created_at campaign_updated_at \\\n",
+ "0 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "1 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "2 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "3 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "4 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "\n",
+ " campaign_sent_at campaign_identifier \n",
+ "0 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "1 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "2 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "3 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "4 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a "
+ ]
+ },
+ "execution_count": 341,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaigns_full.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 342,
+ "id": "e2d81bd1-9fd6-40c7-96f9-998771a4fd77",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "time data '2022-05-06 12:00:23+02:00' does not match format '%Y-%m-%d %H:%M:%S.%f%z'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[342], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# conversion colonne par colonne\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# precision a la Ns\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m df1_campaigns_full[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcreated_at\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mdf1_campaigns_full\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcreated_at\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mx\u001b[49m\u001b[43m \u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mdatetime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstrptime\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mY-\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mm-\u001b[39;49m\u001b[38;5;132;43;01m%d\u001b[39;49;00m\u001b[38;5;124;43m \u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mH:\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mM:\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mS.\u001b[39;49m\u001b[38;5;132;43;01m%f\u001b[39;49;00m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mz\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnotna\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mNaT\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# df1_campaigns_full[\"updated_at\"] = df1_campaigns_full[\"updated_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# df1_campaigns_full[\"campaign_created_at\"] = df1_campaigns_full[\"campaign_created_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# df1_campaigns_full[\"campaign_updated_at\"] = df1_campaigns_full[\"campaign_updated_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;66;03m# df1_campaigns_full[\"delivered_at\"] = df1_campaigns_full[\"delivered_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S%z\") if pd.notna(x) else pd.NaT)\u001b[39;00m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# df1_campaigns_full[\"campaign_sent_at\"] = df1_campaigns_full[\"campaign_sent_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\u001b[39;00m\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/series.py:4764\u001b[0m, in \u001b[0;36mSeries.apply\u001b[0;34m(self, func, convert_dtype, args, by_row, **kwargs)\u001b[0m\n\u001b[1;32m 4629\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\n\u001b[1;32m 4630\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 4631\u001b[0m func: AggFuncType,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4636\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 4637\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrame \u001b[38;5;241m|\u001b[39m Series:\n\u001b[1;32m 4638\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 4639\u001b[0m \u001b[38;5;124;03m Invoke function on values of Series.\u001b[39;00m\n\u001b[1;32m 4640\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4755\u001b[0m \u001b[38;5;124;03m dtype: float64\u001b[39;00m\n\u001b[1;32m 4756\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 4757\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mSeriesApply\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4758\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4759\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4760\u001b[0m \u001b[43m \u001b[49m\u001b[43mconvert_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4761\u001b[0m \u001b[43m \u001b[49m\u001b[43mby_row\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mby_row\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4762\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4763\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m-> 4764\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/apply.py:1209\u001b[0m, in \u001b[0;36mSeriesApply.apply\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1206\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapply_compat()\n\u001b[1;32m 1208\u001b[0m \u001b[38;5;66;03m# self.func is Callable\u001b[39;00m\n\u001b[0;32m-> 1209\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_standard\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/apply.py:1289\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1283\u001b[0m \u001b[38;5;66;03m# row-wise access\u001b[39;00m\n\u001b[1;32m 1284\u001b[0m \u001b[38;5;66;03m# apply doesn't have a `na_action` keyword and for backward compat reasons\u001b[39;00m\n\u001b[1;32m 1285\u001b[0m \u001b[38;5;66;03m# we need to give `na_action=\"ignore\"` for categorical data.\u001b[39;00m\n\u001b[1;32m 1286\u001b[0m \u001b[38;5;66;03m# TODO: remove the `na_action=\"ignore\"` when that default has been changed in\u001b[39;00m\n\u001b[1;32m 1287\u001b[0m \u001b[38;5;66;03m# Categorical (GH51645).\u001b[39;00m\n\u001b[1;32m 1288\u001b[0m action \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(obj\u001b[38;5;241m.\u001b[39mdtype, CategoricalDtype) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1289\u001b[0m mapped \u001b[38;5;241m=\u001b[39m \u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_map_values\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1290\u001b[0m \u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcurried\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maction\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\n\u001b[1;32m 1291\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1293\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(mapped) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(mapped[\u001b[38;5;241m0\u001b[39m], ABCSeries):\n\u001b[1;32m 1294\u001b[0m \u001b[38;5;66;03m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[1;32m 1295\u001b[0m \u001b[38;5;66;03m# See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[1;32m 1296\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\u001b[38;5;241m.\u001b[39m_constructor_expanddim(\u001b[38;5;28mlist\u001b[39m(mapped), index\u001b[38;5;241m=\u001b[39mobj\u001b[38;5;241m.\u001b[39mindex)\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/base.py:921\u001b[0m, in \u001b[0;36mIndexOpsMixin._map_values\u001b[0;34m(self, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 918\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arr, ExtensionArray):\n\u001b[1;32m 919\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mmap(mapper, na_action\u001b[38;5;241m=\u001b[39mna_action)\n\u001b[0;32m--> 921\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43malgorithms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mna_action\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/algorithms.py:1814\u001b[0m, in \u001b[0;36mmap_array\u001b[0;34m(arr, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 1812\u001b[0m values \u001b[38;5;241m=\u001b[39m arr\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mobject\u001b[39m, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 1813\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m na_action \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1814\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_infer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1815\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1816\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mmap_infer_mask(\n\u001b[1;32m 1817\u001b[0m values, mapper, mask\u001b[38;5;241m=\u001b[39misna(values)\u001b[38;5;241m.\u001b[39mview(np\u001b[38;5;241m.\u001b[39muint8), convert\u001b[38;5;241m=\u001b[39mconvert\n\u001b[1;32m 1818\u001b[0m )\n",
+ "File \u001b[0;32mlib.pyx:2926\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n",
+ "Cell \u001b[0;32mIn[342], line 4\u001b[0m, in \u001b[0;36m\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# conversion colonne par colonne\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# precision a la Ns\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m df1_campaigns_full[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcreated_at\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m df1_campaigns_full[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcreated_at\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m x : \u001b[43mdatetime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstrptime\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mY-\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mm-\u001b[39;49m\u001b[38;5;132;43;01m%d\u001b[39;49;00m\u001b[38;5;124;43m \u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mH:\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mM:\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mS.\u001b[39;49m\u001b[38;5;132;43;01m%f\u001b[39;49;00m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mz\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m pd\u001b[38;5;241m.\u001b[39mnotna(x) \u001b[38;5;28;01melse\u001b[39;00m pd\u001b[38;5;241m.\u001b[39mNaT)\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# df1_campaigns_full[\"updated_at\"] = df1_campaigns_full[\"updated_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# df1_campaigns_full[\"campaign_created_at\"] = df1_campaigns_full[\"campaign_created_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# df1_campaigns_full[\"campaign_updated_at\"] = df1_campaigns_full[\"campaign_updated_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;66;03m# df1_campaigns_full[\"delivered_at\"] = df1_campaigns_full[\"delivered_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S%z\") if pd.notna(x) else pd.NaT)\u001b[39;00m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# df1_campaigns_full[\"campaign_sent_at\"] = df1_campaigns_full[\"campaign_sent_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\u001b[39;00m\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/_strptime.py:568\u001b[0m, in \u001b[0;36m_strptime_datetime\u001b[0;34m(cls, data_string, format)\u001b[0m\n\u001b[1;32m 565\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_strptime_datetime\u001b[39m(\u001b[38;5;28mcls\u001b[39m, data_string, \u001b[38;5;28mformat\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%a\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mb \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mH:\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mM:\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mS \u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mY\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 566\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Return a class cls instance based on the input string and the\u001b[39;00m\n\u001b[1;32m 567\u001b[0m \u001b[38;5;124;03m format string.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 568\u001b[0m tt, fraction, gmtoff_fraction \u001b[38;5;241m=\u001b[39m \u001b[43m_strptime\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata_string\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 569\u001b[0m tzname, gmtoff \u001b[38;5;241m=\u001b[39m tt[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m2\u001b[39m:]\n\u001b[1;32m 570\u001b[0m args \u001b[38;5;241m=\u001b[39m tt[:\u001b[38;5;241m6\u001b[39m] \u001b[38;5;241m+\u001b[39m (fraction,)\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/_strptime.py:349\u001b[0m, in \u001b[0;36m_strptime\u001b[0;34m(data_string, format)\u001b[0m\n\u001b[1;32m 347\u001b[0m found \u001b[38;5;241m=\u001b[39m format_regex\u001b[38;5;241m.\u001b[39mmatch(data_string)\n\u001b[1;32m 348\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m found:\n\u001b[0;32m--> 349\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtime data \u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m does not match format \u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\n\u001b[1;32m 350\u001b[0m (data_string, \u001b[38;5;28mformat\u001b[39m))\n\u001b[1;32m 351\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(data_string) \u001b[38;5;241m!=\u001b[39m found\u001b[38;5;241m.\u001b[39mend():\n\u001b[1;32m 352\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124munconverted data remains: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\n\u001b[1;32m 353\u001b[0m data_string[found\u001b[38;5;241m.\u001b[39mend():])\n",
+ "\u001b[0;31mValueError\u001b[0m: time data '2022-05-06 12:00:23+02:00' does not match format '%Y-%m-%d %H:%M:%S.%f%z'"
+ ]
+ }
+ ],
+ "source": [
+ "# conversion colonne par colonne\n",
+ "\n",
+ "# precision a la Ns\n",
+ "df1_campaigns_full[\"created_at\"] = df1_campaigns_full[\"created_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\n",
+ "# df1_campaigns_full[\"updated_at\"] = df1_campaigns_full[\"updated_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\n",
+ "# df1_campaigns_full[\"campaign_created_at\"] = df1_campaigns_full[\"campaign_created_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\n",
+ "# df1_campaigns_full[\"campaign_updated_at\"] = df1_campaigns_full[\"campaign_updated_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\n",
+ "\n",
+ "# precision a la sec\n",
+ "# df1_campaigns_full[\"opened_at\"] = df1_campaigns_full[\"opened_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S%z\") if pd.notna(x) else pd.NaT)\n",
+ "# df1_campaigns_full[\"sent_at\"] = df1_campaigns_full[\"sent_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S%z\") if pd.notna(x) else pd.NaT)\n",
+ "# df1_campaigns_full[\"delivered_at\"] = df1_campaigns_full[\"delivered_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S%z\") if pd.notna(x) else pd.NaT)\n",
+ "# df1_campaigns_full[\"campaign_sent_at\"] = df1_campaigns_full[\"campaign_sent_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 346,
+ "id": "5a1fe408-ae4c-4957-a39b-50a4d5423319",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "6214778 2023-10-23 09:31:50.168545+02:00\n",
+ "6214779 2023-10-23 09:31:28.570386+02:00\n",
+ "6214780 2023-10-23 09:02:26.494195+02:00\n",
+ "6214781 2023-10-23 09:32:34.454957+02:00\n",
+ "6214782 2023-10-23 09:31:29.139217+02:00\n",
+ "6214783 2023-10-23 09:32:06.223901+02:00\n",
+ "6214784 2023-10-23 09:31:52.702258+02:00\n",
+ "6214785 2023-10-23 09:31:45.051321+02:00\n",
+ "6214786 2023-10-23 09:32:55.350092+02:00\n",
+ "6214787 2023-10-23 09:33:14.007405+02:00\n",
+ "6214788 2023-10-23 09:32:44.645432+02:00\n",
+ "6214789 2023-10-23 09:02:27.578671+02:00\n",
+ "6214790 2023-10-23 09:34:24.879045+02:00\n",
+ "6214791 2023-10-23 09:34:02.075066+02:00\n",
+ "6214792 2023-10-23 09:33:20.349918+02:00\n",
+ "6214793 2023-10-23 09:34:25.631234+02:00\n",
+ "6214794 2023-10-23 09:34:27.581150+02:00\n",
+ "6214795 2023-10-23 09:31:45.192200+02:00\n",
+ "6214796 2023-10-23 09:32:52.018890+02:00\n",
+ "6214797 2023-10-23 09:02:01.558573+02:00\n",
+ "6214798 2023-10-23 09:34:48.543213+02:00\n",
+ "6214799 2023-10-23 09:32:15.109097+02:00\n",
+ "6214800 2023-10-23 09:34:26.590416+02:00\n",
+ "6214801 2023-10-23 09:32:02.729363+02:00\n",
+ "6214802 2023-10-23 09:31:41.055337+02:00\n",
+ "6214803 2023-10-23 09:32:36.564696+02:00\n",
+ "6214804 2023-10-23 09:32:50.829641+02:00\n",
+ "6214805 2023-10-23 09:33:31.102500+02:00\n",
+ "6214806 2023-10-23 09:31:55.768547+02:00\n",
+ "6214807 2023-10-23 09:33:57.477892+02:00\n",
+ "Name: created_at, dtype: object"
+ ]
+ },
+ "execution_count": 346,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaigns_full[\"created_at\"].tail(30)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 349,
+ "id": "feb3fc34-51f2-45d5-8f34-9940a14e9060",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "time data \"2023-10-23 09:31:50.168545+02:00\" doesn't match format \"%Y-%m-%d %H:%M:%S%z\", at position 1. You might want to try:\n - passing `format` if your strings have a consistent format;\n - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;\n - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[349], line 9\u001b[0m\n\u001b[1;32m 4\u001b[0m df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame({\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdate_str\u001b[39m\u001b[38;5;124m'\u001b[39m: [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m2022-05-06 12:00:23+02:00\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m2023-10-23 09:31:50.168545+02:00\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 6\u001b[0m })\n\u001b[1;32m 8\u001b[0m \u001b[38;5;66;03m# Convertir la colonne 'date_str' en datetime en conservant l'information sur le fuseau horaire (datetime64[ns])\u001b[39;00m\n\u001b[0;32m----> 9\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdate\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_datetime\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mdate_str\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Afficher le DataFrame résultant\u001b[39;00m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28mprint\u001b[39m(df)\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/tools/datetimes.py:1112\u001b[0m, in \u001b[0;36mto_datetime\u001b[0;34m(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache)\u001b[0m\n\u001b[1;32m 1110\u001b[0m result \u001b[38;5;241m=\u001b[39m arg\u001b[38;5;241m.\u001b[39mmap(cache_array)\n\u001b[1;32m 1111\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1112\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[43mconvert_listlike\u001b[49m\u001b[43m(\u001b[49m\u001b[43marg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_values\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1113\u001b[0m result \u001b[38;5;241m=\u001b[39m arg\u001b[38;5;241m.\u001b[39m_constructor(values, index\u001b[38;5;241m=\u001b[39marg\u001b[38;5;241m.\u001b[39mindex, name\u001b[38;5;241m=\u001b[39marg\u001b[38;5;241m.\u001b[39mname)\n\u001b[1;32m 1114\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arg, (ABCDataFrame, abc\u001b[38;5;241m.\u001b[39mMutableMapping)):\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/tools/datetimes.py:488\u001b[0m, in \u001b[0;36m_convert_listlike_datetimes\u001b[0;34m(arg, format, name, utc, unit, errors, dayfirst, yearfirst, exact)\u001b[0m\n\u001b[1;32m 486\u001b[0m \u001b[38;5;66;03m# `format` could be inferred, or user didn't ask for mixed-format parsing.\u001b[39;00m\n\u001b[1;32m 487\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mformat\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mformat\u001b[39m \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmixed\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 488\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_array_strptime_with_fallback\u001b[49m\u001b[43m(\u001b[49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexact\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 490\u001b[0m result, tz_parsed \u001b[38;5;241m=\u001b[39m objects_to_datetime64ns(\n\u001b[1;32m 491\u001b[0m arg,\n\u001b[1;32m 492\u001b[0m dayfirst\u001b[38;5;241m=\u001b[39mdayfirst,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 496\u001b[0m allow_object\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 497\u001b[0m )\n\u001b[1;32m 499\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tz_parsed \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 500\u001b[0m \u001b[38;5;66;03m# We can take a shortcut since the datetime64 numpy array\u001b[39;00m\n\u001b[1;32m 501\u001b[0m \u001b[38;5;66;03m# is in UTC\u001b[39;00m\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/tools/datetimes.py:519\u001b[0m, in \u001b[0;36m_array_strptime_with_fallback\u001b[0;34m(arg, name, utc, fmt, exact, errors)\u001b[0m\n\u001b[1;32m 508\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_array_strptime_with_fallback\u001b[39m(\n\u001b[1;32m 509\u001b[0m arg,\n\u001b[1;32m 510\u001b[0m name,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 514\u001b[0m errors: \u001b[38;5;28mstr\u001b[39m,\n\u001b[1;32m 515\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Index:\n\u001b[1;32m 516\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 517\u001b[0m \u001b[38;5;124;03m Call array_strptime, with fallback behavior depending on 'errors'.\u001b[39;00m\n\u001b[1;32m 518\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 519\u001b[0m result, timezones \u001b[38;5;241m=\u001b[39m \u001b[43marray_strptime\u001b[49m\u001b[43m(\u001b[49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfmt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexact\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexact\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mutc\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 520\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28many\u001b[39m(tz \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mfor\u001b[39;00m tz \u001b[38;5;129;01min\u001b[39;00m timezones):\n\u001b[1;32m 521\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _return_parsed_timezone_results(result, timezones, utc, name)\n",
+ "File \u001b[0;32mstrptime.pyx:534\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.strptime.array_strptime\u001b[0;34m()\u001b[0m\n",
+ "File \u001b[0;32mstrptime.pyx:355\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.strptime.array_strptime\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;31mValueError\u001b[0m: time data \"2023-10-23 09:31:50.168545+02:00\" doesn't match format \"%Y-%m-%d %H:%M:%S%z\", at position 1. You might want to try:\n - passing `format` if your strings have a consistent format;\n - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;\n - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this."
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "# Exemple de DataFrame avec une colonne 'date_str' contenant des dates en formats différents\n",
+ "df = pd.DataFrame({\n",
+ " 'date_str': ['2022-05-06 12:00:23+02:00', '2023-10-23 09:31:50.168545+02:00']\n",
+ "})\n",
+ "\n",
+ "# Convertir la colonne 'date_str' en datetime en conservant l'information sur le fuseau horaire (datetime64[ns])\n",
+ "df['date'] = pd.to_datetime(df['date_str'], utc=True)\n",
+ "\n",
+ "# Afficher le DataFrame résultant\n",
+ "print(df)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 350,
+ "id": "da01f2d8-3c1e-4d43-92ef-6236a24963d0",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " date_str date\n",
+ "0 2022-05-06 12:00:23+02:00 2022-05-06 10:00:23+00:00\n",
+ "1 023-10-23 09:31:50.168545+02:00 023-10-23 09:31:50.168545+02:00\n"
+ ]
+ }
+ ],
+ "source": [
+ "\n",
+ "# Exemple de DataFrame avec une colonne 'date_str' contenant des dates en formats différents\n",
+ "df = pd.DataFrame({\n",
+ " 'date_str': ['2022-05-06 12:00:23+02:00', '023-10-23 09:31:50.168545+02:00']\n",
+ "})\n",
+ "\n",
+ "# Fonction lambda pour convertir la colonne 'date_str' en datetime avec précision\n",
+ "def convert_to_datetime_with_precision(x):\n",
+ " if pd.notna(x):\n",
+ " # Format avec nanosecondes\n",
+ " try:\n",
+ " return pd.to_datetime(x, utc=True)\n",
+ " except ValueError:\n",
+ " pass\n",
+ "\n",
+ " # Format sans nanosecondes\n",
+ " try:\n",
+ " return pd.to_datetime(x, utc=True, format=\"%Y-%m-%d %H:%M:%S%z\")\n",
+ " except ValueError:\n",
+ " pass\n",
+ "\n",
+ " return x\n",
+ "\n",
+ "# Appliquer la fonction lambda à la colonne 'date_str'\n",
+ "df['date'] = df['date_str'].apply(convert_to_datetime_with_precision)\n",
+ "\n",
+ "# Afficher le DataFrame résultant\n",
+ "print(df)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 351,
+ "id": "e6ca12c8-be66-4537-b759-036123b74b7b",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "KeyboardInterrupt",
+ "evalue": "",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[351], line 7\u001b[0m\n\u001b[1;32m 3\u001b[0m columns_to_convert \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msent_at\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdelivered_at\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcreated_at\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mupdated_at\u001b[39m\u001b[38;5;124m\"\u001b[39m, \n\u001b[1;32m 4\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcampaign_sent_at\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcampaign_created_at\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcampaign_updated_at\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m column \u001b[38;5;129;01min\u001b[39;00m columns_to_convert :\n\u001b[0;32m----> 7\u001b[0m df1_campaigns_full[column] \u001b[38;5;241m=\u001b[39m \u001b[43mdf1_campaigns_full\u001b[49m\u001b[43m[\u001b[49m\u001b[43mcolumn\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconvert_to_datetime_with_precision\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/series.py:4764\u001b[0m, in \u001b[0;36mSeries.apply\u001b[0;34m(self, func, convert_dtype, args, by_row, **kwargs)\u001b[0m\n\u001b[1;32m 4629\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\n\u001b[1;32m 4630\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 4631\u001b[0m func: AggFuncType,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4636\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 4637\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrame \u001b[38;5;241m|\u001b[39m Series:\n\u001b[1;32m 4638\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 4639\u001b[0m \u001b[38;5;124;03m Invoke function on values of Series.\u001b[39;00m\n\u001b[1;32m 4640\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4755\u001b[0m \u001b[38;5;124;03m dtype: float64\u001b[39;00m\n\u001b[1;32m 4756\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 4757\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mSeriesApply\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4758\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4759\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4760\u001b[0m \u001b[43m \u001b[49m\u001b[43mconvert_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4761\u001b[0m \u001b[43m \u001b[49m\u001b[43mby_row\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mby_row\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4762\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4763\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m-> 4764\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/apply.py:1209\u001b[0m, in \u001b[0;36mSeriesApply.apply\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1206\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapply_compat()\n\u001b[1;32m 1208\u001b[0m \u001b[38;5;66;03m# self.func is Callable\u001b[39;00m\n\u001b[0;32m-> 1209\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_standard\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/apply.py:1289\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1283\u001b[0m \u001b[38;5;66;03m# row-wise access\u001b[39;00m\n\u001b[1;32m 1284\u001b[0m \u001b[38;5;66;03m# apply doesn't have a `na_action` keyword and for backward compat reasons\u001b[39;00m\n\u001b[1;32m 1285\u001b[0m \u001b[38;5;66;03m# we need to give `na_action=\"ignore\"` for categorical data.\u001b[39;00m\n\u001b[1;32m 1286\u001b[0m \u001b[38;5;66;03m# TODO: remove the `na_action=\"ignore\"` when that default has been changed in\u001b[39;00m\n\u001b[1;32m 1287\u001b[0m \u001b[38;5;66;03m# Categorical (GH51645).\u001b[39;00m\n\u001b[1;32m 1288\u001b[0m action \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(obj\u001b[38;5;241m.\u001b[39mdtype, CategoricalDtype) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1289\u001b[0m mapped \u001b[38;5;241m=\u001b[39m \u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_map_values\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1290\u001b[0m \u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcurried\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maction\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\n\u001b[1;32m 1291\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1293\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(mapped) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(mapped[\u001b[38;5;241m0\u001b[39m], ABCSeries):\n\u001b[1;32m 1294\u001b[0m \u001b[38;5;66;03m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[1;32m 1295\u001b[0m \u001b[38;5;66;03m# See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[1;32m 1296\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\u001b[38;5;241m.\u001b[39m_constructor_expanddim(\u001b[38;5;28mlist\u001b[39m(mapped), index\u001b[38;5;241m=\u001b[39mobj\u001b[38;5;241m.\u001b[39mindex)\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/base.py:921\u001b[0m, in \u001b[0;36mIndexOpsMixin._map_values\u001b[0;34m(self, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 918\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arr, ExtensionArray):\n\u001b[1;32m 919\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mmap(mapper, na_action\u001b[38;5;241m=\u001b[39mna_action)\n\u001b[0;32m--> 921\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43malgorithms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mna_action\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/algorithms.py:1814\u001b[0m, in \u001b[0;36mmap_array\u001b[0;34m(arr, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 1812\u001b[0m values \u001b[38;5;241m=\u001b[39m arr\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mobject\u001b[39m, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 1813\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m na_action \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1814\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_infer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1815\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1816\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mmap_infer_mask(\n\u001b[1;32m 1817\u001b[0m values, mapper, mask\u001b[38;5;241m=\u001b[39misna(values)\u001b[38;5;241m.\u001b[39mview(np\u001b[38;5;241m.\u001b[39muint8), convert\u001b[38;5;241m=\u001b[39mconvert\n\u001b[1;32m 1818\u001b[0m )\n",
+ "File \u001b[0;32mlib.pyx:2926\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n",
+ "Cell \u001b[0;32mIn[350], line 11\u001b[0m, in \u001b[0;36mconvert_to_datetime_with_precision\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pd\u001b[38;5;241m.\u001b[39mnotna(x):\n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# Format avec nanosecondes\u001b[39;00m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 11\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_datetime\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m:\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/tools/datetimes.py:1146\u001b[0m, in \u001b[0;36mto_datetime\u001b[0;34m(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache)\u001b[0m\n\u001b[1;32m 1144\u001b[0m result \u001b[38;5;241m=\u001b[39m convert_listlike(argc, \u001b[38;5;28mformat\u001b[39m)\n\u001b[1;32m 1145\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1146\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mconvert_listlike\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43marg\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m)\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 1147\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arg, \u001b[38;5;28mbool\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(result, np\u001b[38;5;241m.\u001b[39mbool_):\n\u001b[1;32m 1148\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mbool\u001b[39m(result) \u001b[38;5;66;03m# TODO: avoid this kludge.\u001b[39;00m\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/tools/datetimes.py:488\u001b[0m, in \u001b[0;36m_convert_listlike_datetimes\u001b[0;34m(arg, format, name, utc, unit, errors, dayfirst, yearfirst, exact)\u001b[0m\n\u001b[1;32m 486\u001b[0m \u001b[38;5;66;03m# `format` could be inferred, or user didn't ask for mixed-format parsing.\u001b[39;00m\n\u001b[1;32m 487\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mformat\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mformat\u001b[39m \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmixed\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 488\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_array_strptime_with_fallback\u001b[49m\u001b[43m(\u001b[49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexact\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 490\u001b[0m result, tz_parsed \u001b[38;5;241m=\u001b[39m objects_to_datetime64ns(\n\u001b[1;32m 491\u001b[0m arg,\n\u001b[1;32m 492\u001b[0m dayfirst\u001b[38;5;241m=\u001b[39mdayfirst,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 496\u001b[0m allow_object\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 497\u001b[0m )\n\u001b[1;32m 499\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tz_parsed \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 500\u001b[0m \u001b[38;5;66;03m# We can take a shortcut since the datetime64 numpy array\u001b[39;00m\n\u001b[1;32m 501\u001b[0m \u001b[38;5;66;03m# is in UTC\u001b[39;00m\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/tools/datetimes.py:521\u001b[0m, in \u001b[0;36m_array_strptime_with_fallback\u001b[0;34m(arg, name, utc, fmt, exact, errors)\u001b[0m\n\u001b[1;32m 519\u001b[0m result, timezones \u001b[38;5;241m=\u001b[39m array_strptime(arg, fmt, exact\u001b[38;5;241m=\u001b[39mexact, errors\u001b[38;5;241m=\u001b[39merrors, utc\u001b[38;5;241m=\u001b[39mutc)\n\u001b[1;32m 520\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28many\u001b[39m(tz \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mfor\u001b[39;00m tz \u001b[38;5;129;01min\u001b[39;00m timezones):\n\u001b[0;32m--> 521\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_return_parsed_timezone_results\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresult\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimezones\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 523\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _box_as_indexlike(result, utc\u001b[38;5;241m=\u001b[39mutc, name\u001b[38;5;241m=\u001b[39mname)\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/tools/datetimes.py:344\u001b[0m, in \u001b[0;36m_return_parsed_timezone_results\u001b[0;34m(result, timezones, utc, name)\u001b[0m\n\u001b[1;32m 342\u001b[0m tz_results \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mempty(\u001b[38;5;28mlen\u001b[39m(result), dtype\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mobject\u001b[39m)\n\u001b[1;32m 343\u001b[0m non_na_timezones \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n\u001b[0;32m--> 344\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m zone \u001b[38;5;129;01min\u001b[39;00m \u001b[43munique\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtimezones\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m 345\u001b[0m mask \u001b[38;5;241m=\u001b[39m timezones \u001b[38;5;241m==\u001b[39m zone\n\u001b[1;32m 346\u001b[0m dta \u001b[38;5;241m=\u001b[39m DatetimeArray(result[mask])\u001b[38;5;241m.\u001b[39mtz_localize(zone)\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/algorithms.py:401\u001b[0m, in \u001b[0;36munique\u001b[0;34m(values)\u001b[0m\n\u001b[1;32m 307\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21munique\u001b[39m(values):\n\u001b[1;32m 308\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 309\u001b[0m \u001b[38;5;124;03m Return unique values based on a hash table.\u001b[39;00m\n\u001b[1;32m 310\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 399\u001b[0m \u001b[38;5;124;03m array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object)\u001b[39;00m\n\u001b[1;32m 400\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 401\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43munique_with_mask\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/algorithms.py:440\u001b[0m, in \u001b[0;36munique_with_mask\u001b[0;34m(values, mask)\u001b[0m\n\u001b[1;32m 438\u001b[0m table \u001b[38;5;241m=\u001b[39m hashtable(\u001b[38;5;28mlen\u001b[39m(values))\n\u001b[1;32m 439\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mask \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 440\u001b[0m uniques \u001b[38;5;241m=\u001b[39m \u001b[43mtable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43munique\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 441\u001b[0m uniques \u001b[38;5;241m=\u001b[39m _reconstruct_data(uniques, original\u001b[38;5;241m.\u001b[39mdtype, original)\n\u001b[1;32m 442\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m uniques\n",
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+ ]
+ }
+ ],
+ "source": [
+ "# loop over all dates to convert \n",
+ "\n",
+ "columns_to_convert = [\"sent_at\", \"delivered_at\", \"created_at\", \"updated_at\", \n",
+ " \"campaign_sent_at\", \"campaign_created_at\", \"campaign_updated_at\"]\n",
+ "\n",
+ "for column in columns_to_convert :\n",
+ " df1_campaigns_full[column] = df1_campaigns_full[column].apply(convert_to_datetime_with_precision)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 356,
+ "id": "61e1f604-23ce-4cb2-8ad3-523c62e80e68",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " campaign_id | \n",
+ " customer_id | \n",
+ " opened_at | \n",
+ " sent_at | \n",
+ " delivered_at | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " campaign_name | \n",
+ " campaign_service_id | \n",
+ " campaign_created_at | \n",
+ " campaign_updated_at | \n",
+ " campaign_sent_at | \n",
+ " campaign_identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 4081002 | \n",
+ " 23728 | \n",
+ " 58 | \n",
+ " 8268 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:00:57+02:00 | \n",
+ " 2021-03-28 18:43:38+02:00 | \n",
+ " 2021-03-28 18:43:42.928685+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 4076139 | \n",
+ " 4552 | \n",
+ " 58 | \n",
+ " 1472 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:00:57+02:00 | \n",
+ " 2021-03-28 18:03:26+02:00 | \n",
+ " 2021-03-28 18:03:28.229670+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 4081572 | \n",
+ " 14070 | \n",
+ " 58 | \n",
+ " 7978 | \n",
+ " 2021-03-29 08:38:06+02:00 | \n",
+ " 2021-03-28 18:00:57+02:00 | \n",
+ " 2021-03-28 18:20:45+02:00 | \n",
+ " 2021-03-28 18:20:49.431860+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 4094833 | \n",
+ " 6969 | \n",
+ " 58 | \n",
+ " 32211 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:00:57+02:00 | \n",
+ " 2021-03-28 18:09:18+02:00 | \n",
+ " 2021-03-28 18:09:20.571462+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 4094827 | \n",
+ " 9662 | \n",
+ " 58 | \n",
+ " 30980 | \n",
+ " 2021-04-04 17:54:51+02:00 | \n",
+ " 2021-03-28 18:00:57+02:00 | \n",
+ " 2021-03-28 18:03:29+02:00 | \n",
+ " 2021-03-28 18:13:33.153720+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 8961 | \n",
+ " 12417 | \n",
+ " 58 | \n",
+ " 33400 | \n",
+ " 2021-03-28 21:27:57+02:00 | \n",
+ " 2021-03-28 18:17:35+02:00 | \n",
+ " 2021-03-28 18:17:36+02:00 | \n",
+ " 2021-03-28 18:17:36.735495+02:00 | \n",
+ " 2021-03-28 19:27:57.503961+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 20380 | \n",
+ " 18205 | \n",
+ " 58 | \n",
+ " 106495 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:30:08+02:00 | \n",
+ " 2021-03-28 18:30:11+02:00 | \n",
+ " 2021-03-28 18:30:11.453742+02:00 | \n",
+ " 2021-03-28 18:30:11.474019+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 27298 | \n",
+ " 22107 | \n",
+ " 58 | \n",
+ " 104781 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:39:55+02:00 | \n",
+ " 2021-03-28 18:39:56+02:00 | \n",
+ " 2021-03-28 18:39:56.430679+02:00 | \n",
+ " 2021-03-28 18:39:56.435656+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 29107 | \n",
+ " 22389 | \n",
+ " 58 | \n",
+ " 111570 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:40:38+02:00 | \n",
+ " 2021-03-28 18:40:40+02:00 | \n",
+ " 2021-03-28 18:40:40.975334+02:00 | \n",
+ " 2021-03-28 18:40:40.979852+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 27962 | \n",
+ " 29258 | \n",
+ " 58 | \n",
+ " 119496 | \n",
+ " 2021-03-29 21:03:52+02:00 | \n",
+ " 2021-03-28 20:52:26+02:00 | \n",
+ " 2021-03-28 20:52:30+02:00 | \n",
+ " 2021-03-28 20:52:30.261271+02:00 | \n",
+ " 2021-03-29 19:03:52.527753+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
26464 rows × 14 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id campaign_id customer_id opened_at \\\n",
+ "4081002 23728 58 8268 NaN \n",
+ "4076139 4552 58 1472 NaN \n",
+ "4081572 14070 58 7978 2021-03-29 08:38:06+02:00 \n",
+ "4094833 6969 58 32211 NaN \n",
+ "4094827 9662 58 30980 2021-04-04 17:54:51+02:00 \n",
+ "... ... ... ... ... \n",
+ "8961 12417 58 33400 2021-03-28 21:27:57+02:00 \n",
+ "20380 18205 58 106495 NaN \n",
+ "27298 22107 58 104781 NaN \n",
+ "29107 22389 58 111570 NaN \n",
+ "27962 29258 58 119496 2021-03-29 21:03:52+02:00 \n",
+ "\n",
+ " sent_at delivered_at \\\n",
+ "4081002 2021-03-28 18:00:57+02:00 2021-03-28 18:43:38+02:00 \n",
+ "4076139 2021-03-28 18:00:57+02:00 2021-03-28 18:03:26+02:00 \n",
+ "4081572 2021-03-28 18:00:57+02:00 2021-03-28 18:20:45+02:00 \n",
+ "4094833 2021-03-28 18:00:57+02:00 2021-03-28 18:09:18+02:00 \n",
+ "4094827 2021-03-28 18:00:57+02:00 2021-03-28 18:03:29+02:00 \n",
+ "... ... ... \n",
+ "8961 2021-03-28 18:17:35+02:00 2021-03-28 18:17:36+02:00 \n",
+ "20380 2021-03-28 18:30:08+02:00 2021-03-28 18:30:11+02:00 \n",
+ "27298 2021-03-28 18:39:55+02:00 2021-03-28 18:39:56+02:00 \n",
+ "29107 2021-03-28 18:40:38+02:00 2021-03-28 18:40:40+02:00 \n",
+ "27962 2021-03-28 20:52:26+02:00 2021-03-28 20:52:30+02:00 \n",
+ "\n",
+ " created_at updated_at \\\n",
+ "4081002 2021-03-28 18:43:42.928685+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "4076139 2021-03-28 18:03:28.229670+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "4081572 2021-03-28 18:20:49.431860+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "4094833 2021-03-28 18:09:20.571462+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "4094827 2021-03-28 18:13:33.153720+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "... ... ... \n",
+ "8961 2021-03-28 18:17:36.735495+02:00 2021-03-28 19:27:57.503961+02:00 \n",
+ "20380 2021-03-28 18:30:11.453742+02:00 2021-03-28 18:30:11.474019+02:00 \n",
+ "27298 2021-03-28 18:39:56.430679+02:00 2021-03-28 18:39:56.435656+02:00 \n",
+ "29107 2021-03-28 18:40:40.975334+02:00 2021-03-28 18:40:40.979852+02:00 \n",
+ "27962 2021-03-28 20:52:30.261271+02:00 2021-03-29 19:03:52.527753+02:00 \n",
+ "\n",
+ " campaign_name campaign_service_id \\\n",
+ "4081002 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "4076139 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "4081572 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "4094833 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "4094827 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "... ... ... \n",
+ "8961 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "20380 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "27298 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "29107 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "27962 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "\n",
+ " campaign_created_at campaign_updated_at \\\n",
+ "4081002 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "4076139 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "4081572 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "4094833 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "4094827 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "... ... ... \n",
+ "8961 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "20380 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "27298 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "29107 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "27962 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "\n",
+ " campaign_sent_at campaign_identifier \n",
+ "4081002 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "4076139 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "4081572 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "4094833 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "4094827 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "... ... ... \n",
+ "8961 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "20380 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "27298 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "29107 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "27962 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "\n",
+ "[26464 rows x 14 columns]"
+ ]
+ },
+ "execution_count": 356,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# tests\n",
+ "\n",
+ "df1_campaigns_full[df1_campaigns_full[\"campaign_id\"]==58].sort_values(\"sent_at\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 364,
+ "id": "0c07c533-0e24-4e53-96d5-c51db97425a6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " campaign_id | \n",
+ " customer_id | \n",
+ " opened_at | \n",
+ " sent_at | \n",
+ " delivered_at | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " campaign_name | \n",
+ " campaign_service_id | \n",
+ " campaign_created_at | \n",
+ " campaign_updated_at | \n",
+ " campaign_sent_at | \n",
+ " campaign_identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1681 | \n",
+ " 571478 | \n",
+ " 630525 | \n",
+ " 291045 | \n",
+ " 2021-06-18 14:23:57+02:00 | \n",
+ " 2021-06-17 00:01:05+02:00 | \n",
+ " 2021-06-17 18:15:02+02:00 | \n",
+ " 2021-06-17 19:11:05.780774+02:00 | \n",
+ " 2022-04-15 23:11:44.290919+02:00 | \n",
+ " com_ddcp_campagne_de_qualification_contacts__n... | \n",
+ " 474 | \n",
+ " 2021-06-17 00:02:11.388346+02:00 | \n",
+ " 2021-09-24 11:56:08.931051+02:00 | \n",
+ " 2021-06-17 00:00:00+02:00 | \n",
+ " 25ddc0f8c9d3e22e03d3076f98d83cb2 | \n",
+ "
\n",
+ " \n",
+ " 868 | \n",
+ " 571425 | \n",
+ " 630525 | \n",
+ " 272258 | \n",
+ " NaN | \n",
+ " 2021-06-17 00:01:05+02:00 | \n",
+ " 2021-06-17 18:14:37+02:00 | \n",
+ " 2021-06-17 19:10:59.410221+02:00 | \n",
+ " 2022-04-15 23:11:44.290919+02:00 | \n",
+ " com_ddcp_campagne_de_qualification_contacts__n... | \n",
+ " 474 | \n",
+ " 2021-06-17 00:02:11.388346+02:00 | \n",
+ " 2021-09-24 11:56:08.931051+02:00 | \n",
+ " 2021-06-17 00:00:00+02:00 | \n",
+ " 25ddc0f8c9d3e22e03d3076f98d83cb2 | \n",
+ "
\n",
+ " \n",
+ " 488660 | \n",
+ " 574486 | \n",
+ " 630525 | \n",
+ " 284414 | \n",
+ " NaN | \n",
+ " 2021-06-17 00:01:05+02:00 | \n",
+ " 2021-06-17 19:18:30+02:00 | \n",
+ " 2021-06-17 19:24:37.325550+02:00 | \n",
+ " 2022-04-15 23:11:44.290919+02:00 | \n",
+ " com_ddcp_campagne_de_qualification_contacts__n... | \n",
+ " 474 | \n",
+ " 2021-06-17 00:02:11.388346+02:00 | \n",
+ " 2021-09-24 11:56:08.931051+02:00 | \n",
+ " 2021-06-17 00:00:00+02:00 | \n",
+ " 25ddc0f8c9d3e22e03d3076f98d83cb2 | \n",
+ "
\n",
+ " \n",
+ " 453503 | \n",
+ " 553818 | \n",
+ " 630525 | \n",
+ " 280714 | \n",
+ " NaN | \n",
+ " 2021-06-17 00:01:05+02:00 | \n",
+ " 2021-06-17 07:18:06+02:00 | \n",
+ " 2021-06-17 07:18:06.816543+02:00 | \n",
+ " 2022-04-15 23:11:44.290919+02:00 | \n",
+ " com_ddcp_campagne_de_qualification_contacts__n... | \n",
+ " 474 | \n",
+ " 2021-06-17 00:02:11.388346+02:00 | \n",
+ " 2021-09-24 11:56:08.931051+02:00 | \n",
+ " 2021-06-17 00:00:00+02:00 | \n",
+ " 25ddc0f8c9d3e22e03d3076f98d83cb2 | \n",
+ "
\n",
+ " \n",
+ " 600459 | \n",
+ " 556431 | \n",
+ " 630525 | \n",
+ " 289484 | \n",
+ " NaN | \n",
+ " 2021-06-17 00:01:05+02:00 | \n",
+ " 2021-06-17 10:18:57+02:00 | \n",
+ " 2021-06-17 10:18:57.692035+02:00 | \n",
+ " 2022-04-15 23:11:44.290919+02:00 | \n",
+ " com_ddcp_campagne_de_qualification_contacts__n... | \n",
+ " 474 | \n",
+ " 2021-06-17 00:02:11.388346+02:00 | \n",
+ " 2021-09-24 11:56:08.931051+02:00 | \n",
+ " 2021-06-17 00:00:00+02:00 | \n",
+ " 25ddc0f8c9d3e22e03d3076f98d83cb2 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 514091 | \n",
+ " 566709 | \n",
+ " 630525 | \n",
+ " 112554 | \n",
+ " NaN | \n",
+ " 2021-06-17 14:00:35+02:00 | \n",
+ " 2021-06-17 14:00:39+02:00 | \n",
+ " 2021-06-17 14:00:39.523170+02:00 | \n",
+ " 2021-06-17 14:00:39.551198+02:00 | \n",
+ " com_ddcp_campagne_de_qualification_contacts__n... | \n",
+ " 474 | \n",
+ " 2021-06-17 00:02:11.388346+02:00 | \n",
+ " 2021-09-24 11:56:08.931051+02:00 | \n",
+ " 2021-06-17 00:00:00+02:00 | \n",
+ " 25ddc0f8c9d3e22e03d3076f98d83cb2 | \n",
+ "
\n",
+ " \n",
+ " 604901 | \n",
+ " 569106 | \n",
+ " 630525 | \n",
+ " 33100 | \n",
+ " NaN | \n",
+ " 2021-06-17 16:36:55+02:00 | \n",
+ " 2021-06-17 16:36:55+02:00 | \n",
+ " 2021-06-17 16:36:55.928814+02:00 | \n",
+ " 2021-06-17 16:36:55.933170+02:00 | \n",
+ " com_ddcp_campagne_de_qualification_contacts__n... | \n",
+ " 474 | \n",
+ " 2021-06-17 00:02:11.388346+02:00 | \n",
+ " 2021-09-24 11:56:08.931051+02:00 | \n",
+ " 2021-06-17 00:00:00+02:00 | \n",
+ " 25ddc0f8c9d3e22e03d3076f98d83cb2 | \n",
+ "
\n",
+ " \n",
+ " 478955 | \n",
+ " 572372 | \n",
+ " 630525 | \n",
+ " 119502 | \n",
+ " NaN | \n",
+ " 2021-06-17 18:25:17+02:00 | \n",
+ " 2021-06-17 18:25:20+02:00 | \n",
+ " 2021-06-17 19:13:02.489176+02:00 | \n",
+ " 2021-06-17 19:13:02.520644+02:00 | \n",
+ " com_ddcp_campagne_de_qualification_contacts__n... | \n",
+ " 474 | \n",
+ " 2021-06-17 00:02:11.388346+02:00 | \n",
+ " 2021-09-24 11:56:08.931051+02:00 | \n",
+ " 2021-06-17 00:00:00+02:00 | \n",
+ " 25ddc0f8c9d3e22e03d3076f98d83cb2 | \n",
+ "
\n",
+ " \n",
+ " 477252 | \n",
+ " 572282 | \n",
+ " 630525 | \n",
+ " 33826 | \n",
+ " NaN | \n",
+ " 2021-06-17 18:25:21+02:00 | \n",
+ " 2021-06-17 18:25:26+02:00 | \n",
+ " 2021-06-17 19:13:01.993836+02:00 | \n",
+ " 2021-06-17 19:13:02.006886+02:00 | \n",
+ " com_ddcp_campagne_de_qualification_contacts__n... | \n",
+ " 474 | \n",
+ " 2021-06-17 00:02:11.388346+02:00 | \n",
+ " 2021-09-24 11:56:08.931051+02:00 | \n",
+ " 2021-06-17 00:00:00+02:00 | \n",
+ " 25ddc0f8c9d3e22e03d3076f98d83cb2 | \n",
+ "
\n",
+ " \n",
+ " 532445 | \n",
+ " 576271 | \n",
+ " 630525 | \n",
+ " 119496 | \n",
+ " NaN | \n",
+ " 2021-06-17 20:46:39+02:00 | \n",
+ " 2021-06-17 20:46:40+02:00 | \n",
+ " 2021-06-17 20:46:40.441720+02:00 | \n",
+ " 2021-06-17 20:46:40.449126+02:00 | \n",
+ " com_ddcp_campagne_de_qualification_contacts__n... | \n",
+ " 474 | \n",
+ " 2021-06-17 00:02:11.388346+02:00 | \n",
+ " 2021-09-24 11:56:08.931051+02:00 | \n",
+ " 2021-06-17 00:00:00+02:00 | \n",
+ " 25ddc0f8c9d3e22e03d3076f98d83cb2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
15829 rows × 14 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id campaign_id customer_id opened_at \\\n",
+ "1681 571478 630525 291045 2021-06-18 14:23:57+02:00 \n",
+ "868 571425 630525 272258 NaN \n",
+ "488660 574486 630525 284414 NaN \n",
+ "453503 553818 630525 280714 NaN \n",
+ "600459 556431 630525 289484 NaN \n",
+ "... ... ... ... ... \n",
+ "514091 566709 630525 112554 NaN \n",
+ "604901 569106 630525 33100 NaN \n",
+ "478955 572372 630525 119502 NaN \n",
+ "477252 572282 630525 33826 NaN \n",
+ "532445 576271 630525 119496 NaN \n",
+ "\n",
+ " sent_at delivered_at \\\n",
+ "1681 2021-06-17 00:01:05+02:00 2021-06-17 18:15:02+02:00 \n",
+ "868 2021-06-17 00:01:05+02:00 2021-06-17 18:14:37+02:00 \n",
+ "488660 2021-06-17 00:01:05+02:00 2021-06-17 19:18:30+02:00 \n",
+ "453503 2021-06-17 00:01:05+02:00 2021-06-17 07:18:06+02:00 \n",
+ "600459 2021-06-17 00:01:05+02:00 2021-06-17 10:18:57+02:00 \n",
+ "... ... ... \n",
+ "514091 2021-06-17 14:00:35+02:00 2021-06-17 14:00:39+02:00 \n",
+ "604901 2021-06-17 16:36:55+02:00 2021-06-17 16:36:55+02:00 \n",
+ "478955 2021-06-17 18:25:17+02:00 2021-06-17 18:25:20+02:00 \n",
+ "477252 2021-06-17 18:25:21+02:00 2021-06-17 18:25:26+02:00 \n",
+ "532445 2021-06-17 20:46:39+02:00 2021-06-17 20:46:40+02:00 \n",
+ "\n",
+ " created_at updated_at \\\n",
+ "1681 2021-06-17 19:11:05.780774+02:00 2022-04-15 23:11:44.290919+02:00 \n",
+ "868 2021-06-17 19:10:59.410221+02:00 2022-04-15 23:11:44.290919+02:00 \n",
+ "488660 2021-06-17 19:24:37.325550+02:00 2022-04-15 23:11:44.290919+02:00 \n",
+ "453503 2021-06-17 07:18:06.816543+02:00 2022-04-15 23:11:44.290919+02:00 \n",
+ "600459 2021-06-17 10:18:57.692035+02:00 2022-04-15 23:11:44.290919+02:00 \n",
+ "... ... ... \n",
+ "514091 2021-06-17 14:00:39.523170+02:00 2021-06-17 14:00:39.551198+02:00 \n",
+ "604901 2021-06-17 16:36:55.928814+02:00 2021-06-17 16:36:55.933170+02:00 \n",
+ "478955 2021-06-17 19:13:02.489176+02:00 2021-06-17 19:13:02.520644+02:00 \n",
+ "477252 2021-06-17 19:13:01.993836+02:00 2021-06-17 19:13:02.006886+02:00 \n",
+ "532445 2021-06-17 20:46:40.441720+02:00 2021-06-17 20:46:40.449126+02:00 \n",
+ "\n",
+ " campaign_name \\\n",
+ "1681 com_ddcp_campagne_de_qualification_contacts__n... \n",
+ "868 com_ddcp_campagne_de_qualification_contacts__n... \n",
+ "488660 com_ddcp_campagne_de_qualification_contacts__n... \n",
+ "453503 com_ddcp_campagne_de_qualification_contacts__n... \n",
+ "600459 com_ddcp_campagne_de_qualification_contacts__n... \n",
+ "... ... \n",
+ "514091 com_ddcp_campagne_de_qualification_contacts__n... \n",
+ "604901 com_ddcp_campagne_de_qualification_contacts__n... \n",
+ "478955 com_ddcp_campagne_de_qualification_contacts__n... \n",
+ "477252 com_ddcp_campagne_de_qualification_contacts__n... \n",
+ "532445 com_ddcp_campagne_de_qualification_contacts__n... \n",
+ "\n",
+ " campaign_service_id campaign_created_at \\\n",
+ "1681 474 2021-06-17 00:02:11.388346+02:00 \n",
+ "868 474 2021-06-17 00:02:11.388346+02:00 \n",
+ "488660 474 2021-06-17 00:02:11.388346+02:00 \n",
+ "453503 474 2021-06-17 00:02:11.388346+02:00 \n",
+ "600459 474 2021-06-17 00:02:11.388346+02:00 \n",
+ "... ... ... \n",
+ "514091 474 2021-06-17 00:02:11.388346+02:00 \n",
+ "604901 474 2021-06-17 00:02:11.388346+02:00 \n",
+ "478955 474 2021-06-17 00:02:11.388346+02:00 \n",
+ "477252 474 2021-06-17 00:02:11.388346+02:00 \n",
+ "532445 474 2021-06-17 00:02:11.388346+02:00 \n",
+ "\n",
+ " campaign_updated_at campaign_sent_at \\\n",
+ "1681 2021-09-24 11:56:08.931051+02:00 2021-06-17 00:00:00+02:00 \n",
+ "868 2021-09-24 11:56:08.931051+02:00 2021-06-17 00:00:00+02:00 \n",
+ "488660 2021-09-24 11:56:08.931051+02:00 2021-06-17 00:00:00+02:00 \n",
+ "453503 2021-09-24 11:56:08.931051+02:00 2021-06-17 00:00:00+02:00 \n",
+ "600459 2021-09-24 11:56:08.931051+02:00 2021-06-17 00:00:00+02:00 \n",
+ "... ... ... \n",
+ "514091 2021-09-24 11:56:08.931051+02:00 2021-06-17 00:00:00+02:00 \n",
+ "604901 2021-09-24 11:56:08.931051+02:00 2021-06-17 00:00:00+02:00 \n",
+ "478955 2021-09-24 11:56:08.931051+02:00 2021-06-17 00:00:00+02:00 \n",
+ "477252 2021-09-24 11:56:08.931051+02:00 2021-06-17 00:00:00+02:00 \n",
+ "532445 2021-09-24 11:56:08.931051+02:00 2021-06-17 00:00:00+02:00 \n",
+ "\n",
+ " campaign_identifier \n",
+ "1681 25ddc0f8c9d3e22e03d3076f98d83cb2 \n",
+ "868 25ddc0f8c9d3e22e03d3076f98d83cb2 \n",
+ "488660 25ddc0f8c9d3e22e03d3076f98d83cb2 \n",
+ "453503 25ddc0f8c9d3e22e03d3076f98d83cb2 \n",
+ "600459 25ddc0f8c9d3e22e03d3076f98d83cb2 \n",
+ "... ... \n",
+ "514091 25ddc0f8c9d3e22e03d3076f98d83cb2 \n",
+ "604901 25ddc0f8c9d3e22e03d3076f98d83cb2 \n",
+ "478955 25ddc0f8c9d3e22e03d3076f98d83cb2 \n",
+ "477252 25ddc0f8c9d3e22e03d3076f98d83cb2 \n",
+ "532445 25ddc0f8c9d3e22e03d3076f98d83cb2 \n",
+ "\n",
+ "[15829 rows x 14 columns]"
+ ]
+ },
+ "execution_count": 364,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaigns_full[df1_campaigns_full[\"campaign_id\"]==630525].sort_values(\"sent_at\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2ee0c057-876d-4534-9267-f7235957c8ce",
+ "metadata": {},
+ "source": [
+ "## Link stats"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 238,
+ "id": "c744b5bc-111a-40c0-8acf-bae1bedd7a97",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " clicked_at | \n",
+ " link_id | \n",
+ " customer_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2021-03-26 16:30:36+01:00 | \n",
+ " 1 | \n",
+ " 284033 | \n",
+ " 2021-03-26 15:30:37.050161+01:00 | \n",
+ " 2021-03-26 15:30:37.050161+01:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 2021-03-26 17:16:34+01:00 | \n",
+ " 2 | \n",
+ " 119768 | \n",
+ " 2021-03-26 16:16:34.950871+01:00 | \n",
+ " 2021-03-26 16:16:34.950871+01:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 272 | \n",
+ " 2021-03-28 20:03:32+02:00 | \n",
+ " 42 | \n",
+ " 113105 | \n",
+ " 2021-03-28 18:03:32.736394+02:00 | \n",
+ " 2021-03-28 18:03:32.736394+02:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 2021-03-26 17:43:19+01:00 | \n",
+ " 3 | \n",
+ " 272280 | \n",
+ " 2021-03-26 16:43:19.338321+01:00 | \n",
+ " 2021-03-26 16:43:19.338321+01:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 2021-03-26 17:46:00+01:00 | \n",
+ " 3 | \n",
+ " 105095 | \n",
+ " 2021-03-26 16:46:00.502945+01:00 | \n",
+ " 2021-03-26 16:46:00.502945+01:00 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 151046 | \n",
+ " 243553 | \n",
+ " 2023-11-09 16:34:27+01:00 | \n",
+ " 14666 | \n",
+ " 998 | \n",
+ " 2023-11-09 15:34:29.425425+01:00 | \n",
+ " 2023-11-09 15:34:29.425425+01:00 | \n",
+ "
\n",
+ " \n",
+ " 151047 | \n",
+ " 243554 | \n",
+ " 2023-11-09 16:34:35+01:00 | \n",
+ " 14670 | \n",
+ " 998 | \n",
+ " 2023-11-09 15:34:37.505505+01:00 | \n",
+ " 2023-11-09 15:34:37.505505+01:00 | \n",
+ "
\n",
+ " \n",
+ " 151048 | \n",
+ " 243559 | \n",
+ " 2023-11-09 16:51:15+01:00 | \n",
+ " 14686 | \n",
+ " 82923 | \n",
+ " 2023-11-09 15:51:17.439518+01:00 | \n",
+ " 2023-11-09 15:51:17.439518+01:00 | \n",
+ "
\n",
+ " \n",
+ " 151049 | \n",
+ " 243561 | \n",
+ " 2023-11-09 16:59:42+01:00 | \n",
+ " 14677 | \n",
+ " 82923 | \n",
+ " 2023-11-09 15:59:44.030922+01:00 | \n",
+ " 2023-11-09 15:59:44.030922+01:00 | \n",
+ "
\n",
+ " \n",
+ " 151050 | \n",
+ " 243564 | \n",
+ " 2023-11-09 17:16:41+01:00 | \n",
+ " 14691 | \n",
+ " 1254355 | \n",
+ " 2023-11-09 16:16:43.012932+01:00 | \n",
+ " 2023-11-09 16:16:43.012932+01:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
151051 rows × 6 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id clicked_at link_id customer_id \\\n",
+ "0 1 2021-03-26 16:30:36+01:00 1 284033 \n",
+ "1 2 2021-03-26 17:16:34+01:00 2 119768 \n",
+ "2 272 2021-03-28 20:03:32+02:00 42 113105 \n",
+ "3 4 2021-03-26 17:43:19+01:00 3 272280 \n",
+ "4 5 2021-03-26 17:46:00+01:00 3 105095 \n",
+ "... ... ... ... ... \n",
+ "151046 243553 2023-11-09 16:34:27+01:00 14666 998 \n",
+ "151047 243554 2023-11-09 16:34:35+01:00 14670 998 \n",
+ "151048 243559 2023-11-09 16:51:15+01:00 14686 82923 \n",
+ "151049 243561 2023-11-09 16:59:42+01:00 14677 82923 \n",
+ "151050 243564 2023-11-09 17:16:41+01:00 14691 1254355 \n",
+ "\n",
+ " created_at updated_at \n",
+ "0 2021-03-26 15:30:37.050161+01:00 2021-03-26 15:30:37.050161+01:00 \n",
+ "1 2021-03-26 16:16:34.950871+01:00 2021-03-26 16:16:34.950871+01:00 \n",
+ "2 2021-03-28 18:03:32.736394+02:00 2021-03-28 18:03:32.736394+02:00 \n",
+ "3 2021-03-26 16:43:19.338321+01:00 2021-03-26 16:43:19.338321+01:00 \n",
+ "4 2021-03-26 16:46:00.502945+01:00 2021-03-26 16:46:00.502945+01:00 \n",
+ "... ... ... \n",
+ "151046 2023-11-09 15:34:29.425425+01:00 2023-11-09 15:34:29.425425+01:00 \n",
+ "151047 2023-11-09 15:34:37.505505+01:00 2023-11-09 15:34:37.505505+01:00 \n",
+ "151048 2023-11-09 15:51:17.439518+01:00 2023-11-09 15:51:17.439518+01:00 \n",
+ "151049 2023-11-09 15:59:44.030922+01:00 2023-11-09 15:59:44.030922+01:00 \n",
+ "151050 2023-11-09 16:16:43.012932+01:00 2023-11-09 16:16:43.012932+01:00 \n",
+ "\n",
+ "[151051 rows x 6 columns]"
+ ]
+ },
+ "execution_count": 238,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_link_stats"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 365,
+ "id": "e4e4b17c-3338-4b43-8d96-5af3cb304ff9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id 0.0\n",
+ "clicked_at 0.0\n",
+ "link_id 0.0\n",
+ "customer_id 0.0\n",
+ "created_at 0.0\n",
+ "updated_at 0.0\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 365,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# share of Nan for every variable\n",
+ "\n",
+ "df1_link_stats.isna().sum() / df1_link_stats.shape[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 366,
+ "id": "846f24d8-8a34-4774-aab7-957a71f73a2c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id int64\n",
+ "clicked_at object\n",
+ "link_id int64\n",
+ "customer_id int64\n",
+ "created_at object\n",
+ "updated_at object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 366,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# types of the variables \n",
+ "\n",
+ "df1_link_stats.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 367,
+ "id": "6ee886ee-9ddf-4a78-aee8-002e57d63183",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id 151051\n",
+ "clicked_at 137121\n",
+ "link_id 10788\n",
+ "customer_id 26075\n",
+ "created_at 96565\n",
+ "updated_at 96565\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 367,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# number of unique modalities\n",
+ "\n",
+ "df1_link_stats.nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 378,
+ "id": "79d02627-2c31-4843-a3da-4f5419b6fe9d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "10788"
+ ]
+ },
+ "execution_count": 378,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(sorted(df1_link_stats[\"link_id\"].unique()))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 379,
+ "id": "7651374c-3e69-4012-badf-c3d1bc6a477a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "949"
+ ]
+ },
+ "execution_count": 379,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(sorted(df1_campaigns_full[\"campaign_id\"].unique()))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 380,
+ "id": "c3a11b25-65bc-44b6-b49c-6192f04b1d36",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " campaign_id | \n",
+ " customer_id | \n",
+ " opened_at | \n",
+ " sent_at | \n",
+ " delivered_at | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " campaign_name | \n",
+ " campaign_service_id | \n",
+ " campaign_created_at | \n",
+ " campaign_updated_at | \n",
+ " campaign_sent_at | \n",
+ " campaign_identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 19793 | \n",
+ " 58 | \n",
+ " 112597 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:09+02:00 | \n",
+ " 2021-03-28 18:24:18+02:00 | \n",
+ " 2021-03-28 18:34:20.616136+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 14211 | \n",
+ " 58 | \n",
+ " 113666 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:09+02:00 | \n",
+ " 2021-03-28 18:21:02+02:00 | \n",
+ " 2021-03-28 18:21:04.297213+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 13150 | \n",
+ " 58 | \n",
+ " 280561 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:08:45+02:00 | \n",
+ " 2021-03-28 18:18:49.991042+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 7073 | \n",
+ " 58 | \n",
+ " 101007 | \n",
+ " 2021-03-28 20:11:06+02:00 | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:09:47+02:00 | \n",
+ " 2021-03-28 18:09:50.915354+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5175 | \n",
+ " 58 | \n",
+ " 103972 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:06+02:00 | \n",
+ " 2021-03-28 18:05:03+02:00 | \n",
+ " 2021-03-28 18:05:08.507398+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 18:01:45.448313+02:00 | \n",
+ " 2021-09-24 11:56:07.723413+02:00 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ " 4f4adcbf8c6f66dcfc8a3282ac2bf10a | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 6214803 | \n",
+ " 8302994 | \n",
+ " 1321483 | \n",
+ " 266155 | \n",
+ " 2023-10-23 11:43:25+02:00 | \n",
+ " 2023-10-23 11:32:33+02:00 | \n",
+ " 2023-10-23 11:32:34+02:00 | \n",
+ " 2023-10-23 09:32:36.564696+02:00 | \n",
+ " 2023-10-23 09:43:28.038259+02:00 | \n",
+ " dre_nov_2023 | \n",
+ " 1318 | \n",
+ " 2023-10-23 09:31:19.927528+02:00 | \n",
+ " 2023-10-23 09:31:20.033243+02:00 | \n",
+ " 2023-10-23 11:31:17+02:00 | \n",
+ " 76cf99d3614e23eabab16fb27e944bf9 | \n",
+ "
\n",
+ " \n",
+ " 6214804 | \n",
+ " 8303307 | \n",
+ " 1321483 | \n",
+ " 21355 | \n",
+ " 2023-10-23 11:44:02+02:00 | \n",
+ " 2023-10-23 11:32:49+02:00 | \n",
+ " 2023-10-23 11:32:49+02:00 | \n",
+ " 2023-10-23 09:32:50.829641+02:00 | \n",
+ " 2023-10-23 09:44:04.119578+02:00 | \n",
+ " dre_nov_2023 | \n",
+ " 1318 | \n",
+ " 2023-10-23 09:31:19.927528+02:00 | \n",
+ " 2023-10-23 09:31:20.033243+02:00 | \n",
+ " 2023-10-23 11:31:17+02:00 | \n",
+ " 76cf99d3614e23eabab16fb27e944bf9 | \n",
+ "
\n",
+ " \n",
+ " 6214805 | \n",
+ " 8304346 | \n",
+ " 1321483 | \n",
+ " 21849 | \n",
+ " 2023-10-23 11:45:52+02:00 | \n",
+ " 2023-10-23 11:33:28+02:00 | \n",
+ " 2023-10-23 11:33:29+02:00 | \n",
+ " 2023-10-23 09:33:31.102500+02:00 | \n",
+ " 2023-10-23 09:45:55.927652+02:00 | \n",
+ " dre_nov_2023 | \n",
+ " 1318 | \n",
+ " 2023-10-23 09:31:19.927528+02:00 | \n",
+ " 2023-10-23 09:31:20.033243+02:00 | \n",
+ " 2023-10-23 11:31:17+02:00 | \n",
+ " 76cf99d3614e23eabab16fb27e944bf9 | \n",
+ "
\n",
+ " \n",
+ " 6214806 | \n",
+ " 8302037 | \n",
+ " 1321483 | \n",
+ " 667789 | \n",
+ " 2023-10-23 11:47:32+02:00 | \n",
+ " 2023-10-23 11:31:53+02:00 | \n",
+ " 2023-10-23 11:31:54+02:00 | \n",
+ " 2023-10-23 09:31:55.768547+02:00 | \n",
+ " 2023-10-23 09:47:33.915460+02:00 | \n",
+ " dre_nov_2023 | \n",
+ " 1318 | \n",
+ " 2023-10-23 09:31:19.927528+02:00 | \n",
+ " 2023-10-23 09:31:20.033243+02:00 | \n",
+ " 2023-10-23 11:31:17+02:00 | \n",
+ " 76cf99d3614e23eabab16fb27e944bf9 | \n",
+ "
\n",
+ " \n",
+ " 6214807 | \n",
+ " 8304939 | \n",
+ " 1321483 | \n",
+ " 294154 | \n",
+ " NaN | \n",
+ " 2023-10-23 11:33:54+02:00 | \n",
+ " 2023-10-23 11:33:55+02:00 | \n",
+ " 2023-10-23 09:33:57.477892+02:00 | \n",
+ " 2023-10-23 09:33:57.842331+02:00 | \n",
+ " dre_nov_2023 | \n",
+ " 1318 | \n",
+ " 2023-10-23 09:31:19.927528+02:00 | \n",
+ " 2023-10-23 09:31:20.033243+02:00 | \n",
+ " 2023-10-23 11:31:17+02:00 | \n",
+ " 76cf99d3614e23eabab16fb27e944bf9 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
6214808 rows × 14 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id campaign_id customer_id opened_at \\\n",
+ "0 19793 58 112597 NaN \n",
+ "1 14211 58 113666 NaN \n",
+ "2 13150 58 280561 NaN \n",
+ "3 7073 58 101007 2021-03-28 20:11:06+02:00 \n",
+ "4 5175 58 103972 NaN \n",
+ "... ... ... ... ... \n",
+ "6214803 8302994 1321483 266155 2023-10-23 11:43:25+02:00 \n",
+ "6214804 8303307 1321483 21355 2023-10-23 11:44:02+02:00 \n",
+ "6214805 8304346 1321483 21849 2023-10-23 11:45:52+02:00 \n",
+ "6214806 8302037 1321483 667789 2023-10-23 11:47:32+02:00 \n",
+ "6214807 8304939 1321483 294154 NaN \n",
+ "\n",
+ " sent_at delivered_at \\\n",
+ "0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n",
+ "1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n",
+ "2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n",
+ "3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n",
+ "4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n",
+ "... ... ... \n",
+ "6214803 2023-10-23 11:32:33+02:00 2023-10-23 11:32:34+02:00 \n",
+ "6214804 2023-10-23 11:32:49+02:00 2023-10-23 11:32:49+02:00 \n",
+ "6214805 2023-10-23 11:33:28+02:00 2023-10-23 11:33:29+02:00 \n",
+ "6214806 2023-10-23 11:31:53+02:00 2023-10-23 11:31:54+02:00 \n",
+ "6214807 2023-10-23 11:33:54+02:00 2023-10-23 11:33:55+02:00 \n",
+ "\n",
+ " created_at updated_at \\\n",
+ "0 2021-03-28 18:34:20.616136+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "1 2021-03-28 18:21:04.297213+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "2 2021-03-28 18:18:49.991042+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "3 2021-03-28 18:09:50.915354+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "4 2021-03-28 18:05:08.507398+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "... ... ... \n",
+ "6214803 2023-10-23 09:32:36.564696+02:00 2023-10-23 09:43:28.038259+02:00 \n",
+ "6214804 2023-10-23 09:32:50.829641+02:00 2023-10-23 09:44:04.119578+02:00 \n",
+ "6214805 2023-10-23 09:33:31.102500+02:00 2023-10-23 09:45:55.927652+02:00 \n",
+ "6214806 2023-10-23 09:31:55.768547+02:00 2023-10-23 09:47:33.915460+02:00 \n",
+ "6214807 2023-10-23 09:33:57.477892+02:00 2023-10-23 09:33:57.842331+02:00 \n",
+ "\n",
+ " campaign_name campaign_service_id \\\n",
+ "0 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "1 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "2 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "3 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "4 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "... ... ... \n",
+ "6214803 dre_nov_2023 1318 \n",
+ "6214804 dre_nov_2023 1318 \n",
+ "6214805 dre_nov_2023 1318 \n",
+ "6214806 dre_nov_2023 1318 \n",
+ "6214807 dre_nov_2023 1318 \n",
+ "\n",
+ " campaign_created_at campaign_updated_at \\\n",
+ "0 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "1 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "2 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "3 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "4 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n",
+ "... ... ... \n",
+ "6214803 2023-10-23 09:31:19.927528+02:00 2023-10-23 09:31:20.033243+02:00 \n",
+ "6214804 2023-10-23 09:31:19.927528+02:00 2023-10-23 09:31:20.033243+02:00 \n",
+ "6214805 2023-10-23 09:31:19.927528+02:00 2023-10-23 09:31:20.033243+02:00 \n",
+ "6214806 2023-10-23 09:31:19.927528+02:00 2023-10-23 09:31:20.033243+02:00 \n",
+ "6214807 2023-10-23 09:31:19.927528+02:00 2023-10-23 09:31:20.033243+02:00 \n",
+ "\n",
+ " campaign_sent_at campaign_identifier \n",
+ "0 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "1 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "2 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "3 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "4 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n",
+ "... ... ... \n",
+ "6214803 2023-10-23 11:31:17+02:00 76cf99d3614e23eabab16fb27e944bf9 \n",
+ "6214804 2023-10-23 11:31:17+02:00 76cf99d3614e23eabab16fb27e944bf9 \n",
+ "6214805 2023-10-23 11:31:17+02:00 76cf99d3614e23eabab16fb27e944bf9 \n",
+ "6214806 2023-10-23 11:31:17+02:00 76cf99d3614e23eabab16fb27e944bf9 \n",
+ "6214807 2023-10-23 11:31:17+02:00 76cf99d3614e23eabab16fb27e944bf9 \n",
+ "\n",
+ "[6214808 rows x 14 columns]"
+ ]
+ },
+ "execution_count": 380,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaigns_full"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks_merge/TP_merge_target_campaigns_links.ipynb b/notebooks_merge/TP_merge_target_campaigns_links.ipynb
new file mode 100644
index 0000000..7aa0f0e
--- /dev/null
+++ b/notebooks_merge/TP_merge_target_campaigns_links.ipynb
@@ -0,0 +1,1768 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "5005d8b3-6295-4b22-bd3c-876109be5b3b",
+ "metadata": {},
+ "source": [
+ "# Merges and discovery : target, campaigns, links"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8c56d518-3634-4492-b249-0d8ef33dd527",
+ "metadata": {},
+ "source": [
+ "## First steps : package importations, set up working environment and import data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "dede42d9-1262-45f7-bd7a-586ae800092a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# importations\n",
+ "\n",
+ "import os \n",
+ "import s3fs\n",
+ "import pandas as pd\n",
+ "import re\n",
+ "from datetime import datetime, timezone, timedelta\n",
+ "import math\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "6ce34b58-b5ba-4b54-ba4d-fc82ef01b09c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['bdc2324-data/1',\n",
+ " 'bdc2324-data/10',\n",
+ " 'bdc2324-data/101',\n",
+ " 'bdc2324-data/11',\n",
+ " 'bdc2324-data/12',\n",
+ " 'bdc2324-data/13',\n",
+ " 'bdc2324-data/14',\n",
+ " 'bdc2324-data/2',\n",
+ " 'bdc2324-data/3',\n",
+ " 'bdc2324-data/4',\n",
+ " 'bdc2324-data/5',\n",
+ " 'bdc2324-data/6',\n",
+ " 'bdc2324-data/7',\n",
+ " 'bdc2324-data/8',\n",
+ " 'bdc2324-data/9']"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# bucket for accessing the data\n",
+ "\n",
+ "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
+ "\n",
+ "fs = s3fs.S3FileSystem(client_kwargs = {\"endpoint_url\" : S3_ENDPOINT_URL})\n",
+ "BUCKET = \"bdc2324-data\"\n",
+ "fs.ls(BUCKET)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "8eb13dd3-53c7-4a70-94a4-846168473aa1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['bdc2324-data/1/1campaign_stats.csv',\n",
+ " 'bdc2324-data/1/1campaigns.csv',\n",
+ " 'bdc2324-data/1/1categories.csv',\n",
+ " 'bdc2324-data/1/1countries.csv',\n",
+ " 'bdc2324-data/1/1currencies.csv',\n",
+ " 'bdc2324-data/1/1customer_target_mappings.csv',\n",
+ " 'bdc2324-data/1/1customersplus.csv',\n",
+ " 'bdc2324-data/1/1event_types.csv',\n",
+ " 'bdc2324-data/1/1events.csv',\n",
+ " 'bdc2324-data/1/1facilities.csv',\n",
+ " 'bdc2324-data/1/1link_stats.csv',\n",
+ " 'bdc2324-data/1/1pricing_formulas.csv',\n",
+ " 'bdc2324-data/1/1product_packs.csv',\n",
+ " 'bdc2324-data/1/1products.csv',\n",
+ " 'bdc2324-data/1/1products_groups.csv',\n",
+ " 'bdc2324-data/1/1purchases.csv',\n",
+ " 'bdc2324-data/1/1representation_category_capacities.csv',\n",
+ " 'bdc2324-data/1/1representations.csv',\n",
+ " 'bdc2324-data/1/1seasons.csv',\n",
+ " 'bdc2324-data/1/1structure_tag_mappings.csv',\n",
+ " 'bdc2324-data/1/1suppliers.csv',\n",
+ " 'bdc2324-data/1/1tags.csv',\n",
+ " 'bdc2324-data/1/1target_types.csv',\n",
+ " 'bdc2324-data/1/1targets.csv',\n",
+ " 'bdc2324-data/1/1tickets.csv',\n",
+ " 'bdc2324-data/1/1type_of_categories.csv',\n",
+ " 'bdc2324-data/1/1type_of_pricing_formulas.csv',\n",
+ " 'bdc2324-data/1/1type_ofs.csv']"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "FILE_PATH_S3 = fs.ls(BUCKET)[0] # focus on the company number 1\n",
+ "files_path = fs.ls(FILE_PATH_S3)\n",
+ "files_path"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "1ea66c4e-1307-4f19-836e-3104fba2ff41",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_487/2894332003.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ " df = pd.read_csv(file_in)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# loop to create dataframes related to company 1\n",
+ "\n",
+ "client_number = files_path[0].split(\"/\")[1]\n",
+ "print(client_number)\n",
+ "df_prefix = \"df\" + str(client_number) + \"_\"\n",
+ "\n",
+ "for i in range(len(files_path)) :\n",
+ " current_path = files_path[i]\n",
+ " with fs.open(current_path, mode=\"rb\") as file_in:\n",
+ " df = pd.read_csv(file_in)\n",
+ " # the pattern of the name is df1xxx\n",
+ " nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n",
+ " globals()[nom_dataframe] = df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "13d70b2c-6580-4caf-b839-10f72b2e0b39",
+ "metadata": {},
+ "source": [
+ "## Target, target types and customer target mapping"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "4dbc7fea-ac3b-4348-83fb-dfb1a460f936",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " is_import | \n",
+ " name | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 69 | \n",
+ " False | \n",
+ " manual_dynamic_filter | \n",
+ " 2020-11-30 09:46:18.881030+01:00 | \n",
+ " 2020-11-30 09:46:18.881030+01:00 | \n",
+ " e0f4b8693184850fefd6d2a38f10584e | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 48 | \n",
+ " True | \n",
+ " manual_structure | \n",
+ " 2020-11-04 17:16:19.548275+01:00 | \n",
+ " 2020-11-04 17:16:19.548275+01:00 | \n",
+ " 382bca214204a2d3462f5ec2728d5d1e | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1 | \n",
+ " True | \n",
+ " manual_import | \n",
+ " 2020-10-14 18:37:40.521623+02:00 | \n",
+ " 2020-10-14 18:37:40.521623+02:00 | \n",
+ " 12213df2ce68a624e4c0070521437bac | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 56 | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ " 2020-11-04 18:08:37.233486+01:00 | \n",
+ " 2020-11-04 18:08:37.233486+01:00 | \n",
+ " fb27e81baa4debc6a4e1a8639c20e808 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id is_import name created_at \\\n",
+ "0 69 False manual_dynamic_filter 2020-11-30 09:46:18.881030+01:00 \n",
+ "1 48 True manual_structure 2020-11-04 17:16:19.548275+01:00 \n",
+ "2 1 True manual_import 2020-10-14 18:37:40.521623+02:00 \n",
+ "3 56 False manual_static_filter 2020-11-04 18:08:37.233486+01:00 \n",
+ "\n",
+ " updated_at identifier \n",
+ "0 2020-11-30 09:46:18.881030+01:00 e0f4b8693184850fefd6d2a38f10584e \n",
+ "1 2020-11-04 17:16:19.548275+01:00 382bca214204a2d3462f5ec2728d5d1e \n",
+ "2 2020-10-14 18:37:40.521623+02:00 12213df2ce68a624e4c0070521437bac \n",
+ "3 2020-11-04 18:08:37.233486+01:00 fb27e81baa4debc6a4e1a8639c20e808 "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 1. target types\n",
+ "df1_target_types.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "0e9f5dcb-0dc3-4052-b866-e5c4cb954a1f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " target_type_id | \n",
+ " name | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 217 | \n",
+ " 56 | \n",
+ " DDCP PROMO Art contemporain - salle de chauffe... | \n",
+ " 2021-01-04 15:00:05.401899+01:00 | \n",
+ " 2021-03-02 18:38:19.025969+01:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 701 | \n",
+ " 56 | \n",
+ " consentement optin scolaires | \n",
+ " 2021-12-21 16:03:59.840785+01:00 | \n",
+ " 2022-02-18 17:23:44.761388+01:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 134 | \n",
+ " 56 | \n",
+ " DDCP Newsletter jeune public | \n",
+ " 2020-11-10 09:43:19.667471+01:00 | \n",
+ " 2021-03-02 18:38:19.052304+01:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 700 | \n",
+ " 56 | \n",
+ " consentement optout scolaires | \n",
+ " 2021-12-21 16:01:57.524946+01:00 | \n",
+ " 2022-02-18 17:23:44.807776+01:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 964 | \n",
+ " 56 | \n",
+ " DDCP achat billet nbr dep 19052021 | \n",
+ " 2022-04-14 10:58:17.142834+02:00 | \n",
+ " 2022-04-14 10:58:23.677264+02:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id target_type_id name \\\n",
+ "0 217 56 DDCP PROMO Art contemporain - salle de chauffe... \n",
+ "1 701 56 consentement optin scolaires \n",
+ "2 134 56 DDCP Newsletter jeune public \n",
+ "3 700 56 consentement optout scolaires \n",
+ "4 964 56 DDCP achat billet nbr dep 19052021 \n",
+ "\n",
+ " created_at updated_at \n",
+ "0 2021-01-04 15:00:05.401899+01:00 2021-03-02 18:38:19.025969+01:00 \n",
+ "1 2021-12-21 16:03:59.840785+01:00 2022-02-18 17:23:44.761388+01:00 \n",
+ "2 2020-11-10 09:43:19.667471+01:00 2021-03-02 18:38:19.052304+01:00 \n",
+ "3 2021-12-21 16:01:57.524946+01:00 2022-02-18 17:23:44.807776+01:00 \n",
+ "4 2022-04-14 10:58:17.142834+02:00 2022-04-14 10:58:23.677264+02:00 "
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 2. targets\n",
+ "df1_targets.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "c5c62302-370a-462f-bd79-eac31593f65c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " customer_id | \n",
+ " target_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " name | \n",
+ " extra_field | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1184824 | \n",
+ " 645400 | \n",
+ " 130 | \n",
+ " 2021-09-23 09:35:47.617275+02:00 | \n",
+ " 2021-09-23 09:35:47.617275+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1184825 | \n",
+ " 645400 | \n",
+ " 345 | \n",
+ " 2021-09-23 09:35:47.668846+02:00 | \n",
+ " 2021-09-23 09:35:47.668846+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1184828 | \n",
+ " 645402 | \n",
+ " 126 | \n",
+ " 2021-09-23 12:02:51.253269+02:00 | \n",
+ " 2021-09-23 12:02:51.253269+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1184829 | \n",
+ " 645403 | \n",
+ " 126 | \n",
+ " 2021-09-23 12:20:47.394480+02:00 | \n",
+ " 2021-09-23 12:20:47.394480+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1295770 | \n",
+ " 647301 | \n",
+ " 346 | \n",
+ " 2021-09-28 16:02:29.372608+02:00 | \n",
+ " 2021-09-28 16:02:29.372608+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id customer_id target_id created_at \\\n",
+ "0 1184824 645400 130 2021-09-23 09:35:47.617275+02:00 \n",
+ "1 1184825 645400 345 2021-09-23 09:35:47.668846+02:00 \n",
+ "2 1184828 645402 126 2021-09-23 12:02:51.253269+02:00 \n",
+ "3 1184829 645403 126 2021-09-23 12:20:47.394480+02:00 \n",
+ "4 1295770 647301 346 2021-09-28 16:02:29.372608+02:00 \n",
+ "\n",
+ " updated_at name extra_field \n",
+ "0 2021-09-23 09:35:47.617275+02:00 NaN NaN \n",
+ "1 2021-09-23 09:35:47.668846+02:00 NaN NaN \n",
+ "2 2021-09-23 12:02:51.253269+02:00 NaN NaN \n",
+ "3 2021-09-23 12:20:47.394480+02:00 NaN NaN \n",
+ "4 2021-09-28 16:02:29.372608+02:00 NaN NaN "
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 3. customer target mapping\n",
+ "\n",
+ "df1_customer_target_mappings.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "1a87cebf-c1dd-408d-a523-26633419da1e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " target_type_id | \n",
+ " name | \n",
+ " target_type_is_import | \n",
+ " target_type_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 217 | \n",
+ " 56 | \n",
+ " DDCP PROMO Art contemporain - salle de chauffe... | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 701 | \n",
+ " 56 | \n",
+ " consentement optin scolaires | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 134 | \n",
+ " 56 | \n",
+ " DDCP Newsletter jeune public | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 700 | \n",
+ " 56 | \n",
+ " consentement optout scolaires | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 964 | \n",
+ " 56 | \n",
+ " DDCP achat billet nbr dep 19052021 | \n",
+ " False | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id target_type_id name \\\n",
+ "0 217 56 DDCP PROMO Art contemporain - salle de chauffe... \n",
+ "1 701 56 consentement optin scolaires \n",
+ "2 134 56 DDCP Newsletter jeune public \n",
+ "3 700 56 consentement optout scolaires \n",
+ "4 964 56 DDCP achat billet nbr dep 19052021 \n",
+ "\n",
+ " target_type_is_import target_type_name \n",
+ "0 False manual_static_filter \n",
+ "1 False manual_static_filter \n",
+ "2 False manual_static_filter \n",
+ "3 False manual_static_filter \n",
+ "4 False manual_static_filter "
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 4.1. merge target with target type\n",
+ "\n",
+ "df1_targets_full = pd.merge(df1_targets[[\"id\", \"target_type_id\", \"name\"]], df1_target_types[[\"id\",\"is_import\",\"name\"]].add_prefix(\"target_type_\"), left_on='target_type_id', right_on='target_type_id', how='left')\n",
+ "df1_targets_full.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "d48c1fff-73c2-4e75-8799-da2b80694be7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 4.2. merge df1_customer_target_mappings with df1_targets_full\n",
+ "\n",
+ "# change the position of the column target type id\n",
+ "\n",
+ "# Spécifiez le nom de la colonne à déplacer et la colonne après laquelle vous souhaitez la placer\n",
+ "column_to_move = 'target_type_id'\n",
+ "\n",
+ "# Récupérez l'index de la colonne de référence\n",
+ "reference_index = df1_targets_full.columns.get_loc(\"target_type_name\")\n",
+ "\n",
+ "# Créez une copie de la colonne que vous voulez déplacer\n",
+ "column_copy = df1_targets_full[column_to_move].copy()\n",
+ "\n",
+ "# Supprimez la colonne d'origine\n",
+ "df1_targets_full = df1_targets_full.drop(column_to_move, axis=1)\n",
+ "\n",
+ "# Utilisez la méthode insert pour déplacer la colonne à la nouvelle position\n",
+ "df1_targets_full.insert(reference_index - 1, column_to_move, column_copy)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "a874514a-c7dc-42d4-a440-dedd3a270e24",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " target_id | \n",
+ " target_name | \n",
+ " target_type_is_import | \n",
+ " target_type_id | \n",
+ " target_type_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 217 | \n",
+ " DDCP PROMO Art contemporain - salle de chauffe... | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 701 | \n",
+ " consentement optin scolaires | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 134 | \n",
+ " DDCP Newsletter jeune public | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 700 | \n",
+ " consentement optout scolaires | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 964 | \n",
+ " DDCP achat billet nbr dep 19052021 | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " target_id target_name \\\n",
+ "0 217 DDCP PROMO Art contemporain - salle de chauffe... \n",
+ "1 701 consentement optin scolaires \n",
+ "2 134 DDCP Newsletter jeune public \n",
+ "3 700 consentement optout scolaires \n",
+ "4 964 DDCP achat billet nbr dep 19052021 \n",
+ "\n",
+ " target_type_is_import target_type_id target_type_name \n",
+ "0 False 56 manual_static_filter \n",
+ "1 False 56 manual_static_filter \n",
+ "2 False 56 manual_static_filter \n",
+ "3 False 56 manual_static_filter \n",
+ "4 False 56 manual_static_filter "
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_targets_full = df1_targets_full.rename(columns=lambda x: 'target_' + x if not x.startswith('target_') else x)\n",
+ "df1_targets_full.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "0db0172a-5119-4b7f-97f8-36fc5c985205",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " customer_id | \n",
+ " target_id | \n",
+ " target_name | \n",
+ " target_type_is_import | \n",
+ " target_type_id | \n",
+ " target_type_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1184824 | \n",
+ " 645400 | \n",
+ " 130 | \n",
+ " DDCP PROMO Réseau livres | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1184825 | \n",
+ " 645400 | \n",
+ " 345 | \n",
+ " Inscrits NL générale site web | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1184828 | \n",
+ " 645402 | \n",
+ " 126 | \n",
+ " DDCP PROMO Art contemporain | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1184829 | \n",
+ " 645403 | \n",
+ " 126 | \n",
+ " DDCP PROMO Art contemporain | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1295770 | \n",
+ " 647301 | \n",
+ " 346 | \n",
+ " Votre première liste | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 768019 | \n",
+ " 2737545 | \n",
+ " 666983 | \n",
+ " 345 | \n",
+ " Inscrits NL générale site web | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 768020 | \n",
+ " 2737546 | \n",
+ " 666983 | \n",
+ " 346 | \n",
+ " Votre première liste | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 768021 | \n",
+ " 2737575 | \n",
+ " 666986 | \n",
+ " 346 | \n",
+ " Votre première liste | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 768022 | \n",
+ " 2737576 | \n",
+ " 666987 | \n",
+ " 345 | \n",
+ " Inscrits NL générale site web | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ " 768023 | \n",
+ " 2737577 | \n",
+ " 666987 | \n",
+ " 346 | \n",
+ " Votre première liste | \n",
+ " False | \n",
+ " 56 | \n",
+ " manual_static_filter | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
768024 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id customer_id target_id target_name \\\n",
+ "0 1184824 645400 130 DDCP PROMO Réseau livres \n",
+ "1 1184825 645400 345 Inscrits NL générale site web \n",
+ "2 1184828 645402 126 DDCP PROMO Art contemporain \n",
+ "3 1184829 645403 126 DDCP PROMO Art contemporain \n",
+ "4 1295770 647301 346 Votre première liste \n",
+ "... ... ... ... ... \n",
+ "768019 2737545 666983 345 Inscrits NL générale site web \n",
+ "768020 2737546 666983 346 Votre première liste \n",
+ "768021 2737575 666986 346 Votre première liste \n",
+ "768022 2737576 666987 345 Inscrits NL générale site web \n",
+ "768023 2737577 666987 346 Votre première liste \n",
+ "\n",
+ " target_type_is_import target_type_id target_type_name \n",
+ "0 False 56 manual_static_filter \n",
+ "1 False 56 manual_static_filter \n",
+ "2 False 56 manual_static_filter \n",
+ "3 False 56 manual_static_filter \n",
+ "4 False 56 manual_static_filter \n",
+ "... ... ... ... \n",
+ "768019 False 56 manual_static_filter \n",
+ "768020 False 56 manual_static_filter \n",
+ "768021 False 56 manual_static_filter \n",
+ "768022 False 56 manual_static_filter \n",
+ "768023 False 56 manual_static_filter \n",
+ "\n",
+ "[768024 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# finally, merge\n",
+ "\n",
+ "# pour df1_customer_target_mappings on enlève les colonnes name, extra_field, et updated_at (valeur égale à created_at)\n",
+ "# note : by making a left join on df1_customer_target_mappings, we suppress 2 targets that have no customer associated\n",
+ "\n",
+ "df1_customer_targets = pd.merge(df1_customer_target_mappings[[\"id\", \"customer_id\", \"target_id\"]], \n",
+ " df1_targets_full, left_on='target_id', right_on='target_id', how='left')\n",
+ "df1_customer_targets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "52326267-c5ba-4e21-b8ab-4b4c62de75d1",
+ "metadata": {},
+ "source": [
+ "## Campaign stats, campaigns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "06dca910-5c07-4ee1-bbf2-3b11b48ba1f2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " service_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " process_id | \n",
+ " report_url | \n",
+ " category | \n",
+ " to_be_synced | \n",
+ " identifier | \n",
+ " sent_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1319613 | \n",
+ " newsletter enseignants janvier 2022 | \n",
+ " 721 | \n",
+ " 2022-01-14 16:06:42.586321+01:00 | \n",
+ " 2022-02-03 14:17:27.112963+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " aba3b6fd5d186d28e06ff97135cade7f | \n",
+ " 2022-01-14 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1319586 | \n",
+ " lsf_janvier_2022 | \n",
+ " 717 | \n",
+ " 2022-01-07 11:30:35.315895+01:00 | \n",
+ " 2022-02-03 14:17:27.116171+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 788d986905533aba051261497ecffcbb | \n",
+ " 2022-01-07 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1319282 | \n",
+ " Invitation à déjeuner au Mucem | Vernissage « ... | \n",
+ " 591 | \n",
+ " 2021-09-28 12:50:24.448752+02:00 | \n",
+ " 2022-02-03 14:17:27.119582+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 3493894fa4ea036cfc6433c3e2ee63b0 | \n",
+ " 2021-09-28 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1319283 | \n",
+ " Vacances de la Toussaint - centres des loisirs | \n",
+ " 590 | \n",
+ " 2021-09-28 18:01:04.692073+02:00 | \n",
+ " 2022-02-03 14:17:27.124408+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 08b255a5d42b89b0585260b6f2360bdd | \n",
+ " 2021-09-28 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1319636 | \n",
+ " ddcp_promo_md_livemag | \n",
+ " 730 | \n",
+ " 2022-01-27 18:00:41.053069+01:00 | \n",
+ " 2022-02-03 14:17:27.127607+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " d5cfead94f5350c12c322b5b664544c1 | \n",
+ " 2022-01-27 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id name service_id \\\n",
+ "0 1319613 newsletter enseignants janvier 2022 721 \n",
+ "1 1319586 lsf_janvier_2022 717 \n",
+ "2 1319282 Invitation à déjeuner au Mucem | Vernissage « ... 591 \n",
+ "3 1319283 Vacances de la Toussaint - centres des loisirs 590 \n",
+ "4 1319636 ddcp_promo_md_livemag 730 \n",
+ "\n",
+ " created_at updated_at \\\n",
+ "0 2022-01-14 16:06:42.586321+01:00 2022-02-03 14:17:27.112963+01:00 \n",
+ "1 2022-01-07 11:30:35.315895+01:00 2022-02-03 14:17:27.116171+01:00 \n",
+ "2 2021-09-28 12:50:24.448752+02:00 2022-02-03 14:17:27.119582+01:00 \n",
+ "3 2021-09-28 18:01:04.692073+02:00 2022-02-03 14:17:27.124408+01:00 \n",
+ "4 2022-01-27 18:00:41.053069+01:00 2022-02-03 14:17:27.127607+01:00 \n",
+ "\n",
+ " process_id report_url category to_be_synced \\\n",
+ "0 NaN NaN 0.0 False \n",
+ "1 NaN NaN 0.0 False \n",
+ "2 NaN NaN 0.0 False \n",
+ "3 NaN NaN 0.0 False \n",
+ "4 NaN NaN 0.0 False \n",
+ "\n",
+ " identifier sent_at \n",
+ "0 aba3b6fd5d186d28e06ff97135cade7f 2022-01-14 00:00:00+01:00 \n",
+ "1 788d986905533aba051261497ecffcbb 2022-01-07 00:00:00+01:00 \n",
+ "2 3493894fa4ea036cfc6433c3e2ee63b0 2021-09-28 00:00:00+02:00 \n",
+ "3 08b255a5d42b89b0585260b6f2360bdd 2021-09-28 00:00:00+02:00 \n",
+ "4 d5cfead94f5350c12c322b5b664544c1 2022-01-27 00:00:00+01:00 "
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 1. campaigns\n",
+ "df1_campaigns.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "83eaa447-9144-41ed-9e26-f0f23799a8fd",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " campaign_id | \n",
+ " customer_id | \n",
+ " opened_at | \n",
+ " sent_at | \n",
+ " delivered_at | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 19793 | \n",
+ " 58 | \n",
+ " 112597 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:09+02:00 | \n",
+ " 2021-03-28 18:24:18+02:00 | \n",
+ " 2021-03-28 18:34:20.616136+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 14211 | \n",
+ " 58 | \n",
+ " 113666 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:09+02:00 | \n",
+ " 2021-03-28 18:21:02+02:00 | \n",
+ " 2021-03-28 18:21:04.297213+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 13150 | \n",
+ " 58 | \n",
+ " 280561 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:08:45+02:00 | \n",
+ " 2021-03-28 18:18:49.991042+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 7073 | \n",
+ " 58 | \n",
+ " 101007 | \n",
+ " 2021-03-28 20:11:06+02:00 | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:09:47+02:00 | \n",
+ " 2021-03-28 18:09:50.915354+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5175 | \n",
+ " 58 | \n",
+ " 103972 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:06+02:00 | \n",
+ " 2021-03-28 18:05:03+02:00 | \n",
+ " 2021-03-28 18:05:08.507398+02:00 | \n",
+ " 2022-04-15 22:52:04.397693+02:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id campaign_id customer_id opened_at \\\n",
+ "0 19793 58 112597 NaN \n",
+ "1 14211 58 113666 NaN \n",
+ "2 13150 58 280561 NaN \n",
+ "3 7073 58 101007 2021-03-28 20:11:06+02:00 \n",
+ "4 5175 58 103972 NaN \n",
+ "\n",
+ " sent_at delivered_at \\\n",
+ "0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n",
+ "1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n",
+ "2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n",
+ "3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n",
+ "4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n",
+ "\n",
+ " created_at updated_at \n",
+ "0 2021-03-28 18:34:20.616136+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "1 2021-03-28 18:21:04.297213+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "2 2021-03-28 18:18:49.991042+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "3 2021-03-28 18:09:50.915354+02:00 2022-04-15 22:52:04.397693+02:00 \n",
+ "4 2021-03-28 18:05:08.507398+02:00 2022-04-15 22:52:04.397693+02:00 "
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 2. campaigns stats\n",
+ "df1_campaign_stats.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "7f25eb1b-e7c8-4715-bc30-7ac29a7181ac",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " campaign_id | \n",
+ " customer_id | \n",
+ " opened_at | \n",
+ " sent_at | \n",
+ " delivered_at | \n",
+ " campaign_name | \n",
+ " campaign_service_id | \n",
+ " campaign_sent_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 19793 | \n",
+ " 58 | \n",
+ " 112597 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:09+02:00 | \n",
+ " 2021-03-28 18:24:18+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 14211 | \n",
+ " 58 | \n",
+ " 113666 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:09+02:00 | \n",
+ " 2021-03-28 18:21:02+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 13150 | \n",
+ " 58 | \n",
+ " 280561 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:08:45+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 7073 | \n",
+ " 58 | \n",
+ " 101007 | \n",
+ " 2021-03-28 20:11:06+02:00 | \n",
+ " 2021-03-28 18:00:59+02:00 | \n",
+ " 2021-03-28 18:09:47+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5175 | \n",
+ " 58 | \n",
+ " 103972 | \n",
+ " NaN | \n",
+ " 2021-03-28 18:01:06+02:00 | \n",
+ " 2021-03-28 18:05:03+02:00 | \n",
+ " Le Mucem chez vous, gardons le lien #22 | \n",
+ " 404 | \n",
+ " 2021-03-28 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id campaign_id customer_id opened_at \\\n",
+ "0 19793 58 112597 NaN \n",
+ "1 14211 58 113666 NaN \n",
+ "2 13150 58 280561 NaN \n",
+ "3 7073 58 101007 2021-03-28 20:11:06+02:00 \n",
+ "4 5175 58 103972 NaN \n",
+ "\n",
+ " sent_at delivered_at \\\n",
+ "0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n",
+ "1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n",
+ "2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n",
+ "3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n",
+ "4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n",
+ "\n",
+ " campaign_name campaign_service_id \\\n",
+ "0 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "1 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "2 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "3 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "4 Le Mucem chez vous, gardons le lien #22 404 \n",
+ "\n",
+ " campaign_sent_at \n",
+ "0 2021-03-28 00:00:00+01:00 \n",
+ "1 2021-03-28 00:00:00+01:00 \n",
+ "2 2021-03-28 00:00:00+01:00 \n",
+ "3 2021-03-28 00:00:00+01:00 \n",
+ "4 2021-03-28 00:00:00+01:00 "
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 3. merge campaigns and campaigns stats\n",
+ "\n",
+ "df1_campaigns_full = pd.merge(df1_campaign_stats[[\"id\", \"campaign_id\", \"customer_id\", \"opened_at\", \"sent_at\", \"delivered_at\"]], \n",
+ " df1_campaigns[[\"id\", \"name\", \"service_id\", \"sent_at\"]].add_prefix(\"campaign_\"),\n",
+ " on = \"campaign_id\", how = \"left\")\n",
+ "df1_campaigns_full.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "87fc686a-4a80-40ab-9987-20d2774f3055",
+ "metadata": {},
+ "source": [
+ "## Link stats"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "2f9df2d0-8a23-496b-8e92-617285f64530",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " clicked_at | \n",
+ " link_id | \n",
+ " customer_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2021-03-26 16:30:36+01:00 | \n",
+ " 1 | \n",
+ " 284033 | \n",
+ " 2021-03-26 15:30:37.050161+01:00 | \n",
+ " 2021-03-26 15:30:37.050161+01:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 2021-03-26 17:16:34+01:00 | \n",
+ " 2 | \n",
+ " 119768 | \n",
+ " 2021-03-26 16:16:34.950871+01:00 | \n",
+ " 2021-03-26 16:16:34.950871+01:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 272 | \n",
+ " 2021-03-28 20:03:32+02:00 | \n",
+ " 42 | \n",
+ " 113105 | \n",
+ " 2021-03-28 18:03:32.736394+02:00 | \n",
+ " 2021-03-28 18:03:32.736394+02:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 2021-03-26 17:43:19+01:00 | \n",
+ " 3 | \n",
+ " 272280 | \n",
+ " 2021-03-26 16:43:19.338321+01:00 | \n",
+ " 2021-03-26 16:43:19.338321+01:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 2021-03-26 17:46:00+01:00 | \n",
+ " 3 | \n",
+ " 105095 | \n",
+ " 2021-03-26 16:46:00.502945+01:00 | \n",
+ " 2021-03-26 16:46:00.502945+01:00 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 151046 | \n",
+ " 243553 | \n",
+ " 2023-11-09 16:34:27+01:00 | \n",
+ " 14666 | \n",
+ " 998 | \n",
+ " 2023-11-09 15:34:29.425425+01:00 | \n",
+ " 2023-11-09 15:34:29.425425+01:00 | \n",
+ "
\n",
+ " \n",
+ " 151047 | \n",
+ " 243554 | \n",
+ " 2023-11-09 16:34:35+01:00 | \n",
+ " 14670 | \n",
+ " 998 | \n",
+ " 2023-11-09 15:34:37.505505+01:00 | \n",
+ " 2023-11-09 15:34:37.505505+01:00 | \n",
+ "
\n",
+ " \n",
+ " 151048 | \n",
+ " 243559 | \n",
+ " 2023-11-09 16:51:15+01:00 | \n",
+ " 14686 | \n",
+ " 82923 | \n",
+ " 2023-11-09 15:51:17.439518+01:00 | \n",
+ " 2023-11-09 15:51:17.439518+01:00 | \n",
+ "
\n",
+ " \n",
+ " 151049 | \n",
+ " 243561 | \n",
+ " 2023-11-09 16:59:42+01:00 | \n",
+ " 14677 | \n",
+ " 82923 | \n",
+ " 2023-11-09 15:59:44.030922+01:00 | \n",
+ " 2023-11-09 15:59:44.030922+01:00 | \n",
+ "
\n",
+ " \n",
+ " 151050 | \n",
+ " 243564 | \n",
+ " 2023-11-09 17:16:41+01:00 | \n",
+ " 14691 | \n",
+ " 1254355 | \n",
+ " 2023-11-09 16:16:43.012932+01:00 | \n",
+ " 2023-11-09 16:16:43.012932+01:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
151051 rows × 6 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id clicked_at link_id customer_id \\\n",
+ "0 1 2021-03-26 16:30:36+01:00 1 284033 \n",
+ "1 2 2021-03-26 17:16:34+01:00 2 119768 \n",
+ "2 272 2021-03-28 20:03:32+02:00 42 113105 \n",
+ "3 4 2021-03-26 17:43:19+01:00 3 272280 \n",
+ "4 5 2021-03-26 17:46:00+01:00 3 105095 \n",
+ "... ... ... ... ... \n",
+ "151046 243553 2023-11-09 16:34:27+01:00 14666 998 \n",
+ "151047 243554 2023-11-09 16:34:35+01:00 14670 998 \n",
+ "151048 243559 2023-11-09 16:51:15+01:00 14686 82923 \n",
+ "151049 243561 2023-11-09 16:59:42+01:00 14677 82923 \n",
+ "151050 243564 2023-11-09 17:16:41+01:00 14691 1254355 \n",
+ "\n",
+ " created_at updated_at \n",
+ "0 2021-03-26 15:30:37.050161+01:00 2021-03-26 15:30:37.050161+01:00 \n",
+ "1 2021-03-26 16:16:34.950871+01:00 2021-03-26 16:16:34.950871+01:00 \n",
+ "2 2021-03-28 18:03:32.736394+02:00 2021-03-28 18:03:32.736394+02:00 \n",
+ "3 2021-03-26 16:43:19.338321+01:00 2021-03-26 16:43:19.338321+01:00 \n",
+ "4 2021-03-26 16:46:00.502945+01:00 2021-03-26 16:46:00.502945+01:00 \n",
+ "... ... ... \n",
+ "151046 2023-11-09 15:34:29.425425+01:00 2023-11-09 15:34:29.425425+01:00 \n",
+ "151047 2023-11-09 15:34:37.505505+01:00 2023-11-09 15:34:37.505505+01:00 \n",
+ "151048 2023-11-09 15:51:17.439518+01:00 2023-11-09 15:51:17.439518+01:00 \n",
+ "151049 2023-11-09 15:59:44.030922+01:00 2023-11-09 15:59:44.030922+01:00 \n",
+ "151050 2023-11-09 16:16:43.012932+01:00 2023-11-09 16:16:43.012932+01:00 \n",
+ "\n",
+ "[151051 rows x 6 columns]"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_link_stats"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "aad6fb14-9694-4c1e-9885-1ebe0f38afe3",
+ "metadata": {},
+ "source": [
+ "## Bonus : peut-on lier link stats et campaign ? Non, les dates à laquelle le client clique sur le lie/ouvre la campagne ne permettent pas de faire coincider link_id et campaign_id"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "id": "8be7c974-72c9-4e31-a874-d7e5d2719fb3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " clicked_at | \n",
+ " link_id | \n",
+ " customer_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2021-03-26 16:30:36+01:00 | \n",
+ " 1 | \n",
+ " 284033 | \n",
+ " 2021-03-26 15:30:37.050161+01:00 | \n",
+ " 2021-03-26 15:30:37.050161+01:00 | \n",
+ "
\n",
+ " \n",
+ " 7526 | \n",
+ " 14018 | \n",
+ " 2021-05-10 18:07:59+02:00 | \n",
+ " 312 | \n",
+ " 284033 | \n",
+ " 2021-05-10 16:08:00.541322+02:00 | \n",
+ " 2021-05-10 16:08:00.541322+02:00 | \n",
+ "
\n",
+ " \n",
+ " 96848 | \n",
+ " 133449 | \n",
+ " 2021-03-25 08:42:22+01:00 | \n",
+ " 4 | \n",
+ " 284033 | \n",
+ " 2022-04-15 22:51:01.994343+02:00 | \n",
+ " 2022-04-15 22:51:01.994343+02:00 | \n",
+ "
\n",
+ " \n",
+ " 115728 | \n",
+ " 207544 | \n",
+ " 2022-08-23 10:33:04+02:00 | \n",
+ " 12365 | \n",
+ " 284033 | \n",
+ " 2022-08-23 08:33:06.498908+02:00 | \n",
+ " 2022-08-23 08:33:06.498908+02:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id clicked_at link_id customer_id \\\n",
+ "0 1 2021-03-26 16:30:36+01:00 1 284033 \n",
+ "7526 14018 2021-05-10 18:07:59+02:00 312 284033 \n",
+ "96848 133449 2021-03-25 08:42:22+01:00 4 284033 \n",
+ "115728 207544 2022-08-23 10:33:04+02:00 12365 284033 \n",
+ "\n",
+ " created_at updated_at \n",
+ "0 2021-03-26 15:30:37.050161+01:00 2021-03-26 15:30:37.050161+01:00 \n",
+ "7526 2021-05-10 16:08:00.541322+02:00 2021-05-10 16:08:00.541322+02:00 \n",
+ "96848 2022-04-15 22:51:01.994343+02:00 2022-04-15 22:51:01.994343+02:00 \n",
+ "115728 2022-08-23 08:33:06.498908+02:00 2022-08-23 08:33:06.498908+02:00 "
+ ]
+ },
+ "execution_count": 67,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_link_stats[df1_link_stats[\"customer_id\"] == 284033]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "id": "902e9947-58e1-44f4-b634-1239b0e4df02",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " campaign_id | \n",
+ " customer_id | \n",
+ " opened_at | \n",
+ " sent_at | \n",
+ " delivered_at | \n",
+ " campaign_name | \n",
+ " campaign_service_id | \n",
+ " campaign_sent_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 4030643 | \n",
+ " 4036376 | \n",
+ " 4 | \n",
+ " 284033 | \n",
+ " NaN | \n",
+ " 2021-03-21 18:01:22+01:00 | \n",
+ " 2021-03-21 18:08:04+01:00 | \n",
+ " Le Mucem chez vous, gardons le lien #21 | \n",
+ " 398 | \n",
+ " 2021-03-21 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id campaign_id customer_id opened_at \\\n",
+ "4030643 4036376 4 284033 NaN \n",
+ "\n",
+ " sent_at delivered_at \\\n",
+ "4030643 2021-03-21 18:01:22+01:00 2021-03-21 18:08:04+01:00 \n",
+ "\n",
+ " campaign_name campaign_service_id \\\n",
+ "4030643 Le Mucem chez vous, gardons le lien #21 398 \n",
+ "\n",
+ " campaign_sent_at \n",
+ "4030643 2021-03-21 00:00:00+01:00 "
+ ]
+ },
+ "execution_count": 82,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1_campaigns_full[ (df1_campaigns_full[\"customer_id\"] == 284033) & (df1_campaigns_full[\"campaign_id\"] == 4)]"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/useless/TP_access_merge_data.ipynb b/useless/TP_access_merge_data.ipynb
new file mode 100644
index 0000000..f6ef912
--- /dev/null
+++ b/useless/TP_access_merge_data.ipynb
@@ -0,0 +1,1215 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "5ce2ffc5-66b6-4709-9e2c-7a50f49d1361",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# test\n",
+ "\n",
+ "import os \n",
+ "import s3fs\n",
+ "import pandas as pd\n",
+ "import re"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "f579ff01-f009-4fb1-ba79-0cb3ce58ab7f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['bdc2324-data/1',\n",
+ " 'bdc2324-data/10',\n",
+ " 'bdc2324-data/101',\n",
+ " 'bdc2324-data/11',\n",
+ " 'bdc2324-data/12',\n",
+ " 'bdc2324-data/13',\n",
+ " 'bdc2324-data/14',\n",
+ " 'bdc2324-data/2',\n",
+ " 'bdc2324-data/3',\n",
+ " 'bdc2324-data/4',\n",
+ " 'bdc2324-data/5',\n",
+ " 'bdc2324-data/6',\n",
+ " 'bdc2324-data/7',\n",
+ " 'bdc2324-data/8',\n",
+ " 'bdc2324-data/9']"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
+ "\n",
+ "fs = s3fs.S3FileSystem(client_kwargs = {\"endpoint_url\" : S3_ENDPOINT_URL})\n",
+ "BUCKET = \"bdc2324-data\"\n",
+ "fs.ls(BUCKET)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "c8b2c797-271f-43ee-8823-d0aee5b8782d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "FILE_PATH_S3 = fs.ls(BUCKET)[1] # +\".csv\"\n",
+ "files_path_2 = fs.ls(FILE_PATH_S3)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "18cee687-1462-4169-9bfe-f39786135cdd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with fs.open(files_path_1[1], mode=\"rb\") as file_in:\n",
+ " # print(file_in)\n",
+ " df_campaigns = pd.read_csv(file_in)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "33e8d14c-c649-4b9c-8290-4a2aa635f999",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " service_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " process_id | \n",
+ " report_url | \n",
+ " category | \n",
+ " to_be_synced | \n",
+ " identifier | \n",
+ " sent_at | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1319613 | \n",
+ " newsletter enseignants janvier 2022 | \n",
+ " 721 | \n",
+ " 2022-01-14 16:06:42.586321+01:00 | \n",
+ " 2022-02-03 14:17:27.112963+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " aba3b6fd5d186d28e06ff97135cade7f | \n",
+ " 2022-01-14 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1319586 | \n",
+ " lsf_janvier_2022 | \n",
+ " 717 | \n",
+ " 2022-01-07 11:30:35.315895+01:00 | \n",
+ " 2022-02-03 14:17:27.116171+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 788d986905533aba051261497ecffcbb | \n",
+ " 2022-01-07 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1319282 | \n",
+ " Invitation à déjeuner au Mucem | Vernissage « ... | \n",
+ " 591 | \n",
+ " 2021-09-28 12:50:24.448752+02:00 | \n",
+ " 2022-02-03 14:17:27.119582+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 3493894fa4ea036cfc6433c3e2ee63b0 | \n",
+ " 2021-09-28 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1319283 | \n",
+ " Vacances de la Toussaint - centres des loisirs | \n",
+ " 590 | \n",
+ " 2021-09-28 18:01:04.692073+02:00 | \n",
+ " 2022-02-03 14:17:27.124408+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 08b255a5d42b89b0585260b6f2360bdd | \n",
+ " 2021-09-28 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1319636 | \n",
+ " ddcp_promo_md_livemag | \n",
+ " 730 | \n",
+ " 2022-01-27 18:00:41.053069+01:00 | \n",
+ " 2022-02-03 14:17:27.127607+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " d5cfead94f5350c12c322b5b664544c1 | \n",
+ " 2022-01-27 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 952 | \n",
+ " 1320072 | \n",
+ " dre_gaza0106 | \n",
+ " 881 | \n",
+ " 2022-05-26 09:01:35.523639+02:00 | \n",
+ " 2022-12-02 17:51:22.614046+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 7504adad8bb96320eb3afdd4df6e1f60 | \n",
+ " 2022-05-26 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 953 | \n",
+ " 661398 | \n",
+ " DDCP Plan Bis 4 - Marketing direct - MJ5C | \n",
+ " 183 | \n",
+ " 2021-06-18 10:30:01.259578+02:00 | \n",
+ " 2021-09-24 11:56:09.082785+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " cedebb6e872f539bef8c3f919874e9d7 | \n",
+ " 2020-07-27 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 954 | \n",
+ " 1320487 | \n",
+ " Invitation portes ouvertes amitiés | \n",
+ " 988 | \n",
+ " 2022-09-29 18:01:33.834090+02:00 | \n",
+ " 2022-12-02 17:51:23.258324+01:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 9908279ebbf1f9b250ba689db6a0222b | \n",
+ " 2022-09-29 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ " 955 | \n",
+ " 906903 | \n",
+ " DDCP PROMO La méditerranée des philosophes #3 ... | \n",
+ " 310 | \n",
+ " 2021-07-19 14:07:16.177390+02:00 | \n",
+ " 2021-09-24 11:56:09.086101+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 06eb61b839a0cefee4967c67ccb099dc | \n",
+ " 2020-12-23 00:00:00+01:00 | \n",
+ "
\n",
+ " \n",
+ " 956 | \n",
+ " 579313 | \n",
+ " ddcp_promo_automation_manuel_pre_visit | \n",
+ " 481 | \n",
+ " 2021-06-08 17:38:54.041310+02:00 | \n",
+ " 2021-09-24 11:56:09.089394+02:00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " False | \n",
+ " 9461cce28ebe3e76fb4b931c35a169b0 | \n",
+ " 2021-06-08 00:00:00+02:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
957 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id name service_id \\\n",
+ "0 1319613 newsletter enseignants janvier 2022 721 \n",
+ "1 1319586 lsf_janvier_2022 717 \n",
+ "2 1319282 Invitation à déjeuner au Mucem | Vernissage « ... 591 \n",
+ "3 1319283 Vacances de la Toussaint - centres des loisirs 590 \n",
+ "4 1319636 ddcp_promo_md_livemag 730 \n",
+ ".. ... ... ... \n",
+ "952 1320072 dre_gaza0106 881 \n",
+ "953 661398 DDCP Plan Bis 4 - Marketing direct - MJ5C 183 \n",
+ "954 1320487 Invitation portes ouvertes amitiés 988 \n",
+ "955 906903 DDCP PROMO La méditerranée des philosophes #3 ... 310 \n",
+ "956 579313 ddcp_promo_automation_manuel_pre_visit 481 \n",
+ "\n",
+ " created_at updated_at \\\n",
+ "0 2022-01-14 16:06:42.586321+01:00 2022-02-03 14:17:27.112963+01:00 \n",
+ "1 2022-01-07 11:30:35.315895+01:00 2022-02-03 14:17:27.116171+01:00 \n",
+ "2 2021-09-28 12:50:24.448752+02:00 2022-02-03 14:17:27.119582+01:00 \n",
+ "3 2021-09-28 18:01:04.692073+02:00 2022-02-03 14:17:27.124408+01:00 \n",
+ "4 2022-01-27 18:00:41.053069+01:00 2022-02-03 14:17:27.127607+01:00 \n",
+ ".. ... ... \n",
+ "952 2022-05-26 09:01:35.523639+02:00 2022-12-02 17:51:22.614046+01:00 \n",
+ "953 2021-06-18 10:30:01.259578+02:00 2021-09-24 11:56:09.082785+02:00 \n",
+ "954 2022-09-29 18:01:33.834090+02:00 2022-12-02 17:51:23.258324+01:00 \n",
+ "955 2021-07-19 14:07:16.177390+02:00 2021-09-24 11:56:09.086101+02:00 \n",
+ "956 2021-06-08 17:38:54.041310+02:00 2021-09-24 11:56:09.089394+02:00 \n",
+ "\n",
+ " process_id report_url category to_be_synced \\\n",
+ "0 NaN NaN 0.0 False \n",
+ "1 NaN NaN 0.0 False \n",
+ "2 NaN NaN 0.0 False \n",
+ "3 NaN NaN 0.0 False \n",
+ "4 NaN NaN 0.0 False \n",
+ ".. ... ... ... ... \n",
+ "952 NaN NaN 0.0 False \n",
+ "953 NaN NaN 0.0 False \n",
+ "954 NaN NaN 0.0 False \n",
+ "955 NaN NaN 0.0 False \n",
+ "956 NaN NaN 0.0 False \n",
+ "\n",
+ " identifier sent_at \n",
+ "0 aba3b6fd5d186d28e06ff97135cade7f 2022-01-14 00:00:00+01:00 \n",
+ "1 788d986905533aba051261497ecffcbb 2022-01-07 00:00:00+01:00 \n",
+ "2 3493894fa4ea036cfc6433c3e2ee63b0 2021-09-28 00:00:00+02:00 \n",
+ "3 08b255a5d42b89b0585260b6f2360bdd 2021-09-28 00:00:00+02:00 \n",
+ "4 d5cfead94f5350c12c322b5b664544c1 2022-01-27 00:00:00+01:00 \n",
+ ".. ... ... \n",
+ "952 7504adad8bb96320eb3afdd4df6e1f60 2022-05-26 00:00:00+02:00 \n",
+ "953 cedebb6e872f539bef8c3f919874e9d7 2020-07-27 00:00:00+02:00 \n",
+ "954 9908279ebbf1f9b250ba689db6a0222b 2022-09-29 00:00:00+02:00 \n",
+ "955 06eb61b839a0cefee4967c67ccb099dc 2020-12-23 00:00:00+01:00 \n",
+ "956 9461cce28ebe3e76fb4b931c35a169b0 2021-06-08 00:00:00+02:00 \n",
+ "\n",
+ "[957 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_campaigns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "b04f39e7-7d53-4734-b125-4dc1843172d6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['bdc2324-data', '10', '10campaign_stats.csv']"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "files_path_2[0].split(\"/\")[1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "id": "d9bd97df-67bf-48ef-812a-975deb890163",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_521/1596461036.py:11: DtypeWarning: Columns (19,20,33,34,35,39) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ " df = pd.read_csv(file_in)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# loop to create dataframes from file 2\n",
+ "\n",
+ "files_path = files_path_2\n",
+ "\n",
+ "client_number = files_path[0].split(\"/\")[1]\n",
+ "df_prefix = \"df\" + str(client_number) + \"_\"\n",
+ "\n",
+ "for i in range(len(files_path)) :\n",
+ " current_path = files_path[i]\n",
+ " with fs.open(current_path, mode=\"rb\") as file_in:\n",
+ " df = pd.read_csv(file_in)\n",
+ " # the pattern of the name is df1xxx\n",
+ " nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n",
+ " globals()[nom_dataframe] = df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "id": "7f46e38e-413c-48cb-a171-eb6bc7219d9c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "client number :10\n",
+ "prefix used : df10_\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(f\"client number :{client_number}\")\n",
+ "print(f\"prefix used : {df_prefix}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 79,
+ "id": "bdfd388c-7971-4f4d-99ef-c5b0435a4567",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['bdc2324-data/10/10campaign_stats.csv',\n",
+ " 'bdc2324-data/10/10campaigns.csv',\n",
+ " 'bdc2324-data/10/10categories.csv',\n",
+ " 'bdc2324-data/10/10countries.csv',\n",
+ " 'bdc2324-data/10/10currencies.csv',\n",
+ " 'bdc2324-data/10/10customer_target_mappings.csv',\n",
+ " 'bdc2324-data/10/10customersplus.csv',\n",
+ " 'bdc2324-data/10/10event_types.csv',\n",
+ " 'bdc2324-data/10/10events.csv',\n",
+ " 'bdc2324-data/10/10facilities.csv',\n",
+ " 'bdc2324-data/10/10link_stats.csv',\n",
+ " 'bdc2324-data/10/10pricing_formulas.csv',\n",
+ " 'bdc2324-data/10/10product_packs.csv',\n",
+ " 'bdc2324-data/10/10products.csv',\n",
+ " 'bdc2324-data/10/10products_groups.csv',\n",
+ " 'bdc2324-data/10/10purchases.csv',\n",
+ " 'bdc2324-data/10/10representation_category_capacities.csv',\n",
+ " 'bdc2324-data/10/10representation_types.csv',\n",
+ " 'bdc2324-data/10/10representations.csv',\n",
+ " 'bdc2324-data/10/10seasons.csv',\n",
+ " 'bdc2324-data/10/10suppliers.csv',\n",
+ " 'bdc2324-data/10/10tags.csv',\n",
+ " 'bdc2324-data/10/10target_types.csv',\n",
+ " 'bdc2324-data/10/10targets.csv',\n",
+ " 'bdc2324-data/10/10tickets.csv',\n",
+ " 'bdc2324-data/10/10type_of_pricing_formulas.csv',\n",
+ " 'bdc2324-data/10/10type_ofs.csv']"
+ ]
+ },
+ "execution_count": 79,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "files_path_2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 80,
+ "id": "e7bd02dc-1925-46ff-9d59-231d18f9f4f1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " number | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " purchase_id | \n",
+ " product_id | \n",
+ " is_from_subscription | \n",
+ " type_of | \n",
+ " supplier_id | \n",
+ " barcode | \n",
+ " identifier | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1799177 | \n",
+ " 60_0_0_0_1_k-5 | \n",
+ " 2021-12-29 07:27:27.868513+01:00 | \n",
+ " 2021-12-29 07:27:27.868513+01:00 | \n",
+ " 409613 | \n",
+ " 321683 | \n",
+ " False | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " NaN | \n",
+ " 56c3db5a02c87af7e525676092cb7c4a | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1799178 | \n",
+ " 71_0_0_0_1_k-5 | \n",
+ " 2021-12-29 07:27:27.976380+01:00 | \n",
+ " 2021-12-29 07:27:27.976380+01:00 | \n",
+ " 409613 | \n",
+ " 321684 | \n",
+ " False | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " NaN | \n",
+ " 1ecad1dc6b42b4cdb75784dd9dcd9d5c | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1799179 | \n",
+ " 93_0_0_0_1_k-5 | \n",
+ " 2021-12-29 07:27:27.978719+01:00 | \n",
+ " 2021-12-29 07:27:27.978719+01:00 | \n",
+ " 409613 | \n",
+ " 321685 | \n",
+ " False | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " NaN | \n",
+ " b3d207bdb47bcdb27a52f6bae0db7ec2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1799180 | \n",
+ " 103_0_0_0_1_k-5 | \n",
+ " 2021-12-29 07:27:27.984621+01:00 | \n",
+ " 2021-12-29 07:27:27.984621+01:00 | \n",
+ " 409613 | \n",
+ " 321686 | \n",
+ " False | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " NaN | \n",
+ " 10df9591b617cc177516e9ddf91ddae3 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1799181 | \n",
+ " 107_0_3_2_1_h-1 | \n",
+ " 2021-12-29 07:27:27.988602+01:00 | \n",
+ " 2021-12-29 07:27:27.988602+01:00 | \n",
+ " 409613 | \n",
+ " 321687 | \n",
+ " False | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " NaN | \n",
+ " 3a8c7d5882fe9f20f0f59c8d90c9873c | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 492309 | \n",
+ " 3252232 | \n",
+ " 336359 | \n",
+ " 2023-03-10 01:31:52.543375+01:00 | \n",
+ " 2023-03-10 01:31:52.543375+01:00 | \n",
+ " 710062 | \n",
+ " 572547 | \n",
+ " False | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " NaN | \n",
+ " fc96f582931209501ed186d709664980 | \n",
+ "
\n",
+ " \n",
+ " 492310 | \n",
+ " 3252233 | \n",
+ " 336360 | \n",
+ " 2023-03-10 01:31:52.543869+01:00 | \n",
+ " 2023-03-10 01:31:52.543869+01:00 | \n",
+ " 710062 | \n",
+ " 572547 | \n",
+ " False | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " NaN | \n",
+ " d4ccfb00a9b22b62654bbf98b4d9a5a5 | \n",
+ "
\n",
+ " \n",
+ " 492311 | \n",
+ " 3252234 | \n",
+ " 336361 | \n",
+ " 2023-03-10 01:31:52.545783+01:00 | \n",
+ " 2023-03-10 01:31:52.545783+01:00 | \n",
+ " 710062 | \n",
+ " 572547 | \n",
+ " False | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " NaN | \n",
+ " d5f76662d6571b8eaceaf19c781fa514 | \n",
+ "
\n",
+ " \n",
+ " 492312 | \n",
+ " 3252235 | \n",
+ " 336362 | \n",
+ " 2023-03-10 01:31:52.547043+01:00 | \n",
+ " 2023-03-10 01:31:52.547043+01:00 | \n",
+ " 710062 | \n",
+ " 572547 | \n",
+ " False | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " NaN | \n",
+ " 093225db5cd5e06cc8e06242b4cbba37 | \n",
+ "
\n",
+ " \n",
+ " 492313 | \n",
+ " 3252236 | \n",
+ " 336363 | \n",
+ " 2023-03-10 01:31:52.548311+01:00 | \n",
+ " 2023-03-10 01:31:52.548311+01:00 | \n",
+ " 710062 | \n",
+ " 572547 | \n",
+ " False | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " NaN | \n",
+ " 9bace0d0cd7a5ec559aca8ac8bf67700 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
492314 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id number created_at \\\n",
+ "0 1799177 60_0_0_0_1_k-5 2021-12-29 07:27:27.868513+01:00 \n",
+ "1 1799178 71_0_0_0_1_k-5 2021-12-29 07:27:27.976380+01:00 \n",
+ "2 1799179 93_0_0_0_1_k-5 2021-12-29 07:27:27.978719+01:00 \n",
+ "3 1799180 103_0_0_0_1_k-5 2021-12-29 07:27:27.984621+01:00 \n",
+ "4 1799181 107_0_3_2_1_h-1 2021-12-29 07:27:27.988602+01:00 \n",
+ "... ... ... ... \n",
+ "492309 3252232 336359 2023-03-10 01:31:52.543375+01:00 \n",
+ "492310 3252233 336360 2023-03-10 01:31:52.543869+01:00 \n",
+ "492311 3252234 336361 2023-03-10 01:31:52.545783+01:00 \n",
+ "492312 3252235 336362 2023-03-10 01:31:52.547043+01:00 \n",
+ "492313 3252236 336363 2023-03-10 01:31:52.548311+01:00 \n",
+ "\n",
+ " updated_at purchase_id product_id \\\n",
+ "0 2021-12-29 07:27:27.868513+01:00 409613 321683 \n",
+ "1 2021-12-29 07:27:27.976380+01:00 409613 321684 \n",
+ "2 2021-12-29 07:27:27.978719+01:00 409613 321685 \n",
+ "3 2021-12-29 07:27:27.984621+01:00 409613 321686 \n",
+ "4 2021-12-29 07:27:27.988602+01:00 409613 321687 \n",
+ "... ... ... ... \n",
+ "492309 2023-03-10 01:31:52.543375+01:00 710062 572547 \n",
+ "492310 2023-03-10 01:31:52.543869+01:00 710062 572547 \n",
+ "492311 2023-03-10 01:31:52.545783+01:00 710062 572547 \n",
+ "492312 2023-03-10 01:31:52.547043+01:00 710062 572547 \n",
+ "492313 2023-03-10 01:31:52.548311+01:00 710062 572547 \n",
+ "\n",
+ " is_from_subscription type_of supplier_id barcode \\\n",
+ "0 False 1 2 NaN \n",
+ "1 False 1 2 NaN \n",
+ "2 False 1 2 NaN \n",
+ "3 False 1 2 NaN \n",
+ "4 False 1 2 NaN \n",
+ "... ... ... ... ... \n",
+ "492309 False 1 2 NaN \n",
+ "492310 False 1 2 NaN \n",
+ "492311 False 1 2 NaN \n",
+ "492312 False 1 2 NaN \n",
+ "492313 False 1 2 NaN \n",
+ "\n",
+ " identifier \n",
+ "0 56c3db5a02c87af7e525676092cb7c4a \n",
+ "1 1ecad1dc6b42b4cdb75784dd9dcd9d5c \n",
+ "2 b3d207bdb47bcdb27a52f6bae0db7ec2 \n",
+ "3 10df9591b617cc177516e9ddf91ddae3 \n",
+ "4 3a8c7d5882fe9f20f0f59c8d90c9873c \n",
+ "... ... \n",
+ "492309 fc96f582931209501ed186d709664980 \n",
+ "492310 d4ccfb00a9b22b62654bbf98b4d9a5a5 \n",
+ "492311 d5f76662d6571b8eaceaf19c781fa514 \n",
+ "492312 093225db5cd5e06cc8e06242b4cbba37 \n",
+ "492313 9bace0d0cd7a5ec559aca8ac8bf67700 \n",
+ "\n",
+ "[492314 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 80,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# example : get the table \n",
+ "\n",
+ "df10_tickets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "48ae6de5-2353-4fa8-a2a8-20da3b77e2ff",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'\\nfor i in range(len(files_path_1)) :\\n current_path = files_path_1[i]\\n nom_dataframe = \"df\" + re.search(r\\'/([^/]+)\\\\.csv$\\', current_path).group(1)\\n df = globals()[nom_dataframe]\\n print(nom_dataframe)\\n print(df.head(20))\\n'"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# loop to have a look at dataframes from file 1\n",
+ "\n",
+ "\"\"\"\n",
+ "for i in range(len(files_path_1)) :\n",
+ " current_path = files_path_1[i]\n",
+ " nom_dataframe = \"df\" + re.search(r'/([^/]+)\\.csv$', current_path).group(1)\n",
+ " df = globals()[nom_dataframe]\n",
+ " print(nom_dataframe)\n",
+ " print(df.head(20))\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d72166db-dcef-45bd-9f8c-7cb2ee6bcbde",
+ "metadata": {},
+ "source": [
+ "## Beginning of the exploratory analysis of dataframes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "id": "17966ab2-9038-4dd6-a59c-7739ee05c964",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " lastname | \n",
+ " firstname | \n",
+ " birthdate | \n",
+ " email | \n",
+ " street_id | \n",
+ " created_at | \n",
+ " updated_at | \n",
+ " civility | \n",
+ " is_partner | \n",
+ " ... | \n",
+ " preferred_category | \n",
+ " preferred_supplier | \n",
+ " preferred_formula | \n",
+ " purchase_count | \n",
+ " first_buying_date | \n",
+ " last_visiting_date | \n",
+ " zipcode | \n",
+ " country | \n",
+ " age | \n",
+ " tenant_id | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 821538 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " email821538 | \n",
+ " 139 | \n",
+ " 2023-07-14 11:43:34.261637+02:00 | \n",
+ " 2023-07-14 11:43:34.261637+02:00 | \n",
+ " NaN | \n",
+ " False | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 875 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 809126 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " email809126 | \n",
+ " 1063 | \n",
+ " 2023-05-04 17:17:24.456829+02:00 | \n",
+ " 2023-05-04 17:17:24.456829+02:00 | \n",
+ " NaN | \n",
+ " False | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 875 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 11005 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1063 | \n",
+ " 2017-07-06 03:01:57.242998+02:00 | \n",
+ " 2018-11-12 18:01:18.283492+01:00 | \n",
+ " NaN | \n",
+ " False | \n",
+ " ... | \n",
+ " zone tarif 1 | \n",
+ " NaN | \n",
+ " invite rp | \n",
+ " 14 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 875 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 17663 | \n",
+ " lastname17663 | \n",
+ " firstname17663 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 12731 | \n",
+ " 2018-09-23 02:39:17.778100+02:00 | \n",
+ " 2018-09-23 02:39:17.778100+02:00 | \n",
+ " NaN | \n",
+ " False | \n",
+ " ... | \n",
+ " zone tarif 1 | \n",
+ " NaN | \n",
+ " detaxe | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 44220 | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 875 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 38100 | \n",
+ " lastname38100 | \n",
+ " firstname38100 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 12395 | \n",
+ " 2019-02-11 11:05:58.581121+01:00 | \n",
+ " 2022-12-06 23:15:33.485866+01:00 | \n",
+ " NaN | \n",
+ " False | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 44100 | \n",
+ " fr | \n",
+ " NaN | \n",
+ " 875 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 98789 | \n",
+ " 766266 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " email766266 | \n",
+ " 139 | \n",
+ " 2022-12-06 18:26:04.142337+01:00 | \n",
+ " 2023-05-03 18:01:01.799141+02:00 | \n",
+ " NaN | \n",
+ " False | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 875 | \n",
+ "
\n",
+ " \n",
+ " 98790 | \n",
+ " 766336 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " email766336 | \n",
+ " 139 | \n",
+ " 2022-12-06 18:28:49.139502+01:00 | \n",
+ " 2022-12-06 23:15:33.485866+01:00 | \n",
+ " NaN | \n",
+ " False | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 875 | \n",
+ "
\n",
+ " \n",
+ " 98791 | \n",
+ " 766348 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " email766348 | \n",
+ " 139 | \n",
+ " 2022-12-06 18:28:51.140745+01:00 | \n",
+ " 2022-12-06 23:15:33.485866+01:00 | \n",
+ " NaN | \n",
+ " False | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 875 | \n",
+ "
\n",
+ " \n",
+ " 98792 | \n",
+ " 766363 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " email766363 | \n",
+ " 139 | \n",
+ " 2022-12-06 18:29:44.081056+01:00 | \n",
+ " 2022-12-06 23:15:33.485866+01:00 | \n",
+ " NaN | \n",
+ " False | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 875 | \n",
+ "
\n",
+ " \n",
+ " 98793 | \n",
+ " 766366 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " email766366 | \n",
+ " 139 | \n",
+ " 2022-12-06 18:29:44.934174+01:00 | \n",
+ " 2022-12-06 23:15:33.485866+01:00 | \n",
+ " NaN | \n",
+ " False | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 875 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
98794 rows × 43 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id lastname firstname birthdate email \\\n",
+ "0 821538 NaN NaN NaN email821538 \n",
+ "1 809126 NaN NaN NaN email809126 \n",
+ "2 11005 NaN NaN NaN NaN \n",
+ "3 17663 lastname17663 firstname17663 NaN NaN \n",
+ "4 38100 lastname38100 firstname38100 NaN NaN \n",
+ "... ... ... ... ... ... \n",
+ "98789 766266 NaN NaN NaN email766266 \n",
+ "98790 766336 NaN NaN NaN email766336 \n",
+ "98791 766348 NaN NaN NaN email766348 \n",
+ "98792 766363 NaN NaN NaN email766363 \n",
+ "98793 766366 NaN NaN NaN email766366 \n",
+ "\n",
+ " street_id created_at \\\n",
+ "0 139 2023-07-14 11:43:34.261637+02:00 \n",
+ "1 1063 2023-05-04 17:17:24.456829+02:00 \n",
+ "2 1063 2017-07-06 03:01:57.242998+02:00 \n",
+ "3 12731 2018-09-23 02:39:17.778100+02:00 \n",
+ "4 12395 2019-02-11 11:05:58.581121+01:00 \n",
+ "... ... ... \n",
+ "98789 139 2022-12-06 18:26:04.142337+01:00 \n",
+ "98790 139 2022-12-06 18:28:49.139502+01:00 \n",
+ "98791 139 2022-12-06 18:28:51.140745+01:00 \n",
+ "98792 139 2022-12-06 18:29:44.081056+01:00 \n",
+ "98793 139 2022-12-06 18:29:44.934174+01:00 \n",
+ "\n",
+ " updated_at civility is_partner ... \\\n",
+ "0 2023-07-14 11:43:34.261637+02:00 NaN False ... \n",
+ "1 2023-05-04 17:17:24.456829+02:00 NaN False ... \n",
+ "2 2018-11-12 18:01:18.283492+01:00 NaN False ... \n",
+ "3 2018-09-23 02:39:17.778100+02:00 NaN False ... \n",
+ "4 2022-12-06 23:15:33.485866+01:00 NaN False ... \n",
+ "... ... ... ... ... \n",
+ "98789 2023-05-03 18:01:01.799141+02:00 NaN False ... \n",
+ "98790 2022-12-06 23:15:33.485866+01:00 NaN False ... \n",
+ "98791 2022-12-06 23:15:33.485866+01:00 NaN False ... \n",
+ "98792 2022-12-06 23:15:33.485866+01:00 NaN False ... \n",
+ "98793 2022-12-06 23:15:33.485866+01:00 NaN False ... \n",
+ "\n",
+ " preferred_category preferred_supplier preferred_formula \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 zone tarif 1 NaN invite rp \n",
+ "3 zone tarif 1 NaN detaxe \n",
+ "4 NaN NaN NaN \n",
+ "... ... ... ... \n",
+ "98789 NaN NaN NaN \n",
+ "98790 NaN NaN NaN \n",
+ "98791 NaN NaN NaN \n",
+ "98792 NaN NaN NaN \n",
+ "98793 NaN NaN NaN \n",
+ "\n",
+ " purchase_count first_buying_date last_visiting_date zipcode country \\\n",
+ "0 0 NaN NaN NaN NaN \n",
+ "1 0 NaN NaN NaN fr \n",
+ "2 14 NaN NaN NaN fr \n",
+ "3 1 NaN NaN 44220 fr \n",
+ "4 1 NaN NaN 44100 fr \n",
+ "... ... ... ... ... ... \n",
+ "98789 0 NaN NaN NaN NaN \n",
+ "98790 0 NaN NaN NaN NaN \n",
+ "98791 0 NaN NaN NaN NaN \n",
+ "98792 0 NaN NaN NaN NaN \n",
+ "98793 0 NaN NaN NaN NaN \n",
+ "\n",
+ " age tenant_id \n",
+ "0 NaN 875 \n",
+ "1 NaN 875 \n",
+ "2 NaN 875 \n",
+ "3 NaN 875 \n",
+ "4 NaN 875 \n",
+ "... ... ... \n",
+ "98789 NaN 875 \n",
+ "98790 NaN 875 \n",
+ "98791 NaN 875 \n",
+ "98792 NaN 875 \n",
+ "98793 NaN 875 \n",
+ "\n",
+ "[98794 rows x 43 columns]"
+ ]
+ },
+ "execution_count": 82,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df10_0customersplus"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "932812b1-7a24-4f2d-ae48-7fe8e06b9f62",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# how many missing values ?\n",
+ "\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}