{ "cells": [ { "cell_type": "markdown", "id": "5005d8b3-6295-4b22-bd3c-876109be5b3b", "metadata": {}, "source": [ "# Merges and discovery : target, campaigns, links" ] }, { "cell_type": "markdown", "id": "8c56d518-3634-4492-b249-0d8ef33dd527", "metadata": {}, "source": [ "## First steps : package importations, set up working environment and import data" ] }, { "cell_type": "code", "execution_count": 1, "id": "dede42d9-1262-45f7-bd7a-586ae800092a", "metadata": {}, "outputs": [], "source": [ "# importations\n", "\n", "import os \n", "import s3fs\n", "import pandas as pd\n", "import re\n", "from datetime import datetime, timezone, timedelta\n", "import math\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "id": "6ce34b58-b5ba-4b54-ba4d-fc82ef01b09c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['bdc2324-data/1',\n", " 'bdc2324-data/10',\n", " 'bdc2324-data/101',\n", " 'bdc2324-data/11',\n", " 'bdc2324-data/12',\n", " 'bdc2324-data/13',\n", " 'bdc2324-data/14',\n", " 'bdc2324-data/2',\n", " 'bdc2324-data/3',\n", " 'bdc2324-data/4',\n", " 'bdc2324-data/5',\n", " 'bdc2324-data/6',\n", " 'bdc2324-data/7',\n", " 'bdc2324-data/8',\n", " 'bdc2324-data/9']" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# bucket for accessing the data\n", "\n", "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", "\n", "fs = s3fs.S3FileSystem(client_kwargs = {\"endpoint_url\" : S3_ENDPOINT_URL})\n", "BUCKET = \"bdc2324-data\"\n", "fs.ls(BUCKET)" ] }, { "cell_type": "code", "execution_count": 3, "id": "8eb13dd3-53c7-4a70-94a4-846168473aa1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['bdc2324-data/1/1campaign_stats.csv',\n", " 'bdc2324-data/1/1campaigns.csv',\n", " 'bdc2324-data/1/1categories.csv',\n", " 'bdc2324-data/1/1countries.csv',\n", " 'bdc2324-data/1/1currencies.csv',\n", " 'bdc2324-data/1/1customer_target_mappings.csv',\n", " 'bdc2324-data/1/1customersplus.csv',\n", " 'bdc2324-data/1/1event_types.csv',\n", " 'bdc2324-data/1/1events.csv',\n", " 'bdc2324-data/1/1facilities.csv',\n", " 'bdc2324-data/1/1link_stats.csv',\n", " 'bdc2324-data/1/1pricing_formulas.csv',\n", " 'bdc2324-data/1/1product_packs.csv',\n", " 'bdc2324-data/1/1products.csv',\n", " 'bdc2324-data/1/1products_groups.csv',\n", " 'bdc2324-data/1/1purchases.csv',\n", " 'bdc2324-data/1/1representation_category_capacities.csv',\n", " 'bdc2324-data/1/1representations.csv',\n", " 'bdc2324-data/1/1seasons.csv',\n", " 'bdc2324-data/1/1structure_tag_mappings.csv',\n", " 'bdc2324-data/1/1suppliers.csv',\n", " 'bdc2324-data/1/1tags.csv',\n", " 'bdc2324-data/1/1target_types.csv',\n", " 'bdc2324-data/1/1targets.csv',\n", " 'bdc2324-data/1/1tickets.csv',\n", " 'bdc2324-data/1/1type_of_categories.csv',\n", " 'bdc2324-data/1/1type_of_pricing_formulas.csv',\n", " 'bdc2324-data/1/1type_ofs.csv']" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "FILE_PATH_S3 = fs.ls(BUCKET)[0] # focus on the company number 1\n", "files_path = fs.ls(FILE_PATH_S3)\n", "files_path" ] }, { "cell_type": "code", "execution_count": 4, "id": "1ea66c4e-1307-4f19-836e-3104fba2ff41", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_487/2894332003.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.read_csv(file_in)\n" ] } ], "source": [ "# loop to create dataframes related to company 1\n", "\n", "client_number = files_path[0].split(\"/\")[1]\n", "print(client_number)\n", "df_prefix = \"df\" + str(client_number) + \"_\"\n", "\n", "for i in range(len(files_path)) :\n", " current_path = files_path[i]\n", " with fs.open(current_path, mode=\"rb\") as file_in:\n", " df = pd.read_csv(file_in)\n", " # the pattern of the name is df1xxx\n", " nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n", " globals()[nom_dataframe] = df" ] }, { "cell_type": "markdown", "id": "13d70b2c-6580-4caf-b839-10f72b2e0b39", "metadata": {}, "source": [ "## Target, target types and customer target mapping" ] }, { "cell_type": "code", "execution_count": 6, "id": "4dbc7fea-ac3b-4348-83fb-dfb1a460f936", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idis_importnamecreated_atupdated_atidentifier
069Falsemanual_dynamic_filter2020-11-30 09:46:18.881030+01:002020-11-30 09:46:18.881030+01:00e0f4b8693184850fefd6d2a38f10584e
148Truemanual_structure2020-11-04 17:16:19.548275+01:002020-11-04 17:16:19.548275+01:00382bca214204a2d3462f5ec2728d5d1e
21Truemanual_import2020-10-14 18:37:40.521623+02:002020-10-14 18:37:40.521623+02:0012213df2ce68a624e4c0070521437bac
356Falsemanual_static_filter2020-11-04 18:08:37.233486+01:002020-11-04 18:08:37.233486+01:00fb27e81baa4debc6a4e1a8639c20e808
\n", "
" ], "text/plain": [ " id is_import name created_at \\\n", "0 69 False manual_dynamic_filter 2020-11-30 09:46:18.881030+01:00 \n", "1 48 True manual_structure 2020-11-04 17:16:19.548275+01:00 \n", "2 1 True manual_import 2020-10-14 18:37:40.521623+02:00 \n", "3 56 False manual_static_filter 2020-11-04 18:08:37.233486+01:00 \n", "\n", " updated_at identifier \n", "0 2020-11-30 09:46:18.881030+01:00 e0f4b8693184850fefd6d2a38f10584e \n", "1 2020-11-04 17:16:19.548275+01:00 382bca214204a2d3462f5ec2728d5d1e \n", "2 2020-10-14 18:37:40.521623+02:00 12213df2ce68a624e4c0070521437bac \n", "3 2020-11-04 18:08:37.233486+01:00 fb27e81baa4debc6a4e1a8639c20e808 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 1. target types\n", "df1_target_types.head()" ] }, { "cell_type": "code", "execution_count": 7, "id": "0e9f5dcb-0dc3-4052-b866-e5c4cb954a1f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idtarget_type_idnamecreated_atupdated_at
021756DDCP PROMO Art contemporain - salle de chauffe...2021-01-04 15:00:05.401899+01:002021-03-02 18:38:19.025969+01:00
170156consentement optin scolaires2021-12-21 16:03:59.840785+01:002022-02-18 17:23:44.761388+01:00
213456DDCP Newsletter jeune public2020-11-10 09:43:19.667471+01:002021-03-02 18:38:19.052304+01:00
370056consentement optout scolaires2021-12-21 16:01:57.524946+01:002022-02-18 17:23:44.807776+01:00
496456DDCP achat billet nbr dep 190520212022-04-14 10:58:17.142834+02:002022-04-14 10:58:23.677264+02:00
\n", "
" ], "text/plain": [ " id target_type_id name \\\n", "0 217 56 DDCP PROMO Art contemporain - salle de chauffe... \n", "1 701 56 consentement optin scolaires \n", "2 134 56 DDCP Newsletter jeune public \n", "3 700 56 consentement optout scolaires \n", "4 964 56 DDCP achat billet nbr dep 19052021 \n", "\n", " created_at updated_at \n", "0 2021-01-04 15:00:05.401899+01:00 2021-03-02 18:38:19.025969+01:00 \n", "1 2021-12-21 16:03:59.840785+01:00 2022-02-18 17:23:44.761388+01:00 \n", "2 2020-11-10 09:43:19.667471+01:00 2021-03-02 18:38:19.052304+01:00 \n", "3 2021-12-21 16:01:57.524946+01:00 2022-02-18 17:23:44.807776+01:00 \n", "4 2022-04-14 10:58:17.142834+02:00 2022-04-14 10:58:23.677264+02:00 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 2. targets\n", "df1_targets.head()" ] }, { "cell_type": "code", "execution_count": 8, "id": "c5c62302-370a-462f-bd79-eac31593f65c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idcustomer_idtarget_idcreated_atupdated_atnameextra_field
011848246454001302021-09-23 09:35:47.617275+02:002021-09-23 09:35:47.617275+02:00NaNNaN
111848256454003452021-09-23 09:35:47.668846+02:002021-09-23 09:35:47.668846+02:00NaNNaN
211848286454021262021-09-23 12:02:51.253269+02:002021-09-23 12:02:51.253269+02:00NaNNaN
311848296454031262021-09-23 12:20:47.394480+02:002021-09-23 12:20:47.394480+02:00NaNNaN
412957706473013462021-09-28 16:02:29.372608+02:002021-09-28 16:02:29.372608+02:00NaNNaN
\n", "
" ], "text/plain": [ " id customer_id target_id created_at \\\n", "0 1184824 645400 130 2021-09-23 09:35:47.617275+02:00 \n", "1 1184825 645400 345 2021-09-23 09:35:47.668846+02:00 \n", "2 1184828 645402 126 2021-09-23 12:02:51.253269+02:00 \n", "3 1184829 645403 126 2021-09-23 12:20:47.394480+02:00 \n", "4 1295770 647301 346 2021-09-28 16:02:29.372608+02:00 \n", "\n", " updated_at name extra_field \n", "0 2021-09-23 09:35:47.617275+02:00 NaN NaN \n", "1 2021-09-23 09:35:47.668846+02:00 NaN NaN \n", "2 2021-09-23 12:02:51.253269+02:00 NaN NaN \n", "3 2021-09-23 12:20:47.394480+02:00 NaN NaN \n", "4 2021-09-28 16:02:29.372608+02:00 NaN NaN " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 3. customer target mapping\n", "\n", "df1_customer_target_mappings.head()" ] }, { "cell_type": "code", "execution_count": 10, "id": "1a87cebf-c1dd-408d-a523-26633419da1e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idtarget_type_idnametarget_type_is_importtarget_type_name
021756DDCP PROMO Art contemporain - salle de chauffe...Falsemanual_static_filter
170156consentement optin scolairesFalsemanual_static_filter
213456DDCP Newsletter jeune publicFalsemanual_static_filter
370056consentement optout scolairesFalsemanual_static_filter
496456DDCP achat billet nbr dep 19052021Falsemanual_static_filter
\n", "
" ], "text/plain": [ " id target_type_id name \\\n", "0 217 56 DDCP PROMO Art contemporain - salle de chauffe... \n", "1 701 56 consentement optin scolaires \n", "2 134 56 DDCP Newsletter jeune public \n", "3 700 56 consentement optout scolaires \n", "4 964 56 DDCP achat billet nbr dep 19052021 \n", "\n", " target_type_is_import target_type_name \n", "0 False manual_static_filter \n", "1 False manual_static_filter \n", "2 False manual_static_filter \n", "3 False manual_static_filter \n", "4 False manual_static_filter " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 4.1. merge target with target type\n", "\n", "df1_targets_full = pd.merge(df1_targets[[\"id\", \"target_type_id\", \"name\"]], df1_target_types[[\"id\",\"is_import\",\"name\"]].add_prefix(\"target_type_\"), left_on='target_type_id', right_on='target_type_id', how='left')\n", "df1_targets_full.head()" ] }, { "cell_type": "code", "execution_count": 11, "id": "d48c1fff-73c2-4e75-8799-da2b80694be7", "metadata": {}, "outputs": [], "source": [ "# 4.2. merge df1_customer_target_mappings with df1_targets_full\n", "\n", "# change the position of the column target type id\n", "\n", "# Spécifiez le nom de la colonne à déplacer et la colonne après laquelle vous souhaitez la placer\n", "column_to_move = 'target_type_id'\n", "\n", "# Récupérez l'index de la colonne de référence\n", "reference_index = df1_targets_full.columns.get_loc(\"target_type_name\")\n", "\n", "# Créez une copie de la colonne que vous voulez déplacer\n", "column_copy = df1_targets_full[column_to_move].copy()\n", "\n", "# Supprimez la colonne d'origine\n", "df1_targets_full = df1_targets_full.drop(column_to_move, axis=1)\n", "\n", "# Utilisez la méthode insert pour déplacer la colonne à la nouvelle position\n", "df1_targets_full.insert(reference_index - 1, column_to_move, column_copy)" ] }, { "cell_type": "code", "execution_count": 12, "id": "a874514a-c7dc-42d4-a440-dedd3a270e24", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
target_idtarget_nametarget_type_is_importtarget_type_idtarget_type_name
0217DDCP PROMO Art contemporain - salle de chauffe...False56manual_static_filter
1701consentement optin scolairesFalse56manual_static_filter
2134DDCP Newsletter jeune publicFalse56manual_static_filter
3700consentement optout scolairesFalse56manual_static_filter
4964DDCP achat billet nbr dep 19052021False56manual_static_filter
\n", "
" ], "text/plain": [ " target_id target_name \\\n", "0 217 DDCP PROMO Art contemporain - salle de chauffe... \n", "1 701 consentement optin scolaires \n", "2 134 DDCP Newsletter jeune public \n", "3 700 consentement optout scolaires \n", "4 964 DDCP achat billet nbr dep 19052021 \n", "\n", " target_type_is_import target_type_id target_type_name \n", "0 False 56 manual_static_filter \n", "1 False 56 manual_static_filter \n", "2 False 56 manual_static_filter \n", "3 False 56 manual_static_filter \n", "4 False 56 manual_static_filter " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1_targets_full = df1_targets_full.rename(columns=lambda x: 'target_' + x if not x.startswith('target_') else x)\n", "df1_targets_full.head()" ] }, { "cell_type": "code", "execution_count": 18, "id": "0db0172a-5119-4b7f-97f8-36fc5c985205", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idcustomer_idtarget_idtarget_nametarget_type_is_importtarget_type_idtarget_type_name
01184824645400130DDCP PROMO Réseau livresFalse56manual_static_filter
11184825645400345Inscrits NL générale site webFalse56manual_static_filter
21184828645402126DDCP PROMO Art contemporainFalse56manual_static_filter
31184829645403126DDCP PROMO Art contemporainFalse56manual_static_filter
41295770647301346Votre première listeFalse56manual_static_filter
........................
7680192737545666983345Inscrits NL générale site webFalse56manual_static_filter
7680202737546666983346Votre première listeFalse56manual_static_filter
7680212737575666986346Votre première listeFalse56manual_static_filter
7680222737576666987345Inscrits NL générale site webFalse56manual_static_filter
7680232737577666987346Votre première listeFalse56manual_static_filter
\n", "

768024 rows × 7 columns

\n", "
" ], "text/plain": [ " id customer_id target_id target_name \\\n", "0 1184824 645400 130 DDCP PROMO Réseau livres \n", "1 1184825 645400 345 Inscrits NL générale site web \n", "2 1184828 645402 126 DDCP PROMO Art contemporain \n", "3 1184829 645403 126 DDCP PROMO Art contemporain \n", "4 1295770 647301 346 Votre première liste \n", "... ... ... ... ... \n", "768019 2737545 666983 345 Inscrits NL générale site web \n", "768020 2737546 666983 346 Votre première liste \n", "768021 2737575 666986 346 Votre première liste \n", "768022 2737576 666987 345 Inscrits NL générale site web \n", "768023 2737577 666987 346 Votre première liste \n", "\n", " target_type_is_import target_type_id target_type_name \n", "0 False 56 manual_static_filter \n", "1 False 56 manual_static_filter \n", "2 False 56 manual_static_filter \n", "3 False 56 manual_static_filter \n", "4 False 56 manual_static_filter \n", "... ... ... ... \n", "768019 False 56 manual_static_filter \n", "768020 False 56 manual_static_filter \n", "768021 False 56 manual_static_filter \n", "768022 False 56 manual_static_filter \n", "768023 False 56 manual_static_filter \n", "\n", "[768024 rows x 7 columns]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# finally, merge\n", "\n", "# pour df1_customer_target_mappings on enlève les colonnes name, extra_field, et updated_at (valeur égale à created_at)\n", "# note : by making a left join on df1_customer_target_mappings, we suppress 2 targets that have no customer associated\n", "\n", "df1_customer_targets = pd.merge(df1_customer_target_mappings[[\"id\", \"customer_id\", \"target_id\"]], \n", " df1_targets_full, left_on='target_id', right_on='target_id', how='left')\n", "df1_customer_targets" ] }, { "cell_type": "markdown", "id": "52326267-c5ba-4e21-b8ab-4b4c62de75d1", "metadata": {}, "source": [ "## Campaign stats, campaigns" ] }, { "cell_type": "code", "execution_count": 30, "id": "06dca910-5c07-4ee1-bbf2-3b11b48ba1f2", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idnameservice_idcreated_atupdated_atprocess_idreport_urlcategoryto_be_syncedidentifiersent_at
01319613newsletter enseignants janvier 20227212022-01-14 16:06:42.586321+01:002022-02-03 14:17:27.112963+01:00NaNNaN0.0Falseaba3b6fd5d186d28e06ff97135cade7f2022-01-14 00:00:00+01:00
11319586lsf_janvier_20227172022-01-07 11:30:35.315895+01:002022-02-03 14:17:27.116171+01:00NaNNaN0.0False788d986905533aba051261497ecffcbb2022-01-07 00:00:00+01:00
21319282Invitation à déjeuner au Mucem | Vernissage « ...5912021-09-28 12:50:24.448752+02:002022-02-03 14:17:27.119582+01:00NaNNaN0.0False3493894fa4ea036cfc6433c3e2ee63b02021-09-28 00:00:00+02:00
31319283Vacances de la Toussaint - centres des loisirs5902021-09-28 18:01:04.692073+02:002022-02-03 14:17:27.124408+01:00NaNNaN0.0False08b255a5d42b89b0585260b6f2360bdd2021-09-28 00:00:00+02:00
41319636ddcp_promo_md_livemag7302022-01-27 18:00:41.053069+01:002022-02-03 14:17:27.127607+01:00NaNNaN0.0Falsed5cfead94f5350c12c322b5b664544c12022-01-27 00:00:00+01:00
\n", "
" ], "text/plain": [ " id name service_id \\\n", "0 1319613 newsletter enseignants janvier 2022 721 \n", "1 1319586 lsf_janvier_2022 717 \n", "2 1319282 Invitation à déjeuner au Mucem | Vernissage « ... 591 \n", "3 1319283 Vacances de la Toussaint - centres des loisirs 590 \n", "4 1319636 ddcp_promo_md_livemag 730 \n", "\n", " created_at updated_at \\\n", "0 2022-01-14 16:06:42.586321+01:00 2022-02-03 14:17:27.112963+01:00 \n", "1 2022-01-07 11:30:35.315895+01:00 2022-02-03 14:17:27.116171+01:00 \n", "2 2021-09-28 12:50:24.448752+02:00 2022-02-03 14:17:27.119582+01:00 \n", "3 2021-09-28 18:01:04.692073+02:00 2022-02-03 14:17:27.124408+01:00 \n", "4 2022-01-27 18:00:41.053069+01:00 2022-02-03 14:17:27.127607+01:00 \n", "\n", " process_id report_url category to_be_synced \\\n", "0 NaN NaN 0.0 False \n", "1 NaN NaN 0.0 False \n", "2 NaN NaN 0.0 False \n", "3 NaN NaN 0.0 False \n", "4 NaN NaN 0.0 False \n", "\n", " identifier sent_at \n", "0 aba3b6fd5d186d28e06ff97135cade7f 2022-01-14 00:00:00+01:00 \n", "1 788d986905533aba051261497ecffcbb 2022-01-07 00:00:00+01:00 \n", "2 3493894fa4ea036cfc6433c3e2ee63b0 2021-09-28 00:00:00+02:00 \n", "3 08b255a5d42b89b0585260b6f2360bdd 2021-09-28 00:00:00+02:00 \n", "4 d5cfead94f5350c12c322b5b664544c1 2022-01-27 00:00:00+01:00 " ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 1. campaigns\n", "df1_campaigns.head()" ] }, { "cell_type": "code", "execution_count": 31, "id": "83eaa447-9144-41ed-9e26-f0f23799a8fd", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idcampaign_idcustomer_idopened_atsent_atdelivered_atcreated_atupdated_at
01979358112597NaN2021-03-28 18:01:09+02:002021-03-28 18:24:18+02:002021-03-28 18:34:20.616136+02:002022-04-15 22:52:04.397693+02:00
11421158113666NaN2021-03-28 18:01:09+02:002021-03-28 18:21:02+02:002021-03-28 18:21:04.297213+02:002022-04-15 22:52:04.397693+02:00
21315058280561NaN2021-03-28 18:00:59+02:002021-03-28 18:08:45+02:002021-03-28 18:18:49.991042+02:002022-04-15 22:52:04.397693+02:00
37073581010072021-03-28 20:11:06+02:002021-03-28 18:00:59+02:002021-03-28 18:09:47+02:002021-03-28 18:09:50.915354+02:002022-04-15 22:52:04.397693+02:00
4517558103972NaN2021-03-28 18:01:06+02:002021-03-28 18:05:03+02:002021-03-28 18:05:08.507398+02:002022-04-15 22:52:04.397693+02:00
\n", "
" ], "text/plain": [ " id campaign_id customer_id opened_at \\\n", "0 19793 58 112597 NaN \n", "1 14211 58 113666 NaN \n", "2 13150 58 280561 NaN \n", "3 7073 58 101007 2021-03-28 20:11:06+02:00 \n", "4 5175 58 103972 NaN \n", "\n", " sent_at delivered_at \\\n", "0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n", "1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n", "2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n", "3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n", "4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n", "\n", " created_at updated_at \n", "0 2021-03-28 18:34:20.616136+02:00 2022-04-15 22:52:04.397693+02:00 \n", "1 2021-03-28 18:21:04.297213+02:00 2022-04-15 22:52:04.397693+02:00 \n", "2 2021-03-28 18:18:49.991042+02:00 2022-04-15 22:52:04.397693+02:00 \n", "3 2021-03-28 18:09:50.915354+02:00 2022-04-15 22:52:04.397693+02:00 \n", "4 2021-03-28 18:05:08.507398+02:00 2022-04-15 22:52:04.397693+02:00 " ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 2. campaigns stats\n", "df1_campaign_stats.head()" ] }, { "cell_type": "code", "execution_count": 34, "id": "7f25eb1b-e7c8-4715-bc30-7ac29a7181ac", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idcampaign_idcustomer_idopened_atsent_atdelivered_atcampaign_namecampaign_service_idcampaign_sent_at
01979358112597NaN2021-03-28 18:01:09+02:002021-03-28 18:24:18+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 00:00:00+01:00
11421158113666NaN2021-03-28 18:01:09+02:002021-03-28 18:21:02+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 00:00:00+01:00
21315058280561NaN2021-03-28 18:00:59+02:002021-03-28 18:08:45+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 00:00:00+01:00
37073581010072021-03-28 20:11:06+02:002021-03-28 18:00:59+02:002021-03-28 18:09:47+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 00:00:00+01:00
4517558103972NaN2021-03-28 18:01:06+02:002021-03-28 18:05:03+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 00:00:00+01:00
\n", "
" ], "text/plain": [ " id campaign_id customer_id opened_at \\\n", "0 19793 58 112597 NaN \n", "1 14211 58 113666 NaN \n", "2 13150 58 280561 NaN \n", "3 7073 58 101007 2021-03-28 20:11:06+02:00 \n", "4 5175 58 103972 NaN \n", "\n", " sent_at delivered_at \\\n", "0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n", "1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n", "2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n", "3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n", "4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n", "\n", " campaign_name campaign_service_id \\\n", "0 Le Mucem chez vous, gardons le lien #22 404 \n", "1 Le Mucem chez vous, gardons le lien #22 404 \n", "2 Le Mucem chez vous, gardons le lien #22 404 \n", "3 Le Mucem chez vous, gardons le lien #22 404 \n", "4 Le Mucem chez vous, gardons le lien #22 404 \n", "\n", " campaign_sent_at \n", "0 2021-03-28 00:00:00+01:00 \n", "1 2021-03-28 00:00:00+01:00 \n", "2 2021-03-28 00:00:00+01:00 \n", "3 2021-03-28 00:00:00+01:00 \n", "4 2021-03-28 00:00:00+01:00 " ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 3. merge campaigns and campaigns stats\n", "\n", "df1_campaigns_full = pd.merge(df1_campaign_stats[[\"id\", \"campaign_id\", \"customer_id\", \"opened_at\", \"sent_at\", \"delivered_at\"]], \n", " df1_campaigns[[\"id\", \"name\", \"service_id\", \"sent_at\"]].add_prefix(\"campaign_\"),\n", " on = \"campaign_id\", how = \"left\")\n", "df1_campaigns_full.head()" ] }, { "cell_type": "markdown", "id": "87fc686a-4a80-40ab-9987-20d2774f3055", "metadata": {}, "source": [ "## Link stats" ] }, { "cell_type": "code", "execution_count": 35, "id": "2f9df2d0-8a23-496b-8e92-617285f64530", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idclicked_atlink_idcustomer_idcreated_atupdated_at
012021-03-26 16:30:36+01:0012840332021-03-26 15:30:37.050161+01:002021-03-26 15:30:37.050161+01:00
122021-03-26 17:16:34+01:0021197682021-03-26 16:16:34.950871+01:002021-03-26 16:16:34.950871+01:00
22722021-03-28 20:03:32+02:00421131052021-03-28 18:03:32.736394+02:002021-03-28 18:03:32.736394+02:00
342021-03-26 17:43:19+01:0032722802021-03-26 16:43:19.338321+01:002021-03-26 16:43:19.338321+01:00
452021-03-26 17:46:00+01:0031050952021-03-26 16:46:00.502945+01:002021-03-26 16:46:00.502945+01:00
.....................
1510462435532023-11-09 16:34:27+01:00146669982023-11-09 15:34:29.425425+01:002023-11-09 15:34:29.425425+01:00
1510472435542023-11-09 16:34:35+01:00146709982023-11-09 15:34:37.505505+01:002023-11-09 15:34:37.505505+01:00
1510482435592023-11-09 16:51:15+01:0014686829232023-11-09 15:51:17.439518+01:002023-11-09 15:51:17.439518+01:00
1510492435612023-11-09 16:59:42+01:0014677829232023-11-09 15:59:44.030922+01:002023-11-09 15:59:44.030922+01:00
1510502435642023-11-09 17:16:41+01:001469112543552023-11-09 16:16:43.012932+01:002023-11-09 16:16:43.012932+01:00
\n", "

151051 rows × 6 columns

\n", "
" ], "text/plain": [ " id clicked_at link_id customer_id \\\n", "0 1 2021-03-26 16:30:36+01:00 1 284033 \n", "1 2 2021-03-26 17:16:34+01:00 2 119768 \n", "2 272 2021-03-28 20:03:32+02:00 42 113105 \n", "3 4 2021-03-26 17:43:19+01:00 3 272280 \n", "4 5 2021-03-26 17:46:00+01:00 3 105095 \n", "... ... ... ... ... \n", "151046 243553 2023-11-09 16:34:27+01:00 14666 998 \n", "151047 243554 2023-11-09 16:34:35+01:00 14670 998 \n", "151048 243559 2023-11-09 16:51:15+01:00 14686 82923 \n", "151049 243561 2023-11-09 16:59:42+01:00 14677 82923 \n", "151050 243564 2023-11-09 17:16:41+01:00 14691 1254355 \n", "\n", " created_at updated_at \n", "0 2021-03-26 15:30:37.050161+01:00 2021-03-26 15:30:37.050161+01:00 \n", "1 2021-03-26 16:16:34.950871+01:00 2021-03-26 16:16:34.950871+01:00 \n", "2 2021-03-28 18:03:32.736394+02:00 2021-03-28 18:03:32.736394+02:00 \n", "3 2021-03-26 16:43:19.338321+01:00 2021-03-26 16:43:19.338321+01:00 \n", "4 2021-03-26 16:46:00.502945+01:00 2021-03-26 16:46:00.502945+01:00 \n", "... ... ... \n", "151046 2023-11-09 15:34:29.425425+01:00 2023-11-09 15:34:29.425425+01:00 \n", "151047 2023-11-09 15:34:37.505505+01:00 2023-11-09 15:34:37.505505+01:00 \n", "151048 2023-11-09 15:51:17.439518+01:00 2023-11-09 15:51:17.439518+01:00 \n", "151049 2023-11-09 15:59:44.030922+01:00 2023-11-09 15:59:44.030922+01:00 \n", "151050 2023-11-09 16:16:43.012932+01:00 2023-11-09 16:16:43.012932+01:00 \n", "\n", "[151051 rows x 6 columns]" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1_link_stats" ] }, { "cell_type": "markdown", "id": "aad6fb14-9694-4c1e-9885-1ebe0f38afe3", "metadata": {}, "source": [ "## Bonus : peut-on lier link stats et campaign ? Non, les dates à laquelle le client clique sur le lie/ouvre la campagne ne permettent pas de faire coincider link_id et campaign_id" ] }, { "cell_type": "code", "execution_count": 67, "id": "8be7c974-72c9-4e31-a874-d7e5d2719fb3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idclicked_atlink_idcustomer_idcreated_atupdated_at
012021-03-26 16:30:36+01:0012840332021-03-26 15:30:37.050161+01:002021-03-26 15:30:37.050161+01:00
7526140182021-05-10 18:07:59+02:003122840332021-05-10 16:08:00.541322+02:002021-05-10 16:08:00.541322+02:00
968481334492021-03-25 08:42:22+01:0042840332022-04-15 22:51:01.994343+02:002022-04-15 22:51:01.994343+02:00
1157282075442022-08-23 10:33:04+02:00123652840332022-08-23 08:33:06.498908+02:002022-08-23 08:33:06.498908+02:00
\n", "
" ], "text/plain": [ " id clicked_at link_id customer_id \\\n", "0 1 2021-03-26 16:30:36+01:00 1 284033 \n", "7526 14018 2021-05-10 18:07:59+02:00 312 284033 \n", "96848 133449 2021-03-25 08:42:22+01:00 4 284033 \n", "115728 207544 2022-08-23 10:33:04+02:00 12365 284033 \n", "\n", " created_at updated_at \n", "0 2021-03-26 15:30:37.050161+01:00 2021-03-26 15:30:37.050161+01:00 \n", "7526 2021-05-10 16:08:00.541322+02:00 2021-05-10 16:08:00.541322+02:00 \n", "96848 2022-04-15 22:51:01.994343+02:00 2022-04-15 22:51:01.994343+02:00 \n", "115728 2022-08-23 08:33:06.498908+02:00 2022-08-23 08:33:06.498908+02:00 " ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1_link_stats[df1_link_stats[\"customer_id\"] == 284033]" ] }, { "cell_type": "code", "execution_count": 82, "id": "902e9947-58e1-44f4-b634-1239b0e4df02", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idcampaign_idcustomer_idopened_atsent_atdelivered_atcampaign_namecampaign_service_idcampaign_sent_at
403064340363764284033NaN2021-03-21 18:01:22+01:002021-03-21 18:08:04+01:00Le Mucem chez vous, gardons le lien #213982021-03-21 00:00:00+01:00
\n", "
" ], "text/plain": [ " id campaign_id customer_id opened_at \\\n", "4030643 4036376 4 284033 NaN \n", "\n", " sent_at delivered_at \\\n", "4030643 2021-03-21 18:01:22+01:00 2021-03-21 18:08:04+01:00 \n", "\n", " campaign_name campaign_service_id \\\n", "4030643 Le Mucem chez vous, gardons le lien #21 398 \n", "\n", " campaign_sent_at \n", "4030643 2021-03-21 00:00:00+01:00 " ] }, "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1_campaigns_full[ (df1_campaigns_full[\"customer_id\"] == 284033) & (df1_campaigns_full[\"campaign_id\"] == 4)]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 5 }