From 027ba3671e77ab65f95f877e18f0b1c5e880cfa5 Mon Sep 17 00:00:00 2001 From: tpique-ensae Date: Mon, 26 Feb 2024 15:49:40 +0000 Subject: [PATCH] ajout dossiers - rangement des notebooks --- TP_access_merge_data.ipynb | 1215 ---- TP_exploratory_analysis-Copy1.ipynb | 7990 --------------------------- TP_merge_tables_clean.ipynb | 1760 ------ 3 files changed, 10965 deletions(-) delete mode 100644 TP_access_merge_data.ipynb delete mode 100644 TP_exploratory_analysis-Copy1.ipynb delete mode 100644 TP_merge_tables_clean.ipynb diff --git a/TP_access_merge_data.ipynb b/TP_access_merge_data.ipynb deleted file mode 100644 index c8adbd5..0000000 --- a/TP_access_merge_data.ipynb +++ /dev/null @@ -1,1215 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "5ce2ffc5-66b6-4709-9e2c-7a50f49d1361", - "metadata": {}, - "outputs": [], - "source": [ - "# test\n", - "\n", - "import os \n", - "import s3fs\n", - "import pandas as pd\n", - "import re" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "f579ff01-f009-4fb1-ba79-0cb3ce58ab7f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['bdc2324-data/1',\n", - " 'bdc2324-data/10',\n", - " 'bdc2324-data/101',\n", - " 'bdc2324-data/11',\n", - " 'bdc2324-data/12',\n", - " 'bdc2324-data/13',\n", - " 'bdc2324-data/14',\n", - " 'bdc2324-data/2',\n", - " 'bdc2324-data/3',\n", - " 'bdc2324-data/4',\n", - " 'bdc2324-data/5',\n", - " 'bdc2324-data/6',\n", - " 'bdc2324-data/7',\n", - " 'bdc2324-data/8',\n", - " 'bdc2324-data/9']" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", - "\n", - "fs = s3fs.S3FileSystem(client_kwargs = {\"endpoint_url\" : S3_ENDPOINT_URL})\n", - "BUCKET = \"bdc2324-data\"\n", - "fs.ls(BUCKET)" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "c8b2c797-271f-43ee-8823-d0aee5b8782d", - "metadata": {}, - "outputs": [], - "source": [ - "FILE_PATH_S3 = fs.ls(BUCKET)[1] # +\".csv\"\n", - "files_path_2 = fs.ls(FILE_PATH_S3)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "18cee687-1462-4169-9bfe-f39786135cdd", - "metadata": {}, - "outputs": [], - "source": [ - "with fs.open(files_path_1[1], mode=\"rb\") as file_in:\n", - " # print(file_in)\n", - " df_campaigns = pd.read_csv(file_in)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "33e8d14c-c649-4b9c-8290-4a2aa635f999", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnameservice_idcreated_atupdated_atprocess_idreport_urlcategoryto_be_syncedidentifiersent_at
01319613newsletter enseignants janvier 20227212022-01-14 16:06:42.586321+01:002022-02-03 14:17:27.112963+01:00NaNNaN0.0Falseaba3b6fd5d186d28e06ff97135cade7f2022-01-14 00:00:00+01:00
11319586lsf_janvier_20227172022-01-07 11:30:35.315895+01:002022-02-03 14:17:27.116171+01:00NaNNaN0.0False788d986905533aba051261497ecffcbb2022-01-07 00:00:00+01:00
21319282Invitation à déjeuner au Mucem | Vernissage « ...5912021-09-28 12:50:24.448752+02:002022-02-03 14:17:27.119582+01:00NaNNaN0.0False3493894fa4ea036cfc6433c3e2ee63b02021-09-28 00:00:00+02:00
31319283Vacances de la Toussaint - centres des loisirs5902021-09-28 18:01:04.692073+02:002022-02-03 14:17:27.124408+01:00NaNNaN0.0False08b255a5d42b89b0585260b6f2360bdd2021-09-28 00:00:00+02:00
41319636ddcp_promo_md_livemag7302022-01-27 18:00:41.053069+01:002022-02-03 14:17:27.127607+01:00NaNNaN0.0Falsed5cfead94f5350c12c322b5b664544c12022-01-27 00:00:00+01:00
....................................
9521320072dre_gaza01068812022-05-26 09:01:35.523639+02:002022-12-02 17:51:22.614046+01:00NaNNaN0.0False7504adad8bb96320eb3afdd4df6e1f602022-05-26 00:00:00+02:00
953661398DDCP Plan Bis 4 - Marketing direct - MJ5C1832021-06-18 10:30:01.259578+02:002021-09-24 11:56:09.082785+02:00NaNNaN0.0Falsecedebb6e872f539bef8c3f919874e9d72020-07-27 00:00:00+02:00
9541320487Invitation portes ouvertes amitiés9882022-09-29 18:01:33.834090+02:002022-12-02 17:51:23.258324+01:00NaNNaN0.0False9908279ebbf1f9b250ba689db6a0222b2022-09-29 00:00:00+02:00
955906903DDCP PROMO La méditerranée des philosophes #3 ...3102021-07-19 14:07:16.177390+02:002021-09-24 11:56:09.086101+02:00NaNNaN0.0False06eb61b839a0cefee4967c67ccb099dc2020-12-23 00:00:00+01:00
956579313ddcp_promo_automation_manuel_pre_visit4812021-06-08 17:38:54.041310+02:002021-09-24 11:56:09.089394+02:00NaNNaN0.0False9461cce28ebe3e76fb4b931c35a169b02021-06-08 00:00:00+02:00
\n", - "

957 rows × 11 columns

\n", - "
" - ], - "text/plain": [ - " id name service_id \\\n", - "0 1319613 newsletter enseignants janvier 2022 721 \n", - "1 1319586 lsf_janvier_2022 717 \n", - "2 1319282 Invitation à déjeuner au Mucem | Vernissage « ... 591 \n", - "3 1319283 Vacances de la Toussaint - centres des loisirs 590 \n", - "4 1319636 ddcp_promo_md_livemag 730 \n", - ".. ... ... ... \n", - "952 1320072 dre_gaza0106 881 \n", - "953 661398 DDCP Plan Bis 4 - Marketing direct - MJ5C 183 \n", - "954 1320487 Invitation portes ouvertes amitiés 988 \n", - "955 906903 DDCP PROMO La méditerranée des philosophes #3 ... 310 \n", - "956 579313 ddcp_promo_automation_manuel_pre_visit 481 \n", - "\n", - " created_at updated_at \\\n", - "0 2022-01-14 16:06:42.586321+01:00 2022-02-03 14:17:27.112963+01:00 \n", - "1 2022-01-07 11:30:35.315895+01:00 2022-02-03 14:17:27.116171+01:00 \n", - "2 2021-09-28 12:50:24.448752+02:00 2022-02-03 14:17:27.119582+01:00 \n", - "3 2021-09-28 18:01:04.692073+02:00 2022-02-03 14:17:27.124408+01:00 \n", - "4 2022-01-27 18:00:41.053069+01:00 2022-02-03 14:17:27.127607+01:00 \n", - ".. ... ... \n", - "952 2022-05-26 09:01:35.523639+02:00 2022-12-02 17:51:22.614046+01:00 \n", - "953 2021-06-18 10:30:01.259578+02:00 2021-09-24 11:56:09.082785+02:00 \n", - "954 2022-09-29 18:01:33.834090+02:00 2022-12-02 17:51:23.258324+01:00 \n", - "955 2021-07-19 14:07:16.177390+02:00 2021-09-24 11:56:09.086101+02:00 \n", - "956 2021-06-08 17:38:54.041310+02:00 2021-09-24 11:56:09.089394+02:00 \n", - "\n", - " process_id report_url category to_be_synced \\\n", - "0 NaN NaN 0.0 False \n", - "1 NaN NaN 0.0 False \n", - "2 NaN NaN 0.0 False \n", - "3 NaN NaN 0.0 False \n", - "4 NaN NaN 0.0 False \n", - ".. ... ... ... ... \n", - "952 NaN NaN 0.0 False \n", - "953 NaN NaN 0.0 False \n", - "954 NaN NaN 0.0 False \n", - "955 NaN NaN 0.0 False \n", - "956 NaN NaN 0.0 False \n", - "\n", - " identifier sent_at \n", - "0 aba3b6fd5d186d28e06ff97135cade7f 2022-01-14 00:00:00+01:00 \n", - "1 788d986905533aba051261497ecffcbb 2022-01-07 00:00:00+01:00 \n", - "2 3493894fa4ea036cfc6433c3e2ee63b0 2021-09-28 00:00:00+02:00 \n", - "3 08b255a5d42b89b0585260b6f2360bdd 2021-09-28 00:00:00+02:00 \n", - "4 d5cfead94f5350c12c322b5b664544c1 2022-01-27 00:00:00+01:00 \n", - ".. ... ... \n", - "952 7504adad8bb96320eb3afdd4df6e1f60 2022-05-26 00:00:00+02:00 \n", - "953 cedebb6e872f539bef8c3f919874e9d7 2020-07-27 00:00:00+02:00 \n", - "954 9908279ebbf1f9b250ba689db6a0222b 2022-09-29 00:00:00+02:00 \n", - "955 06eb61b839a0cefee4967c67ccb099dc 2020-12-23 00:00:00+01:00 \n", - "956 9461cce28ebe3e76fb4b931c35a169b0 2021-06-08 00:00:00+02:00 \n", - "\n", - "[957 rows x 11 columns]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_campaigns" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "b04f39e7-7d53-4734-b125-4dc1843172d6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['bdc2324-data', '10', '10campaign_stats.csv']" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "files_path_2[0].split(\"/\")[1]" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "id": "d9bd97df-67bf-48ef-812a-975deb890163", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_521/1596461036.py:11: DtypeWarning: Columns (19,20,33,34,35,39) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(file_in)\n" - ] - } - ], - "source": [ - "# loop to create dataframes from file 2\n", - "\n", - "files_path = files_path_2\n", - "\n", - "client_number = files_path[0].split(\"/\")[1]\n", - "df_prefix = \"df\" + str(client_number) + \"_\"\n", - "\n", - "for i in range(len(files_path)) :\n", - " current_path = files_path[i]\n", - " with fs.open(current_path, mode=\"rb\") as file_in:\n", - " df = pd.read_csv(file_in)\n", - " # the pattern of the name is df1xxx\n", - " nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n", - " globals()[nom_dataframe] = df" - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "id": "7f46e38e-413c-48cb-a171-eb6bc7219d9c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "client number :10\n", - "prefix used : df10_\n" - ] - } - ], - "source": [ - "print(f\"client number :{client_number}\")\n", - "print(f\"prefix used : {df_prefix}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "id": "bdfd388c-7971-4f4d-99ef-c5b0435a4567", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['bdc2324-data/10/10campaign_stats.csv',\n", - " 'bdc2324-data/10/10campaigns.csv',\n", - " 'bdc2324-data/10/10categories.csv',\n", - " 'bdc2324-data/10/10countries.csv',\n", - " 'bdc2324-data/10/10currencies.csv',\n", - " 'bdc2324-data/10/10customer_target_mappings.csv',\n", - " 'bdc2324-data/10/10customersplus.csv',\n", - " 'bdc2324-data/10/10event_types.csv',\n", - " 'bdc2324-data/10/10events.csv',\n", - " 'bdc2324-data/10/10facilities.csv',\n", - " 'bdc2324-data/10/10link_stats.csv',\n", - " 'bdc2324-data/10/10pricing_formulas.csv',\n", - " 'bdc2324-data/10/10product_packs.csv',\n", - " 'bdc2324-data/10/10products.csv',\n", - " 'bdc2324-data/10/10products_groups.csv',\n", - " 'bdc2324-data/10/10purchases.csv',\n", - " 'bdc2324-data/10/10representation_category_capacities.csv',\n", - " 'bdc2324-data/10/10representation_types.csv',\n", - " 'bdc2324-data/10/10representations.csv',\n", - " 'bdc2324-data/10/10seasons.csv',\n", - " 'bdc2324-data/10/10suppliers.csv',\n", - " 'bdc2324-data/10/10tags.csv',\n", - " 'bdc2324-data/10/10target_types.csv',\n", - " 'bdc2324-data/10/10targets.csv',\n", - " 'bdc2324-data/10/10tickets.csv',\n", - " 'bdc2324-data/10/10type_of_pricing_formulas.csv',\n", - " 'bdc2324-data/10/10type_ofs.csv']" - ] - }, - "execution_count": 79, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "files_path_2" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "id": "e7bd02dc-1925-46ff-9d59-231d18f9f4f1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnumbercreated_atupdated_atpurchase_idproduct_idis_from_subscriptiontype_ofsupplier_idbarcodeidentifier
0179917760_0_0_0_1_k-52021-12-29 07:27:27.868513+01:002021-12-29 07:27:27.868513+01:00409613321683False12NaN56c3db5a02c87af7e525676092cb7c4a
1179917871_0_0_0_1_k-52021-12-29 07:27:27.976380+01:002021-12-29 07:27:27.976380+01:00409613321684False12NaN1ecad1dc6b42b4cdb75784dd9dcd9d5c
2179917993_0_0_0_1_k-52021-12-29 07:27:27.978719+01:002021-12-29 07:27:27.978719+01:00409613321685False12NaNb3d207bdb47bcdb27a52f6bae0db7ec2
31799180103_0_0_0_1_k-52021-12-29 07:27:27.984621+01:002021-12-29 07:27:27.984621+01:00409613321686False12NaN10df9591b617cc177516e9ddf91ddae3
41799181107_0_3_2_1_h-12021-12-29 07:27:27.988602+01:002021-12-29 07:27:27.988602+01:00409613321687False12NaN3a8c7d5882fe9f20f0f59c8d90c9873c
....................................
49230932522323363592023-03-10 01:31:52.543375+01:002023-03-10 01:31:52.543375+01:00710062572547False12NaNfc96f582931209501ed186d709664980
49231032522333363602023-03-10 01:31:52.543869+01:002023-03-10 01:31:52.543869+01:00710062572547False12NaNd4ccfb00a9b22b62654bbf98b4d9a5a5
49231132522343363612023-03-10 01:31:52.545783+01:002023-03-10 01:31:52.545783+01:00710062572547False12NaNd5f76662d6571b8eaceaf19c781fa514
49231232522353363622023-03-10 01:31:52.547043+01:002023-03-10 01:31:52.547043+01:00710062572547False12NaN093225db5cd5e06cc8e06242b4cbba37
49231332522363363632023-03-10 01:31:52.548311+01:002023-03-10 01:31:52.548311+01:00710062572547False12NaN9bace0d0cd7a5ec559aca8ac8bf67700
\n", - "

492314 rows × 11 columns

\n", - "
" - ], - "text/plain": [ - " id number created_at \\\n", - "0 1799177 60_0_0_0_1_k-5 2021-12-29 07:27:27.868513+01:00 \n", - "1 1799178 71_0_0_0_1_k-5 2021-12-29 07:27:27.976380+01:00 \n", - "2 1799179 93_0_0_0_1_k-5 2021-12-29 07:27:27.978719+01:00 \n", - "3 1799180 103_0_0_0_1_k-5 2021-12-29 07:27:27.984621+01:00 \n", - "4 1799181 107_0_3_2_1_h-1 2021-12-29 07:27:27.988602+01:00 \n", - "... ... ... ... \n", - "492309 3252232 336359 2023-03-10 01:31:52.543375+01:00 \n", - "492310 3252233 336360 2023-03-10 01:31:52.543869+01:00 \n", - "492311 3252234 336361 2023-03-10 01:31:52.545783+01:00 \n", - "492312 3252235 336362 2023-03-10 01:31:52.547043+01:00 \n", - "492313 3252236 336363 2023-03-10 01:31:52.548311+01:00 \n", - "\n", - " updated_at purchase_id product_id \\\n", - "0 2021-12-29 07:27:27.868513+01:00 409613 321683 \n", - "1 2021-12-29 07:27:27.976380+01:00 409613 321684 \n", - "2 2021-12-29 07:27:27.978719+01:00 409613 321685 \n", - "3 2021-12-29 07:27:27.984621+01:00 409613 321686 \n", - "4 2021-12-29 07:27:27.988602+01:00 409613 321687 \n", - "... ... ... ... \n", - "492309 2023-03-10 01:31:52.543375+01:00 710062 572547 \n", - "492310 2023-03-10 01:31:52.543869+01:00 710062 572547 \n", - "492311 2023-03-10 01:31:52.545783+01:00 710062 572547 \n", - "492312 2023-03-10 01:31:52.547043+01:00 710062 572547 \n", - "492313 2023-03-10 01:31:52.548311+01:00 710062 572547 \n", - "\n", - " is_from_subscription type_of supplier_id barcode \\\n", - "0 False 1 2 NaN \n", - "1 False 1 2 NaN \n", - "2 False 1 2 NaN \n", - "3 False 1 2 NaN \n", - "4 False 1 2 NaN \n", - "... ... ... ... ... \n", - "492309 False 1 2 NaN \n", - "492310 False 1 2 NaN \n", - "492311 False 1 2 NaN \n", - "492312 False 1 2 NaN \n", - "492313 False 1 2 NaN \n", - "\n", - " identifier \n", - "0 56c3db5a02c87af7e525676092cb7c4a \n", - "1 1ecad1dc6b42b4cdb75784dd9dcd9d5c \n", - "2 b3d207bdb47bcdb27a52f6bae0db7ec2 \n", - "3 10df9591b617cc177516e9ddf91ddae3 \n", - "4 3a8c7d5882fe9f20f0f59c8d90c9873c \n", - "... ... \n", - "492309 fc96f582931209501ed186d709664980 \n", - "492310 d4ccfb00a9b22b62654bbf98b4d9a5a5 \n", - "492311 d5f76662d6571b8eaceaf19c781fa514 \n", - "492312 093225db5cd5e06cc8e06242b4cbba37 \n", - "492313 9bace0d0cd7a5ec559aca8ac8bf67700 \n", - "\n", - "[492314 rows x 11 columns]" - ] - }, - "execution_count": 80, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# example : get the table \n", - "\n", - "df10_tickets" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "48ae6de5-2353-4fa8-a2a8-20da3b77e2ff", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'\\nfor i in range(len(files_path_1)) :\\n current_path = files_path_1[i]\\n nom_dataframe = \"df\" + re.search(r\\'/([^/]+)\\\\.csv$\\', current_path).group(1)\\n df = globals()[nom_dataframe]\\n print(nom_dataframe)\\n print(df.head(20))\\n'" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# loop to have a look at dataframes from file 1\n", - "\n", - "\"\"\"\n", - "for i in range(len(files_path_1)) :\n", - " current_path = files_path_1[i]\n", - " nom_dataframe = \"df\" + re.search(r'/([^/]+)\\.csv$', current_path).group(1)\n", - " df = globals()[nom_dataframe]\n", - " print(nom_dataframe)\n", - " print(df.head(20))\n", - "\"\"\"" - ] - }, - { - "cell_type": "markdown", - "id": "d72166db-dcef-45bd-9f8c-7cb2ee6bcbde", - "metadata": {}, - "source": [ - "## Beginning of the exploratory analysis of dataframes" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "id": "17966ab2-9038-4dd6-a59c-7739ee05c964", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idlastnamefirstnamebirthdateemailstreet_idcreated_atupdated_atcivilityis_partner...preferred_categorypreferred_supplierpreferred_formulapurchase_countfirst_buying_datelast_visiting_datezipcodecountryagetenant_id
0821538NaNNaNNaNemail8215381392023-07-14 11:43:34.261637+02:002023-07-14 11:43:34.261637+02:00NaNFalse...NaNNaNNaN0NaNNaNNaNNaNNaN875
1809126NaNNaNNaNemail80912610632023-05-04 17:17:24.456829+02:002023-05-04 17:17:24.456829+02:00NaNFalse...NaNNaNNaN0NaNNaNNaNfrNaN875
211005NaNNaNNaNNaN10632017-07-06 03:01:57.242998+02:002018-11-12 18:01:18.283492+01:00NaNFalse...zone tarif 1NaNinvite rp14NaNNaNNaNfrNaN875
317663lastname17663firstname17663NaNNaN127312018-09-23 02:39:17.778100+02:002018-09-23 02:39:17.778100+02:00NaNFalse...zone tarif 1NaNdetaxe1NaNNaN44220frNaN875
438100lastname38100firstname38100NaNNaN123952019-02-11 11:05:58.581121+01:002022-12-06 23:15:33.485866+01:00NaNFalse...NaNNaNNaN1NaNNaN44100frNaN875
..................................................................
98789766266NaNNaNNaNemail7662661392022-12-06 18:26:04.142337+01:002023-05-03 18:01:01.799141+02:00NaNFalse...NaNNaNNaN0NaNNaNNaNNaNNaN875
98790766336NaNNaNNaNemail7663361392022-12-06 18:28:49.139502+01:002022-12-06 23:15:33.485866+01:00NaNFalse...NaNNaNNaN0NaNNaNNaNNaNNaN875
98791766348NaNNaNNaNemail7663481392022-12-06 18:28:51.140745+01:002022-12-06 23:15:33.485866+01:00NaNFalse...NaNNaNNaN0NaNNaNNaNNaNNaN875
98792766363NaNNaNNaNemail7663631392022-12-06 18:29:44.081056+01:002022-12-06 23:15:33.485866+01:00NaNFalse...NaNNaNNaN0NaNNaNNaNNaNNaN875
98793766366NaNNaNNaNemail7663661392022-12-06 18:29:44.934174+01:002022-12-06 23:15:33.485866+01:00NaNFalse...NaNNaNNaN0NaNNaNNaNNaNNaN875
\n", - "

98794 rows × 43 columns

\n", - "
" - ], - "text/plain": [ - " id lastname firstname birthdate email \\\n", - "0 821538 NaN NaN NaN email821538 \n", - "1 809126 NaN NaN NaN email809126 \n", - "2 11005 NaN NaN NaN NaN \n", - "3 17663 lastname17663 firstname17663 NaN NaN \n", - "4 38100 lastname38100 firstname38100 NaN NaN \n", - "... ... ... ... ... ... \n", - "98789 766266 NaN NaN NaN email766266 \n", - "98790 766336 NaN NaN NaN email766336 \n", - "98791 766348 NaN NaN NaN email766348 \n", - "98792 766363 NaN NaN NaN email766363 \n", - "98793 766366 NaN NaN NaN email766366 \n", - "\n", - " street_id created_at \\\n", - "0 139 2023-07-14 11:43:34.261637+02:00 \n", - "1 1063 2023-05-04 17:17:24.456829+02:00 \n", - "2 1063 2017-07-06 03:01:57.242998+02:00 \n", - "3 12731 2018-09-23 02:39:17.778100+02:00 \n", - "4 12395 2019-02-11 11:05:58.581121+01:00 \n", - "... ... ... \n", - "98789 139 2022-12-06 18:26:04.142337+01:00 \n", - "98790 139 2022-12-06 18:28:49.139502+01:00 \n", - "98791 139 2022-12-06 18:28:51.140745+01:00 \n", - "98792 139 2022-12-06 18:29:44.081056+01:00 \n", - "98793 139 2022-12-06 18:29:44.934174+01:00 \n", - "\n", - " updated_at civility is_partner ... \\\n", - "0 2023-07-14 11:43:34.261637+02:00 NaN False ... \n", - "1 2023-05-04 17:17:24.456829+02:00 NaN False ... \n", - "2 2018-11-12 18:01:18.283492+01:00 NaN False ... \n", - "3 2018-09-23 02:39:17.778100+02:00 NaN False ... \n", - "4 2022-12-06 23:15:33.485866+01:00 NaN False ... \n", - "... ... ... ... ... \n", - "98789 2023-05-03 18:01:01.799141+02:00 NaN False ... \n", - "98790 2022-12-06 23:15:33.485866+01:00 NaN False ... \n", - "98791 2022-12-06 23:15:33.485866+01:00 NaN False ... \n", - "98792 2022-12-06 23:15:33.485866+01:00 NaN False ... \n", - "98793 2022-12-06 23:15:33.485866+01:00 NaN False ... \n", - "\n", - " preferred_category preferred_supplier preferred_formula \\\n", - "0 NaN NaN NaN \n", - "1 NaN NaN NaN \n", - "2 zone tarif 1 NaN invite rp \n", - "3 zone tarif 1 NaN detaxe \n", - "4 NaN NaN NaN \n", - "... ... ... ... \n", - "98789 NaN NaN NaN \n", - "98790 NaN NaN NaN \n", - "98791 NaN NaN NaN \n", - "98792 NaN NaN NaN \n", - "98793 NaN NaN NaN \n", - "\n", - " purchase_count first_buying_date last_visiting_date zipcode country \\\n", - "0 0 NaN NaN NaN NaN \n", - "1 0 NaN NaN NaN fr \n", - "2 14 NaN NaN NaN fr \n", - "3 1 NaN NaN 44220 fr \n", - "4 1 NaN NaN 44100 fr \n", - "... ... ... ... ... ... \n", - "98789 0 NaN NaN NaN NaN \n", - "98790 0 NaN NaN NaN NaN \n", - "98791 0 NaN NaN NaN NaN \n", - "98792 0 NaN NaN NaN NaN \n", - "98793 0 NaN NaN NaN NaN \n", - "\n", - " age tenant_id \n", - "0 NaN 875 \n", - "1 NaN 875 \n", - "2 NaN 875 \n", - "3 NaN 875 \n", - "4 NaN 875 \n", - "... ... ... \n", - "98789 NaN 875 \n", - "98790 NaN 875 \n", - "98791 NaN 875 \n", - "98792 NaN 875 \n", - "98793 NaN 875 \n", - "\n", - "[98794 rows x 43 columns]" - ] - }, - "execution_count": 82, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df10_0customersplus" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "932812b1-7a24-4f2d-ae48-7fe8e06b9f62", - "metadata": {}, - "outputs": [], - "source": [ - "# how many missing values ?\n", - "\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/TP_exploratory_analysis-Copy1.ipynb b/TP_exploratory_analysis-Copy1.ipynb deleted file mode 100644 index 021b463..0000000 --- a/TP_exploratory_analysis-Copy1.ipynb +++ /dev/null @@ -1,7990 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "6c0589ab-924f-4706-bef7-65500f0c4dd5", - "metadata": {}, - "source": [ - "# Exploratory study of variables : targets, campaign and link stats" - ] - }, - { - "cell_type": "markdown", - "id": "83319f84-427f-43aa-af26-06797244e89c", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, - "source": [ - "## First steps : package importations, set up working environment and import data" - ] - }, - { - "cell_type": "code", - "execution_count": 253, - "id": "a26f3f09-3961-43fe-b4d9-1abe3b906a2c", - "metadata": {}, - "outputs": [], - "source": [ - "# importations\n", - "\n", - "import os \n", - "import s3fs\n", - "import pandas as pd\n", - "import re\n", - "from datetime import datetime, timezone, timedelta\n", - "import math\n", - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": 188, - "id": "78478dbf-bd91-45e0-9f2b-2d9e6b0f648c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['bdc2324-data/1',\n", - " 'bdc2324-data/10',\n", - " 'bdc2324-data/101',\n", - " 'bdc2324-data/11',\n", - " 'bdc2324-data/12',\n", - " 'bdc2324-data/13',\n", - " 'bdc2324-data/14',\n", - " 'bdc2324-data/2',\n", - " 'bdc2324-data/3',\n", - " 'bdc2324-data/4',\n", - " 'bdc2324-data/5',\n", - " 'bdc2324-data/6',\n", - " 'bdc2324-data/7',\n", - " 'bdc2324-data/8',\n", - " 'bdc2324-data/9']" - ] - }, - "execution_count": 188, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# bucket for accessing the data\n", - "\n", - "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", - "\n", - "fs = s3fs.S3FileSystem(client_kwargs = {\"endpoint_url\" : S3_ENDPOINT_URL})\n", - "BUCKET = \"bdc2324-data\"\n", - "fs.ls(BUCKET)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "a7e1b277-4381-45c0-b1ec-4050af54a3b6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['bdc2324-data/1/1campaign_stats.csv',\n", - " 'bdc2324-data/1/1campaigns.csv',\n", - " 'bdc2324-data/1/1categories.csv',\n", - " 'bdc2324-data/1/1countries.csv',\n", - " 'bdc2324-data/1/1currencies.csv',\n", - " 'bdc2324-data/1/1customer_target_mappings.csv',\n", - " 'bdc2324-data/1/1customersplus.csv',\n", - " 'bdc2324-data/1/1event_types.csv',\n", - " 'bdc2324-data/1/1events.csv',\n", - " 'bdc2324-data/1/1facilities.csv',\n", - " 'bdc2324-data/1/1link_stats.csv',\n", - " 'bdc2324-data/1/1pricing_formulas.csv',\n", - " 'bdc2324-data/1/1product_packs.csv',\n", - " 'bdc2324-data/1/1products.csv',\n", - " 'bdc2324-data/1/1products_groups.csv',\n", - " 'bdc2324-data/1/1purchases.csv',\n", - " 'bdc2324-data/1/1representation_category_capacities.csv',\n", - " 'bdc2324-data/1/1representations.csv',\n", - " 'bdc2324-data/1/1seasons.csv',\n", - " 'bdc2324-data/1/1structure_tag_mappings.csv',\n", - " 'bdc2324-data/1/1suppliers.csv',\n", - " 'bdc2324-data/1/1tags.csv',\n", - " 'bdc2324-data/1/1target_types.csv',\n", - " 'bdc2324-data/1/1targets.csv',\n", - " 'bdc2324-data/1/1tickets.csv',\n", - " 'bdc2324-data/1/1type_of_categories.csv',\n", - " 'bdc2324-data/1/1type_of_pricing_formulas.csv',\n", - " 'bdc2324-data/1/1type_ofs.csv']" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "FILE_PATH_S3 = fs.ls(BUCKET)[0] # focus on the company number 1\n", - "files_path = fs.ls(FILE_PATH_S3)\n", - "files_path" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "b26f7d2b-391f-4326-a60b-5b379186b4e8", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_624/107044352.py:9: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(file_in)\n" - ] - } - ], - "source": [ - "# loop to create dataframes related to company 1\n", - "\n", - "client_number = files_path[0].split(\"/\")[1]\n", - "df_prefix = \"df\" + str(client_number) + \"_\"\n", - "\n", - "for i in range(len(files_path)) :\n", - " current_path = files_path[i]\n", - " with fs.open(current_path, mode=\"rb\") as file_in:\n", - " df = pd.read_csv(file_in)\n", - " # the pattern of the name is df1xxx\n", - " nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n", - " globals()[nom_dataframe] = df" - ] - }, - { - "cell_type": "markdown", - "id": "5cb3e9dc-ba6e-408c-b1a6-a2c5a2215f71", - "metadata": {}, - "source": [ - "## Target, target types and customer target mapping" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "c6dbd777-b6da-485f-a650-b0a12f3d90c4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id int64\n", - "is_import bool\n", - "name object\n", - "created_at object\n", - "updated_at object\n", - "identifier object\n", - "dtype: object" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 1. target types\n", - "df1_target_types.dtypes" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "04d625e8-b077-450f-a654-1a3b05fc1325", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "str" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(df1_target_types[\"created_at\"][0])" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "607441b9-33a8-41a7-a089-120dfe266de0", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idis_importnamecreated_atupdated_atidentifier
069Falsemanual_dynamic_filter2020-11-30 09:46:18.881030+01:002020-11-30 09:46:18.881030+01:00e0f4b8693184850fefd6d2a38f10584e
148Truemanual_structure2020-11-04 17:16:19.548275+01:002020-11-04 17:16:19.548275+01:00382bca214204a2d3462f5ec2728d5d1e
21Truemanual_import2020-10-14 18:37:40.521623+02:002020-10-14 18:37:40.521623+02:0012213df2ce68a624e4c0070521437bac
356Falsemanual_static_filter2020-11-04 18:08:37.233486+01:002020-11-04 18:08:37.233486+01:00fb27e81baa4debc6a4e1a8639c20e808
\n", - "
" - ], - "text/plain": [ - " id is_import name created_at \\\n", - "0 69 False manual_dynamic_filter 2020-11-30 09:46:18.881030+01:00 \n", - "1 48 True manual_structure 2020-11-04 17:16:19.548275+01:00 \n", - "2 1 True manual_import 2020-10-14 18:37:40.521623+02:00 \n", - "3 56 False manual_static_filter 2020-11-04 18:08:37.233486+01:00 \n", - "\n", - " updated_at identifier \n", - "0 2020-11-30 09:46:18.881030+01:00 e0f4b8693184850fefd6d2a38f10584e \n", - "1 2020-11-04 17:16:19.548275+01:00 382bca214204a2d3462f5ec2728d5d1e \n", - "2 2020-10-14 18:37:40.521623+02:00 12213df2ce68a624e4c0070521437bac \n", - "3 2020-11-04 18:08:37.233486+01:00 fb27e81baa4debc6a4e1a8639c20e808 " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_target_types" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "6c036742-3069-438d-82af-62acc89aa000", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idtarget_type_idnamecreated_atupdated_at
021756DDCP PROMO Art contemporain - salle de chauffe...2021-01-04 15:00:05.401899+01:002021-03-02 18:38:19.025969+01:00
170156consentement optin scolaires2021-12-21 16:03:59.840785+01:002022-02-18 17:23:44.761388+01:00
213456DDCP Newsletter jeune public2020-11-10 09:43:19.667471+01:002021-03-02 18:38:19.052304+01:00
370056consentement optout scolaires2021-12-21 16:01:57.524946+01:002022-02-18 17:23:44.807776+01:00
496456DDCP achat billet nbr dep 190520212022-04-14 10:58:17.142834+02:002022-04-14 10:58:23.677264+02:00
\n", - "
" - ], - "text/plain": [ - " id target_type_id name \\\n", - "0 217 56 DDCP PROMO Art contemporain - salle de chauffe... \n", - "1 701 56 consentement optin scolaires \n", - "2 134 56 DDCP Newsletter jeune public \n", - "3 700 56 consentement optout scolaires \n", - "4 964 56 DDCP achat billet nbr dep 19052021 \n", - "\n", - " created_at updated_at \n", - "0 2021-01-04 15:00:05.401899+01:00 2021-03-02 18:38:19.025969+01:00 \n", - "1 2021-12-21 16:03:59.840785+01:00 2022-02-18 17:23:44.761388+01:00 \n", - "2 2020-11-10 09:43:19.667471+01:00 2021-03-02 18:38:19.052304+01:00 \n", - "3 2021-12-21 16:01:57.524946+01:00 2022-02-18 17:23:44.807776+01:00 \n", - "4 2022-04-14 10:58:17.142834+02:00 2022-04-14 10:58:23.677264+02:00 " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 2. targets\n", - "\n", - "df1_targets.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "6bcde543-3eea-4584-82a2-903a1007c4ee", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id int64\n", - "target_type_id int64\n", - "name object\n", - "created_at object\n", - "updated_at object\n", - "dtype: object" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_targets.dtypes" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "bc9acebd-a030-4a40-bd1f-2ff0ab3f59d2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "str" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(df1_targets[\"created_at\"][0])" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "9e1b38d3-220c-4a20-a60b-a8f87dfd5bff", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id 0\n", - "target_type_id 0\n", - "name 0\n", - "created_at 0\n", - "updated_at 0\n", - "dtype: int64" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# valeurs manquantes\n", - "\n", - "df1_targets.isna().sum()" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "bf660284-974f-40aa-a914-100d45fceafc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "287" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_targets[\"name\"].nunique()" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "6589e11c-9c7a-4bd8-8953-3c5a23fa0ba2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idtarget_type_idnamecreated_atupdated_at
021756DDCP PROMO Art contemporain - salle de chauffe...2021-01-04 15:00:05.401899+01:002021-03-02 18:38:19.025969+01:00
170156consentement optin scolaires2021-12-21 16:03:59.840785+01:002022-02-18 17:23:44.761388+01:00
213456DDCP Newsletter jeune public2020-11-10 09:43:19.667471+01:002021-03-02 18:38:19.052304+01:00
370056consentement optout scolaires2021-12-21 16:01:57.524946+01:002022-02-18 17:23:44.807776+01:00
496456DDCP achat billet nbr dep 190520212022-04-14 10:58:17.142834+02:002022-04-14 10:58:23.677264+02:00
..................
28218111ddcp_promo_ribambelle_2022_mapado_naikko_opt in2022-11-30 15:57:05.681956+01:002022-11-30 16:00:32.649210+01:00
28320061cp 14 mars2023-03-03 18:07:00.223750+01:002023-03-03 18:15:01.390970+01:00
28421931ddcp fichier musique 22023-04-14 14:33:53.628142+02:002023-04-14 15:00:35.608210+02:00
28524291import_mucem2023-06-26 18:32:40.146757+02:002023-06-26 18:45:02.614668+02:00
28624851po_au salon_2e envoi2023-07-03 13:09:48.598072+02:002023-07-03 13:15:03.634600+02:00
\n", - "

287 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " id target_type_id name \\\n", - "0 217 56 DDCP PROMO Art contemporain - salle de chauffe... \n", - "1 701 56 consentement optin scolaires \n", - "2 134 56 DDCP Newsletter jeune public \n", - "3 700 56 consentement optout scolaires \n", - "4 964 56 DDCP achat billet nbr dep 19052021 \n", - ".. ... ... ... \n", - "282 1811 1 ddcp_promo_ribambelle_2022_mapado_naikko_opt in \n", - "283 2006 1 cp 14 mars \n", - "284 2193 1 ddcp fichier musique 2 \n", - "285 2429 1 import_mucem \n", - "286 2485 1 po_au salon_2e envoi \n", - "\n", - " created_at updated_at \n", - "0 2021-01-04 15:00:05.401899+01:00 2021-03-02 18:38:19.025969+01:00 \n", - "1 2021-12-21 16:03:59.840785+01:00 2022-02-18 17:23:44.761388+01:00 \n", - "2 2020-11-10 09:43:19.667471+01:00 2021-03-02 18:38:19.052304+01:00 \n", - "3 2021-12-21 16:01:57.524946+01:00 2022-02-18 17:23:44.807776+01:00 \n", - "4 2022-04-14 10:58:17.142834+02:00 2022-04-14 10:58:23.677264+02:00 \n", - ".. ... ... \n", - "282 2022-11-30 15:57:05.681956+01:00 2022-11-30 16:00:32.649210+01:00 \n", - "283 2023-03-03 18:07:00.223750+01:00 2023-03-03 18:15:01.390970+01:00 \n", - "284 2023-04-14 14:33:53.628142+02:00 2023-04-14 15:00:35.608210+02:00 \n", - "285 2023-06-26 18:32:40.146757+02:00 2023-06-26 18:45:02.614668+02:00 \n", - "286 2023-07-03 13:09:48.598072+02:00 2023-07-03 13:15:03.634600+02:00 \n", - "\n", - "[287 rows x 5 columns]" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_targets" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "ef56e8ec-0429-475e-9c28-07983654c37b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcustomer_idtarget_idcreated_atupdated_atnameextra_field
011848246454001302021-09-23 09:35:47.617275+02:002021-09-23 09:35:47.617275+02:00NaNNaN
111848256454003452021-09-23 09:35:47.668846+02:002021-09-23 09:35:47.668846+02:00NaNNaN
211848286454021262021-09-23 12:02:51.253269+02:002021-09-23 12:02:51.253269+02:00NaNNaN
311848296454031262021-09-23 12:20:47.394480+02:002021-09-23 12:20:47.394480+02:00NaNNaN
412957706473013462021-09-28 16:02:29.372608+02:002021-09-28 16:02:29.372608+02:00NaNNaN
\n", - "
" - ], - "text/plain": [ - " id customer_id target_id created_at \\\n", - "0 1184824 645400 130 2021-09-23 09:35:47.617275+02:00 \n", - "1 1184825 645400 345 2021-09-23 09:35:47.668846+02:00 \n", - "2 1184828 645402 126 2021-09-23 12:02:51.253269+02:00 \n", - "3 1184829 645403 126 2021-09-23 12:20:47.394480+02:00 \n", - "4 1295770 647301 346 2021-09-28 16:02:29.372608+02:00 \n", - "\n", - " updated_at name extra_field \n", - "0 2021-09-23 09:35:47.617275+02:00 NaN NaN \n", - "1 2021-09-23 09:35:47.668846+02:00 NaN NaN \n", - "2 2021-09-23 12:02:51.253269+02:00 NaN NaN \n", - "3 2021-09-23 12:20:47.394480+02:00 NaN NaN \n", - "4 2021-09-28 16:02:29.372608+02:00 NaN NaN " - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 3. customer target mapping\n", - "\n", - "df1_customer_target_mappings.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "5244543f-1948-4769-be1f-691ad13174a8", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id 0.000000\n", - "customer_id 0.000000\n", - "target_id 0.000000\n", - "created_at 0.000022\n", - "updated_at 0.000022\n", - "name 1.000000\n", - "extra_field 1.000000\n", - "dtype: float64" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_customer_target_mappings.isna().sum()/df1_customer_target_mappings.shape[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "1c59e2ae-ee24-4195-bfea-ae55b92368ec", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "768024" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_customer_target_mappings[\"id\"].nunique()\n", - "# df1_customer_target_mappings.shape[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "4ed49f39-e6d3-4785-ba7d-bce918d423ee", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# les couples customer_id / target_id sont-ils uniques ?\n", - "df1_customer_target_mappings.duplicated(subset = [\"customer_id\", \"target_id\"]).sum() # aucun doublon" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "f8cb1740-2cb0-4b3a-bfb0-d35423dc2cc7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
target_type_idtarget_type_is_importtarget_type_nametarget_type_identifier
069Falsemanual_dynamic_filtere0f4b8693184850fefd6d2a38f10584e
148Truemanual_structure382bca214204a2d3462f5ec2728d5d1e
21Truemanual_import12213df2ce68a624e4c0070521437bac
356Falsemanual_static_filterfb27e81baa4debc6a4e1a8639c20e808
\n", - "
" - ], - "text/plain": [ - " target_type_id target_type_is_import target_type_name \\\n", - "0 69 False manual_dynamic_filter \n", - "1 48 True manual_structure \n", - "2 1 True manual_import \n", - "3 56 False manual_static_filter \n", - "\n", - " target_type_identifier \n", - "0 e0f4b8693184850fefd6d2a38f10584e \n", - "1 382bca214204a2d3462f5ec2728d5d1e \n", - "2 12213df2ce68a624e4c0070521437bac \n", - "3 fb27e81baa4debc6a4e1a8639c20e808 " - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 4.1. merge target with target type\n", - "\n", - "df1_target_types[[\"id\",\"is_import\",\"name\",\"identifier\"]].add_prefix(\"target_type_\")" - ] - }, - { - "cell_type": "code", - "execution_count": 94, - "id": "ebabdebd-3d75-4048-b65d-4cbd69bee390", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idtarget_type_idnamecreated_atupdated_attarget_type_is_importtarget_type_nametarget_type_identifier
021756DDCP PROMO Art contemporain - salle de chauffe...2021-01-04 15:00:05.401899+01:002021-03-02 18:38:19.025969+01:00Falsemanual_static_filterfb27e81baa4debc6a4e1a8639c20e808
170156consentement optin scolaires2021-12-21 16:03:59.840785+01:002022-02-18 17:23:44.761388+01:00Falsemanual_static_filterfb27e81baa4debc6a4e1a8639c20e808
213456DDCP Newsletter jeune public2020-11-10 09:43:19.667471+01:002021-03-02 18:38:19.052304+01:00Falsemanual_static_filterfb27e81baa4debc6a4e1a8639c20e808
370056consentement optout scolaires2021-12-21 16:01:57.524946+01:002022-02-18 17:23:44.807776+01:00Falsemanual_static_filterfb27e81baa4debc6a4e1a8639c20e808
496456DDCP achat billet nbr dep 190520212022-04-14 10:58:17.142834+02:002022-04-14 10:58:23.677264+02:00Falsemanual_static_filterfb27e81baa4debc6a4e1a8639c20e808
...........................
28218111ddcp_promo_ribambelle_2022_mapado_naikko_opt in2022-11-30 15:57:05.681956+01:002022-11-30 16:00:32.649210+01:00Truemanual_import12213df2ce68a624e4c0070521437bac
28320061cp 14 mars2023-03-03 18:07:00.223750+01:002023-03-03 18:15:01.390970+01:00Truemanual_import12213df2ce68a624e4c0070521437bac
28421931ddcp fichier musique 22023-04-14 14:33:53.628142+02:002023-04-14 15:00:35.608210+02:00Truemanual_import12213df2ce68a624e4c0070521437bac
28524291import_mucem2023-06-26 18:32:40.146757+02:002023-06-26 18:45:02.614668+02:00Truemanual_import12213df2ce68a624e4c0070521437bac
28624851po_au salon_2e envoi2023-07-03 13:09:48.598072+02:002023-07-03 13:15:03.634600+02:00Truemanual_import12213df2ce68a624e4c0070521437bac
\n", - "

287 rows × 8 columns

\n", - "
" - ], - "text/plain": [ - " id target_type_id name \\\n", - "0 217 56 DDCP PROMO Art contemporain - salle de chauffe... \n", - "1 701 56 consentement optin scolaires \n", - "2 134 56 DDCP Newsletter jeune public \n", - "3 700 56 consentement optout scolaires \n", - "4 964 56 DDCP achat billet nbr dep 19052021 \n", - ".. ... ... ... \n", - "282 1811 1 ddcp_promo_ribambelle_2022_mapado_naikko_opt in \n", - "283 2006 1 cp 14 mars \n", - "284 2193 1 ddcp fichier musique 2 \n", - "285 2429 1 import_mucem \n", - "286 2485 1 po_au salon_2e envoi \n", - "\n", - " created_at updated_at \\\n", - "0 2021-01-04 15:00:05.401899+01:00 2021-03-02 18:38:19.025969+01:00 \n", - "1 2021-12-21 16:03:59.840785+01:00 2022-02-18 17:23:44.761388+01:00 \n", - "2 2020-11-10 09:43:19.667471+01:00 2021-03-02 18:38:19.052304+01:00 \n", - "3 2021-12-21 16:01:57.524946+01:00 2022-02-18 17:23:44.807776+01:00 \n", - "4 2022-04-14 10:58:17.142834+02:00 2022-04-14 10:58:23.677264+02:00 \n", - ".. ... ... \n", - "282 2022-11-30 15:57:05.681956+01:00 2022-11-30 16:00:32.649210+01:00 \n", - "283 2023-03-03 18:07:00.223750+01:00 2023-03-03 18:15:01.390970+01:00 \n", - "284 2023-04-14 14:33:53.628142+02:00 2023-04-14 15:00:35.608210+02:00 \n", - "285 2023-06-26 18:32:40.146757+02:00 2023-06-26 18:45:02.614668+02:00 \n", - "286 2023-07-03 13:09:48.598072+02:00 2023-07-03 13:15:03.634600+02:00 \n", - "\n", - " target_type_is_import target_type_name \\\n", - "0 False manual_static_filter \n", - "1 False manual_static_filter \n", - "2 False manual_static_filter \n", - "3 False manual_static_filter \n", - "4 False manual_static_filter \n", - ".. ... ... \n", - "282 True manual_import \n", - "283 True manual_import \n", - "284 True manual_import \n", - "285 True manual_import \n", - "286 True manual_import \n", - "\n", - " target_type_identifier \n", - "0 fb27e81baa4debc6a4e1a8639c20e808 \n", - "1 fb27e81baa4debc6a4e1a8639c20e808 \n", - "2 fb27e81baa4debc6a4e1a8639c20e808 \n", - "3 fb27e81baa4debc6a4e1a8639c20e808 \n", - "4 fb27e81baa4debc6a4e1a8639c20e808 \n", - ".. ... \n", - "282 12213df2ce68a624e4c0070521437bac \n", - "283 12213df2ce68a624e4c0070521437bac \n", - "284 12213df2ce68a624e4c0070521437bac \n", - "285 12213df2ce68a624e4c0070521437bac \n", - "286 12213df2ce68a624e4c0070521437bac \n", - "\n", - "[287 rows x 8 columns]" - ] - }, - "execution_count": 94, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# merge\n", - "\n", - "df1_targets_full = pd.merge(df1_targets, df1_target_types[[\"id\",\"is_import\",\"name\",\"identifier\"]].add_prefix(\"target_type_\"), left_on='target_type_id', right_on='target_type_id', how='left')\n", - "df1_targets_full" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "f0b03a5d-b622-496a-bc71-ef92e91f9e51", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcustomer_idtarget_idcreated_atupdated_atnameextra_field
011848246454001302021-09-23 09:35:47.617275+02:002021-09-23 09:35:47.617275+02:00NaNNaN
111848256454003452021-09-23 09:35:47.668846+02:002021-09-23 09:35:47.668846+02:00NaNNaN
211848286454021262021-09-23 12:02:51.253269+02:002021-09-23 12:02:51.253269+02:00NaNNaN
311848296454031262021-09-23 12:20:47.394480+02:002021-09-23 12:20:47.394480+02:00NaNNaN
412957706473013462021-09-28 16:02:29.372608+02:002021-09-28 16:02:29.372608+02:00NaNNaN
\n", - "
" - ], - "text/plain": [ - " id customer_id target_id created_at \\\n", - "0 1184824 645400 130 2021-09-23 09:35:47.617275+02:00 \n", - "1 1184825 645400 345 2021-09-23 09:35:47.668846+02:00 \n", - "2 1184828 645402 126 2021-09-23 12:02:51.253269+02:00 \n", - "3 1184829 645403 126 2021-09-23 12:20:47.394480+02:00 \n", - "4 1295770 647301 346 2021-09-28 16:02:29.372608+02:00 \n", - "\n", - " updated_at name extra_field \n", - "0 2021-09-23 09:35:47.617275+02:00 NaN NaN \n", - "1 2021-09-23 09:35:47.668846+02:00 NaN NaN \n", - "2 2021-09-23 12:02:51.253269+02:00 NaN NaN \n", - "3 2021-09-23 12:20:47.394480+02:00 NaN NaN \n", - "4 2021-09-28 16:02:29.372608+02:00 NaN NaN " - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 4.2. merge df1_customer_target_mappings with df1_targets_full\n", - "\n", - "df1_customer_target_mappings.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "906e01fd-23b3-4da7-bc5e-6618599fbb05", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "17" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Q : les dates de création et de mise à jour de la table customer target mapping sont elles égales ??\n", - "\n", - "# 17 observations for which creation date != update date, ms ce sont que des Nan, OK !\n", - "(df1_customer_target_mappings[\"created_at\"] != df1_customer_target_mappings[\"updated_at\"]).sum() " - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "c9265d2f-b636-415e-bc2d-99b932b89424", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcustomer_idtarget_idcreated_atupdated_atnameextra_field
6054841691570661701264NaNNaNNaNNaN
6545491832071651594264NaNNaNNaNNaN
6545501832072663061264NaNNaNNaNNaN
6545511832073663114264NaNNaNNaNNaN
6551621949466663865264NaNNaNNaNNaN
7540382154438664300264NaNNaNNaNNaN
7609292282079665557264NaNNaNNaNNaN
7609302282080665563264NaNNaNNaNNaN
7617872675293661492264NaNNaNNaNNaN
7617982721237665931264NaNNaNNaNNaN
7617992721238665932264NaNNaNNaNNaN
7618002721239665938264NaNNaNNaNNaN
7618012721240665956264NaNNaNNaNNaN
7679182736960666466264NaNNaNNaNNaN
7679192736961666468264NaNNaNNaNNaN
7679682737357666824264NaNNaNNaNNaN
7679842737489107743264NaNNaNNaNNaN
\n", - "
" - ], - "text/plain": [ - " id customer_id target_id created_at updated_at name \\\n", - "605484 1691570 661701 264 NaN NaN NaN \n", - "654549 1832071 651594 264 NaN NaN NaN \n", - "654550 1832072 663061 264 NaN NaN NaN \n", - "654551 1832073 663114 264 NaN NaN NaN \n", - "655162 1949466 663865 264 NaN NaN NaN \n", - "754038 2154438 664300 264 NaN NaN NaN \n", - "760929 2282079 665557 264 NaN NaN NaN \n", - "760930 2282080 665563 264 NaN NaN NaN \n", - "761787 2675293 661492 264 NaN NaN NaN \n", - "761798 2721237 665931 264 NaN NaN NaN \n", - "761799 2721238 665932 264 NaN NaN NaN \n", - "761800 2721239 665938 264 NaN NaN NaN \n", - "761801 2721240 665956 264 NaN NaN NaN \n", - "767918 2736960 666466 264 NaN NaN NaN \n", - "767919 2736961 666468 264 NaN NaN NaN \n", - "767968 2737357 666824 264 NaN NaN NaN \n", - "767984 2737489 107743 264 NaN NaN NaN \n", - "\n", - " extra_field \n", - "605484 NaN \n", - "654549 NaN \n", - "654550 NaN \n", - "654551 NaN \n", - "655162 NaN \n", - "754038 NaN \n", - "760929 NaN \n", - "760930 NaN \n", - "761787 NaN \n", - "761798 NaN \n", - "761799 NaN \n", - "761800 NaN \n", - "761801 NaN \n", - "767918 NaN \n", - "767919 NaN \n", - "767968 NaN \n", - "767984 NaN " - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_customer_target_mappings[df1_customer_target_mappings[\"created_at\"] != df1_customer_target_mappings[\"updated_at\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "63e4ce23-ce13-46fc-82c5-9065a774b4b5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcustomer_idtarget_idcreated_atupdated_atnameextra_field
140341626517512642022-01-28 20:00:16.448920+01:002022-01-28 20:00:16.448920+01:00NaNNaN
149341627422132642022-01-28 20:30:17.323634+01:002022-01-28 20:30:17.323634+01:00NaNNaN
1120429205411560592642022-09-29 07:00:43.003440+02:002022-09-29 07:00:43.003440+02:00NaNNaN
1121429205511560632642022-09-29 07:00:43.003440+02:002022-09-29 07:00:43.003440+02:00NaNNaN
40064428048349162642023-03-14 07:01:27.868349+01:002023-03-14 07:01:27.868349+01:00NaNNaN
........................
7618012721240665956264NaNNaNNaNNaN
7679182736960666466264NaNNaNNaNNaN
7679192736961666468264NaNNaNNaNNaN
7679682737357666824264NaNNaNNaNNaN
7679842737489107743264NaNNaNNaNNaN
\n", - "

1954 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " id customer_id target_id created_at \\\n", - "140 3416265 1751 264 2022-01-28 20:00:16.448920+01:00 \n", - "149 3416274 2213 264 2022-01-28 20:30:17.323634+01:00 \n", - "1120 4292054 1156059 264 2022-09-29 07:00:43.003440+02:00 \n", - "1121 4292055 1156063 264 2022-09-29 07:00:43.003440+02:00 \n", - "4006 4428048 34916 264 2023-03-14 07:01:27.868349+01:00 \n", - "... ... ... ... ... \n", - "761801 2721240 665956 264 NaN \n", - "767918 2736960 666466 264 NaN \n", - "767919 2736961 666468 264 NaN \n", - "767968 2737357 666824 264 NaN \n", - "767984 2737489 107743 264 NaN \n", - "\n", - " updated_at name extra_field \n", - "140 2022-01-28 20:00:16.448920+01:00 NaN NaN \n", - "149 2022-01-28 20:30:17.323634+01:00 NaN NaN \n", - "1120 2022-09-29 07:00:43.003440+02:00 NaN NaN \n", - "1121 2022-09-29 07:00:43.003440+02:00 NaN NaN \n", - "4006 2023-03-14 07:01:27.868349+01:00 NaN NaN \n", - "... ... ... ... \n", - "761801 NaN NaN NaN \n", - "767918 NaN NaN NaN \n", - "767919 NaN NaN NaN \n", - "767968 NaN NaN NaN \n", - "767984 NaN NaN NaN \n", - "\n", - "[1954 rows x 7 columns]" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# ces données manquantes concernent le target avec id 264, mais les autres valeurs pr ce même target sont bien renseignées\n", - "df1_customer_target_mappings[df1_customer_target_mappings[\"target_id\"]==264]" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "id": "0681b3e6-71bb-4132-b11a-646382f78de6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'2021-10-28 11:30:42.717180+02:00'" - ] - }, - "execution_count": 71, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Q : les dates de creation / update sont elles-uniques selon le client ou selon la target ?\n", - "\n", - "df1_customer_target_mappings[df1_customer_target_mappings[\"target_id\"]==217][\"updated_at\"].max()" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "id": "93e4a125-08dd-42ba-baa6-0dc5996a76af", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idtarget_type_idnamecreated_atupdated_attarget_type_is_importtarget_type_nametarget_type_identifier
021756DDCP PROMO Art contemporain - salle de chauffe...2021-01-04 15:00:05.401899+01:002021-03-02 18:38:19.025969+01:00Falsemanual_static_filterfb27e81baa4debc6a4e1a8639c20e808
\n", - "
" - ], - "text/plain": [ - " id target_type_id name \\\n", - "0 217 56 DDCP PROMO Art contemporain - salle de chauffe... \n", - "\n", - " created_at updated_at \\\n", - "0 2021-01-04 15:00:05.401899+01:00 2021-03-02 18:38:19.025969+01:00 \n", - "\n", - " target_type_is_import target_type_name \\\n", - "0 False manual_static_filter \n", - "\n", - " target_type_identifier \n", - "0 fb27e81baa4debc6a4e1a8639c20e808 " - ] - }, - "execution_count": 65, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_targets_full[df1_targets_full[\"id\"]==217]" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "id": "88eac1a6-74b1-4ce1-91a1-c1c69e7a9264", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idtarget_type_idnamecreated_atupdated_attarget_type_is_importtarget_type_nametarget_type_identifier
021756DDCP PROMO Art contemporain - salle de chauffe...2021-01-04 15:00:05.401899+01:002021-03-02 18:38:19.025969+01:00Falsemanual_static_filterfb27e81baa4debc6a4e1a8639c20e808
170156consentement optin scolaires2021-12-21 16:03:59.840785+01:002022-02-18 17:23:44.761388+01:00Falsemanual_static_filterfb27e81baa4debc6a4e1a8639c20e808
213456DDCP Newsletter jeune public2020-11-10 09:43:19.667471+01:002021-03-02 18:38:19.052304+01:00Falsemanual_static_filterfb27e81baa4debc6a4e1a8639c20e808
370056consentement optout scolaires2021-12-21 16:01:57.524946+01:002022-02-18 17:23:44.807776+01:00Falsemanual_static_filterfb27e81baa4debc6a4e1a8639c20e808
496456DDCP achat billet nbr dep 190520212022-04-14 10:58:17.142834+02:002022-04-14 10:58:23.677264+02:00Falsemanual_static_filterfb27e81baa4debc6a4e1a8639c20e808
\n", - "
" - ], - "text/plain": [ - " id target_type_id name \\\n", - "0 217 56 DDCP PROMO Art contemporain - salle de chauffe... \n", - "1 701 56 consentement optin scolaires \n", - "2 134 56 DDCP Newsletter jeune public \n", - "3 700 56 consentement optout scolaires \n", - "4 964 56 DDCP achat billet nbr dep 19052021 \n", - "\n", - " created_at updated_at \\\n", - "0 2021-01-04 15:00:05.401899+01:00 2021-03-02 18:38:19.025969+01:00 \n", - "1 2021-12-21 16:03:59.840785+01:00 2022-02-18 17:23:44.761388+01:00 \n", - "2 2020-11-10 09:43:19.667471+01:00 2021-03-02 18:38:19.052304+01:00 \n", - "3 2021-12-21 16:01:57.524946+01:00 2022-02-18 17:23:44.807776+01:00 \n", - "4 2022-04-14 10:58:17.142834+02:00 2022-04-14 10:58:23.677264+02:00 \n", - "\n", - " target_type_is_import target_type_name \\\n", - "0 False manual_static_filter \n", - "1 False manual_static_filter \n", - "2 False manual_static_filter \n", - "3 False manual_static_filter \n", - "4 False manual_static_filter \n", - "\n", - " target_type_identifier \n", - "0 fb27e81baa4debc6a4e1a8639c20e808 \n", - "1 fb27e81baa4debc6a4e1a8639c20e808 \n", - "2 fb27e81baa4debc6a4e1a8639c20e808 \n", - "3 fb27e81baa4debc6a4e1a8639c20e808 \n", - "4 fb27e81baa4debc6a4e1a8639c20e808 " - ] - }, - "execution_count": 69, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_targets_full.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "id": "9af4066e-97d8-4066-a7ef-094807e33ba3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcustomer_idtarget_idcreated_atupdated_atnameextra_field
011848246454001302021-09-23 09:35:47.617275+02:002021-09-23 09:35:47.617275+02:00NaNNaN
111848256454003452021-09-23 09:35:47.668846+02:002021-09-23 09:35:47.668846+02:00NaNNaN
211848286454021262021-09-23 12:02:51.253269+02:002021-09-23 12:02:51.253269+02:00NaNNaN
311848296454031262021-09-23 12:20:47.394480+02:002021-09-23 12:20:47.394480+02:00NaNNaN
412957706473013462021-09-28 16:02:29.372608+02:002021-09-28 16:02:29.372608+02:00NaNNaN
........................
76801927375456669833452021-12-14 14:48:05.456842+01:002021-12-14 14:48:05.456842+01:00NaNNaN
76802027375466669833462021-12-14 14:48:05.465830+01:002021-12-14 14:48:05.465830+01:00NaNNaN
76802127375756669863462021-12-14 23:15:42.757832+01:002021-12-14 23:15:42.757832+01:00NaNNaN
76802227375766669873452021-12-15 00:14:59.018215+01:002021-12-15 00:14:59.018215+01:00NaNNaN
76802327375776669873462021-12-15 00:14:59.029434+01:002021-12-15 00:14:59.029434+01:00NaNNaN
\n", - "

768024 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " id customer_id target_id created_at \\\n", - "0 1184824 645400 130 2021-09-23 09:35:47.617275+02:00 \n", - "1 1184825 645400 345 2021-09-23 09:35:47.668846+02:00 \n", - "2 1184828 645402 126 2021-09-23 12:02:51.253269+02:00 \n", - "3 1184829 645403 126 2021-09-23 12:20:47.394480+02:00 \n", - "4 1295770 647301 346 2021-09-28 16:02:29.372608+02:00 \n", - "... ... ... ... ... \n", - "768019 2737545 666983 345 2021-12-14 14:48:05.456842+01:00 \n", - "768020 2737546 666983 346 2021-12-14 14:48:05.465830+01:00 \n", - "768021 2737575 666986 346 2021-12-14 23:15:42.757832+01:00 \n", - "768022 2737576 666987 345 2021-12-15 00:14:59.018215+01:00 \n", - "768023 2737577 666987 346 2021-12-15 00:14:59.029434+01:00 \n", - "\n", - " updated_at name extra_field \n", - "0 2021-09-23 09:35:47.617275+02:00 NaN NaN \n", - "1 2021-09-23 09:35:47.668846+02:00 NaN NaN \n", - "2 2021-09-23 12:02:51.253269+02:00 NaN NaN \n", - "3 2021-09-23 12:20:47.394480+02:00 NaN NaN \n", - "4 2021-09-28 16:02:29.372608+02:00 NaN NaN \n", - "... ... ... ... \n", - "768019 2021-12-14 14:48:05.456842+01:00 NaN NaN \n", - "768020 2021-12-14 14:48:05.465830+01:00 NaN NaN \n", - "768021 2021-12-14 23:15:42.757832+01:00 NaN NaN \n", - "768022 2021-12-15 00:14:59.018215+01:00 NaN NaN \n", - "768023 2021-12-15 00:14:59.029434+01:00 NaN NaN \n", - "\n", - "[768024 rows x 7 columns]" - ] - }, - "execution_count": 74, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_customer_target_mappings" - ] - }, - { - "cell_type": "code", - "execution_count": 104, - "id": "bcb53207-017c-4c62-ae05-56fbbfbeb3e9", - "metadata": {}, - "outputs": [], - "source": [ - "# change the position of the column target type id\n", - "\n", - "# Spécifiez le nom de la colonne à déplacer et la colonne après laquelle vous souhaitez la placer\n", - "column_to_move = 'target_type_id'\n", - "\n", - "# Récupérez l'index de la colonne de référence\n", - "reference_index = df1_targets_full.columns.get_loc(\"target_type_name\")\n", - "\n", - "# Créez une copie de la colonne que vous voulez déplacer\n", - "column_copy = df1_targets_full[column_to_move].copy()\n", - "\n", - "# Supprimez la colonne d'origine\n", - "df1_targets_full = df1_targets_full.drop(column_to_move, axis=1)\n", - "\n", - "# Utilisez la méthode insert pour déplacer la colonne à la nouvelle position\n", - "df1_targets_full.insert(reference_index - 1, column_to_move, column_copy)" - ] - }, - { - "cell_type": "code", - "execution_count": 109, - "id": "e3e2b729-c661-44dd-acf3-afdb85353bce", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
target_idtarget_nametarget_created_attarget_updated_attarget_type_is_importtarget_type_idtarget_type_nametarget_type_identifier
0217DDCP PROMO Art contemporain - salle de chauffe...2021-01-04 15:00:05.401899+01:002021-03-02 18:38:19.025969+01:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
1701consentement optin scolaires2021-12-21 16:03:59.840785+01:002022-02-18 17:23:44.761388+01:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
2134DDCP Newsletter jeune public2020-11-10 09:43:19.667471+01:002021-03-02 18:38:19.052304+01:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
3700consentement optout scolaires2021-12-21 16:01:57.524946+01:002022-02-18 17:23:44.807776+01:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
4964DDCP achat billet nbr dep 190520212022-04-14 10:58:17.142834+02:002022-04-14 10:58:23.677264+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
\n", - "
" - ], - "text/plain": [ - " target_id target_name \\\n", - "0 217 DDCP PROMO Art contemporain - salle de chauffe... \n", - "1 701 consentement optin scolaires \n", - "2 134 DDCP Newsletter jeune public \n", - "3 700 consentement optout scolaires \n", - "4 964 DDCP achat billet nbr dep 19052021 \n", - "\n", - " target_created_at target_updated_at \\\n", - "0 2021-01-04 15:00:05.401899+01:00 2021-03-02 18:38:19.025969+01:00 \n", - "1 2021-12-21 16:03:59.840785+01:00 2022-02-18 17:23:44.761388+01:00 \n", - "2 2020-11-10 09:43:19.667471+01:00 2021-03-02 18:38:19.052304+01:00 \n", - "3 2021-12-21 16:01:57.524946+01:00 2022-02-18 17:23:44.807776+01:00 \n", - "4 2022-04-14 10:58:17.142834+02:00 2022-04-14 10:58:23.677264+02:00 \n", - "\n", - " target_type_is_import target_type_id target_type_name \\\n", - "0 False 56 manual_static_filter \n", - "1 False 56 manual_static_filter \n", - "2 False 56 manual_static_filter \n", - "3 False 56 manual_static_filter \n", - "4 False 56 manual_static_filter \n", - "\n", - " target_type_identifier \n", - "0 fb27e81baa4debc6a4e1a8639c20e808 \n", - "1 fb27e81baa4debc6a4e1a8639c20e808 \n", - "2 fb27e81baa4debc6a4e1a8639c20e808 \n", - "3 fb27e81baa4debc6a4e1a8639c20e808 \n", - "4 fb27e81baa4debc6a4e1a8639c20e808 " - ] - }, - "execution_count": 109, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_targets_full = df1_targets_full.rename(columns=lambda x: 'target_' + x if not x.startswith('target_') else x)\n", - "df1_targets_full.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 110, - "id": "cda50294-e9f3-4c0e-9172-85fde93efa70", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcustomer_idtarget_idcreated_at
011848246454001302021-09-23 09:35:47.617275+02:00
111848256454003452021-09-23 09:35:47.668846+02:00
211848286454021262021-09-23 12:02:51.253269+02:00
311848296454031262021-09-23 12:20:47.394480+02:00
412957706473013462021-09-28 16:02:29.372608+02:00
\n", - "
" - ], - "text/plain": [ - " id customer_id target_id created_at\n", - "0 1184824 645400 130 2021-09-23 09:35:47.617275+02:00\n", - "1 1184825 645400 345 2021-09-23 09:35:47.668846+02:00\n", - "2 1184828 645402 126 2021-09-23 12:02:51.253269+02:00\n", - "3 1184829 645403 126 2021-09-23 12:20:47.394480+02:00\n", - "4 1295770 647301 346 2021-09-28 16:02:29.372608+02:00" - ] - }, - "execution_count": 110, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_customer_target_mappings[[\"id\", \"customer_id\", \"target_id\", \"created_at\"]].head()" - ] - }, - { - "cell_type": "code", - "execution_count": 111, - "id": "1aaac887-5ea9-4651-8628-920c7d80f120", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcustomer_idtarget_idcreated_attarget_nametarget_created_attarget_updated_attarget_type_is_importtarget_type_idtarget_type_nametarget_type_identifier
011848246454001302021-09-23 09:35:47.617275+02:00DDCP PROMO Réseau livres2020-11-04 18:40:49.500866+01:002021-03-02 18:38:19.084287+01:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
111848256454003452021-09-23 09:35:47.668846+02:00Inscrits NL générale site web2021-04-16 17:17:26.069199+02:002021-04-16 17:17:26.069199+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
211848286454021262021-09-23 12:02:51.253269+02:00DDCP PROMO Art contemporain2020-11-04 18:38:53.016572+01:002021-04-16 17:17:25.850107+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
311848296454031262021-09-23 12:20:47.394480+02:00DDCP PROMO Art contemporain2020-11-04 18:38:53.016572+01:002021-04-16 17:17:25.850107+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
412957706473013462021-09-28 16:02:29.372608+02:00Votre première liste2021-04-16 17:17:26.080378+02:002021-04-16 17:17:26.080378+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
....................................
76801927375456669833452021-12-14 14:48:05.456842+01:00Inscrits NL générale site web2021-04-16 17:17:26.069199+02:002021-04-16 17:17:26.069199+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
76802027375466669833462021-12-14 14:48:05.465830+01:00Votre première liste2021-04-16 17:17:26.080378+02:002021-04-16 17:17:26.080378+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
76802127375756669863462021-12-14 23:15:42.757832+01:00Votre première liste2021-04-16 17:17:26.080378+02:002021-04-16 17:17:26.080378+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
76802227375766669873452021-12-15 00:14:59.018215+01:00Inscrits NL générale site web2021-04-16 17:17:26.069199+02:002021-04-16 17:17:26.069199+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
76802327375776669873462021-12-15 00:14:59.029434+01:00Votre première liste2021-04-16 17:17:26.080378+02:002021-04-16 17:17:26.080378+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
\n", - "

768024 rows × 11 columns

\n", - "
" - ], - "text/plain": [ - " id customer_id target_id created_at \\\n", - "0 1184824 645400 130 2021-09-23 09:35:47.617275+02:00 \n", - "1 1184825 645400 345 2021-09-23 09:35:47.668846+02:00 \n", - "2 1184828 645402 126 2021-09-23 12:02:51.253269+02:00 \n", - "3 1184829 645403 126 2021-09-23 12:20:47.394480+02:00 \n", - "4 1295770 647301 346 2021-09-28 16:02:29.372608+02:00 \n", - "... ... ... ... ... \n", - "768019 2737545 666983 345 2021-12-14 14:48:05.456842+01:00 \n", - "768020 2737546 666983 346 2021-12-14 14:48:05.465830+01:00 \n", - "768021 2737575 666986 346 2021-12-14 23:15:42.757832+01:00 \n", - "768022 2737576 666987 345 2021-12-15 00:14:59.018215+01:00 \n", - "768023 2737577 666987 346 2021-12-15 00:14:59.029434+01:00 \n", - "\n", - " target_name target_created_at \\\n", - "0 DDCP PROMO Réseau livres 2020-11-04 18:40:49.500866+01:00 \n", - "1 Inscrits NL générale site web 2021-04-16 17:17:26.069199+02:00 \n", - "2 DDCP PROMO Art contemporain 2020-11-04 18:38:53.016572+01:00 \n", - "3 DDCP PROMO Art contemporain 2020-11-04 18:38:53.016572+01:00 \n", - "4 Votre première liste 2021-04-16 17:17:26.080378+02:00 \n", - "... ... ... \n", - "768019 Inscrits NL générale site web 2021-04-16 17:17:26.069199+02:00 \n", - "768020 Votre première liste 2021-04-16 17:17:26.080378+02:00 \n", - "768021 Votre première liste 2021-04-16 17:17:26.080378+02:00 \n", - "768022 Inscrits NL générale site web 2021-04-16 17:17:26.069199+02:00 \n", - "768023 Votre première liste 2021-04-16 17:17:26.080378+02:00 \n", - "\n", - " target_updated_at target_type_is_import \\\n", - "0 2021-03-02 18:38:19.084287+01:00 False \n", - "1 2021-04-16 17:17:26.069199+02:00 False \n", - "2 2021-04-16 17:17:25.850107+02:00 False \n", - "3 2021-04-16 17:17:25.850107+02:00 False \n", - "4 2021-04-16 17:17:26.080378+02:00 False \n", - "... ... ... \n", - "768019 2021-04-16 17:17:26.069199+02:00 False \n", - "768020 2021-04-16 17:17:26.080378+02:00 False \n", - "768021 2021-04-16 17:17:26.080378+02:00 False \n", - "768022 2021-04-16 17:17:26.069199+02:00 False \n", - "768023 2021-04-16 17:17:26.080378+02:00 False \n", - "\n", - " target_type_id target_type_name target_type_identifier \n", - "0 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n", - "1 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n", - "2 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n", - "3 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n", - "4 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n", - "... ... ... ... \n", - "768019 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n", - "768020 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n", - "768021 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n", - "768022 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n", - "768023 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n", - "\n", - "[768024 rows x 11 columns]" - ] - }, - "execution_count": 111, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# finally, merge\n", - "\n", - "# pour df1_customer_target_mappings on enlève les colonnes name, extra_field, et updated_at (valeur égale à created_at)\n", - "# note : by making a left join on df1_customer_target_mappings, we suppress 2 targets that have no customer associated\n", - "\n", - "df1_customer_targets = pd.merge(df1_customer_target_mappings[[\"id\", \"customer_id\", \"target_id\", \"created_at\"]], \n", - " df1_targets_full, left_on='target_id', right_on='target_id', how='left')\n", - "df1_customer_targets" - ] - }, - { - "cell_type": "code", - "execution_count": 138, - "id": "95657bda-d060-48ca-8217-3e3f119028c1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcustomer_idtarget_idcreated_attarget_nametarget_created_attarget_updated_attarget_type_is_importtarget_type_idtarget_type_nametarget_type_identifier
011848246454001302021-09-23 09:35:47.617275+02:00DDCP PROMO Réseau livres2020-11-04 18:40:49.500866+01:002021-03-02 18:38:19.084287+01:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
111848256454003452021-09-23 09:35:47.668846+02:00Inscrits NL générale site web2021-04-16 17:17:26.069199+02:002021-04-16 17:17:26.069199+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
211848286454021262021-09-23 12:02:51.253269+02:00DDCP PROMO Art contemporain2020-11-04 18:38:53.016572+01:002021-04-16 17:17:25.850107+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
311848296454031262021-09-23 12:20:47.394480+02:00DDCP PROMO Art contemporain2020-11-04 18:38:53.016572+01:002021-04-16 17:17:25.850107+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
412957706473013462021-09-28 16:02:29.372608+02:00Votre première liste2021-04-16 17:17:26.080378+02:002021-04-16 17:17:26.080378+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
....................................
76801927375456669833452021-12-14 14:48:05.456842+01:00Inscrits NL générale site web2021-04-16 17:17:26.069199+02:002021-04-16 17:17:26.069199+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
76802027375466669833462021-12-14 14:48:05.465830+01:00Votre première liste2021-04-16 17:17:26.080378+02:002021-04-16 17:17:26.080378+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
76802127375756669863462021-12-14 23:15:42.757832+01:00Votre première liste2021-04-16 17:17:26.080378+02:002021-04-16 17:17:26.080378+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
76802227375766669873452021-12-15 00:14:59.018215+01:00Inscrits NL générale site web2021-04-16 17:17:26.069199+02:002021-04-16 17:17:26.069199+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
76802327375776669873462021-12-15 00:14:59.029434+01:00Votre première liste2021-04-16 17:17:26.080378+02:002021-04-16 17:17:26.080378+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808
\n", - "

768024 rows × 11 columns

\n", - "
" - ], - "text/plain": [ - " id customer_id target_id created_at \\\n", - "0 1184824 645400 130 2021-09-23 09:35:47.617275+02:00 \n", - "1 1184825 645400 345 2021-09-23 09:35:47.668846+02:00 \n", - "2 1184828 645402 126 2021-09-23 12:02:51.253269+02:00 \n", - "3 1184829 645403 126 2021-09-23 12:20:47.394480+02:00 \n", - "4 1295770 647301 346 2021-09-28 16:02:29.372608+02:00 \n", - "... ... ... ... ... \n", - "768019 2737545 666983 345 2021-12-14 14:48:05.456842+01:00 \n", - "768020 2737546 666983 346 2021-12-14 14:48:05.465830+01:00 \n", - "768021 2737575 666986 346 2021-12-14 23:15:42.757832+01:00 \n", - "768022 2737576 666987 345 2021-12-15 00:14:59.018215+01:00 \n", - "768023 2737577 666987 346 2021-12-15 00:14:59.029434+01:00 \n", - "\n", - " target_name target_created_at \\\n", - "0 DDCP PROMO Réseau livres 2020-11-04 18:40:49.500866+01:00 \n", - "1 Inscrits NL générale site web 2021-04-16 17:17:26.069199+02:00 \n", - "2 DDCP PROMO Art contemporain 2020-11-04 18:38:53.016572+01:00 \n", - "3 DDCP PROMO Art contemporain 2020-11-04 18:38:53.016572+01:00 \n", - "4 Votre première liste 2021-04-16 17:17:26.080378+02:00 \n", - "... ... ... \n", - "768019 Inscrits NL générale site web 2021-04-16 17:17:26.069199+02:00 \n", - "768020 Votre première liste 2021-04-16 17:17:26.080378+02:00 \n", - "768021 Votre première liste 2021-04-16 17:17:26.080378+02:00 \n", - "768022 Inscrits NL générale site web 2021-04-16 17:17:26.069199+02:00 \n", - "768023 Votre première liste 2021-04-16 17:17:26.080378+02:00 \n", - "\n", - " target_updated_at target_type_is_import \\\n", - "0 2021-03-02 18:38:19.084287+01:00 False \n", - "1 2021-04-16 17:17:26.069199+02:00 False \n", - "2 2021-04-16 17:17:25.850107+02:00 False \n", - "3 2021-04-16 17:17:25.850107+02:00 False \n", - "4 2021-04-16 17:17:26.080378+02:00 False \n", - "... ... ... \n", - "768019 2021-04-16 17:17:26.069199+02:00 False \n", - "768020 2021-04-16 17:17:26.080378+02:00 False \n", - "768021 2021-04-16 17:17:26.080378+02:00 False \n", - "768022 2021-04-16 17:17:26.069199+02:00 False \n", - "768023 2021-04-16 17:17:26.080378+02:00 False \n", - "\n", - " target_type_id target_type_name target_type_identifier \n", - "0 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n", - "1 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n", - "2 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n", - "3 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n", - "4 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n", - "... ... ... ... \n", - "768019 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n", - "768020 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n", - "768021 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n", - "768022 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n", - "768023 56 manual_static_filter fb27e81baa4debc6a4e1a8639c20e808 \n", - "\n", - "[768024 rows x 11 columns]" - ] - }, - "execution_count": 138, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# rq : on dirait que la date de création des targets est à peine inférieure à la date minimum de création des targets des customers \n", - "# idée : les targets sont créées puis envoyées aux clients, d'où un léger délai \n", - "# mais question substiste : pourquoi les clients ne reçoivent-ils pas la target en même temps ? \n", - "\n", - "# vérifions que la date de création de la target est tjrs inférieure à la date de création minimum pour tous les clients ayant reçu la target\n", - "\n", - "# first step : convert strings into dates\n", - "\n", - "df1_customer_targets[\"created_at\"] = df1_customer_targets[\"created_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\n", - "df1_customer_targets[\"target_created_at\"] = df1_customer_targets[\"target_created_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\n", - "df1_customer_targets[\"target_updated_at\"] = df1_customer_targets[\"target_updated_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 144, - "id": "58b22fab-d13d-456a-8250-1da035572fe9", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "target_id\n", - "116 0 days 00:00:00.949028\n", - "117 0 days 00:00:00.037337\n", - "119 0 days 00:00:00.024423\n", - "120 0 days 00:00:00.058732\n", - "122 0 days 00:00:00.027283\n", - " ... \n", - "2779 0 days 00:00:19.087958\n", - "2788 0 days 00:01:36.372927\n", - "2825 0 days 00:00:00.028771\n", - "2830 0 days 00:00:01.587058\n", - "2833 0 days 00:00:00.031071\n", - "Name: creation_delay, Length: 283, dtype: object" - ] - }, - "execution_count": 144, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# second step : compute delay and minimum by target\n", - "\n", - "df1_customer_targets[\"creation_delay\"] = df1_customer_targets[\"created_at\"] -df1_customer_targets[\"target_created_at\"]\n", - "\n", - "\n", - "df1_customer_targets.groupby(\"target_id\")[\"creation_delay\"].min()" - ] - }, - { - "cell_type": "code", - "execution_count": 148, - "id": "4b5c8f3e-9227-466c-a4c0-2280864a5036", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0 days 00:00:00.009293\n", - "686 days 23:14:10.435866\n" - ] - } - ], - "source": [ - "print(df1_customer_targets.groupby(\"target_id\")[\"creation_delay\"].min().min())\n", - "print((df1_customer_targets.groupby(\"target_id\")[\"creation_delay\"].min()).max())" - ] - }, - { - "cell_type": "code", - "execution_count": 153, - "id": "41e4040c-45a0-41ac-be91-4c86ef5ab1a8", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "target_id\n", - "335 285 days 22:56:30.356536\n", - "339 86 days 21:34:19.282253\n", - "469 7 days 07:24:03.446563\n", - "490 3 days 16:28:38.068677\n", - "502 7 days 20:15:19.326651\n", - "515 1 days 22:49:33.761856\n", - "517 76 days 00:41:25.366394\n", - "528 26 days 06:17:44.689111\n", - "529 6 days 02:41:29.617761\n", - "530 1 days 04:34:33.843116\n", - "642 219 days 16:50:10.816034\n", - "695 668 days 03:31:22.896313\n", - "697 58 days 20:26:26.744823\n", - "699 686 days 23:14:10.435866\n", - "786 625 days 14:47:48.797084\n", - "1747 14 days 04:08:24.295840\n", - "2094 239 days 15:13:18.681637\n", - "2321 167 days 21:19:37.490219\n", - "Name: creation_delay, dtype: object" - ] - }, - "execution_count": 153, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# glt, le délai création de la target - création pour le premier client est très court, envoi quasi instantanné\n", - "# mais parfois, le délai est très long, plus d'une année pour les cas extrêmes\n", - "\n", - "min_target_delay = df1_customer_targets.groupby(\"target_id\")[\"creation_delay\"].min()\n", - "min_target_delay[min_target_delay > timedelta(days=1)]" - ] - }, - { - "cell_type": "code", - "execution_count": 155, - "id": "ffb2d1be-b1cb-4285-9584-d96ffeee146e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "target_type_id\n", - "1 0 days 00:00:06.490151\n", - "56 0 days 00:00:00.009293\n", - "69 0 days 00:00:00.032269\n", - "Name: creation_delay, dtype: object" - ] - }, - "execution_count": 155, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_customer_targets.groupby(\"target_type_id\")[\"creation_delay\"].min() # les target de type 1 ont un plus grd délai" - ] - }, - { - "cell_type": "code", - "execution_count": 159, - "id": "44d5a1f5-0691-43de-bb9f-9915830bbb77", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[56 69 1]\n", - "[56 69 1]\n" - ] - } - ], - "source": [ - "print(df1_customer_targets[\"target_type_id\"].unique())\n", - "print(df1_targets[\"target_type_id\"].unique()) # rq : slt 3 types de target sur les 4 sont dans la table" - ] - }, - { - "cell_type": "code", - "execution_count": 165, - "id": "3a21df0d-0199-45d7-9019-e69dab67c9a8", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcustomer_idtarget_idcreated_attarget_nametarget_created_attarget_updated_attarget_type_is_importtarget_type_idtarget_type_nametarget_type_identifiercreation_delay
011848246454001302021-09-23 09:35:47.617275+02:00DDCP PROMO Réseau livres2020-11-04 18:40:49.500866+01:002021-03-02 18:38:19.084287+01:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808322 days, 13:54:58.116409
111848256454003452021-09-23 09:35:47.668846+02:00Inscrits NL générale site web2021-04-16 17:17:26.069199+02:002021-04-16 17:17:26.069199+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808159 days, 16:18:21.599647
211848286454021262021-09-23 12:02:51.253269+02:00DDCP PROMO Art contemporain2020-11-04 18:38:53.016572+01:002021-04-16 17:17:25.850107+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808322 days, 16:23:58.236697
311848296454031262021-09-23 12:20:47.394480+02:00DDCP PROMO Art contemporain2020-11-04 18:38:53.016572+01:002021-04-16 17:17:25.850107+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808322 days, 16:41:54.377908
412957706473013462021-09-28 16:02:29.372608+02:00Votre première liste2021-04-16 17:17:26.080378+02:002021-04-16 17:17:26.080378+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808164 days, 22:45:03.292230
511848336456273982021-09-24 18:16:33.432760+02:00DDCP PROMO MD participants ateliers yoga2021-05-26 10:54:12.232999+02:002021-05-26 10:54:22.378253+02:00False69manual_dynamic_filtere0f4b8693184850fefd6d2a38f10584e121 days, 7:22:21.199761
6445281812087366312023-05-06 03:29:43.875970+02:00consentement optin b2b2021-11-30 10:03:37.430645+01:002022-02-18 17:21:30.653027+01:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808521 days, 16:26:06.445325
7429170211558455022022-09-28 12:55:36.843316+02:00Automation_parrainage_newsletter_générale2021-08-10 15:25:56.142538+02:002021-08-10 15:26:06.275964+02:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808413 days, 21:29:40.700778
8409640611216514692022-07-31 11:45:19.694236+02:00RI Newsletter Alexandrie (inscriptions formula...2021-07-08 11:31:10.246495+02:002022-01-26 12:14:17.941253+01:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808388 days, 0:14:09.447741
9445282412087426312023-05-06 03:29:43.901323+02:00consentement optin b2b2021-11-30 10:03:37.430645+01:002022-02-18 17:21:30.653027+01:00False56manual_static_filterfb27e81baa4debc6a4e1a8639c20e808521 days, 16:26:06.470678
\n", - "
" - ], - "text/plain": [ - " id customer_id target_id created_at \\\n", - "0 1184824 645400 130 2021-09-23 09:35:47.617275+02:00 \n", - "1 1184825 645400 345 2021-09-23 09:35:47.668846+02:00 \n", - "2 1184828 645402 126 2021-09-23 12:02:51.253269+02:00 \n", - "3 1184829 645403 126 2021-09-23 12:20:47.394480+02:00 \n", - "4 1295770 647301 346 2021-09-28 16:02:29.372608+02:00 \n", - "5 1184833 645627 398 2021-09-24 18:16:33.432760+02:00 \n", - "6 4452818 1208736 631 2023-05-06 03:29:43.875970+02:00 \n", - "7 4291702 1155845 502 2022-09-28 12:55:36.843316+02:00 \n", - "8 4096406 1121651 469 2022-07-31 11:45:19.694236+02:00 \n", - "9 4452824 1208742 631 2023-05-06 03:29:43.901323+02:00 \n", - "\n", - " target_name \\\n", - "0 DDCP PROMO Réseau livres \n", - "1 Inscrits NL générale site web \n", - "2 DDCP PROMO Art contemporain \n", - "3 DDCP PROMO Art contemporain \n", - "4 Votre première liste \n", - "5 DDCP PROMO MD participants ateliers yoga \n", - "6 consentement optin b2b \n", - "7 Automation_parrainage_newsletter_générale \n", - "8 RI Newsletter Alexandrie (inscriptions formula... \n", - "9 consentement optin b2b \n", - "\n", - " target_created_at target_updated_at \\\n", - "0 2020-11-04 18:40:49.500866+01:00 2021-03-02 18:38:19.084287+01:00 \n", - "1 2021-04-16 17:17:26.069199+02:00 2021-04-16 17:17:26.069199+02:00 \n", - "2 2020-11-04 18:38:53.016572+01:00 2021-04-16 17:17:25.850107+02:00 \n", - "3 2020-11-04 18:38:53.016572+01:00 2021-04-16 17:17:25.850107+02:00 \n", - "4 2021-04-16 17:17:26.080378+02:00 2021-04-16 17:17:26.080378+02:00 \n", - "5 2021-05-26 10:54:12.232999+02:00 2021-05-26 10:54:22.378253+02:00 \n", - "6 2021-11-30 10:03:37.430645+01:00 2022-02-18 17:21:30.653027+01:00 \n", - "7 2021-08-10 15:25:56.142538+02:00 2021-08-10 15:26:06.275964+02:00 \n", - "8 2021-07-08 11:31:10.246495+02:00 2022-01-26 12:14:17.941253+01:00 \n", - "9 2021-11-30 10:03:37.430645+01:00 2022-02-18 17:21:30.653027+01:00 \n", - "\n", - " target_type_is_import target_type_id target_type_name \\\n", - "0 False 56 manual_static_filter \n", - "1 False 56 manual_static_filter \n", - "2 False 56 manual_static_filter \n", - "3 False 56 manual_static_filter \n", - "4 False 56 manual_static_filter \n", - "5 False 69 manual_dynamic_filter \n", - "6 False 56 manual_static_filter \n", - "7 False 56 manual_static_filter \n", - "8 False 56 manual_static_filter \n", - "9 False 56 manual_static_filter \n", - "\n", - " target_type_identifier creation_delay \n", - "0 fb27e81baa4debc6a4e1a8639c20e808 322 days, 13:54:58.116409 \n", - "1 fb27e81baa4debc6a4e1a8639c20e808 159 days, 16:18:21.599647 \n", - "2 fb27e81baa4debc6a4e1a8639c20e808 322 days, 16:23:58.236697 \n", - "3 fb27e81baa4debc6a4e1a8639c20e808 322 days, 16:41:54.377908 \n", - "4 fb27e81baa4debc6a4e1a8639c20e808 164 days, 22:45:03.292230 \n", - "5 e0f4b8693184850fefd6d2a38f10584e 121 days, 7:22:21.199761 \n", - "6 fb27e81baa4debc6a4e1a8639c20e808 521 days, 16:26:06.445325 \n", - "7 fb27e81baa4debc6a4e1a8639c20e808 413 days, 21:29:40.700778 \n", - "8 fb27e81baa4debc6a4e1a8639c20e808 388 days, 0:14:09.447741 \n", - "9 fb27e81baa4debc6a4e1a8639c20e808 521 days, 16:26:06.470678 " - ] - }, - "execution_count": 165, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# final visu : nice table for targets\n", - "\n", - "# pour la suite, on peut supprimer la colonne creation delay, \n", - "# était juste utile pour vérifier que la date de création était postérieure à la date de création de la target\n", - "\n", - "df1_customer_targets.head(10)" - ] - }, - { - "cell_type": "markdown", - "id": "d762394b-3aee-4284-a472-40a6b6f4308a", - "metadata": {}, - "source": [ - "## Campaign stats, campaigns" - ] - }, - { - "cell_type": "code", - "execution_count": 189, - "id": "9d338a1a-52a5-49c4-a277-37be3f190e81", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnameservice_idcreated_atupdated_atprocess_idreport_urlcategoryto_be_syncedidentifiersent_at
01319613newsletter enseignants janvier 20227212022-01-14 16:06:42.586321+01:002022-02-03 14:17:27.112963+01:00NaNNaN0.0Falseaba3b6fd5d186d28e06ff97135cade7f2022-01-14 00:00:00+01:00
11319586lsf_janvier_20227172022-01-07 11:30:35.315895+01:002022-02-03 14:17:27.116171+01:00NaNNaN0.0False788d986905533aba051261497ecffcbb2022-01-07 00:00:00+01:00
21319282Invitation à déjeuner au Mucem | Vernissage « ...5912021-09-28 12:50:24.448752+02:002022-02-03 14:17:27.119582+01:00NaNNaN0.0False3493894fa4ea036cfc6433c3e2ee63b02021-09-28 00:00:00+02:00
31319283Vacances de la Toussaint - centres des loisirs5902021-09-28 18:01:04.692073+02:002022-02-03 14:17:27.124408+01:00NaNNaN0.0False08b255a5d42b89b0585260b6f2360bdd2021-09-28 00:00:00+02:00
41319636ddcp_promo_md_livemag7302022-01-27 18:00:41.053069+01:002022-02-03 14:17:27.127607+01:00NaNNaN0.0Falsed5cfead94f5350c12c322b5b664544c12022-01-27 00:00:00+01:00
\n", - "
" - ], - "text/plain": [ - " id name service_id \\\n", - "0 1319613 newsletter enseignants janvier 2022 721 \n", - "1 1319586 lsf_janvier_2022 717 \n", - "2 1319282 Invitation à déjeuner au Mucem | Vernissage « ... 591 \n", - "3 1319283 Vacances de la Toussaint - centres des loisirs 590 \n", - "4 1319636 ddcp_promo_md_livemag 730 \n", - "\n", - " created_at updated_at \\\n", - "0 2022-01-14 16:06:42.586321+01:00 2022-02-03 14:17:27.112963+01:00 \n", - "1 2022-01-07 11:30:35.315895+01:00 2022-02-03 14:17:27.116171+01:00 \n", - "2 2021-09-28 12:50:24.448752+02:00 2022-02-03 14:17:27.119582+01:00 \n", - "3 2021-09-28 18:01:04.692073+02:00 2022-02-03 14:17:27.124408+01:00 \n", - "4 2022-01-27 18:00:41.053069+01:00 2022-02-03 14:17:27.127607+01:00 \n", - "\n", - " process_id report_url category to_be_synced \\\n", - "0 NaN NaN 0.0 False \n", - "1 NaN NaN 0.0 False \n", - "2 NaN NaN 0.0 False \n", - "3 NaN NaN 0.0 False \n", - "4 NaN NaN 0.0 False \n", - "\n", - " identifier sent_at \n", - "0 aba3b6fd5d186d28e06ff97135cade7f 2022-01-14 00:00:00+01:00 \n", - "1 788d986905533aba051261497ecffcbb 2022-01-07 00:00:00+01:00 \n", - "2 3493894fa4ea036cfc6433c3e2ee63b0 2021-09-28 00:00:00+02:00 \n", - "3 08b255a5d42b89b0585260b6f2360bdd 2021-09-28 00:00:00+02:00 \n", - "4 d5cfead94f5350c12c322b5b664544c1 2022-01-27 00:00:00+01:00 " - ] - }, - "execution_count": 189, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 1. campaigns\n", - "\n", - "df1_campaigns.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 171, - "id": "fad1a58c-cece-45f9-a44f-ca46884a9a81", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id 0.000000\n", - "name 0.000000\n", - "service_id 0.000000\n", - "created_at 0.000000\n", - "updated_at 0.000000\n", - "process_id 1.000000\n", - "report_url 1.000000\n", - "category 0.002090\n", - "to_be_synced 0.000000\n", - "identifier 0.000000\n", - "sent_at 0.003135\n", - "dtype: float64" - ] - }, - "execution_count": 171, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# part de Nan pour chaque variable\n", - "\n", - "df1_campaigns.isna().sum() / df1_campaigns.shape[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 185, - "id": "cdeebf18-a3a4-4131-ad88-d45c39ec5786", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id int64\n", - "name object\n", - "service_id int64\n", - "created_at object\n", - "updated_at object\n", - "process_id float64\n", - "report_url float64\n", - "category float64\n", - "to_be_synced bool\n", - "identifier object\n", - "sent_at object\n", - "dtype: object" - ] - }, - "execution_count": 185, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_campaigns.dtypes" - ] - }, - { - "cell_type": "code", - "execution_count": 186, - "id": "5c9b669a-477b-4f33-86df-b22ff2c21382", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "str" - ] - }, - "execution_count": 186, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(df1_campaigns[\"identifier\"][0])" - ] - }, - { - "cell_type": "code", - "execution_count": 187, - "id": "b5b0af8d-b9a0-4224-a229-d74d90ac2686", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 0., nan])" - ] - }, - "execution_count": 187, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# category\n", - "\n", - "df1_campaigns[\"category\"].isna()" - ] - }, - { - "cell_type": "code", - "execution_count": 191, - "id": "4cc618ae-063f-48fc-bce7-8b72d30ad4ca", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "957\n", - "957\n" - ] - } - ], - "source": [ - "# identifier\n", - "\n", - "print(df1_campaigns[\"identifier\"].nunique())\n", - "print(df1_campaigns.shape[0]) # identifier is unique" - ] - }, - { - "cell_type": "code", - "execution_count": 194, - "id": "d13c3f21-ebd7-4e9b-baca-1f3a10ac24a9", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id 957\n", - "name 855\n", - "service_id 957\n", - "created_at 957\n", - "updated_at 957\n", - "process_id 0\n", - "report_url 0\n", - "category 1\n", - "to_be_synced 2\n", - "identifier 957\n", - "sent_at 737\n", - "dtype: int64\n" - ] - } - ], - "source": [ - "# service id\n", - "\n", - "print(df1_campaigns.nunique()) # on a un identifiant de service par campagne, mais pas un nom unique" - ] - }, - { - "cell_type": "code", - "execution_count": 211, - "id": "aea65b10-8a7f-472e-a7f5-455a90d3cfef", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnameservice_idcreated_atupdated_atprocess_idreport_urlcategoryto_be_syncedidentifiersent_at
7771319239\"L'Orient sonore\" au Mucem à partir du 22 juillet1842021-09-24 11:56:09.277085+02:002021-09-24 11:56:09.277085+02:00NaNNaN0.0False6cdd60ea0045eb7a6ec44c54d29ed4022020-07-15 00:00:00+02:00
7781319240\"L'Orient sonore\" au Mucem à partir du 22 juillet1812021-09-24 11:56:09.284647+02:002021-09-24 11:56:09.284647+02:00NaNNaN0.0Falsefc221309746013ac554571fbd180e1c82020-07-09 00:00:00+02:00
2551320926Alexandrie NL211162023-01-31 11:08:55.915268+01:002023-01-31 11:08:56.286044+01:00NaNNaN0.0Falsedd77279f7d325eec933f05b1672f6a1f2023-01-31 12:08:54+01:00
1611320910Alexandrie NL210772023-01-24 09:01:00.250855+01:002023-01-24 09:01:00.271292+01:00NaNNaN0.0False062ddb6c727310e76b6200b7c71f63b52023-01-24 10:00:58+01:00
2411320574Alexandrie NL27312022-10-11 07:00:50.971513+02:002022-12-02 17:51:21.670983+01:00NaNNaN0.0False59c33016884a62116be975a9bb8257e32022-10-11 00:00:00+02:00
3171320972Centres_loisirs _vacances de février11242023-02-08 12:01:16.732961+01:002023-02-08 12:01:16.808008+01:00NaNNaN0.0Falsec7635bfd99248a2cdef8249ef7bfbef42023-02-08 13:01:15+01:00
1661320954Centres_loisirs _vacances de février11102023-02-01 09:30:41.267232+01:002023-02-01 09:30:41.354117+01:00NaNNaN0.0False2cbca44843a864533ec05b321ae1f9d12023-02-01 10:30:40+01:00
672148Champ social décembre 20202832021-04-03 18:24:42.186026+02:002021-09-24 11:56:08.182818+02:00NaNNaN0.0False0f49c89d1e7298bb9930789c8ed59d482020-12-03 00:00:00+01:00
56972Champ social décembre 20202842021-03-29 15:41:53.631952+02:002021-09-24 11:56:07.748770+02:00NaNNaN0.0False46ba9f2a6976570b0353203ec44742172020-12-04 00:00:00+01:00
1751319881Champ social mars 20228332022-04-25 10:00:26.029871+02:002022-12-02 17:51:22.319899+01:00NaNNaN0.0False013a006f03dbc5392effeb8f18fda7552022-04-25 00:00:00+02:00
3161319760Champ social mars 20227852022-03-11 13:00:28.333251+01:002022-12-02 17:51:21.991906+01:00NaNNaN0.0False4b04a686b0ad13dce35fa99fa4161c652022-03-11 00:00:00+01:00
3261319798DDCP Newsletter Destination Mucem Est 28042022-03-22 10:21:02.122363+01:002022-12-02 17:51:22.119041+01:00NaNNaN0.0Falsedc5689792e08eb2e219dce49e64c885b2022-03-22 00:00:00+01:00
1771319882DDCP Newsletter Destination Mucem Est 28432022-04-26 09:00:44.083713+02:002022-12-02 17:51:22.454684+01:00NaNNaN0.0False3d8e28caf901313a554cebc7d32e67e52022-04-26 00:00:00+02:00
3471319883DDCP Newsletter Destination Mucem Nord 28452022-04-26 09:00:46.020370+02:002022-12-02 17:51:22.463986+01:00NaNNaN0.0Falseb86e8d03fe992d1b0e19656875ee557c2022-04-26 00:00:00+02:00
3191319768DDCP Newsletter Destination Mucem Nord 27892022-03-17 10:20:51.757178+01:002022-12-02 17:51:22.064760+01:00NaNNaN0.0False68053af2923e00204c3ca7c6a3150cf72022-03-17 00:00:00+01:00
1761319885DDCP Newsletter Destination Mucem Nord Est 28422022-04-26 09:30:57.232149+02:002022-12-02 17:51:22.447304+01:00NaNNaN0.0Falsefc3cf452d3da8402bebb765225ce8c0e2022-04-26 00:00:00+02:00
3241319769DDCP Newsletter Destination Mucem Nord Est 28002022-03-17 10:22:58.736431+01:002022-12-02 17:51:22.107694+01:00NaNNaN0.0False7a53928fa4dd31e82c6ef826f341daec2022-03-17 00:00:00+01:00
2431319884DDCP Newsletter Destination Mucem Sud 28442022-04-26 09:00:46.894528+02:002022-12-02 17:51:22.459272+01:00NaNNaN0.0Falsee97ee2054defb209c35fe4dc945990612022-04-26 00:00:00+02:00
3271319799DDCP Newsletter Destination Mucem Sud 28052022-03-22 10:24:05.787335+01:002022-12-02 17:51:22.123726+01:00NaNNaN0.0False846c260d715e5b854ffad5f70a516c882022-03-22 00:00:00+01:00
6202681DDCP PROMO programmation Orient sonore Pass mu...2262021-04-08 21:10:40.634455+02:002021-09-24 11:56:07.922243+02:00NaNNaN0.0False9cfdf10e8fc047a44b08ed031e1f0ed12020-10-09 00:00:00+02:00
\n", - "
" - ], - "text/plain": [ - " id name service_id \\\n", - "777 1319239 \"L'Orient sonore\" au Mucem à partir du 22 juillet 184 \n", - "778 1319240 \"L'Orient sonore\" au Mucem à partir du 22 juillet 181 \n", - "255 1320926 Alexandrie NL2 1116 \n", - "161 1320910 Alexandrie NL2 1077 \n", - "241 1320574 Alexandrie NL2 731 \n", - "317 1320972 Centres_loisirs _vacances de février 1124 \n", - "166 1320954 Centres_loisirs _vacances de février 1110 \n", - "672 148 Champ social décembre 2020 283 \n", - "569 72 Champ social décembre 2020 284 \n", - "175 1319881 Champ social mars 2022 833 \n", - "316 1319760 Champ social mars 2022 785 \n", - "326 1319798 DDCP Newsletter Destination Mucem Est 2 804 \n", - "177 1319882 DDCP Newsletter Destination Mucem Est 2 843 \n", - "347 1319883 DDCP Newsletter Destination Mucem Nord 2 845 \n", - "319 1319768 DDCP Newsletter Destination Mucem Nord 2 789 \n", - "176 1319885 DDCP Newsletter Destination Mucem Nord Est 2 842 \n", - "324 1319769 DDCP Newsletter Destination Mucem Nord Est 2 800 \n", - "243 1319884 DDCP Newsletter Destination Mucem Sud 2 844 \n", - "327 1319799 DDCP Newsletter Destination Mucem Sud 2 805 \n", - "620 2681 DDCP PROMO programmation Orient sonore Pass mu... 226 \n", - "\n", - " created_at updated_at \\\n", - "777 2021-09-24 11:56:09.277085+02:00 2021-09-24 11:56:09.277085+02:00 \n", - "778 2021-09-24 11:56:09.284647+02:00 2021-09-24 11:56:09.284647+02:00 \n", - "255 2023-01-31 11:08:55.915268+01:00 2023-01-31 11:08:56.286044+01:00 \n", - "161 2023-01-24 09:01:00.250855+01:00 2023-01-24 09:01:00.271292+01:00 \n", - "241 2022-10-11 07:00:50.971513+02:00 2022-12-02 17:51:21.670983+01:00 \n", - "317 2023-02-08 12:01:16.732961+01:00 2023-02-08 12:01:16.808008+01:00 \n", - "166 2023-02-01 09:30:41.267232+01:00 2023-02-01 09:30:41.354117+01:00 \n", - "672 2021-04-03 18:24:42.186026+02:00 2021-09-24 11:56:08.182818+02:00 \n", - "569 2021-03-29 15:41:53.631952+02:00 2021-09-24 11:56:07.748770+02:00 \n", - "175 2022-04-25 10:00:26.029871+02:00 2022-12-02 17:51:22.319899+01:00 \n", - "316 2022-03-11 13:00:28.333251+01:00 2022-12-02 17:51:21.991906+01:00 \n", - "326 2022-03-22 10:21:02.122363+01:00 2022-12-02 17:51:22.119041+01:00 \n", - "177 2022-04-26 09:00:44.083713+02:00 2022-12-02 17:51:22.454684+01:00 \n", - "347 2022-04-26 09:00:46.020370+02:00 2022-12-02 17:51:22.463986+01:00 \n", - "319 2022-03-17 10:20:51.757178+01:00 2022-12-02 17:51:22.064760+01:00 \n", - "176 2022-04-26 09:30:57.232149+02:00 2022-12-02 17:51:22.447304+01:00 \n", - "324 2022-03-17 10:22:58.736431+01:00 2022-12-02 17:51:22.107694+01:00 \n", - "243 2022-04-26 09:00:46.894528+02:00 2022-12-02 17:51:22.459272+01:00 \n", - "327 2022-03-22 10:24:05.787335+01:00 2022-12-02 17:51:22.123726+01:00 \n", - "620 2021-04-08 21:10:40.634455+02:00 2021-09-24 11:56:07.922243+02:00 \n", - "\n", - " process_id report_url category to_be_synced \\\n", - "777 NaN NaN 0.0 False \n", - "778 NaN NaN 0.0 False \n", - "255 NaN NaN 0.0 False \n", - "161 NaN NaN 0.0 False \n", - "241 NaN NaN 0.0 False \n", - "317 NaN NaN 0.0 False \n", - "166 NaN NaN 0.0 False \n", - "672 NaN NaN 0.0 False \n", - "569 NaN NaN 0.0 False \n", - "175 NaN NaN 0.0 False \n", - "316 NaN NaN 0.0 False \n", - "326 NaN NaN 0.0 False \n", - "177 NaN NaN 0.0 False \n", - "347 NaN NaN 0.0 False \n", - "319 NaN NaN 0.0 False \n", - "176 NaN NaN 0.0 False \n", - "324 NaN NaN 0.0 False \n", - "243 NaN NaN 0.0 False \n", - "327 NaN NaN 0.0 False \n", - "620 NaN NaN 0.0 False \n", - "\n", - " identifier sent_at \n", - "777 6cdd60ea0045eb7a6ec44c54d29ed402 2020-07-15 00:00:00+02:00 \n", - "778 fc221309746013ac554571fbd180e1c8 2020-07-09 00:00:00+02:00 \n", - "255 dd77279f7d325eec933f05b1672f6a1f 2023-01-31 12:08:54+01:00 \n", - "161 062ddb6c727310e76b6200b7c71f63b5 2023-01-24 10:00:58+01:00 \n", - "241 59c33016884a62116be975a9bb8257e3 2022-10-11 00:00:00+02:00 \n", - "317 c7635bfd99248a2cdef8249ef7bfbef4 2023-02-08 13:01:15+01:00 \n", - "166 2cbca44843a864533ec05b321ae1f9d1 2023-02-01 10:30:40+01:00 \n", - "672 0f49c89d1e7298bb9930789c8ed59d48 2020-12-03 00:00:00+01:00 \n", - "569 46ba9f2a6976570b0353203ec4474217 2020-12-04 00:00:00+01:00 \n", - "175 013a006f03dbc5392effeb8f18fda755 2022-04-25 00:00:00+02:00 \n", - "316 4b04a686b0ad13dce35fa99fa4161c65 2022-03-11 00:00:00+01:00 \n", - "326 dc5689792e08eb2e219dce49e64c885b 2022-03-22 00:00:00+01:00 \n", - "177 3d8e28caf901313a554cebc7d32e67e5 2022-04-26 00:00:00+02:00 \n", - "347 b86e8d03fe992d1b0e19656875ee557c 2022-04-26 00:00:00+02:00 \n", - "319 68053af2923e00204c3ca7c6a3150cf7 2022-03-17 00:00:00+01:00 \n", - "176 fc3cf452d3da8402bebb765225ce8c0e 2022-04-26 00:00:00+02:00 \n", - "324 7a53928fa4dd31e82c6ef826f341daec 2022-03-17 00:00:00+01:00 \n", - "243 e97ee2054defb209c35fe4dc94599061 2022-04-26 00:00:00+02:00 \n", - "327 846c260d715e5b854ffad5f70a516c88 2022-03-22 00:00:00+01:00 \n", - "620 9cfdf10e8fc047a44b08ed031e1f0ed1 2020-10-09 00:00:00+02:00 " - ] - }, - "execution_count": 211, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# name\n", - "\n", - "df1_campaigns[df1_campaigns.duplicated(subset = [\"name\"], keep=False)].sort_values(\"name\").head(20)" - ] - }, - { - "cell_type": "code", - "execution_count": 207, - "id": "35ea834e-01a3-4841-a9a9-351c25c5af37", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "175 True\n", - "316 True\n", - "dtype: bool" - ] - }, - "execution_count": 207, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_campaigns[df1_campaigns[\"name\"]==\"Champ social mars 2022\"].duplicated(subset=\"name\", keep=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 226, - "id": "5e16bf37-c2e0-48c9-8a90-6713f7c6206c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Share of campaigns to synce : 0.52 % \n" - ] - } - ], - "source": [ - "# to be synced \n", - "\n", - "share_campaigns_to_be_synced = round(100 * df1_campaigns[\"to_be_synced\"].mean(),2)\n", - "print(f\"Share of campaigns to synce : {share_campaigns_to_be_synced} % \") # 0.5% of campaigns to synce" - ] - }, - { - "cell_type": "code", - "execution_count": 235, - "id": "88a6f9d4-ddd2-4288-9bba-7d9e76c66f51", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnameservice_idcreated_atupdated_atprocess_idreport_urlcategoryto_be_syncedidentifiersent_at
431320752dre_alors_on_sort0712_tech&cult1212_lesreveill...10192022-11-28 09:30:31.189207+01:002022-12-02 17:51:23.474745+01:00NaNNaN0.0True03e0704b5690a2dee1861dc3ad3316c92022-11-28 00:00:00+01:00
791320755News hebdo du 5 au 4 décembre 202210602022-12-04 18:01:29.971417+01:002022-12-04 18:01:30.037656+01:00NaNNaN0.0True299a23a2291e2126b91d54f3601ec1622022-12-04 19:01:27+01:00
4641320749dre_le_sel_24112210542022-11-24 09:01:37.467710+01:002022-12-02 17:51:23.622812+01:00NaNNaN0.0Truedb576a7d2453575f29eab4bac787b9192022-11-24 00:00:00+01:00
4651320751News hebdo du 28 novembre au 4 décembre10572022-11-27 18:01:44.546081+01:002022-12-02 17:51:23.627178+01:00NaNNaN0.0Trued8700cbd38cc9f30cecb34f0c195b1372022-11-27 00:00:00+01:00
8881319474ddcp_promo_temps fort salammbo6702021-11-25 13:19:41.547780+01:002022-02-03 14:17:27.728648+01:00NaNNaN0.0True17c276c8e723eb46aef576537e9d56d02021-11-25 00:00:00+01:00
\n", - "
" - ], - "text/plain": [ - " id name service_id \\\n", - "43 1320752 dre_alors_on_sort0712_tech&cult1212_lesreveill... 1019 \n", - "79 1320755 News hebdo du 5 au 4 décembre 2022 1060 \n", - "464 1320749 dre_le_sel_241122 1054 \n", - "465 1320751 News hebdo du 28 novembre au 4 décembre 1057 \n", - "888 1319474 ddcp_promo_temps fort salammbo 670 \n", - "\n", - " created_at updated_at \\\n", - "43 2022-11-28 09:30:31.189207+01:00 2022-12-02 17:51:23.474745+01:00 \n", - "79 2022-12-04 18:01:29.971417+01:00 2022-12-04 18:01:30.037656+01:00 \n", - "464 2022-11-24 09:01:37.467710+01:00 2022-12-02 17:51:23.622812+01:00 \n", - "465 2022-11-27 18:01:44.546081+01:00 2022-12-02 17:51:23.627178+01:00 \n", - "888 2021-11-25 13:19:41.547780+01:00 2022-02-03 14:17:27.728648+01:00 \n", - "\n", - " process_id report_url category to_be_synced \\\n", - "43 NaN NaN 0.0 True \n", - "79 NaN NaN 0.0 True \n", - "464 NaN NaN 0.0 True \n", - "465 NaN NaN 0.0 True \n", - "888 NaN NaN 0.0 True \n", - "\n", - " identifier sent_at \n", - "43 03e0704b5690a2dee1861dc3ad3316c9 2022-11-28 00:00:00+01:00 \n", - "79 299a23a2291e2126b91d54f3601ec162 2022-12-04 19:01:27+01:00 \n", - "464 db576a7d2453575f29eab4bac787b919 2022-11-24 00:00:00+01:00 \n", - "465 d8700cbd38cc9f30cecb34f0c195b137 2022-11-27 00:00:00+01:00 \n", - "888 17c276c8e723eb46aef576537e9d56d0 2021-11-25 00:00:00+01:00 " - ] - }, - "execution_count": 235, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# focus : campaigns to synce - 5 cases\n", - "# la date d'envoie semble cohérente. Pas d'observation particulière sur ces cas ...\n", - "\n", - "df1_campaigns[df1_campaigns[\"to_be_synced\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": 234, - "id": "cf9dedd6-2554-4f9e-a09b-f1465718a18d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnameservice_idcreated_atupdated_atprocess_idreport_urlcategoryto_be_syncedidentifiersent_at
431320752dre_alors_on_sort0712_tech&cult1212_lesreveill...10192022-11-28 09:30:31.189207+01:002022-12-02 17:51:23.474745+01:00NaNNaN0.0True03e0704b5690a2dee1861dc3ad3316c92022-11-28 00:00:00+01:00
791320755News hebdo du 5 au 4 décembre 202210602022-12-04 18:01:29.971417+01:002022-12-04 18:01:30.037656+01:00NaNNaN0.0True299a23a2291e2126b91d54f3601ec1622022-12-04 19:01:27+01:00
4641320749dre_le_sel_24112210542022-11-24 09:01:37.467710+01:002022-12-02 17:51:23.622812+01:00NaNNaN0.0Truedb576a7d2453575f29eab4bac787b9192022-11-24 00:00:00+01:00
4651320751News hebdo du 28 novembre au 4 décembre10572022-11-27 18:01:44.546081+01:002022-12-02 17:51:23.627178+01:00NaNNaN0.0Trued8700cbd38cc9f30cecb34f0c195b1372022-11-27 00:00:00+01:00
8881319474ddcp_promo_temps fort salammbo6702021-11-25 13:19:41.547780+01:002022-02-03 14:17:27.728648+01:00NaNNaN0.0True17c276c8e723eb46aef576537e9d56d02021-11-25 00:00:00+01:00
\n", - "
" - ], - "text/plain": [ - " id name service_id \\\n", - "43 1320752 dre_alors_on_sort0712_tech&cult1212_lesreveill... 1019 \n", - "79 1320755 News hebdo du 5 au 4 décembre 2022 1060 \n", - "464 1320749 dre_le_sel_241122 1054 \n", - "465 1320751 News hebdo du 28 novembre au 4 décembre 1057 \n", - "888 1319474 ddcp_promo_temps fort salammbo 670 \n", - "\n", - " created_at updated_at \\\n", - "43 2022-11-28 09:30:31.189207+01:00 2022-12-02 17:51:23.474745+01:00 \n", - "79 2022-12-04 18:01:29.971417+01:00 2022-12-04 18:01:30.037656+01:00 \n", - "464 2022-11-24 09:01:37.467710+01:00 2022-12-02 17:51:23.622812+01:00 \n", - "465 2022-11-27 18:01:44.546081+01:00 2022-12-02 17:51:23.627178+01:00 \n", - "888 2021-11-25 13:19:41.547780+01:00 2022-02-03 14:17:27.728648+01:00 \n", - "\n", - " process_id report_url category to_be_synced \\\n", - "43 NaN NaN 0.0 True \n", - "79 NaN NaN 0.0 True \n", - "464 NaN NaN 0.0 True \n", - "465 NaN NaN 0.0 True \n", - "888 NaN NaN 0.0 True \n", - "\n", - " identifier sent_at \n", - "43 03e0704b5690a2dee1861dc3ad3316c9 2022-11-28 00:00:00+01:00 \n", - "79 299a23a2291e2126b91d54f3601ec162 2022-12-04 19:01:27+01:00 \n", - "464 db576a7d2453575f29eab4bac787b919 2022-11-24 00:00:00+01:00 \n", - "465 d8700cbd38cc9f30cecb34f0c195b137 2022-11-27 00:00:00+01:00 \n", - "888 17c276c8e723eb46aef576537e9d56d0 2021-11-25 00:00:00+01:00 " - ] - }, - "execution_count": 234, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_campaigns[df1_campaigns[\"name\"].isin(df1_campaigns[df1_campaigns[\"to_be_synced\"]][\"name\"].unique()) ]" - ] - }, - { - "cell_type": "code", - "execution_count": 237, - "id": "ba2f188f-be49-4e19-9cb3-0ec54e58d0c1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnameservice_idcreated_atupdated_atprocess_idreport_urlcategoryto_be_syncedidentifiersent_at
01319613newsletter enseignants janvier 20227212022-01-14 16:06:42.586321+01:002022-02-03 14:17:27.112963+01:00NaNNaN0.0Falseaba3b6fd5d186d28e06ff97135cade7f2022-01-14 00:00:00+01:00
11319586lsf_janvier_20227172022-01-07 11:30:35.315895+01:002022-02-03 14:17:27.116171+01:00NaNNaN0.0False788d986905533aba051261497ecffcbb2022-01-07 00:00:00+01:00
21319282Invitation à déjeuner au Mucem | Vernissage « ...5912021-09-28 12:50:24.448752+02:002022-02-03 14:17:27.119582+01:00NaNNaN0.0False3493894fa4ea036cfc6433c3e2ee63b02021-09-28 00:00:00+02:00
31319283Vacances de la Toussaint - centres des loisirs5902021-09-28 18:01:04.692073+02:002022-02-03 14:17:27.124408+01:00NaNNaN0.0False08b255a5d42b89b0585260b6f2360bdd2021-09-28 00:00:00+02:00
41319636ddcp_promo_md_livemag7302022-01-27 18:00:41.053069+01:002022-02-03 14:17:27.127607+01:00NaNNaN0.0Falsed5cfead94f5350c12c322b5b664544c12022-01-27 00:00:00+01:00
51319614News hebdo du 17 janv au 23 janv 20227122022-01-16 18:01:28.974157+01:002022-02-03 14:17:27.130944+01:00NaNNaN0.0False19bc916108fc6938f52cb96f7e0879412022-01-16 00:00:00+01:00
61319263ddcp_promo_automne_littérature_relance_nn_ouverts5862021-09-24 15:00:04.174247+02:002021-09-24 16:13:10.505400+02:00NaNNaN0.0False605ff764c617d3cd28dbbdd72be8f9a22021-09-24 00:00:00+02:00
71319284Invitation au vernissage de l'exposition \"La C...5932021-09-30 14:47:18.135394+02:002022-02-03 14:17:27.134073+01:00NaNNaN0.0Falseacc3e0404646c57502b480dc052c4fe12021-09-30 00:00:00+02:00
81319625dre_mobilisations_artistiques_et_politiques7042022-01-27 10:01:16.716706+01:002022-02-03 14:17:27.172039+01:00NaNNaN0.0Falsef64eac11f2cd8f0efa196f8ad173178e2022-01-27 00:00:00+01:00
91319285ddcp_promo_soyinka_taubira_infos_pratiques5942021-10-01 12:16:57.031796+02:002022-02-03 14:17:27.137444+01:00NaNNaN0.0False076a0c97d09cf1a0ec3e19c7f2529f2b2021-10-01 00:00:00+02:00
\n", - "
" - ], - "text/plain": [ - " id name service_id \\\n", - "0 1319613 newsletter enseignants janvier 2022 721 \n", - "1 1319586 lsf_janvier_2022 717 \n", - "2 1319282 Invitation à déjeuner au Mucem | Vernissage « ... 591 \n", - "3 1319283 Vacances de la Toussaint - centres des loisirs 590 \n", - "4 1319636 ddcp_promo_md_livemag 730 \n", - "5 1319614 News hebdo du 17 janv au 23 janv 2022 712 \n", - "6 1319263 ddcp_promo_automne_littérature_relance_nn_ouverts 586 \n", - "7 1319284 Invitation au vernissage de l'exposition \"La C... 593 \n", - "8 1319625 dre_mobilisations_artistiques_et_politiques 704 \n", - "9 1319285 ddcp_promo_soyinka_taubira_infos_pratiques 594 \n", - "\n", - " created_at updated_at \\\n", - "0 2022-01-14 16:06:42.586321+01:00 2022-02-03 14:17:27.112963+01:00 \n", - "1 2022-01-07 11:30:35.315895+01:00 2022-02-03 14:17:27.116171+01:00 \n", - "2 2021-09-28 12:50:24.448752+02:00 2022-02-03 14:17:27.119582+01:00 \n", - "3 2021-09-28 18:01:04.692073+02:00 2022-02-03 14:17:27.124408+01:00 \n", - "4 2022-01-27 18:00:41.053069+01:00 2022-02-03 14:17:27.127607+01:00 \n", - "5 2022-01-16 18:01:28.974157+01:00 2022-02-03 14:17:27.130944+01:00 \n", - "6 2021-09-24 15:00:04.174247+02:00 2021-09-24 16:13:10.505400+02:00 \n", - "7 2021-09-30 14:47:18.135394+02:00 2022-02-03 14:17:27.134073+01:00 \n", - "8 2022-01-27 10:01:16.716706+01:00 2022-02-03 14:17:27.172039+01:00 \n", - "9 2021-10-01 12:16:57.031796+02:00 2022-02-03 14:17:27.137444+01:00 \n", - "\n", - " process_id report_url category to_be_synced \\\n", - "0 NaN NaN 0.0 False \n", - "1 NaN NaN 0.0 False \n", - "2 NaN NaN 0.0 False \n", - "3 NaN NaN 0.0 False \n", - "4 NaN NaN 0.0 False \n", - "5 NaN NaN 0.0 False \n", - "6 NaN NaN 0.0 False \n", - "7 NaN NaN 0.0 False \n", - "8 NaN NaN 0.0 False \n", - "9 NaN NaN 0.0 False \n", - "\n", - " identifier sent_at \n", - "0 aba3b6fd5d186d28e06ff97135cade7f 2022-01-14 00:00:00+01:00 \n", - "1 788d986905533aba051261497ecffcbb 2022-01-07 00:00:00+01:00 \n", - "2 3493894fa4ea036cfc6433c3e2ee63b0 2021-09-28 00:00:00+02:00 \n", - "3 08b255a5d42b89b0585260b6f2360bdd 2021-09-28 00:00:00+02:00 \n", - "4 d5cfead94f5350c12c322b5b664544c1 2022-01-27 00:00:00+01:00 \n", - "5 19bc916108fc6938f52cb96f7e087941 2022-01-16 00:00:00+01:00 \n", - "6 605ff764c617d3cd28dbbdd72be8f9a2 2021-09-24 00:00:00+02:00 \n", - "7 acc3e0404646c57502b480dc052c4fe1 2021-09-30 00:00:00+02:00 \n", - "8 f64eac11f2cd8f0efa196f8ad173178e 2022-01-27 00:00:00+01:00 \n", - "9 076a0c97d09cf1a0ec3e19c7f2529f2b 2021-10-01 00:00:00+02:00 " - ] - }, - "execution_count": 237, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_campaigns[~df1_campaigns[\"to_be_synced\"]].head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": 240, - "id": "4bf2cbdd-6236-43b8-9a13-74f2803a6ac5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcampaign_idcustomer_idopened_atsent_atdelivered_atcreated_atupdated_at
01979358112597NaN2021-03-28 18:01:09+02:002021-03-28 18:24:18+02:002021-03-28 18:34:20.616136+02:002022-04-15 22:52:04.397693+02:00
11421158113666NaN2021-03-28 18:01:09+02:002021-03-28 18:21:02+02:002021-03-28 18:21:04.297213+02:002022-04-15 22:52:04.397693+02:00
21315058280561NaN2021-03-28 18:00:59+02:002021-03-28 18:08:45+02:002021-03-28 18:18:49.991042+02:002022-04-15 22:52:04.397693+02:00
37073581010072021-03-28 20:11:06+02:002021-03-28 18:00:59+02:002021-03-28 18:09:47+02:002021-03-28 18:09:50.915354+02:002022-04-15 22:52:04.397693+02:00
4517558103972NaN2021-03-28 18:01:06+02:002021-03-28 18:05:03+02:002021-03-28 18:05:08.507398+02:002022-04-15 22:52:04.397693+02:00
54809581045992021-03-28 18:12:12+02:002021-03-28 18:01:06+02:002021-03-28 18:04:18+02:002021-03-28 18:04:19.662496+02:002022-04-15 22:52:04.397693+02:00
611605582805792021-03-28 18:16:14+02:002021-03-28 18:00:59+02:002021-03-28 18:16:09+02:002021-03-28 18:16:10.974208+02:002022-04-15 22:52:04.397693+02:00
71871458341732021-03-29 05:31:37+02:002021-03-28 18:00:58+02:002021-03-28 18:31:02+02:002021-03-28 18:31:07.619032+02:002022-04-15 22:52:04.397693+02:00
8171195834992NaN2021-03-28 18:00:58+02:002021-03-28 18:28:00+02:002021-03-28 18:28:03.574600+02:002022-04-15 22:52:04.397693+02:00
9140015835343NaN2021-03-28 18:00:58+02:002021-03-28 18:20:48+02:002021-03-28 18:20:49.258826+02:002022-04-15 22:52:04.397693+02:00
\n", - "
" - ], - "text/plain": [ - " id campaign_id customer_id opened_at \\\n", - "0 19793 58 112597 NaN \n", - "1 14211 58 113666 NaN \n", - "2 13150 58 280561 NaN \n", - "3 7073 58 101007 2021-03-28 20:11:06+02:00 \n", - "4 5175 58 103972 NaN \n", - "5 4809 58 104599 2021-03-28 18:12:12+02:00 \n", - "6 11605 58 280579 2021-03-28 18:16:14+02:00 \n", - "7 18714 58 34173 2021-03-29 05:31:37+02:00 \n", - "8 17119 58 34992 NaN \n", - "9 14001 58 35343 NaN \n", - "\n", - " sent_at delivered_at \\\n", - "0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n", - "1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n", - "2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n", - "3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n", - "4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n", - "5 2021-03-28 18:01:06+02:00 2021-03-28 18:04:18+02:00 \n", - "6 2021-03-28 18:00:59+02:00 2021-03-28 18:16:09+02:00 \n", - "7 2021-03-28 18:00:58+02:00 2021-03-28 18:31:02+02:00 \n", - "8 2021-03-28 18:00:58+02:00 2021-03-28 18:28:00+02:00 \n", - "9 2021-03-28 18:00:58+02:00 2021-03-28 18:20:48+02:00 \n", - "\n", - " created_at updated_at \n", - "0 2021-03-28 18:34:20.616136+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "1 2021-03-28 18:21:04.297213+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "2 2021-03-28 18:18:49.991042+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "3 2021-03-28 18:09:50.915354+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "4 2021-03-28 18:05:08.507398+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "5 2021-03-28 18:04:19.662496+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "6 2021-03-28 18:16:10.974208+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "7 2021-03-28 18:31:07.619032+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "8 2021-03-28 18:28:03.574600+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "9 2021-03-28 18:20:49.258826+02:00 2022-04-15 22:52:04.397693+02:00 " - ] - }, - "execution_count": 240, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 2. campaigns stats\n", - "\n", - "df1_campaign_stats.head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": 242, - "id": "0bf6f3d8-40f3-4268-a89d-fc962acd6c4a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id 0.000000\n", - "campaign_id 0.000000\n", - "customer_id 0.000000\n", - "opened_at 0.807672\n", - "sent_at 0.000969\n", - "delivered_at 0.021495\n", - "created_at 0.000000\n", - "updated_at 0.000000\n", - "dtype: float64" - ] - }, - "execution_count": 242, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_campaign_stats.isna().sum() / df1_campaign_stats.shape[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 243, - "id": "2d3140db-fa86-41dd-81c9-2c6ca1e9402e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id int64\n", - "campaign_id int64\n", - "customer_id int64\n", - "opened_at object\n", - "sent_at object\n", - "delivered_at object\n", - "created_at object\n", - "updated_at object\n", - "dtype: object" - ] - }, - "execution_count": 243, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_campaign_stats.dtypes" - ] - }, - { - "cell_type": "code", - "execution_count": 244, - "id": "e4cc1b7c-5956-41c3-ad59-2738c5f2778c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id 6214808\n", - "campaign_id 949\n", - "customer_id 130472\n", - "opened_at 1102699\n", - "sent_at 152184\n", - "delivered_at 380248\n", - "created_at 4295988\n", - "updated_at 2176478\n", - "dtype: int64" - ] - }, - "execution_count": 244, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "print(df1_campaign_stats.shape[0])\n", - "print(df1_campaign_stats.nunique())" - ] - }, - { - "cell_type": "code", - "execution_count": 262, - "id": "8735c5dd-1d02-4dae-804e-70ee1be08df8", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcampaign_idcustomer_idopened_atsent_atdelivered_atcreated_atupdated_at
01979358112597NaN2021-03-28 18:01:09+02:002021-03-28 18:24:18+02:002021-03-28 18:34:20.616136+02:002022-04-15 22:52:04.397693+02:00
11421158113666NaN2021-03-28 18:01:09+02:002021-03-28 18:21:02+02:002021-03-28 18:21:04.297213+02:002022-04-15 22:52:04.397693+02:00
21315058280561NaN2021-03-28 18:00:59+02:002021-03-28 18:08:45+02:002021-03-28 18:18:49.991042+02:002022-04-15 22:52:04.397693+02:00
37073581010072021-03-28 20:11:06+02:002021-03-28 18:00:59+02:002021-03-28 18:09:47+02:002021-03-28 18:09:50.915354+02:002022-04-15 22:52:04.397693+02:00
4517558103972NaN2021-03-28 18:01:06+02:002021-03-28 18:05:03+02:002021-03-28 18:05:08.507398+02:002022-04-15 22:52:04.397693+02:00
\n", - "
" - ], - "text/plain": [ - " id campaign_id customer_id opened_at \\\n", - "0 19793 58 112597 NaN \n", - "1 14211 58 113666 NaN \n", - "2 13150 58 280561 NaN \n", - "3 7073 58 101007 2021-03-28 20:11:06+02:00 \n", - "4 5175 58 103972 NaN \n", - "\n", - " sent_at delivered_at \\\n", - "0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n", - "1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n", - "2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n", - "3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n", - "4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n", - "\n", - " created_at updated_at \n", - "0 2021-03-28 18:34:20.616136+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "1 2021-03-28 18:21:04.297213+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "2 2021-03-28 18:18:49.991042+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "3 2021-03-28 18:09:50.915354+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "4 2021-03-28 18:05:08.507398+02:00 2022-04-15 22:52:04.397693+02:00 " - ] - }, - "execution_count": 262, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 3. merge campaigns and campaigns stats\n", - "\n", - "df1_campaign_stats.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 273, - "id": "1e88efca-96b1-4977-b633-25d13830633e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1319243, 1319245, 1319247, 1319248, 1319250, 1319259, 1319260,\n", - " 1319262])" - ] - }, - "execution_count": 273, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# étape préalable au merge : les identifiants de campagne des deux tabes sont - ils égaux ?\n", - "\n", - "id_campaigns = np.sort(df1_campaigns[\"id\"].unique())\n", - "id_campaigns_stats = np.sort(df1_campaign_stats[\"campaign_id\"].unique())\n", - "np.setdiff1d(id_campaigns, id_campaigns_stats)" - ] - }, - { - "cell_type": "code", - "execution_count": 275, - "id": "43440e38-b141-43f1-9e0c-fa8559218e76", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnameservice_idcreated_atupdated_atprocess_idreport_urlcategoryto_be_syncedidentifiersent_at
7891319243DRE Exposer le récit 13 mars1112021-09-24 11:56:09.307905+02:002021-09-24 11:56:09.307905+02:00NaNNaN0.0False698d51a19d8a121ce581499d7b7016682020-03-03 00:00:00+01:00
7911319245SDR Relance invit petit dej voyage voyages1092021-09-24 11:56:09.323919+02:002021-09-24 11:56:09.323919+02:00NaNNaN0.0False2723d092b63885e0d7c260cc007e8b9d2020-02-24 00:00:00+01:00
7931319247Au Mucem en 2020972021-09-24 11:56:09.339127+02:002021-09-24 11:56:09.339127+02:00NaNNaN0.0Falsee2ef524fbf3d9fe611d5a8e90fefdc9c2020-01-31 00:00:00+01:00
7941319248DRE Giono922021-09-24 11:56:09.346887+02:002021-09-24 11:56:09.346887+02:00NaNNaN0.0False92cc227532d17e56e07902b254dfad102020-01-29 00:00:00+01:00
7961319250Portes ouvertes \"Voyage, voyages\" au Mucem | M...772021-09-24 11:56:09.362114+02:002021-09-24 11:56:09.362114+02:00NaNNaN0.0False28dd2c7955ce926456240b2ff0100bde2020-01-13 00:00:00+01:00
8051319259Save the date | Vernissage \"Voyage, voyages\" a...382021-09-24 11:56:09.432720+02:002021-09-24 11:56:09.432720+02:00NaNNaN0.0Falsea5771bce93e200c36f7cd9dfd0e5deaa2019-11-20 00:00:00+01:00
8061319260Portes ouvertes \"Massilia Toy\" au Mucem | Merc...372021-09-24 11:56:09.440465+02:002021-09-24 11:56:09.440465+02:00NaNNaN0.0Falsea5bfc9e07964f8dddeb95fc584cd965d2019-11-20 00:00:00+01:00
8081319262TENK S-1 Corse172021-09-24 11:56:09.456460+02:002021-09-24 11:56:09.456460+02:00NaNNaN0.0False70efdf2ec9b086079795c442636b55fb2019-11-07 00:00:00+01:00
\n", - "
" - ], - "text/plain": [ - " id name service_id \\\n", - "789 1319243 DRE Exposer le récit 13 mars 111 \n", - "791 1319245 SDR Relance invit petit dej voyage voyages 109 \n", - "793 1319247 Au Mucem en 2020 97 \n", - "794 1319248 DRE Giono 92 \n", - "796 1319250 Portes ouvertes \"Voyage, voyages\" au Mucem | M... 77 \n", - "805 1319259 Save the date | Vernissage \"Voyage, voyages\" a... 38 \n", - "806 1319260 Portes ouvertes \"Massilia Toy\" au Mucem | Merc... 37 \n", - "808 1319262 TENK S-1 Corse 17 \n", - "\n", - " created_at updated_at \\\n", - "789 2021-09-24 11:56:09.307905+02:00 2021-09-24 11:56:09.307905+02:00 \n", - "791 2021-09-24 11:56:09.323919+02:00 2021-09-24 11:56:09.323919+02:00 \n", - "793 2021-09-24 11:56:09.339127+02:00 2021-09-24 11:56:09.339127+02:00 \n", - "794 2021-09-24 11:56:09.346887+02:00 2021-09-24 11:56:09.346887+02:00 \n", - "796 2021-09-24 11:56:09.362114+02:00 2021-09-24 11:56:09.362114+02:00 \n", - "805 2021-09-24 11:56:09.432720+02:00 2021-09-24 11:56:09.432720+02:00 \n", - "806 2021-09-24 11:56:09.440465+02:00 2021-09-24 11:56:09.440465+02:00 \n", - "808 2021-09-24 11:56:09.456460+02:00 2021-09-24 11:56:09.456460+02:00 \n", - "\n", - " process_id report_url category to_be_synced \\\n", - "789 NaN NaN 0.0 False \n", - "791 NaN NaN 0.0 False \n", - "793 NaN NaN 0.0 False \n", - "794 NaN NaN 0.0 False \n", - "796 NaN NaN 0.0 False \n", - "805 NaN NaN 0.0 False \n", - "806 NaN NaN 0.0 False \n", - "808 NaN NaN 0.0 False \n", - "\n", - " identifier sent_at \n", - "789 698d51a19d8a121ce581499d7b701668 2020-03-03 00:00:00+01:00 \n", - "791 2723d092b63885e0d7c260cc007e8b9d 2020-02-24 00:00:00+01:00 \n", - "793 e2ef524fbf3d9fe611d5a8e90fefdc9c 2020-01-31 00:00:00+01:00 \n", - "794 92cc227532d17e56e07902b254dfad10 2020-01-29 00:00:00+01:00 \n", - "796 28dd2c7955ce926456240b2ff0100bde 2020-01-13 00:00:00+01:00 \n", - "805 a5771bce93e200c36f7cd9dfd0e5deaa 2019-11-20 00:00:00+01:00 \n", - "806 a5bfc9e07964f8dddeb95fc584cd965d 2019-11-20 00:00:00+01:00 \n", - "808 70efdf2ec9b086079795c442636b55fb 2019-11-07 00:00:00+01:00 " - ] - }, - "execution_count": 275, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# ci-dessous des campagnes sans customer associé dans la table\n", - "# elles seront retirées lors du merge car pas utiles à notre étude\n", - "# on fera un merge à gauche en se basant sur campaign_stats \n", - "\n", - "df1_campaigns[df1_campaigns[\"id\"].isin([1319243, 1319245, 1319247, 1319248, 1319250, 1319259, 1319260,\n", - " 1319262])]" - ] - }, - { - "cell_type": "code", - "execution_count": 338, - "id": "6cbcd261-a6ba-497c-929b-29a714e1812d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcampaign_idcustomer_idopened_atsent_atdelivered_atcreated_atupdated_atcampaign_namecampaign_service_idcampaign_created_atcampaign_updated_atcampaign_sent_atcampaign_identifier
01979358112597NaN2021-03-28 18:01:09+02:002021-03-28 18:24:18+02:002021-03-28 18:34:20.616136+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
11421158113666NaN2021-03-28 18:01:09+02:002021-03-28 18:21:02+02:002021-03-28 18:21:04.297213+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
21315058280561NaN2021-03-28 18:00:59+02:002021-03-28 18:08:45+02:002021-03-28 18:18:49.991042+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
37073581010072021-03-28 20:11:06+02:002021-03-28 18:00:59+02:002021-03-28 18:09:47+02:002021-03-28 18:09:50.915354+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
4517558103972NaN2021-03-28 18:01:06+02:002021-03-28 18:05:03+02:002021-03-28 18:05:08.507398+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
\n", - "
" - ], - "text/plain": [ - " id campaign_id customer_id opened_at \\\n", - "0 19793 58 112597 NaN \n", - "1 14211 58 113666 NaN \n", - "2 13150 58 280561 NaN \n", - "3 7073 58 101007 2021-03-28 20:11:06+02:00 \n", - "4 5175 58 103972 NaN \n", - "\n", - " sent_at delivered_at \\\n", - "0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n", - "1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n", - "2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n", - "3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n", - "4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n", - "\n", - " created_at updated_at \\\n", - "0 2021-03-28 18:34:20.616136+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "1 2021-03-28 18:21:04.297213+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "2 2021-03-28 18:18:49.991042+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "3 2021-03-28 18:09:50.915354+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "4 2021-03-28 18:05:08.507398+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "\n", - " campaign_name campaign_service_id \\\n", - "0 Le Mucem chez vous, gardons le lien #22 404 \n", - "1 Le Mucem chez vous, gardons le lien #22 404 \n", - "2 Le Mucem chez vous, gardons le lien #22 404 \n", - "3 Le Mucem chez vous, gardons le lien #22 404 \n", - "4 Le Mucem chez vous, gardons le lien #22 404 \n", - "\n", - " campaign_created_at campaign_updated_at \\\n", - "0 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "1 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "2 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "3 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "4 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "\n", - " campaign_sent_at campaign_identifier \n", - "0 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "1 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "2 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "3 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "4 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a " - ] - }, - "execution_count": 338, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# merge \n", - "\n", - "# de campaigns on supprile les var valant tjrs NaN et to_be_synced qui semble pas très informatif\n", - "\n", - "df1_campaigns_full = pd.merge(df1_campaign_stats, \n", - " df1_campaigns[[\"id\", \"name\", \"service_id\", \"created_at\", \"updated_at\", \"sent_at\", \"identifier\"]].add_prefix(\"campaign_\"),\n", - " on = \"campaign_id\", how = \"left\")\n", - "df1_campaigns_full.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 328, - "id": "81e549e9-d165-439a-a824-17f053a33983", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id 0\n", - "campaign_id 0\n", - "customer_id 0\n", - "opened_at 5019527\n", - "sent_at 6023\n", - "delivered_at 133590\n", - "created_at 0\n", - "updated_at 0\n", - "campaign_name 0\n", - "campaign_service_id 0\n", - "campaign_created_at 0\n", - "campaign_updated_at 0\n", - "campaign_sent_at 6\n", - "campaign_identifier 0\n", - "dtype: int64" - ] - }, - "execution_count": 328, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_campaigns_full.isna().sum()" - ] - }, - { - "cell_type": "code", - "execution_count": 297, - "id": "aa249cdc-e0ac-41ec-b6f8-b9459f31eca3", - "metadata": {}, - "outputs": [], - "source": [ - "# lien entre sent at et campaign sent at ? \n", - "# à quoi correspond la date de la campagne, est-ce le premier envoi à un client ?\n", - "\n", - "# first step : transform dates to have the good format\n", - "# VERY time-consuming bc the df has 6M lines !!!!\n", - "\n", - "from dateutil import parser\n", - "\n", - "def convert_to_datetime(column):\n", - " return column.apply(lambda x: parser.parse(str(x)) if pd.notna(x) else pd.NaT)\n", - "\n", - "# Liste des colonnes à convertir\n", - "columns_to_convert = [\"sent_at\", \"delivered_at\", \"created_at\", \"updated_at\", \n", - " \"campaign_sent_at\", \"campaign_created_at\", \"campaign_updated_at\"]\n", - "\n", - "# Appliquer la fonction à chaque colonne spécifiée\n", - "df1_campaigns_full[columns_to_convert] = df1_campaigns_full[columns_to_convert].apply(convert_to_datetime)\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 329, - "id": "f2b05227-e8d8-4ca8-8359-dc3471841763", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "UTC: 2021-03-28 16:01:09+00:00\n", - "Local: 2021-03-28 18:01:09+02:00\n" - ] - } - ], - "source": [ - "# Exemple d'élément\n", - "date_string = '2021-03-28 18:01:09+02:00'\n", - "\n", - "# Convertir en datetime en utilisant pd.to_datetime avec utc=True\n", - "datetime_object_utc = pd.to_datetime(date_string, utc=True)\n", - "print(\"UTC:\", datetime_object_utc)\n", - "\n", - "# Convertir en datetime en utilisant pd.to_datetime avec utc=False (ou sans spécifier utc)\n", - "datetime_object_local = pd.to_datetime(date_string, utc=False)\n", - "print(\"Local:\", datetime_object_local)" - ] - }, - { - "cell_type": "code", - "execution_count": 332, - "id": "63fa4af8-0c28-4b20-97e2-560da4d4b77e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "UTC: 2021-03-28 16:00:00+00:00\n", - "Différence en heures: 1.5\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "\n", - "# Exemple d'élément\n", - "date_string = '2021-03-28 18:00:00+02:00'\n", - "\n", - "# Convertir en datetime en utilisant pd.to_datetime avec utc=True\n", - "datetime_object_utc = pd.to_datetime(date_string, utc=True)\n", - "\n", - "# Afficher l'objet datetime en UTC\n", - "print(\"UTC:\", datetime_object_utc)\n", - "\n", - "# Effectuer un calcul de différence entre deux dates en UTC\n", - "other_date_string = '2021-03-28 20:30:00+03:00'\n", - "other_datetime_object_utc = pd.to_datetime(other_date_string, utc=True)\n", - "\n", - "# Calculer la différence entre les dates\n", - "time_difference = other_datetime_object_utc - datetime_object_utc\n", - "\n", - "# Afficher la différence\n", - "print(\"Différence en heures:\", time_difference.total_seconds() / 3600)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 321, - "id": "9388c008-e2a5-463d-95d2-8f5fea0d6a5a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcampaign_idcustomer_idopened_atsent_atdelivered_atcreated_atupdated_atcampaign_namecampaign_service_idcampaign_created_atcampaign_updated_atcampaign_sent_atcampaign_identifier
01979358112597NaN2021-03-28 18:01:09+02:002021-03-28 18:24:18+02:002021-03-28 18:34:20.616136+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
11421158113666NaN2021-03-28 18:01:09+02:002021-03-28 18:21:02+02:002021-03-28 18:21:04.297213+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
21315058280561NaN2021-03-28 18:00:59+02:002021-03-28 18:08:45+02:002021-03-28 18:18:49.991042+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
37073581010072021-03-28 20:11:06+02:002021-03-28 18:00:59+02:002021-03-28 18:09:47+02:002021-03-28 18:09:50.915354+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
4517558103972NaN2021-03-28 18:01:06+02:002021-03-28 18:05:03+02:002021-03-28 18:05:08.507398+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
\n", - "
" - ], - "text/plain": [ - " id campaign_id customer_id opened_at \\\n", - "0 19793 58 112597 NaN \n", - "1 14211 58 113666 NaN \n", - "2 13150 58 280561 NaN \n", - "3 7073 58 101007 2021-03-28 20:11:06+02:00 \n", - "4 5175 58 103972 NaN \n", - "\n", - " sent_at delivered_at \\\n", - "0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n", - "1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n", - "2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n", - "3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n", - "4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n", - "\n", - " created_at updated_at \\\n", - "0 2021-03-28 18:34:20.616136+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "1 2021-03-28 18:21:04.297213+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "2 2021-03-28 18:18:49.991042+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "3 2021-03-28 18:09:50.915354+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "4 2021-03-28 18:05:08.507398+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "\n", - " campaign_name campaign_service_id \\\n", - "0 Le Mucem chez vous, gardons le lien #22 404 \n", - "1 Le Mucem chez vous, gardons le lien #22 404 \n", - "2 Le Mucem chez vous, gardons le lien #22 404 \n", - "3 Le Mucem chez vous, gardons le lien #22 404 \n", - "4 Le Mucem chez vous, gardons le lien #22 404 \n", - "\n", - " campaign_created_at campaign_updated_at \\\n", - "0 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "1 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "2 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "3 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "4 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "\n", - " campaign_sent_at campaign_identifier \n", - "0 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "1 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "2 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "3 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "4 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a " - ] - }, - "execution_count": 321, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# etape supp pour s'assurer que les dates non convertibles sont bien des Nan\n", - "\n", - "df1_campaigns_full[columns_to_convert] = df1_campaigns_full[columns_to_convert].apply(pd.to_datetime, errors='coerce')\n", - "df1_campaigns_full.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 333, - "id": "edb2f622-bf19-4c51-8213-1b8a3dacf72e", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_624/1309539541.py:3: FutureWarning: In a future version of pandas, parsing datetimes with mixed time zones will raise an error unless `utc=True`. Please specify `utc=True` to opt in to the new behaviour and silence this warning. To create a `Series` with mixed offsets and `object` dtype, please use `apply` and `datetime.datetime.strptime`\n", - " df1_campaigns_full[\"sent_at\"] = pd.to_datetime(df1_campaigns_full[\"sent_at\"] , utc=False).astype('datetime64[ns]')\n" - ] - }, - { - "ename": "ValueError", - "evalue": "Tz-aware datetime.datetime cannot be converted to datetime64 unless utc=True, at position 18", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[333], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# autre methode\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m df1_campaigns_full[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msent_at\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_datetime\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf1_campaigns_full\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msent_at\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mdatetime64[ns]\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/generic.py:6534\u001b[0m, in \u001b[0;36mNDFrame.astype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 6530\u001b[0m results \u001b[38;5;241m=\u001b[39m [ser\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy) \u001b[38;5;28;01mfor\u001b[39;00m _, ser \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mitems()]\n\u001b[1;32m 6532\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 6533\u001b[0m \u001b[38;5;66;03m# else, only a single dtype is given\u001b[39;00m\n\u001b[0;32m-> 6534\u001b[0m new_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_mgr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6535\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_constructor_from_mgr(new_data, axes\u001b[38;5;241m=\u001b[39mnew_data\u001b[38;5;241m.\u001b[39maxes)\n\u001b[1;32m 6536\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\u001b[38;5;241m.\u001b[39m__finalize__(\u001b[38;5;28mself\u001b[39m, method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mastype\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/internals/managers.py:414\u001b[0m, in \u001b[0;36mBaseBlockManager.astype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 411\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m using_copy_on_write():\n\u001b[1;32m 412\u001b[0m copy \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mastype\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43musing_cow\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43musing_copy_on_write\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/internals/managers.py:354\u001b[0m, in \u001b[0;36mBaseBlockManager.apply\u001b[0;34m(self, f, align_keys, **kwargs)\u001b[0m\n\u001b[1;32m 352\u001b[0m applied \u001b[38;5;241m=\u001b[39m b\u001b[38;5;241m.\u001b[39mapply(f, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 353\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 354\u001b[0m applied \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 355\u001b[0m result_blocks \u001b[38;5;241m=\u001b[39m extend_blocks(applied, result_blocks)\n\u001b[1;32m 357\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39mfrom_blocks(result_blocks, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxes)\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/internals/blocks.py:616\u001b[0m, in \u001b[0;36mBlock.astype\u001b[0;34m(self, dtype, copy, errors, using_cow)\u001b[0m\n\u001b[1;32m 596\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 597\u001b[0m \u001b[38;5;124;03mCoerce to the new dtype.\u001b[39;00m\n\u001b[1;32m 598\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 612\u001b[0m \u001b[38;5;124;03mBlock\u001b[39;00m\n\u001b[1;32m 613\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 614\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvalues\n\u001b[0;32m--> 616\u001b[0m new_values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_array_safe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 618\u001b[0m new_values \u001b[38;5;241m=\u001b[39m maybe_coerce_values(new_values)\n\u001b[1;32m 620\u001b[0m refs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:238\u001b[0m, in \u001b[0;36mastype_array_safe\u001b[0;34m(values, dtype, copy, errors)\u001b[0m\n\u001b[1;32m 235\u001b[0m dtype \u001b[38;5;241m=\u001b[39m dtype\u001b[38;5;241m.\u001b[39mnumpy_dtype\n\u001b[1;32m 237\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 238\u001b[0m new_values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 239\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m):\n\u001b[1;32m 240\u001b[0m \u001b[38;5;66;03m# e.g. _astype_nansafe can fail on object-dtype of strings\u001b[39;00m\n\u001b[1;32m 241\u001b[0m \u001b[38;5;66;03m# trying to convert to float\u001b[39;00m\n\u001b[1;32m 242\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m errors \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:183\u001b[0m, in \u001b[0;36mastype_array\u001b[0;34m(values, dtype, copy)\u001b[0m\n\u001b[1;32m 180\u001b[0m values \u001b[38;5;241m=\u001b[39m values\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy)\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 183\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[43m_astype_nansafe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 185\u001b[0m \u001b[38;5;66;03m# in pandas we don't store numpy str dtypes, so convert to object\u001b[39;00m\n\u001b[1;32m 186\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(dtype, np\u001b[38;5;241m.\u001b[39mdtype) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(values\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;241m.\u001b[39mtype, \u001b[38;5;28mstr\u001b[39m):\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:110\u001b[0m, in \u001b[0;36m_astype_nansafe\u001b[0;34m(arr, dtype, copy, skipna)\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mis_np_dtype(dtype, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mM\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 108\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m to_datetime\n\u001b[0;32m--> 110\u001b[0m dti \u001b[38;5;241m=\u001b[39m \u001b[43mto_datetime\u001b[49m\u001b[43m(\u001b[49m\u001b[43marr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mravel\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 111\u001b[0m dta \u001b[38;5;241m=\u001b[39m dti\u001b[38;5;241m.\u001b[39m_data\u001b[38;5;241m.\u001b[39mreshape(arr\u001b[38;5;241m.\u001b[39mshape)\n\u001b[1;32m 112\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m dta\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\u001b[38;5;241m.\u001b[39m_ndarray\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/tools/datetimes.py:1131\u001b[0m, in \u001b[0;36mto_datetime\u001b[0;34m(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache)\u001b[0m\n\u001b[1;32m 1123\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1124\u001b[0m \u001b[38;5;66;03m# error: Argument 1 to \"_maybe_cache\" has incompatible type\u001b[39;00m\n\u001b[1;32m 1125\u001b[0m \u001b[38;5;66;03m# \"Union[float, str, datetime, List[Any], Tuple[Any, ...], ExtensionArray,\u001b[39;00m\n\u001b[1;32m 1126\u001b[0m \u001b[38;5;66;03m# ndarray[Any, Any], Series]\"; expected \"Union[List[Any], Tuple[Any, ...],\u001b[39;00m\n\u001b[1;32m 1127\u001b[0m \u001b[38;5;66;03m# Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series], Series]\"\u001b[39;00m\n\u001b[1;32m 1128\u001b[0m argc \u001b[38;5;241m=\u001b[39m cast(\n\u001b[1;32m 1129\u001b[0m Union[\u001b[38;5;28mlist\u001b[39m, \u001b[38;5;28mtuple\u001b[39m, ExtensionArray, np\u001b[38;5;241m.\u001b[39mndarray, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSeries\u001b[39m\u001b[38;5;124m\"\u001b[39m, Index], arg\n\u001b[1;32m 1130\u001b[0m )\n\u001b[0;32m-> 1131\u001b[0m cache_array \u001b[38;5;241m=\u001b[39m \u001b[43m_maybe_cache\u001b[49m\u001b[43m(\u001b[49m\u001b[43margc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcache\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert_listlike\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1132\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m OutOfBoundsDatetime:\n\u001b[1;32m 1133\u001b[0m \u001b[38;5;66;03m# caching attempts to create a DatetimeIndex, which may raise\u001b[39;00m\n\u001b[1;32m 1134\u001b[0m \u001b[38;5;66;03m# an OOB. If that's the desired behavior, then just reraise...\u001b[39;00m\n\u001b[1;32m 1135\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m errors \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/tools/datetimes.py:254\u001b[0m, in \u001b[0;36m_maybe_cache\u001b[0;34m(arg, format, cache, convert_listlike)\u001b[0m\n\u001b[1;32m 252\u001b[0m unique_dates \u001b[38;5;241m=\u001b[39m unique(arg)\n\u001b[1;32m 253\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(unique_dates) \u001b[38;5;241m<\u001b[39m \u001b[38;5;28mlen\u001b[39m(arg):\n\u001b[0;32m--> 254\u001b[0m cache_dates \u001b[38;5;241m=\u001b[39m \u001b[43mconvert_listlike\u001b[49m\u001b[43m(\u001b[49m\u001b[43munique_dates\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 255\u001b[0m \u001b[38;5;66;03m# GH#45319\u001b[39;00m\n\u001b[1;32m 256\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/tools/datetimes.py:490\u001b[0m, in \u001b[0;36m_convert_listlike_datetimes\u001b[0;34m(arg, format, name, utc, unit, errors, dayfirst, yearfirst, exact)\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mformat\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mformat\u001b[39m \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmixed\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 488\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _array_strptime_with_fallback(arg, name, utc, \u001b[38;5;28mformat\u001b[39m, exact, errors)\n\u001b[0;32m--> 490\u001b[0m result, tz_parsed \u001b[38;5;241m=\u001b[39m \u001b[43mobjects_to_datetime64ns\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 491\u001b[0m \u001b[43m \u001b[49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 492\u001b[0m \u001b[43m \u001b[49m\u001b[43mdayfirst\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdayfirst\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 493\u001b[0m \u001b[43m \u001b[49m\u001b[43myearfirst\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43myearfirst\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 494\u001b[0m \u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mutc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 495\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 496\u001b[0m \u001b[43m \u001b[49m\u001b[43mallow_object\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 497\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 499\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tz_parsed \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 500\u001b[0m \u001b[38;5;66;03m# We can take a shortcut since the datetime64 numpy array\u001b[39;00m\n\u001b[1;32m 501\u001b[0m \u001b[38;5;66;03m# is in UTC\u001b[39;00m\n\u001b[1;32m 502\u001b[0m dta \u001b[38;5;241m=\u001b[39m DatetimeArray(result, dtype\u001b[38;5;241m=\u001b[39mtz_to_dtype(tz_parsed))\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/arrays/datetimes.py:2346\u001b[0m, in \u001b[0;36mobjects_to_datetime64ns\u001b[0;34m(data, dayfirst, yearfirst, utc, errors, allow_object)\u001b[0m\n\u001b[1;32m 2343\u001b[0m \u001b[38;5;66;03m# if str-dtype, convert\u001b[39;00m\n\u001b[1;32m 2344\u001b[0m data \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marray(data, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, dtype\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39mobject_)\n\u001b[0;32m-> 2346\u001b[0m result, tz_parsed \u001b[38;5;241m=\u001b[39m \u001b[43mtslib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray_to_datetime\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2347\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2348\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2349\u001b[0m \u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mutc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2350\u001b[0m \u001b[43m \u001b[49m\u001b[43mdayfirst\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdayfirst\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2351\u001b[0m \u001b[43m \u001b[49m\u001b[43myearfirst\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43myearfirst\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2352\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2354\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tz_parsed \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 2355\u001b[0m \u001b[38;5;66;03m# We can take a shortcut since the datetime64 numpy array\u001b[39;00m\n\u001b[1;32m 2356\u001b[0m \u001b[38;5;66;03m# is in UTC\u001b[39;00m\n\u001b[1;32m 2357\u001b[0m \u001b[38;5;66;03m# Return i8 values to denote unix timestamps\u001b[39;00m\n\u001b[1;32m 2358\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\u001b[38;5;241m.\u001b[39mview(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mi8\u001b[39m\u001b[38;5;124m\"\u001b[39m), tz_parsed\n", - "File \u001b[0;32mtslib.pyx:403\u001b[0m, in \u001b[0;36mpandas._libs.tslib.array_to_datetime\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32mtslib.pyx:552\u001b[0m, in \u001b[0;36mpandas._libs.tslib.array_to_datetime\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32mtslib.pyx:480\u001b[0m, in \u001b[0;36mpandas._libs.tslib.array_to_datetime\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32mconversion.pyx:716\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.conversion.convert_timezone\u001b[0;34m()\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: Tz-aware datetime.datetime cannot be converted to datetime64 unless utc=True, at position 18" - ] - } - ], - "source": [ - "# autre methode\n", - "\n", - "df1_campaigns_full[\"sent_at\"] = pd.to_datetime(df1_campaigns_full[\"sent_at\"] , utc=False).astype('datetime64[ns]')" - ] - }, - { - "cell_type": "code", - "execution_count": 334, - "id": "92bbdf80-e34b-4146-864a-b0dd4e04c5e9", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " sent_at\n", - "0 2022-01-01 10:34:56+00:00\n", - "1 2022-02-01 13:45:30+00:00\n", - "2 2022-03-01 16:30:00+00:00\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "\n", - "# Exemple de DataFrame avec une colonne 'sent_at' contenant des dates en format string\n", - "df1_campaigns_full = pd.DataFrame({\n", - " 'sent_at': ['2022-01-01 12:34:56+02:00', '2022-02-01 15:45:30+02:00', '2022-03-01 18:30:00+02:00']\n", - "})\n", - "\n", - "# Convertir la colonne 'sent_at' en datetime en conservant l'information sur le fuseau horaire (datetime64[ns])\n", - "df1_campaigns_full['sent_at'] = pd.to_datetime(df1_campaigns_full['sent_at'], utc=True)\n", - "\n", - "# Afficher le DataFrame résultant\n", - "print(df1_campaigns_full)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 324, - "id": "a8ad41ed-433c-4f7e-9f67-888dcb54d24e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "campaign_id\n", - "1 2021-03-24 00:00:00+01:00\n", - "2 2021-03-14 00:00:00+01:00\n", - "3 2021-03-15 00:00:00+01:00\n", - "4 2021-03-21 00:00:00+01:00\n", - "5 2021-03-10 00:00:00+01:00\n", - " ... \n", - "1321501 2023-11-06 13:30:12+01:00\n", - "1321503 2023-11-07 17:31:16+01:00\n", - "1321505 2023-11-08 11:15:52+01:00\n", - "1321506 2023-11-08 19:00:25+01:00\n", - "1321507 2023-11-08 19:00:37+01:00\n", - "Name: campaign_sent_at, Length: 949, dtype: datetime64[ns, tzoffset(None, 3600)]\n" - ] - }, - { - "ename": "TypeError", - "evalue": "'bool' object is not callable", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[324], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# comparison \u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(df1_campaigns_full\u001b[38;5;241m.\u001b[39mgroupby(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcampaign_id\u001b[39m\u001b[38;5;124m\"\u001b[39m)[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcampaign_sent_at\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mfirst()) \u001b[38;5;66;03m# envoi des campagnes\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mdf1_campaigns_full\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgroupby\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcampaign_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msent_at\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdropna\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mmin())\n", - "\u001b[0;31mTypeError\u001b[0m: 'bool' object is not callable" - ] - } - ], - "source": [ - "# comparison \n", - "\n", - "print(df1_campaigns_full.groupby(\"campaign_id\")[\"campaign_sent_at\"].first()) # envoi des campagnes\n", - "print(df1_campaigns_full.groupby(\"campaign_id\")[\"sent_at\"].dropna().min())" - ] - }, - { - "cell_type": "code", - "execution_count": 325, - "id": "1771adeb-bbc9-40ef-afb6-49a6b3ff2e79", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id 0\n", - "campaign_id 0\n", - "customer_id 0\n", - "opened_at 5019527\n", - "sent_at 2741358\n", - "delivered_at 2807002\n", - "created_at 1547090\n", - "updated_at 766803\n", - "campaign_name 0\n", - "campaign_service_id 0\n", - "campaign_created_at 2216183\n", - "campaign_updated_at 2561268\n", - "campaign_sent_at 3504140\n", - "campaign_identifier 0\n", - "dtype: int64" - ] - }, - "execution_count": 325, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_campaigns_full.isna().sum()" - ] - }, - { - "cell_type": "code", - "execution_count": 326, - "id": "1a5a1d98-a076-4988-aaf3-e753c117e518", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id 0\n", - "name 0\n", - "service_id 0\n", - "created_at 0\n", - "updated_at 0\n", - "process_id 957\n", - "report_url 957\n", - "category 2\n", - "to_be_synced 0\n", - "identifier 0\n", - "sent_at 3\n", - "dtype: int64" - ] - }, - "execution_count": 326, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_campaigns.isna().sum()" - ] - }, - { - "cell_type": "code", - "execution_count": 320, - "id": "749df9f0-8a18-49f0-a820-05cc674a5fce", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2020-06-02 10:24:08+02:00\n", - "2020-06-02 10:24:08+02:00\n" - ] - } - ], - "source": [ - "# df1_campaigns_full[\"sent_at\"] = \n", - "print(pd.to_datetime(df1_campaigns_full[\"sent_at\"], errors='coerce').min())\n", - "print(df1_campaigns_full[\"sent_at\"].dropna().min())" - ] - }, - { - "cell_type": "code", - "execution_count": 313, - "id": "f46000b8-4b7b-4121-b0af-8e8a388ce33c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "6214808" - ] - }, - "execution_count": 313, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_campaigns_full[\"sent_at\"].apply(lambda x : isinstance(x, datetime)).sum()\n", - "# df1_campaigns_full[\"sent_at\"].tail(30)" - ] - }, - { - "cell_type": "code", - "execution_count": 314, - "id": "0ae4aeca-6edc-44e8-bc72-74f19b62a8f3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "6214808" - ] - }, - "execution_count": 314, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_campaigns_full.shape[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 340, - "id": "4ef4d3d5-5f0a-4798-86d1-1b56641fcce4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id int64\n", - "campaign_id int64\n", - "customer_id int64\n", - "opened_at object\n", - "sent_at object\n", - "delivered_at object\n", - "created_at object\n", - "updated_at object\n", - "campaign_name object\n", - "campaign_service_id int64\n", - "campaign_created_at object\n", - "campaign_updated_at object\n", - "campaign_sent_at object\n", - "campaign_identifier object\n", - "dtype: object" - ] - }, - "execution_count": 340, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_campaigns_full.dtypes" - ] - }, - { - "cell_type": "code", - "execution_count": 341, - "id": "8de270ac-c205-4686-8d53-6cd52d8239d0", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcampaign_idcustomer_idopened_atsent_atdelivered_atcreated_atupdated_atcampaign_namecampaign_service_idcampaign_created_atcampaign_updated_atcampaign_sent_atcampaign_identifier
01979358112597NaN2021-03-28 18:01:09+02:002021-03-28 18:24:18+02:002021-03-28 18:34:20.616136+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
11421158113666NaN2021-03-28 18:01:09+02:002021-03-28 18:21:02+02:002021-03-28 18:21:04.297213+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
21315058280561NaN2021-03-28 18:00:59+02:002021-03-28 18:08:45+02:002021-03-28 18:18:49.991042+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
37073581010072021-03-28 20:11:06+02:002021-03-28 18:00:59+02:002021-03-28 18:09:47+02:002021-03-28 18:09:50.915354+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
4517558103972NaN2021-03-28 18:01:06+02:002021-03-28 18:05:03+02:002021-03-28 18:05:08.507398+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
\n", - "
" - ], - "text/plain": [ - " id campaign_id customer_id opened_at \\\n", - "0 19793 58 112597 NaN \n", - "1 14211 58 113666 NaN \n", - "2 13150 58 280561 NaN \n", - "3 7073 58 101007 2021-03-28 20:11:06+02:00 \n", - "4 5175 58 103972 NaN \n", - "\n", - " sent_at delivered_at \\\n", - "0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n", - "1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n", - "2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n", - "3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n", - "4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n", - "\n", - " created_at updated_at \\\n", - "0 2021-03-28 18:34:20.616136+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "1 2021-03-28 18:21:04.297213+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "2 2021-03-28 18:18:49.991042+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "3 2021-03-28 18:09:50.915354+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "4 2021-03-28 18:05:08.507398+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "\n", - " campaign_name campaign_service_id \\\n", - "0 Le Mucem chez vous, gardons le lien #22 404 \n", - "1 Le Mucem chez vous, gardons le lien #22 404 \n", - "2 Le Mucem chez vous, gardons le lien #22 404 \n", - "3 Le Mucem chez vous, gardons le lien #22 404 \n", - "4 Le Mucem chez vous, gardons le lien #22 404 \n", - "\n", - " campaign_created_at campaign_updated_at \\\n", - "0 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "1 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "2 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "3 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "4 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "\n", - " campaign_sent_at campaign_identifier \n", - "0 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "1 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "2 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "3 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "4 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a " - ] - }, - "execution_count": 341, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_campaigns_full.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 342, - "id": "e2d81bd1-9fd6-40c7-96f9-998771a4fd77", - "metadata": {}, - "outputs": [ - { - "ename": "ValueError", - "evalue": "time data '2022-05-06 12:00:23+02:00' does not match format '%Y-%m-%d %H:%M:%S.%f%z'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[342], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# conversion colonne par colonne\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# precision a la Ns\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m df1_campaigns_full[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcreated_at\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mdf1_campaigns_full\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcreated_at\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mx\u001b[49m\u001b[43m \u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mdatetime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstrptime\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mY-\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mm-\u001b[39;49m\u001b[38;5;132;43;01m%d\u001b[39;49;00m\u001b[38;5;124;43m \u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mH:\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mM:\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mS.\u001b[39;49m\u001b[38;5;132;43;01m%f\u001b[39;49;00m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mz\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnotna\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mNaT\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# df1_campaigns_full[\"updated_at\"] = df1_campaigns_full[\"updated_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# df1_campaigns_full[\"campaign_created_at\"] = df1_campaigns_full[\"campaign_created_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# df1_campaigns_full[\"campaign_updated_at\"] = df1_campaigns_full[\"campaign_updated_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;66;03m# df1_campaigns_full[\"delivered_at\"] = df1_campaigns_full[\"delivered_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S%z\") if pd.notna(x) else pd.NaT)\u001b[39;00m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# df1_campaigns_full[\"campaign_sent_at\"] = df1_campaigns_full[\"campaign_sent_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\u001b[39;00m\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/series.py:4764\u001b[0m, in \u001b[0;36mSeries.apply\u001b[0;34m(self, func, convert_dtype, args, by_row, **kwargs)\u001b[0m\n\u001b[1;32m 4629\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\n\u001b[1;32m 4630\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 4631\u001b[0m func: AggFuncType,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4636\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 4637\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrame \u001b[38;5;241m|\u001b[39m Series:\n\u001b[1;32m 4638\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 4639\u001b[0m \u001b[38;5;124;03m Invoke function on values of Series.\u001b[39;00m\n\u001b[1;32m 4640\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4755\u001b[0m \u001b[38;5;124;03m dtype: float64\u001b[39;00m\n\u001b[1;32m 4756\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 4757\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mSeriesApply\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4758\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4759\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4760\u001b[0m \u001b[43m \u001b[49m\u001b[43mconvert_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4761\u001b[0m \u001b[43m \u001b[49m\u001b[43mby_row\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mby_row\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4762\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4763\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m-> 4764\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/apply.py:1209\u001b[0m, in \u001b[0;36mSeriesApply.apply\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1206\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapply_compat()\n\u001b[1;32m 1208\u001b[0m \u001b[38;5;66;03m# self.func is Callable\u001b[39;00m\n\u001b[0;32m-> 1209\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_standard\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/apply.py:1289\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1283\u001b[0m \u001b[38;5;66;03m# row-wise access\u001b[39;00m\n\u001b[1;32m 1284\u001b[0m \u001b[38;5;66;03m# apply doesn't have a `na_action` keyword and for backward compat reasons\u001b[39;00m\n\u001b[1;32m 1285\u001b[0m \u001b[38;5;66;03m# we need to give `na_action=\"ignore\"` for categorical data.\u001b[39;00m\n\u001b[1;32m 1286\u001b[0m \u001b[38;5;66;03m# TODO: remove the `na_action=\"ignore\"` when that default has been changed in\u001b[39;00m\n\u001b[1;32m 1287\u001b[0m \u001b[38;5;66;03m# Categorical (GH51645).\u001b[39;00m\n\u001b[1;32m 1288\u001b[0m action \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(obj\u001b[38;5;241m.\u001b[39mdtype, CategoricalDtype) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1289\u001b[0m mapped \u001b[38;5;241m=\u001b[39m \u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_map_values\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1290\u001b[0m \u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcurried\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maction\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\n\u001b[1;32m 1291\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1293\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(mapped) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(mapped[\u001b[38;5;241m0\u001b[39m], ABCSeries):\n\u001b[1;32m 1294\u001b[0m \u001b[38;5;66;03m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[1;32m 1295\u001b[0m \u001b[38;5;66;03m# See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[1;32m 1296\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\u001b[38;5;241m.\u001b[39m_constructor_expanddim(\u001b[38;5;28mlist\u001b[39m(mapped), index\u001b[38;5;241m=\u001b[39mobj\u001b[38;5;241m.\u001b[39mindex)\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/base.py:921\u001b[0m, in \u001b[0;36mIndexOpsMixin._map_values\u001b[0;34m(self, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 918\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arr, ExtensionArray):\n\u001b[1;32m 919\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mmap(mapper, na_action\u001b[38;5;241m=\u001b[39mna_action)\n\u001b[0;32m--> 921\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43malgorithms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mna_action\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/algorithms.py:1814\u001b[0m, in \u001b[0;36mmap_array\u001b[0;34m(arr, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 1812\u001b[0m values \u001b[38;5;241m=\u001b[39m arr\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mobject\u001b[39m, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 1813\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m na_action \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1814\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_infer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1815\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1816\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mmap_infer_mask(\n\u001b[1;32m 1817\u001b[0m values, mapper, mask\u001b[38;5;241m=\u001b[39misna(values)\u001b[38;5;241m.\u001b[39mview(np\u001b[38;5;241m.\u001b[39muint8), convert\u001b[38;5;241m=\u001b[39mconvert\n\u001b[1;32m 1818\u001b[0m )\n", - "File \u001b[0;32mlib.pyx:2926\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n", - "Cell \u001b[0;32mIn[342], line 4\u001b[0m, in \u001b[0;36m\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# conversion colonne par colonne\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# precision a la Ns\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m df1_campaigns_full[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcreated_at\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m df1_campaigns_full[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcreated_at\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m x : \u001b[43mdatetime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstrptime\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mY-\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mm-\u001b[39;49m\u001b[38;5;132;43;01m%d\u001b[39;49;00m\u001b[38;5;124;43m \u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mH:\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mM:\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mS.\u001b[39;49m\u001b[38;5;132;43;01m%f\u001b[39;49;00m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mz\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m pd\u001b[38;5;241m.\u001b[39mnotna(x) \u001b[38;5;28;01melse\u001b[39;00m pd\u001b[38;5;241m.\u001b[39mNaT)\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# df1_campaigns_full[\"updated_at\"] = df1_campaigns_full[\"updated_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# df1_campaigns_full[\"campaign_created_at\"] = df1_campaigns_full[\"campaign_created_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# df1_campaigns_full[\"campaign_updated_at\"] = df1_campaigns_full[\"campaign_updated_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;66;03m# df1_campaigns_full[\"delivered_at\"] = df1_campaigns_full[\"delivered_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S%z\") if pd.notna(x) else pd.NaT)\u001b[39;00m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# df1_campaigns_full[\"campaign_sent_at\"] = df1_campaigns_full[\"campaign_sent_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\u001b[39;00m\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/_strptime.py:568\u001b[0m, in \u001b[0;36m_strptime_datetime\u001b[0;34m(cls, data_string, format)\u001b[0m\n\u001b[1;32m 565\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_strptime_datetime\u001b[39m(\u001b[38;5;28mcls\u001b[39m, data_string, \u001b[38;5;28mformat\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%a\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mb \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mH:\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mM:\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mS \u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mY\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 566\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Return a class cls instance based on the input string and the\u001b[39;00m\n\u001b[1;32m 567\u001b[0m \u001b[38;5;124;03m format string.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 568\u001b[0m tt, fraction, gmtoff_fraction \u001b[38;5;241m=\u001b[39m \u001b[43m_strptime\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata_string\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 569\u001b[0m tzname, gmtoff \u001b[38;5;241m=\u001b[39m tt[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m2\u001b[39m:]\n\u001b[1;32m 570\u001b[0m args \u001b[38;5;241m=\u001b[39m tt[:\u001b[38;5;241m6\u001b[39m] \u001b[38;5;241m+\u001b[39m (fraction,)\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/_strptime.py:349\u001b[0m, in \u001b[0;36m_strptime\u001b[0;34m(data_string, format)\u001b[0m\n\u001b[1;32m 347\u001b[0m found \u001b[38;5;241m=\u001b[39m format_regex\u001b[38;5;241m.\u001b[39mmatch(data_string)\n\u001b[1;32m 348\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m found:\n\u001b[0;32m--> 349\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtime data \u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m does not match format \u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\n\u001b[1;32m 350\u001b[0m (data_string, \u001b[38;5;28mformat\u001b[39m))\n\u001b[1;32m 351\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(data_string) \u001b[38;5;241m!=\u001b[39m found\u001b[38;5;241m.\u001b[39mend():\n\u001b[1;32m 352\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124munconverted data remains: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\n\u001b[1;32m 353\u001b[0m data_string[found\u001b[38;5;241m.\u001b[39mend():])\n", - "\u001b[0;31mValueError\u001b[0m: time data '2022-05-06 12:00:23+02:00' does not match format '%Y-%m-%d %H:%M:%S.%f%z'" - ] - } - ], - "source": [ - "# conversion colonne par colonne\n", - "\n", - "# precision a la Ns\n", - "df1_campaigns_full[\"created_at\"] = df1_campaigns_full[\"created_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\n", - "# df1_campaigns_full[\"updated_at\"] = df1_campaigns_full[\"updated_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\n", - "# df1_campaigns_full[\"campaign_created_at\"] = df1_campaigns_full[\"campaign_created_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\n", - "# df1_campaigns_full[\"campaign_updated_at\"] = df1_campaigns_full[\"campaign_updated_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\n", - "\n", - "# precision a la sec\n", - "# df1_campaigns_full[\"opened_at\"] = df1_campaigns_full[\"opened_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S%z\") if pd.notna(x) else pd.NaT)\n", - "# df1_campaigns_full[\"sent_at\"] = df1_campaigns_full[\"sent_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S%z\") if pd.notna(x) else pd.NaT)\n", - "# df1_campaigns_full[\"delivered_at\"] = df1_campaigns_full[\"delivered_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S%z\") if pd.notna(x) else pd.NaT)\n", - "# df1_campaigns_full[\"campaign_sent_at\"] = df1_campaigns_full[\"campaign_sent_at\"].apply(lambda x : datetime.strptime(str(x), \"%Y-%m-%d %H:%M:%S.%f%z\") if pd.notna(x) else pd.NaT)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 346, - "id": "5a1fe408-ae4c-4957-a39b-50a4d5423319", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "6214778 2023-10-23 09:31:50.168545+02:00\n", - "6214779 2023-10-23 09:31:28.570386+02:00\n", - "6214780 2023-10-23 09:02:26.494195+02:00\n", - "6214781 2023-10-23 09:32:34.454957+02:00\n", - "6214782 2023-10-23 09:31:29.139217+02:00\n", - "6214783 2023-10-23 09:32:06.223901+02:00\n", - "6214784 2023-10-23 09:31:52.702258+02:00\n", - "6214785 2023-10-23 09:31:45.051321+02:00\n", - "6214786 2023-10-23 09:32:55.350092+02:00\n", - "6214787 2023-10-23 09:33:14.007405+02:00\n", - "6214788 2023-10-23 09:32:44.645432+02:00\n", - "6214789 2023-10-23 09:02:27.578671+02:00\n", - "6214790 2023-10-23 09:34:24.879045+02:00\n", - "6214791 2023-10-23 09:34:02.075066+02:00\n", - "6214792 2023-10-23 09:33:20.349918+02:00\n", - "6214793 2023-10-23 09:34:25.631234+02:00\n", - "6214794 2023-10-23 09:34:27.581150+02:00\n", - "6214795 2023-10-23 09:31:45.192200+02:00\n", - "6214796 2023-10-23 09:32:52.018890+02:00\n", - "6214797 2023-10-23 09:02:01.558573+02:00\n", - "6214798 2023-10-23 09:34:48.543213+02:00\n", - "6214799 2023-10-23 09:32:15.109097+02:00\n", - "6214800 2023-10-23 09:34:26.590416+02:00\n", - "6214801 2023-10-23 09:32:02.729363+02:00\n", - "6214802 2023-10-23 09:31:41.055337+02:00\n", - "6214803 2023-10-23 09:32:36.564696+02:00\n", - "6214804 2023-10-23 09:32:50.829641+02:00\n", - "6214805 2023-10-23 09:33:31.102500+02:00\n", - "6214806 2023-10-23 09:31:55.768547+02:00\n", - "6214807 2023-10-23 09:33:57.477892+02:00\n", - "Name: created_at, dtype: object" - ] - }, - "execution_count": 346, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_campaigns_full[\"created_at\"].tail(30)" - ] - }, - { - "cell_type": "code", - "execution_count": 349, - "id": "feb3fc34-51f2-45d5-8f34-9940a14e9060", - "metadata": {}, - "outputs": [ - { - "ename": "ValueError", - "evalue": "time data \"2023-10-23 09:31:50.168545+02:00\" doesn't match format \"%Y-%m-%d %H:%M:%S%z\", at position 1. You might want to try:\n - passing `format` if your strings have a consistent format;\n - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;\n - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[349], line 9\u001b[0m\n\u001b[1;32m 4\u001b[0m df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame({\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdate_str\u001b[39m\u001b[38;5;124m'\u001b[39m: [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m2022-05-06 12:00:23+02:00\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m2023-10-23 09:31:50.168545+02:00\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 6\u001b[0m })\n\u001b[1;32m 8\u001b[0m \u001b[38;5;66;03m# Convertir la colonne 'date_str' en datetime en conservant l'information sur le fuseau horaire (datetime64[ns])\u001b[39;00m\n\u001b[0;32m----> 9\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdate\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_datetime\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mdate_str\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Afficher le DataFrame résultant\u001b[39;00m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28mprint\u001b[39m(df)\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/tools/datetimes.py:1112\u001b[0m, in \u001b[0;36mto_datetime\u001b[0;34m(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache)\u001b[0m\n\u001b[1;32m 1110\u001b[0m result \u001b[38;5;241m=\u001b[39m arg\u001b[38;5;241m.\u001b[39mmap(cache_array)\n\u001b[1;32m 1111\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1112\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[43mconvert_listlike\u001b[49m\u001b[43m(\u001b[49m\u001b[43marg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_values\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1113\u001b[0m result \u001b[38;5;241m=\u001b[39m arg\u001b[38;5;241m.\u001b[39m_constructor(values, index\u001b[38;5;241m=\u001b[39marg\u001b[38;5;241m.\u001b[39mindex, name\u001b[38;5;241m=\u001b[39marg\u001b[38;5;241m.\u001b[39mname)\n\u001b[1;32m 1114\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arg, (ABCDataFrame, abc\u001b[38;5;241m.\u001b[39mMutableMapping)):\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/tools/datetimes.py:488\u001b[0m, in \u001b[0;36m_convert_listlike_datetimes\u001b[0;34m(arg, format, name, utc, unit, errors, dayfirst, yearfirst, exact)\u001b[0m\n\u001b[1;32m 486\u001b[0m \u001b[38;5;66;03m# `format` could be inferred, or user didn't ask for mixed-format parsing.\u001b[39;00m\n\u001b[1;32m 487\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mformat\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mformat\u001b[39m \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmixed\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 488\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_array_strptime_with_fallback\u001b[49m\u001b[43m(\u001b[49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexact\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 490\u001b[0m result, tz_parsed \u001b[38;5;241m=\u001b[39m objects_to_datetime64ns(\n\u001b[1;32m 491\u001b[0m arg,\n\u001b[1;32m 492\u001b[0m dayfirst\u001b[38;5;241m=\u001b[39mdayfirst,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 496\u001b[0m allow_object\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 497\u001b[0m )\n\u001b[1;32m 499\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tz_parsed \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 500\u001b[0m \u001b[38;5;66;03m# We can take a shortcut since the datetime64 numpy array\u001b[39;00m\n\u001b[1;32m 501\u001b[0m \u001b[38;5;66;03m# is in UTC\u001b[39;00m\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/tools/datetimes.py:519\u001b[0m, in \u001b[0;36m_array_strptime_with_fallback\u001b[0;34m(arg, name, utc, fmt, exact, errors)\u001b[0m\n\u001b[1;32m 508\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_array_strptime_with_fallback\u001b[39m(\n\u001b[1;32m 509\u001b[0m arg,\n\u001b[1;32m 510\u001b[0m name,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 514\u001b[0m errors: \u001b[38;5;28mstr\u001b[39m,\n\u001b[1;32m 515\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Index:\n\u001b[1;32m 516\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 517\u001b[0m \u001b[38;5;124;03m Call array_strptime, with fallback behavior depending on 'errors'.\u001b[39;00m\n\u001b[1;32m 518\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 519\u001b[0m result, timezones \u001b[38;5;241m=\u001b[39m \u001b[43marray_strptime\u001b[49m\u001b[43m(\u001b[49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfmt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexact\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexact\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mutc\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 520\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28many\u001b[39m(tz \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mfor\u001b[39;00m tz \u001b[38;5;129;01min\u001b[39;00m timezones):\n\u001b[1;32m 521\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _return_parsed_timezone_results(result, timezones, utc, name)\n", - "File \u001b[0;32mstrptime.pyx:534\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.strptime.array_strptime\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32mstrptime.pyx:355\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.strptime.array_strptime\u001b[0;34m()\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: time data \"2023-10-23 09:31:50.168545+02:00\" doesn't match format \"%Y-%m-%d %H:%M:%S%z\", at position 1. You might want to try:\n - passing `format` if your strings have a consistent format;\n - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;\n - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this." - ] - } - ], - "source": [ - "import pandas as pd\n", - "\n", - "# Exemple de DataFrame avec une colonne 'date_str' contenant des dates en formats différents\n", - "df = pd.DataFrame({\n", - " 'date_str': ['2022-05-06 12:00:23+02:00', '2023-10-23 09:31:50.168545+02:00']\n", - "})\n", - "\n", - "# Convertir la colonne 'date_str' en datetime en conservant l'information sur le fuseau horaire (datetime64[ns])\n", - "df['date'] = pd.to_datetime(df['date_str'], utc=True)\n", - "\n", - "# Afficher le DataFrame résultant\n", - "print(df)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 350, - "id": "da01f2d8-3c1e-4d43-92ef-6236a24963d0", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " date_str date\n", - "0 2022-05-06 12:00:23+02:00 2022-05-06 10:00:23+00:00\n", - "1 023-10-23 09:31:50.168545+02:00 023-10-23 09:31:50.168545+02:00\n" - ] - } - ], - "source": [ - "\n", - "# Exemple de DataFrame avec une colonne 'date_str' contenant des dates en formats différents\n", - "df = pd.DataFrame({\n", - " 'date_str': ['2022-05-06 12:00:23+02:00', '023-10-23 09:31:50.168545+02:00']\n", - "})\n", - "\n", - "# Fonction lambda pour convertir la colonne 'date_str' en datetime avec précision\n", - "def convert_to_datetime_with_precision(x):\n", - " if pd.notna(x):\n", - " # Format avec nanosecondes\n", - " try:\n", - " return pd.to_datetime(x, utc=True)\n", - " except ValueError:\n", - " pass\n", - "\n", - " # Format sans nanosecondes\n", - " try:\n", - " return pd.to_datetime(x, utc=True, format=\"%Y-%m-%d %H:%M:%S%z\")\n", - " except ValueError:\n", - " pass\n", - "\n", - " return x\n", - "\n", - "# Appliquer la fonction lambda à la colonne 'date_str'\n", - "df['date'] = df['date_str'].apply(convert_to_datetime_with_precision)\n", - "\n", - "# Afficher le DataFrame résultant\n", - "print(df)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 351, - "id": "e6ca12c8-be66-4537-b759-036123b74b7b", - "metadata": {}, - "outputs": [ - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[351], line 7\u001b[0m\n\u001b[1;32m 3\u001b[0m columns_to_convert \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msent_at\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdelivered_at\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcreated_at\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mupdated_at\u001b[39m\u001b[38;5;124m\"\u001b[39m, \n\u001b[1;32m 4\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcampaign_sent_at\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcampaign_created_at\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcampaign_updated_at\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m column \u001b[38;5;129;01min\u001b[39;00m columns_to_convert :\n\u001b[0;32m----> 7\u001b[0m df1_campaigns_full[column] \u001b[38;5;241m=\u001b[39m \u001b[43mdf1_campaigns_full\u001b[49m\u001b[43m[\u001b[49m\u001b[43mcolumn\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconvert_to_datetime_with_precision\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/series.py:4764\u001b[0m, in \u001b[0;36mSeries.apply\u001b[0;34m(self, func, convert_dtype, args, by_row, **kwargs)\u001b[0m\n\u001b[1;32m 4629\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\n\u001b[1;32m 4630\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 4631\u001b[0m func: AggFuncType,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4636\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 4637\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrame \u001b[38;5;241m|\u001b[39m Series:\n\u001b[1;32m 4638\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 4639\u001b[0m \u001b[38;5;124;03m Invoke function on values of Series.\u001b[39;00m\n\u001b[1;32m 4640\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4755\u001b[0m \u001b[38;5;124;03m dtype: float64\u001b[39;00m\n\u001b[1;32m 4756\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 4757\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mSeriesApply\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4758\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4759\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4760\u001b[0m \u001b[43m \u001b[49m\u001b[43mconvert_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4761\u001b[0m \u001b[43m \u001b[49m\u001b[43mby_row\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mby_row\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4762\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4763\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m-> 4764\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/apply.py:1209\u001b[0m, in \u001b[0;36mSeriesApply.apply\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1206\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapply_compat()\n\u001b[1;32m 1208\u001b[0m \u001b[38;5;66;03m# self.func is Callable\u001b[39;00m\n\u001b[0;32m-> 1209\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_standard\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/apply.py:1289\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1283\u001b[0m \u001b[38;5;66;03m# row-wise access\u001b[39;00m\n\u001b[1;32m 1284\u001b[0m \u001b[38;5;66;03m# apply doesn't have a `na_action` keyword and for backward compat reasons\u001b[39;00m\n\u001b[1;32m 1285\u001b[0m \u001b[38;5;66;03m# we need to give `na_action=\"ignore\"` for categorical data.\u001b[39;00m\n\u001b[1;32m 1286\u001b[0m \u001b[38;5;66;03m# TODO: remove the `na_action=\"ignore\"` when that default has been changed in\u001b[39;00m\n\u001b[1;32m 1287\u001b[0m \u001b[38;5;66;03m# Categorical (GH51645).\u001b[39;00m\n\u001b[1;32m 1288\u001b[0m action \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(obj\u001b[38;5;241m.\u001b[39mdtype, CategoricalDtype) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1289\u001b[0m mapped \u001b[38;5;241m=\u001b[39m \u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_map_values\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1290\u001b[0m \u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcurried\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maction\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\n\u001b[1;32m 1291\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1293\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(mapped) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(mapped[\u001b[38;5;241m0\u001b[39m], ABCSeries):\n\u001b[1;32m 1294\u001b[0m \u001b[38;5;66;03m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[1;32m 1295\u001b[0m \u001b[38;5;66;03m# See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[1;32m 1296\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\u001b[38;5;241m.\u001b[39m_constructor_expanddim(\u001b[38;5;28mlist\u001b[39m(mapped), index\u001b[38;5;241m=\u001b[39mobj\u001b[38;5;241m.\u001b[39mindex)\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/base.py:921\u001b[0m, in \u001b[0;36mIndexOpsMixin._map_values\u001b[0;34m(self, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 918\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arr, ExtensionArray):\n\u001b[1;32m 919\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m arr\u001b[38;5;241m.\u001b[39mmap(mapper, na_action\u001b[38;5;241m=\u001b[39mna_action)\n\u001b[0;32m--> 921\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43malgorithms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mna_action\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/algorithms.py:1814\u001b[0m, in \u001b[0;36mmap_array\u001b[0;34m(arr, mapper, na_action, convert)\u001b[0m\n\u001b[1;32m 1812\u001b[0m values \u001b[38;5;241m=\u001b[39m arr\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mobject\u001b[39m, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 1813\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m na_action \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1814\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_infer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1815\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1816\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m lib\u001b[38;5;241m.\u001b[39mmap_infer_mask(\n\u001b[1;32m 1817\u001b[0m values, mapper, mask\u001b[38;5;241m=\u001b[39misna(values)\u001b[38;5;241m.\u001b[39mview(np\u001b[38;5;241m.\u001b[39muint8), convert\u001b[38;5;241m=\u001b[39mconvert\n\u001b[1;32m 1818\u001b[0m )\n", - "File \u001b[0;32mlib.pyx:2926\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n", - "Cell \u001b[0;32mIn[350], line 11\u001b[0m, in \u001b[0;36mconvert_to_datetime_with_precision\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pd\u001b[38;5;241m.\u001b[39mnotna(x):\n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# Format avec nanosecondes\u001b[39;00m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 11\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_datetime\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m:\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/tools/datetimes.py:1146\u001b[0m, in \u001b[0;36mto_datetime\u001b[0;34m(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache)\u001b[0m\n\u001b[1;32m 1144\u001b[0m result \u001b[38;5;241m=\u001b[39m convert_listlike(argc, \u001b[38;5;28mformat\u001b[39m)\n\u001b[1;32m 1145\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1146\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mconvert_listlike\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marray\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43marg\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m)\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 1147\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arg, \u001b[38;5;28mbool\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(result, np\u001b[38;5;241m.\u001b[39mbool_):\n\u001b[1;32m 1148\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mbool\u001b[39m(result) \u001b[38;5;66;03m# TODO: avoid this kludge.\u001b[39;00m\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/tools/datetimes.py:488\u001b[0m, in \u001b[0;36m_convert_listlike_datetimes\u001b[0;34m(arg, format, name, utc, unit, errors, dayfirst, yearfirst, exact)\u001b[0m\n\u001b[1;32m 486\u001b[0m \u001b[38;5;66;03m# `format` could be inferred, or user didn't ask for mixed-format parsing.\u001b[39;00m\n\u001b[1;32m 487\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mformat\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mformat\u001b[39m \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmixed\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 488\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_array_strptime_with_fallback\u001b[49m\u001b[43m(\u001b[49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexact\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 490\u001b[0m result, tz_parsed \u001b[38;5;241m=\u001b[39m objects_to_datetime64ns(\n\u001b[1;32m 491\u001b[0m arg,\n\u001b[1;32m 492\u001b[0m dayfirst\u001b[38;5;241m=\u001b[39mdayfirst,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 496\u001b[0m allow_object\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 497\u001b[0m )\n\u001b[1;32m 499\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tz_parsed \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 500\u001b[0m \u001b[38;5;66;03m# We can take a shortcut since the datetime64 numpy array\u001b[39;00m\n\u001b[1;32m 501\u001b[0m \u001b[38;5;66;03m# is in UTC\u001b[39;00m\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/tools/datetimes.py:521\u001b[0m, in \u001b[0;36m_array_strptime_with_fallback\u001b[0;34m(arg, name, utc, fmt, exact, errors)\u001b[0m\n\u001b[1;32m 519\u001b[0m result, timezones \u001b[38;5;241m=\u001b[39m array_strptime(arg, fmt, exact\u001b[38;5;241m=\u001b[39mexact, errors\u001b[38;5;241m=\u001b[39merrors, utc\u001b[38;5;241m=\u001b[39mutc)\n\u001b[1;32m 520\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28many\u001b[39m(tz \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mfor\u001b[39;00m tz \u001b[38;5;129;01min\u001b[39;00m timezones):\n\u001b[0;32m--> 521\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_return_parsed_timezone_results\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresult\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimezones\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 523\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _box_as_indexlike(result, utc\u001b[38;5;241m=\u001b[39mutc, name\u001b[38;5;241m=\u001b[39mname)\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/tools/datetimes.py:344\u001b[0m, in \u001b[0;36m_return_parsed_timezone_results\u001b[0;34m(result, timezones, utc, name)\u001b[0m\n\u001b[1;32m 342\u001b[0m tz_results \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mempty(\u001b[38;5;28mlen\u001b[39m(result), dtype\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mobject\u001b[39m)\n\u001b[1;32m 343\u001b[0m non_na_timezones \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n\u001b[0;32m--> 344\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m zone \u001b[38;5;129;01min\u001b[39;00m \u001b[43munique\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtimezones\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m 345\u001b[0m mask \u001b[38;5;241m=\u001b[39m timezones \u001b[38;5;241m==\u001b[39m zone\n\u001b[1;32m 346\u001b[0m dta \u001b[38;5;241m=\u001b[39m DatetimeArray(result[mask])\u001b[38;5;241m.\u001b[39mtz_localize(zone)\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/algorithms.py:401\u001b[0m, in \u001b[0;36munique\u001b[0;34m(values)\u001b[0m\n\u001b[1;32m 307\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21munique\u001b[39m(values):\n\u001b[1;32m 308\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 309\u001b[0m \u001b[38;5;124;03m Return unique values based on a hash table.\u001b[39;00m\n\u001b[1;32m 310\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 399\u001b[0m \u001b[38;5;124;03m array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object)\u001b[39;00m\n\u001b[1;32m 400\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 401\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43munique_with_mask\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/opt/mamba/lib/python3.10/site-packages/pandas/core/algorithms.py:440\u001b[0m, in \u001b[0;36munique_with_mask\u001b[0;34m(values, mask)\u001b[0m\n\u001b[1;32m 438\u001b[0m table \u001b[38;5;241m=\u001b[39m hashtable(\u001b[38;5;28mlen\u001b[39m(values))\n\u001b[1;32m 439\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mask \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 440\u001b[0m uniques \u001b[38;5;241m=\u001b[39m \u001b[43mtable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43munique\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 441\u001b[0m uniques \u001b[38;5;241m=\u001b[39m _reconstruct_data(uniques, original\u001b[38;5;241m.\u001b[39mdtype, original)\n\u001b[1;32m 442\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m uniques\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "# loop over all dates to convert \n", - "\n", - "columns_to_convert = [\"sent_at\", \"delivered_at\", \"created_at\", \"updated_at\", \n", - " \"campaign_sent_at\", \"campaign_created_at\", \"campaign_updated_at\"]\n", - "\n", - "for column in columns_to_convert :\n", - " df1_campaigns_full[column] = df1_campaigns_full[column].apply(convert_to_datetime_with_precision)" - ] - }, - { - "cell_type": "code", - "execution_count": 356, - "id": "61e1f604-23ce-4cb2-8ad3-523c62e80e68", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcampaign_idcustomer_idopened_atsent_atdelivered_atcreated_atupdated_atcampaign_namecampaign_service_idcampaign_created_atcampaign_updated_atcampaign_sent_atcampaign_identifier
408100223728588268NaN2021-03-28 18:00:57+02:002021-03-28 18:43:38+02:002021-03-28 18:43:42.928685+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
40761394552581472NaN2021-03-28 18:00:57+02:002021-03-28 18:03:26+02:002021-03-28 18:03:28.229670+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
4081572140705879782021-03-29 08:38:06+02:002021-03-28 18:00:57+02:002021-03-28 18:20:45+02:002021-03-28 18:20:49.431860+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
409483369695832211NaN2021-03-28 18:00:57+02:002021-03-28 18:09:18+02:002021-03-28 18:09:20.571462+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
4094827966258309802021-04-04 17:54:51+02:002021-03-28 18:00:57+02:002021-03-28 18:03:29+02:002021-03-28 18:13:33.153720+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
.............................................
89611241758334002021-03-28 21:27:57+02:002021-03-28 18:17:35+02:002021-03-28 18:17:36+02:002021-03-28 18:17:36.735495+02:002021-03-28 19:27:57.503961+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
203801820558106495NaN2021-03-28 18:30:08+02:002021-03-28 18:30:11+02:002021-03-28 18:30:11.453742+02:002021-03-28 18:30:11.474019+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
272982210758104781NaN2021-03-28 18:39:55+02:002021-03-28 18:39:56+02:002021-03-28 18:39:56.430679+02:002021-03-28 18:39:56.435656+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
291072238958111570NaN2021-03-28 18:40:38+02:002021-03-28 18:40:40+02:002021-03-28 18:40:40.975334+02:002021-03-28 18:40:40.979852+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
2796229258581194962021-03-29 21:03:52+02:002021-03-28 20:52:26+02:002021-03-28 20:52:30+02:002021-03-28 20:52:30.261271+02:002021-03-29 19:03:52.527753+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
\n", - "

26464 rows × 14 columns

\n", - "
" - ], - "text/plain": [ - " id campaign_id customer_id opened_at \\\n", - "4081002 23728 58 8268 NaN \n", - "4076139 4552 58 1472 NaN \n", - "4081572 14070 58 7978 2021-03-29 08:38:06+02:00 \n", - "4094833 6969 58 32211 NaN \n", - "4094827 9662 58 30980 2021-04-04 17:54:51+02:00 \n", - "... ... ... ... ... \n", - "8961 12417 58 33400 2021-03-28 21:27:57+02:00 \n", - "20380 18205 58 106495 NaN \n", - "27298 22107 58 104781 NaN \n", - "29107 22389 58 111570 NaN \n", - "27962 29258 58 119496 2021-03-29 21:03:52+02:00 \n", - "\n", - " sent_at delivered_at \\\n", - "4081002 2021-03-28 18:00:57+02:00 2021-03-28 18:43:38+02:00 \n", - "4076139 2021-03-28 18:00:57+02:00 2021-03-28 18:03:26+02:00 \n", - "4081572 2021-03-28 18:00:57+02:00 2021-03-28 18:20:45+02:00 \n", - "4094833 2021-03-28 18:00:57+02:00 2021-03-28 18:09:18+02:00 \n", - "4094827 2021-03-28 18:00:57+02:00 2021-03-28 18:03:29+02:00 \n", - "... ... ... \n", - "8961 2021-03-28 18:17:35+02:00 2021-03-28 18:17:36+02:00 \n", - "20380 2021-03-28 18:30:08+02:00 2021-03-28 18:30:11+02:00 \n", - "27298 2021-03-28 18:39:55+02:00 2021-03-28 18:39:56+02:00 \n", - "29107 2021-03-28 18:40:38+02:00 2021-03-28 18:40:40+02:00 \n", - "27962 2021-03-28 20:52:26+02:00 2021-03-28 20:52:30+02:00 \n", - "\n", - " created_at updated_at \\\n", - "4081002 2021-03-28 18:43:42.928685+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "4076139 2021-03-28 18:03:28.229670+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "4081572 2021-03-28 18:20:49.431860+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "4094833 2021-03-28 18:09:20.571462+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "4094827 2021-03-28 18:13:33.153720+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "... ... ... \n", - "8961 2021-03-28 18:17:36.735495+02:00 2021-03-28 19:27:57.503961+02:00 \n", - "20380 2021-03-28 18:30:11.453742+02:00 2021-03-28 18:30:11.474019+02:00 \n", - "27298 2021-03-28 18:39:56.430679+02:00 2021-03-28 18:39:56.435656+02:00 \n", - "29107 2021-03-28 18:40:40.975334+02:00 2021-03-28 18:40:40.979852+02:00 \n", - "27962 2021-03-28 20:52:30.261271+02:00 2021-03-29 19:03:52.527753+02:00 \n", - "\n", - " campaign_name campaign_service_id \\\n", - "4081002 Le Mucem chez vous, gardons le lien #22 404 \n", - "4076139 Le Mucem chez vous, gardons le lien #22 404 \n", - "4081572 Le Mucem chez vous, gardons le lien #22 404 \n", - "4094833 Le Mucem chez vous, gardons le lien #22 404 \n", - "4094827 Le Mucem chez vous, gardons le lien #22 404 \n", - "... ... ... \n", - "8961 Le Mucem chez vous, gardons le lien #22 404 \n", - "20380 Le Mucem chez vous, gardons le lien #22 404 \n", - "27298 Le Mucem chez vous, gardons le lien #22 404 \n", - "29107 Le Mucem chez vous, gardons le lien #22 404 \n", - "27962 Le Mucem chez vous, gardons le lien #22 404 \n", - "\n", - " campaign_created_at campaign_updated_at \\\n", - "4081002 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "4076139 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "4081572 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "4094833 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "4094827 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "... ... ... \n", - "8961 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "20380 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "27298 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "29107 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "27962 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "\n", - " campaign_sent_at campaign_identifier \n", - "4081002 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "4076139 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "4081572 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "4094833 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "4094827 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "... ... ... \n", - "8961 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "20380 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "27298 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "29107 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "27962 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "\n", - "[26464 rows x 14 columns]" - ] - }, - "execution_count": 356, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# tests\n", - "\n", - "df1_campaigns_full[df1_campaigns_full[\"campaign_id\"]==58].sort_values(\"sent_at\")" - ] - }, - { - "cell_type": "code", - "execution_count": 364, - "id": "0c07c533-0e24-4e53-96d5-c51db97425a6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcampaign_idcustomer_idopened_atsent_atdelivered_atcreated_atupdated_atcampaign_namecampaign_service_idcampaign_created_atcampaign_updated_atcampaign_sent_atcampaign_identifier
16815714786305252910452021-06-18 14:23:57+02:002021-06-17 00:01:05+02:002021-06-17 18:15:02+02:002021-06-17 19:11:05.780774+02:002022-04-15 23:11:44.290919+02:00com_ddcp_campagne_de_qualification_contacts__n...4742021-06-17 00:02:11.388346+02:002021-09-24 11:56:08.931051+02:002021-06-17 00:00:00+02:0025ddc0f8c9d3e22e03d3076f98d83cb2
868571425630525272258NaN2021-06-17 00:01:05+02:002021-06-17 18:14:37+02:002021-06-17 19:10:59.410221+02:002022-04-15 23:11:44.290919+02:00com_ddcp_campagne_de_qualification_contacts__n...4742021-06-17 00:02:11.388346+02:002021-09-24 11:56:08.931051+02:002021-06-17 00:00:00+02:0025ddc0f8c9d3e22e03d3076f98d83cb2
488660574486630525284414NaN2021-06-17 00:01:05+02:002021-06-17 19:18:30+02:002021-06-17 19:24:37.325550+02:002022-04-15 23:11:44.290919+02:00com_ddcp_campagne_de_qualification_contacts__n...4742021-06-17 00:02:11.388346+02:002021-09-24 11:56:08.931051+02:002021-06-17 00:00:00+02:0025ddc0f8c9d3e22e03d3076f98d83cb2
453503553818630525280714NaN2021-06-17 00:01:05+02:002021-06-17 07:18:06+02:002021-06-17 07:18:06.816543+02:002022-04-15 23:11:44.290919+02:00com_ddcp_campagne_de_qualification_contacts__n...4742021-06-17 00:02:11.388346+02:002021-09-24 11:56:08.931051+02:002021-06-17 00:00:00+02:0025ddc0f8c9d3e22e03d3076f98d83cb2
600459556431630525289484NaN2021-06-17 00:01:05+02:002021-06-17 10:18:57+02:002021-06-17 10:18:57.692035+02:002022-04-15 23:11:44.290919+02:00com_ddcp_campagne_de_qualification_contacts__n...4742021-06-17 00:02:11.388346+02:002021-09-24 11:56:08.931051+02:002021-06-17 00:00:00+02:0025ddc0f8c9d3e22e03d3076f98d83cb2
.............................................
514091566709630525112554NaN2021-06-17 14:00:35+02:002021-06-17 14:00:39+02:002021-06-17 14:00:39.523170+02:002021-06-17 14:00:39.551198+02:00com_ddcp_campagne_de_qualification_contacts__n...4742021-06-17 00:02:11.388346+02:002021-09-24 11:56:08.931051+02:002021-06-17 00:00:00+02:0025ddc0f8c9d3e22e03d3076f98d83cb2
60490156910663052533100NaN2021-06-17 16:36:55+02:002021-06-17 16:36:55+02:002021-06-17 16:36:55.928814+02:002021-06-17 16:36:55.933170+02:00com_ddcp_campagne_de_qualification_contacts__n...4742021-06-17 00:02:11.388346+02:002021-09-24 11:56:08.931051+02:002021-06-17 00:00:00+02:0025ddc0f8c9d3e22e03d3076f98d83cb2
478955572372630525119502NaN2021-06-17 18:25:17+02:002021-06-17 18:25:20+02:002021-06-17 19:13:02.489176+02:002021-06-17 19:13:02.520644+02:00com_ddcp_campagne_de_qualification_contacts__n...4742021-06-17 00:02:11.388346+02:002021-09-24 11:56:08.931051+02:002021-06-17 00:00:00+02:0025ddc0f8c9d3e22e03d3076f98d83cb2
47725257228263052533826NaN2021-06-17 18:25:21+02:002021-06-17 18:25:26+02:002021-06-17 19:13:01.993836+02:002021-06-17 19:13:02.006886+02:00com_ddcp_campagne_de_qualification_contacts__n...4742021-06-17 00:02:11.388346+02:002021-09-24 11:56:08.931051+02:002021-06-17 00:00:00+02:0025ddc0f8c9d3e22e03d3076f98d83cb2
532445576271630525119496NaN2021-06-17 20:46:39+02:002021-06-17 20:46:40+02:002021-06-17 20:46:40.441720+02:002021-06-17 20:46:40.449126+02:00com_ddcp_campagne_de_qualification_contacts__n...4742021-06-17 00:02:11.388346+02:002021-09-24 11:56:08.931051+02:002021-06-17 00:00:00+02:0025ddc0f8c9d3e22e03d3076f98d83cb2
\n", - "

15829 rows × 14 columns

\n", - "
" - ], - "text/plain": [ - " id campaign_id customer_id opened_at \\\n", - "1681 571478 630525 291045 2021-06-18 14:23:57+02:00 \n", - "868 571425 630525 272258 NaN \n", - "488660 574486 630525 284414 NaN \n", - "453503 553818 630525 280714 NaN \n", - "600459 556431 630525 289484 NaN \n", - "... ... ... ... ... \n", - "514091 566709 630525 112554 NaN \n", - "604901 569106 630525 33100 NaN \n", - "478955 572372 630525 119502 NaN \n", - "477252 572282 630525 33826 NaN \n", - "532445 576271 630525 119496 NaN \n", - "\n", - " sent_at delivered_at \\\n", - "1681 2021-06-17 00:01:05+02:00 2021-06-17 18:15:02+02:00 \n", - "868 2021-06-17 00:01:05+02:00 2021-06-17 18:14:37+02:00 \n", - "488660 2021-06-17 00:01:05+02:00 2021-06-17 19:18:30+02:00 \n", - "453503 2021-06-17 00:01:05+02:00 2021-06-17 07:18:06+02:00 \n", - "600459 2021-06-17 00:01:05+02:00 2021-06-17 10:18:57+02:00 \n", - "... ... ... \n", - "514091 2021-06-17 14:00:35+02:00 2021-06-17 14:00:39+02:00 \n", - "604901 2021-06-17 16:36:55+02:00 2021-06-17 16:36:55+02:00 \n", - "478955 2021-06-17 18:25:17+02:00 2021-06-17 18:25:20+02:00 \n", - "477252 2021-06-17 18:25:21+02:00 2021-06-17 18:25:26+02:00 \n", - "532445 2021-06-17 20:46:39+02:00 2021-06-17 20:46:40+02:00 \n", - "\n", - " created_at updated_at \\\n", - "1681 2021-06-17 19:11:05.780774+02:00 2022-04-15 23:11:44.290919+02:00 \n", - "868 2021-06-17 19:10:59.410221+02:00 2022-04-15 23:11:44.290919+02:00 \n", - "488660 2021-06-17 19:24:37.325550+02:00 2022-04-15 23:11:44.290919+02:00 \n", - "453503 2021-06-17 07:18:06.816543+02:00 2022-04-15 23:11:44.290919+02:00 \n", - "600459 2021-06-17 10:18:57.692035+02:00 2022-04-15 23:11:44.290919+02:00 \n", - "... ... ... \n", - "514091 2021-06-17 14:00:39.523170+02:00 2021-06-17 14:00:39.551198+02:00 \n", - "604901 2021-06-17 16:36:55.928814+02:00 2021-06-17 16:36:55.933170+02:00 \n", - "478955 2021-06-17 19:13:02.489176+02:00 2021-06-17 19:13:02.520644+02:00 \n", - "477252 2021-06-17 19:13:01.993836+02:00 2021-06-17 19:13:02.006886+02:00 \n", - "532445 2021-06-17 20:46:40.441720+02:00 2021-06-17 20:46:40.449126+02:00 \n", - "\n", - " campaign_name \\\n", - "1681 com_ddcp_campagne_de_qualification_contacts__n... \n", - "868 com_ddcp_campagne_de_qualification_contacts__n... \n", - "488660 com_ddcp_campagne_de_qualification_contacts__n... \n", - "453503 com_ddcp_campagne_de_qualification_contacts__n... \n", - "600459 com_ddcp_campagne_de_qualification_contacts__n... \n", - "... ... \n", - "514091 com_ddcp_campagne_de_qualification_contacts__n... \n", - "604901 com_ddcp_campagne_de_qualification_contacts__n... \n", - "478955 com_ddcp_campagne_de_qualification_contacts__n... \n", - "477252 com_ddcp_campagne_de_qualification_contacts__n... \n", - "532445 com_ddcp_campagne_de_qualification_contacts__n... \n", - "\n", - " campaign_service_id campaign_created_at \\\n", - "1681 474 2021-06-17 00:02:11.388346+02:00 \n", - "868 474 2021-06-17 00:02:11.388346+02:00 \n", - "488660 474 2021-06-17 00:02:11.388346+02:00 \n", - "453503 474 2021-06-17 00:02:11.388346+02:00 \n", - "600459 474 2021-06-17 00:02:11.388346+02:00 \n", - "... ... ... \n", - "514091 474 2021-06-17 00:02:11.388346+02:00 \n", - "604901 474 2021-06-17 00:02:11.388346+02:00 \n", - "478955 474 2021-06-17 00:02:11.388346+02:00 \n", - "477252 474 2021-06-17 00:02:11.388346+02:00 \n", - "532445 474 2021-06-17 00:02:11.388346+02:00 \n", - "\n", - " campaign_updated_at campaign_sent_at \\\n", - "1681 2021-09-24 11:56:08.931051+02:00 2021-06-17 00:00:00+02:00 \n", - "868 2021-09-24 11:56:08.931051+02:00 2021-06-17 00:00:00+02:00 \n", - "488660 2021-09-24 11:56:08.931051+02:00 2021-06-17 00:00:00+02:00 \n", - "453503 2021-09-24 11:56:08.931051+02:00 2021-06-17 00:00:00+02:00 \n", - "600459 2021-09-24 11:56:08.931051+02:00 2021-06-17 00:00:00+02:00 \n", - "... ... ... \n", - "514091 2021-09-24 11:56:08.931051+02:00 2021-06-17 00:00:00+02:00 \n", - "604901 2021-09-24 11:56:08.931051+02:00 2021-06-17 00:00:00+02:00 \n", - "478955 2021-09-24 11:56:08.931051+02:00 2021-06-17 00:00:00+02:00 \n", - "477252 2021-09-24 11:56:08.931051+02:00 2021-06-17 00:00:00+02:00 \n", - "532445 2021-09-24 11:56:08.931051+02:00 2021-06-17 00:00:00+02:00 \n", - "\n", - " campaign_identifier \n", - "1681 25ddc0f8c9d3e22e03d3076f98d83cb2 \n", - "868 25ddc0f8c9d3e22e03d3076f98d83cb2 \n", - "488660 25ddc0f8c9d3e22e03d3076f98d83cb2 \n", - "453503 25ddc0f8c9d3e22e03d3076f98d83cb2 \n", - "600459 25ddc0f8c9d3e22e03d3076f98d83cb2 \n", - "... ... \n", - "514091 25ddc0f8c9d3e22e03d3076f98d83cb2 \n", - "604901 25ddc0f8c9d3e22e03d3076f98d83cb2 \n", - "478955 25ddc0f8c9d3e22e03d3076f98d83cb2 \n", - "477252 25ddc0f8c9d3e22e03d3076f98d83cb2 \n", - "532445 25ddc0f8c9d3e22e03d3076f98d83cb2 \n", - "\n", - "[15829 rows x 14 columns]" - ] - }, - "execution_count": 364, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_campaigns_full[df1_campaigns_full[\"campaign_id\"]==630525].sort_values(\"sent_at\")" - ] - }, - { - "cell_type": "markdown", - "id": "2ee0c057-876d-4534-9267-f7235957c8ce", - "metadata": {}, - "source": [ - "## Link stats" - ] - }, - { - "cell_type": "code", - "execution_count": 238, - "id": "c744b5bc-111a-40c0-8acf-bae1bedd7a97", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idclicked_atlink_idcustomer_idcreated_atupdated_at
012021-03-26 16:30:36+01:0012840332021-03-26 15:30:37.050161+01:002021-03-26 15:30:37.050161+01:00
122021-03-26 17:16:34+01:0021197682021-03-26 16:16:34.950871+01:002021-03-26 16:16:34.950871+01:00
22722021-03-28 20:03:32+02:00421131052021-03-28 18:03:32.736394+02:002021-03-28 18:03:32.736394+02:00
342021-03-26 17:43:19+01:0032722802021-03-26 16:43:19.338321+01:002021-03-26 16:43:19.338321+01:00
452021-03-26 17:46:00+01:0031050952021-03-26 16:46:00.502945+01:002021-03-26 16:46:00.502945+01:00
.....................
1510462435532023-11-09 16:34:27+01:00146669982023-11-09 15:34:29.425425+01:002023-11-09 15:34:29.425425+01:00
1510472435542023-11-09 16:34:35+01:00146709982023-11-09 15:34:37.505505+01:002023-11-09 15:34:37.505505+01:00
1510482435592023-11-09 16:51:15+01:0014686829232023-11-09 15:51:17.439518+01:002023-11-09 15:51:17.439518+01:00
1510492435612023-11-09 16:59:42+01:0014677829232023-11-09 15:59:44.030922+01:002023-11-09 15:59:44.030922+01:00
1510502435642023-11-09 17:16:41+01:001469112543552023-11-09 16:16:43.012932+01:002023-11-09 16:16:43.012932+01:00
\n", - "

151051 rows × 6 columns

\n", - "
" - ], - "text/plain": [ - " id clicked_at link_id customer_id \\\n", - "0 1 2021-03-26 16:30:36+01:00 1 284033 \n", - "1 2 2021-03-26 17:16:34+01:00 2 119768 \n", - "2 272 2021-03-28 20:03:32+02:00 42 113105 \n", - "3 4 2021-03-26 17:43:19+01:00 3 272280 \n", - "4 5 2021-03-26 17:46:00+01:00 3 105095 \n", - "... ... ... ... ... \n", - "151046 243553 2023-11-09 16:34:27+01:00 14666 998 \n", - "151047 243554 2023-11-09 16:34:35+01:00 14670 998 \n", - "151048 243559 2023-11-09 16:51:15+01:00 14686 82923 \n", - "151049 243561 2023-11-09 16:59:42+01:00 14677 82923 \n", - "151050 243564 2023-11-09 17:16:41+01:00 14691 1254355 \n", - "\n", - " created_at updated_at \n", - "0 2021-03-26 15:30:37.050161+01:00 2021-03-26 15:30:37.050161+01:00 \n", - "1 2021-03-26 16:16:34.950871+01:00 2021-03-26 16:16:34.950871+01:00 \n", - "2 2021-03-28 18:03:32.736394+02:00 2021-03-28 18:03:32.736394+02:00 \n", - "3 2021-03-26 16:43:19.338321+01:00 2021-03-26 16:43:19.338321+01:00 \n", - "4 2021-03-26 16:46:00.502945+01:00 2021-03-26 16:46:00.502945+01:00 \n", - "... ... ... \n", - "151046 2023-11-09 15:34:29.425425+01:00 2023-11-09 15:34:29.425425+01:00 \n", - "151047 2023-11-09 15:34:37.505505+01:00 2023-11-09 15:34:37.505505+01:00 \n", - "151048 2023-11-09 15:51:17.439518+01:00 2023-11-09 15:51:17.439518+01:00 \n", - "151049 2023-11-09 15:59:44.030922+01:00 2023-11-09 15:59:44.030922+01:00 \n", - "151050 2023-11-09 16:16:43.012932+01:00 2023-11-09 16:16:43.012932+01:00 \n", - "\n", - "[151051 rows x 6 columns]" - ] - }, - "execution_count": 238, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_link_stats" - ] - }, - { - "cell_type": "code", - "execution_count": 365, - "id": "e4e4b17c-3338-4b43-8d96-5af3cb304ff9", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id 0.0\n", - "clicked_at 0.0\n", - "link_id 0.0\n", - "customer_id 0.0\n", - "created_at 0.0\n", - "updated_at 0.0\n", - "dtype: float64" - ] - }, - "execution_count": 365, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# share of Nan for every variable\n", - "\n", - "df1_link_stats.isna().sum() / df1_link_stats.shape[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 366, - "id": "846f24d8-8a34-4774-aab7-957a71f73a2c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id int64\n", - "clicked_at object\n", - "link_id int64\n", - "customer_id int64\n", - "created_at object\n", - "updated_at object\n", - "dtype: object" - ] - }, - "execution_count": 366, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# types of the variables \n", - "\n", - "df1_link_stats.dtypes" - ] - }, - { - "cell_type": "code", - "execution_count": 367, - "id": "6ee886ee-9ddf-4a78-aee8-002e57d63183", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "id 151051\n", - "clicked_at 137121\n", - "link_id 10788\n", - "customer_id 26075\n", - "created_at 96565\n", - "updated_at 96565\n", - "dtype: int64" - ] - }, - "execution_count": 367, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# number of unique modalities\n", - "\n", - "df1_link_stats.nunique()" - ] - }, - { - "cell_type": "code", - "execution_count": 378, - "id": "79d02627-2c31-4843-a3da-4f5419b6fe9d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "10788" - ] - }, - "execution_count": 378, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(sorted(df1_link_stats[\"link_id\"].unique()))" - ] - }, - { - "cell_type": "code", - "execution_count": 379, - "id": "7651374c-3e69-4012-badf-c3d1bc6a477a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "949" - ] - }, - "execution_count": 379, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(sorted(df1_campaigns_full[\"campaign_id\"].unique()))" - ] - }, - { - "cell_type": "code", - "execution_count": 380, - "id": "c3a11b25-65bc-44b6-b49c-6192f04b1d36", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcampaign_idcustomer_idopened_atsent_atdelivered_atcreated_atupdated_atcampaign_namecampaign_service_idcampaign_created_atcampaign_updated_atcampaign_sent_atcampaign_identifier
01979358112597NaN2021-03-28 18:01:09+02:002021-03-28 18:24:18+02:002021-03-28 18:34:20.616136+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
11421158113666NaN2021-03-28 18:01:09+02:002021-03-28 18:21:02+02:002021-03-28 18:21:04.297213+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
21315058280561NaN2021-03-28 18:00:59+02:002021-03-28 18:08:45+02:002021-03-28 18:18:49.991042+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
37073581010072021-03-28 20:11:06+02:002021-03-28 18:00:59+02:002021-03-28 18:09:47+02:002021-03-28 18:09:50.915354+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
4517558103972NaN2021-03-28 18:01:06+02:002021-03-28 18:05:03+02:002021-03-28 18:05:08.507398+02:002022-04-15 22:52:04.397693+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 18:01:45.448313+02:002021-09-24 11:56:07.723413+02:002021-03-28 00:00:00+01:004f4adcbf8c6f66dcfc8a3282ac2bf10a
.............................................
6214803830299413214832661552023-10-23 11:43:25+02:002023-10-23 11:32:33+02:002023-10-23 11:32:34+02:002023-10-23 09:32:36.564696+02:002023-10-23 09:43:28.038259+02:00dre_nov_202313182023-10-23 09:31:19.927528+02:002023-10-23 09:31:20.033243+02:002023-10-23 11:31:17+02:0076cf99d3614e23eabab16fb27e944bf9
621480483033071321483213552023-10-23 11:44:02+02:002023-10-23 11:32:49+02:002023-10-23 11:32:49+02:002023-10-23 09:32:50.829641+02:002023-10-23 09:44:04.119578+02:00dre_nov_202313182023-10-23 09:31:19.927528+02:002023-10-23 09:31:20.033243+02:002023-10-23 11:31:17+02:0076cf99d3614e23eabab16fb27e944bf9
621480583043461321483218492023-10-23 11:45:52+02:002023-10-23 11:33:28+02:002023-10-23 11:33:29+02:002023-10-23 09:33:31.102500+02:002023-10-23 09:45:55.927652+02:00dre_nov_202313182023-10-23 09:31:19.927528+02:002023-10-23 09:31:20.033243+02:002023-10-23 11:31:17+02:0076cf99d3614e23eabab16fb27e944bf9
6214806830203713214836677892023-10-23 11:47:32+02:002023-10-23 11:31:53+02:002023-10-23 11:31:54+02:002023-10-23 09:31:55.768547+02:002023-10-23 09:47:33.915460+02:00dre_nov_202313182023-10-23 09:31:19.927528+02:002023-10-23 09:31:20.033243+02:002023-10-23 11:31:17+02:0076cf99d3614e23eabab16fb27e944bf9
621480783049391321483294154NaN2023-10-23 11:33:54+02:002023-10-23 11:33:55+02:002023-10-23 09:33:57.477892+02:002023-10-23 09:33:57.842331+02:00dre_nov_202313182023-10-23 09:31:19.927528+02:002023-10-23 09:31:20.033243+02:002023-10-23 11:31:17+02:0076cf99d3614e23eabab16fb27e944bf9
\n", - "

6214808 rows × 14 columns

\n", - "
" - ], - "text/plain": [ - " id campaign_id customer_id opened_at \\\n", - "0 19793 58 112597 NaN \n", - "1 14211 58 113666 NaN \n", - "2 13150 58 280561 NaN \n", - "3 7073 58 101007 2021-03-28 20:11:06+02:00 \n", - "4 5175 58 103972 NaN \n", - "... ... ... ... ... \n", - "6214803 8302994 1321483 266155 2023-10-23 11:43:25+02:00 \n", - "6214804 8303307 1321483 21355 2023-10-23 11:44:02+02:00 \n", - "6214805 8304346 1321483 21849 2023-10-23 11:45:52+02:00 \n", - "6214806 8302037 1321483 667789 2023-10-23 11:47:32+02:00 \n", - "6214807 8304939 1321483 294154 NaN \n", - "\n", - " sent_at delivered_at \\\n", - "0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n", - "1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n", - "2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n", - "3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n", - "4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n", - "... ... ... \n", - "6214803 2023-10-23 11:32:33+02:00 2023-10-23 11:32:34+02:00 \n", - "6214804 2023-10-23 11:32:49+02:00 2023-10-23 11:32:49+02:00 \n", - "6214805 2023-10-23 11:33:28+02:00 2023-10-23 11:33:29+02:00 \n", - "6214806 2023-10-23 11:31:53+02:00 2023-10-23 11:31:54+02:00 \n", - "6214807 2023-10-23 11:33:54+02:00 2023-10-23 11:33:55+02:00 \n", - "\n", - " created_at updated_at \\\n", - "0 2021-03-28 18:34:20.616136+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "1 2021-03-28 18:21:04.297213+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "2 2021-03-28 18:18:49.991042+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "3 2021-03-28 18:09:50.915354+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "4 2021-03-28 18:05:08.507398+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "... ... ... \n", - "6214803 2023-10-23 09:32:36.564696+02:00 2023-10-23 09:43:28.038259+02:00 \n", - "6214804 2023-10-23 09:32:50.829641+02:00 2023-10-23 09:44:04.119578+02:00 \n", - "6214805 2023-10-23 09:33:31.102500+02:00 2023-10-23 09:45:55.927652+02:00 \n", - "6214806 2023-10-23 09:31:55.768547+02:00 2023-10-23 09:47:33.915460+02:00 \n", - "6214807 2023-10-23 09:33:57.477892+02:00 2023-10-23 09:33:57.842331+02:00 \n", - "\n", - " campaign_name campaign_service_id \\\n", - "0 Le Mucem chez vous, gardons le lien #22 404 \n", - "1 Le Mucem chez vous, gardons le lien #22 404 \n", - "2 Le Mucem chez vous, gardons le lien #22 404 \n", - "3 Le Mucem chez vous, gardons le lien #22 404 \n", - "4 Le Mucem chez vous, gardons le lien #22 404 \n", - "... ... ... \n", - "6214803 dre_nov_2023 1318 \n", - "6214804 dre_nov_2023 1318 \n", - "6214805 dre_nov_2023 1318 \n", - "6214806 dre_nov_2023 1318 \n", - "6214807 dre_nov_2023 1318 \n", - "\n", - " campaign_created_at campaign_updated_at \\\n", - "0 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "1 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "2 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "3 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "4 2021-03-28 18:01:45.448313+02:00 2021-09-24 11:56:07.723413+02:00 \n", - "... ... ... \n", - "6214803 2023-10-23 09:31:19.927528+02:00 2023-10-23 09:31:20.033243+02:00 \n", - "6214804 2023-10-23 09:31:19.927528+02:00 2023-10-23 09:31:20.033243+02:00 \n", - "6214805 2023-10-23 09:31:19.927528+02:00 2023-10-23 09:31:20.033243+02:00 \n", - "6214806 2023-10-23 09:31:19.927528+02:00 2023-10-23 09:31:20.033243+02:00 \n", - "6214807 2023-10-23 09:31:19.927528+02:00 2023-10-23 09:31:20.033243+02:00 \n", - "\n", - " campaign_sent_at campaign_identifier \n", - "0 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "1 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "2 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "3 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "4 2021-03-28 00:00:00+01:00 4f4adcbf8c6f66dcfc8a3282ac2bf10a \n", - "... ... ... \n", - "6214803 2023-10-23 11:31:17+02:00 76cf99d3614e23eabab16fb27e944bf9 \n", - "6214804 2023-10-23 11:31:17+02:00 76cf99d3614e23eabab16fb27e944bf9 \n", - "6214805 2023-10-23 11:31:17+02:00 76cf99d3614e23eabab16fb27e944bf9 \n", - "6214806 2023-10-23 11:31:17+02:00 76cf99d3614e23eabab16fb27e944bf9 \n", - "6214807 2023-10-23 11:31:17+02:00 76cf99d3614e23eabab16fb27e944bf9 \n", - "\n", - "[6214808 rows x 14 columns]" - ] - }, - "execution_count": 380, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_campaigns_full" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/TP_merge_tables_clean.ipynb b/TP_merge_tables_clean.ipynb deleted file mode 100644 index 66b5228..0000000 --- a/TP_merge_tables_clean.ipynb +++ /dev/null @@ -1,1760 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "8c56d518-3634-4492-b249-0d8ef33dd527", - "metadata": {}, - "source": [ - "## First steps : package importations, set up working environment and import data" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "dede42d9-1262-45f7-bd7a-586ae800092a", - "metadata": {}, - "outputs": [], - "source": [ - "# importations\n", - "\n", - "import os \n", - "import s3fs\n", - "import pandas as pd\n", - "import re\n", - "from datetime import datetime, timezone, timedelta\n", - "import math\n", - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "6ce34b58-b5ba-4b54-ba4d-fc82ef01b09c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['bdc2324-data/1',\n", - " 'bdc2324-data/10',\n", - " 'bdc2324-data/101',\n", - " 'bdc2324-data/11',\n", - " 'bdc2324-data/12',\n", - " 'bdc2324-data/13',\n", - " 'bdc2324-data/14',\n", - " 'bdc2324-data/2',\n", - " 'bdc2324-data/3',\n", - " 'bdc2324-data/4',\n", - " 'bdc2324-data/5',\n", - " 'bdc2324-data/6',\n", - " 'bdc2324-data/7',\n", - " 'bdc2324-data/8',\n", - " 'bdc2324-data/9']" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# bucket for accessing the data\n", - "\n", - "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", - "\n", - "fs = s3fs.S3FileSystem(client_kwargs = {\"endpoint_url\" : S3_ENDPOINT_URL})\n", - "BUCKET = \"bdc2324-data\"\n", - "fs.ls(BUCKET)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "8eb13dd3-53c7-4a70-94a4-846168473aa1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['bdc2324-data/1/1campaign_stats.csv',\n", - " 'bdc2324-data/1/1campaigns.csv',\n", - " 'bdc2324-data/1/1categories.csv',\n", - " 'bdc2324-data/1/1countries.csv',\n", - " 'bdc2324-data/1/1currencies.csv',\n", - " 'bdc2324-data/1/1customer_target_mappings.csv',\n", - " 'bdc2324-data/1/1customersplus.csv',\n", - " 'bdc2324-data/1/1event_types.csv',\n", - " 'bdc2324-data/1/1events.csv',\n", - " 'bdc2324-data/1/1facilities.csv',\n", - " 'bdc2324-data/1/1link_stats.csv',\n", - " 'bdc2324-data/1/1pricing_formulas.csv',\n", - " 'bdc2324-data/1/1product_packs.csv',\n", - " 'bdc2324-data/1/1products.csv',\n", - " 'bdc2324-data/1/1products_groups.csv',\n", - " 'bdc2324-data/1/1purchases.csv',\n", - " 'bdc2324-data/1/1representation_category_capacities.csv',\n", - " 'bdc2324-data/1/1representations.csv',\n", - " 'bdc2324-data/1/1seasons.csv',\n", - " 'bdc2324-data/1/1structure_tag_mappings.csv',\n", - " 'bdc2324-data/1/1suppliers.csv',\n", - " 'bdc2324-data/1/1tags.csv',\n", - " 'bdc2324-data/1/1target_types.csv',\n", - " 'bdc2324-data/1/1targets.csv',\n", - " 'bdc2324-data/1/1tickets.csv',\n", - " 'bdc2324-data/1/1type_of_categories.csv',\n", - " 'bdc2324-data/1/1type_of_pricing_formulas.csv',\n", - " 'bdc2324-data/1/1type_ofs.csv']" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "FILE_PATH_S3 = fs.ls(BUCKET)[0] # focus on the company number 1\n", - "files_path = fs.ls(FILE_PATH_S3)\n", - "files_path" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "1ea66c4e-1307-4f19-836e-3104fba2ff41", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_487/2894332003.py:10: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(file_in)\n" - ] - } - ], - "source": [ - "# loop to create dataframes related to company 1\n", - "\n", - "client_number = files_path[0].split(\"/\")[1]\n", - "print(client_number)\n", - "df_prefix = \"df\" + str(client_number) + \"_\"\n", - "\n", - "for i in range(len(files_path)) :\n", - " current_path = files_path[i]\n", - " with fs.open(current_path, mode=\"rb\") as file_in:\n", - " df = pd.read_csv(file_in)\n", - " # the pattern of the name is df1xxx\n", - " nom_dataframe = df_prefix + re.search(r'\\/(\\d+)\\/(\\d+)([a-zA-Z_]+)\\.csv$', current_path).group(3)\n", - " globals()[nom_dataframe] = df" - ] - }, - { - "cell_type": "markdown", - "id": "13d70b2c-6580-4caf-b839-10f72b2e0b39", - "metadata": {}, - "source": [ - "## Target, target types and customer target mapping" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "4dbc7fea-ac3b-4348-83fb-dfb1a460f936", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idis_importnamecreated_atupdated_atidentifier
069Falsemanual_dynamic_filter2020-11-30 09:46:18.881030+01:002020-11-30 09:46:18.881030+01:00e0f4b8693184850fefd6d2a38f10584e
148Truemanual_structure2020-11-04 17:16:19.548275+01:002020-11-04 17:16:19.548275+01:00382bca214204a2d3462f5ec2728d5d1e
21Truemanual_import2020-10-14 18:37:40.521623+02:002020-10-14 18:37:40.521623+02:0012213df2ce68a624e4c0070521437bac
356Falsemanual_static_filter2020-11-04 18:08:37.233486+01:002020-11-04 18:08:37.233486+01:00fb27e81baa4debc6a4e1a8639c20e808
\n", - "
" - ], - "text/plain": [ - " id is_import name created_at \\\n", - "0 69 False manual_dynamic_filter 2020-11-30 09:46:18.881030+01:00 \n", - "1 48 True manual_structure 2020-11-04 17:16:19.548275+01:00 \n", - "2 1 True manual_import 2020-10-14 18:37:40.521623+02:00 \n", - "3 56 False manual_static_filter 2020-11-04 18:08:37.233486+01:00 \n", - "\n", - " updated_at identifier \n", - "0 2020-11-30 09:46:18.881030+01:00 e0f4b8693184850fefd6d2a38f10584e \n", - "1 2020-11-04 17:16:19.548275+01:00 382bca214204a2d3462f5ec2728d5d1e \n", - "2 2020-10-14 18:37:40.521623+02:00 12213df2ce68a624e4c0070521437bac \n", - "3 2020-11-04 18:08:37.233486+01:00 fb27e81baa4debc6a4e1a8639c20e808 " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 1. target types\n", - "df1_target_types.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "0e9f5dcb-0dc3-4052-b866-e5c4cb954a1f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idtarget_type_idnamecreated_atupdated_at
021756DDCP PROMO Art contemporain - salle de chauffe...2021-01-04 15:00:05.401899+01:002021-03-02 18:38:19.025969+01:00
170156consentement optin scolaires2021-12-21 16:03:59.840785+01:002022-02-18 17:23:44.761388+01:00
213456DDCP Newsletter jeune public2020-11-10 09:43:19.667471+01:002021-03-02 18:38:19.052304+01:00
370056consentement optout scolaires2021-12-21 16:01:57.524946+01:002022-02-18 17:23:44.807776+01:00
496456DDCP achat billet nbr dep 190520212022-04-14 10:58:17.142834+02:002022-04-14 10:58:23.677264+02:00
\n", - "
" - ], - "text/plain": [ - " id target_type_id name \\\n", - "0 217 56 DDCP PROMO Art contemporain - salle de chauffe... \n", - "1 701 56 consentement optin scolaires \n", - "2 134 56 DDCP Newsletter jeune public \n", - "3 700 56 consentement optout scolaires \n", - "4 964 56 DDCP achat billet nbr dep 19052021 \n", - "\n", - " created_at updated_at \n", - "0 2021-01-04 15:00:05.401899+01:00 2021-03-02 18:38:19.025969+01:00 \n", - "1 2021-12-21 16:03:59.840785+01:00 2022-02-18 17:23:44.761388+01:00 \n", - "2 2020-11-10 09:43:19.667471+01:00 2021-03-02 18:38:19.052304+01:00 \n", - "3 2021-12-21 16:01:57.524946+01:00 2022-02-18 17:23:44.807776+01:00 \n", - "4 2022-04-14 10:58:17.142834+02:00 2022-04-14 10:58:23.677264+02:00 " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 2. targets\n", - "df1_targets.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "c5c62302-370a-462f-bd79-eac31593f65c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcustomer_idtarget_idcreated_atupdated_atnameextra_field
011848246454001302021-09-23 09:35:47.617275+02:002021-09-23 09:35:47.617275+02:00NaNNaN
111848256454003452021-09-23 09:35:47.668846+02:002021-09-23 09:35:47.668846+02:00NaNNaN
211848286454021262021-09-23 12:02:51.253269+02:002021-09-23 12:02:51.253269+02:00NaNNaN
311848296454031262021-09-23 12:20:47.394480+02:002021-09-23 12:20:47.394480+02:00NaNNaN
412957706473013462021-09-28 16:02:29.372608+02:002021-09-28 16:02:29.372608+02:00NaNNaN
\n", - "
" - ], - "text/plain": [ - " id customer_id target_id created_at \\\n", - "0 1184824 645400 130 2021-09-23 09:35:47.617275+02:00 \n", - "1 1184825 645400 345 2021-09-23 09:35:47.668846+02:00 \n", - "2 1184828 645402 126 2021-09-23 12:02:51.253269+02:00 \n", - "3 1184829 645403 126 2021-09-23 12:20:47.394480+02:00 \n", - "4 1295770 647301 346 2021-09-28 16:02:29.372608+02:00 \n", - "\n", - " updated_at name extra_field \n", - "0 2021-09-23 09:35:47.617275+02:00 NaN NaN \n", - "1 2021-09-23 09:35:47.668846+02:00 NaN NaN \n", - "2 2021-09-23 12:02:51.253269+02:00 NaN NaN \n", - "3 2021-09-23 12:20:47.394480+02:00 NaN NaN \n", - "4 2021-09-28 16:02:29.372608+02:00 NaN NaN " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 3. customer target mapping\n", - "\n", - "df1_customer_target_mappings.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "1a87cebf-c1dd-408d-a523-26633419da1e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idtarget_type_idnametarget_type_is_importtarget_type_name
021756DDCP PROMO Art contemporain - salle de chauffe...Falsemanual_static_filter
170156consentement optin scolairesFalsemanual_static_filter
213456DDCP Newsletter jeune publicFalsemanual_static_filter
370056consentement optout scolairesFalsemanual_static_filter
496456DDCP achat billet nbr dep 19052021Falsemanual_static_filter
\n", - "
" - ], - "text/plain": [ - " id target_type_id name \\\n", - "0 217 56 DDCP PROMO Art contemporain - salle de chauffe... \n", - "1 701 56 consentement optin scolaires \n", - "2 134 56 DDCP Newsletter jeune public \n", - "3 700 56 consentement optout scolaires \n", - "4 964 56 DDCP achat billet nbr dep 19052021 \n", - "\n", - " target_type_is_import target_type_name \n", - "0 False manual_static_filter \n", - "1 False manual_static_filter \n", - "2 False manual_static_filter \n", - "3 False manual_static_filter \n", - "4 False manual_static_filter " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 4.1. merge target with target type\n", - "\n", - "df1_targets_full = pd.merge(df1_targets[[\"id\", \"target_type_id\", \"name\"]], df1_target_types[[\"id\",\"is_import\",\"name\"]].add_prefix(\"target_type_\"), left_on='target_type_id', right_on='target_type_id', how='left')\n", - "df1_targets_full.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "d48c1fff-73c2-4e75-8799-da2b80694be7", - "metadata": {}, - "outputs": [], - "source": [ - "# 4.2. merge df1_customer_target_mappings with df1_targets_full\n", - "\n", - "# change the position of the column target type id\n", - "\n", - "# Spécifiez le nom de la colonne à déplacer et la colonne après laquelle vous souhaitez la placer\n", - "column_to_move = 'target_type_id'\n", - "\n", - "# Récupérez l'index de la colonne de référence\n", - "reference_index = df1_targets_full.columns.get_loc(\"target_type_name\")\n", - "\n", - "# Créez une copie de la colonne que vous voulez déplacer\n", - "column_copy = df1_targets_full[column_to_move].copy()\n", - "\n", - "# Supprimez la colonne d'origine\n", - "df1_targets_full = df1_targets_full.drop(column_to_move, axis=1)\n", - "\n", - "# Utilisez la méthode insert pour déplacer la colonne à la nouvelle position\n", - "df1_targets_full.insert(reference_index - 1, column_to_move, column_copy)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "a874514a-c7dc-42d4-a440-dedd3a270e24", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
target_idtarget_nametarget_type_is_importtarget_type_idtarget_type_name
0217DDCP PROMO Art contemporain - salle de chauffe...False56manual_static_filter
1701consentement optin scolairesFalse56manual_static_filter
2134DDCP Newsletter jeune publicFalse56manual_static_filter
3700consentement optout scolairesFalse56manual_static_filter
4964DDCP achat billet nbr dep 19052021False56manual_static_filter
\n", - "
" - ], - "text/plain": [ - " target_id target_name \\\n", - "0 217 DDCP PROMO Art contemporain - salle de chauffe... \n", - "1 701 consentement optin scolaires \n", - "2 134 DDCP Newsletter jeune public \n", - "3 700 consentement optout scolaires \n", - "4 964 DDCP achat billet nbr dep 19052021 \n", - "\n", - " target_type_is_import target_type_id target_type_name \n", - "0 False 56 manual_static_filter \n", - "1 False 56 manual_static_filter \n", - "2 False 56 manual_static_filter \n", - "3 False 56 manual_static_filter \n", - "4 False 56 manual_static_filter " - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_targets_full = df1_targets_full.rename(columns=lambda x: 'target_' + x if not x.startswith('target_') else x)\n", - "df1_targets_full.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "0db0172a-5119-4b7f-97f8-36fc5c985205", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcustomer_idtarget_idtarget_nametarget_type_is_importtarget_type_idtarget_type_name
01184824645400130DDCP PROMO Réseau livresFalse56manual_static_filter
11184825645400345Inscrits NL générale site webFalse56manual_static_filter
21184828645402126DDCP PROMO Art contemporainFalse56manual_static_filter
31184829645403126DDCP PROMO Art contemporainFalse56manual_static_filter
41295770647301346Votre première listeFalse56manual_static_filter
........................
7680192737545666983345Inscrits NL générale site webFalse56manual_static_filter
7680202737546666983346Votre première listeFalse56manual_static_filter
7680212737575666986346Votre première listeFalse56manual_static_filter
7680222737576666987345Inscrits NL générale site webFalse56manual_static_filter
7680232737577666987346Votre première listeFalse56manual_static_filter
\n", - "

768024 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " id customer_id target_id target_name \\\n", - "0 1184824 645400 130 DDCP PROMO Réseau livres \n", - "1 1184825 645400 345 Inscrits NL générale site web \n", - "2 1184828 645402 126 DDCP PROMO Art contemporain \n", - "3 1184829 645403 126 DDCP PROMO Art contemporain \n", - "4 1295770 647301 346 Votre première liste \n", - "... ... ... ... ... \n", - "768019 2737545 666983 345 Inscrits NL générale site web \n", - "768020 2737546 666983 346 Votre première liste \n", - "768021 2737575 666986 346 Votre première liste \n", - "768022 2737576 666987 345 Inscrits NL générale site web \n", - "768023 2737577 666987 346 Votre première liste \n", - "\n", - " target_type_is_import target_type_id target_type_name \n", - "0 False 56 manual_static_filter \n", - "1 False 56 manual_static_filter \n", - "2 False 56 manual_static_filter \n", - "3 False 56 manual_static_filter \n", - "4 False 56 manual_static_filter \n", - "... ... ... ... \n", - "768019 False 56 manual_static_filter \n", - "768020 False 56 manual_static_filter \n", - "768021 False 56 manual_static_filter \n", - "768022 False 56 manual_static_filter \n", - "768023 False 56 manual_static_filter \n", - "\n", - "[768024 rows x 7 columns]" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# finally, merge\n", - "\n", - "# pour df1_customer_target_mappings on enlève les colonnes name, extra_field, et updated_at (valeur égale à created_at)\n", - "# note : by making a left join on df1_customer_target_mappings, we suppress 2 targets that have no customer associated\n", - "\n", - "df1_customer_targets = pd.merge(df1_customer_target_mappings[[\"id\", \"customer_id\", \"target_id\"]], \n", - " df1_targets_full, left_on='target_id', right_on='target_id', how='left')\n", - "df1_customer_targets" - ] - }, - { - "cell_type": "markdown", - "id": "52326267-c5ba-4e21-b8ab-4b4c62de75d1", - "metadata": {}, - "source": [ - "## Campaign stats, campaigns" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "06dca910-5c07-4ee1-bbf2-3b11b48ba1f2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnameservice_idcreated_atupdated_atprocess_idreport_urlcategoryto_be_syncedidentifiersent_at
01319613newsletter enseignants janvier 20227212022-01-14 16:06:42.586321+01:002022-02-03 14:17:27.112963+01:00NaNNaN0.0Falseaba3b6fd5d186d28e06ff97135cade7f2022-01-14 00:00:00+01:00
11319586lsf_janvier_20227172022-01-07 11:30:35.315895+01:002022-02-03 14:17:27.116171+01:00NaNNaN0.0False788d986905533aba051261497ecffcbb2022-01-07 00:00:00+01:00
21319282Invitation à déjeuner au Mucem | Vernissage « ...5912021-09-28 12:50:24.448752+02:002022-02-03 14:17:27.119582+01:00NaNNaN0.0False3493894fa4ea036cfc6433c3e2ee63b02021-09-28 00:00:00+02:00
31319283Vacances de la Toussaint - centres des loisirs5902021-09-28 18:01:04.692073+02:002022-02-03 14:17:27.124408+01:00NaNNaN0.0False08b255a5d42b89b0585260b6f2360bdd2021-09-28 00:00:00+02:00
41319636ddcp_promo_md_livemag7302022-01-27 18:00:41.053069+01:002022-02-03 14:17:27.127607+01:00NaNNaN0.0Falsed5cfead94f5350c12c322b5b664544c12022-01-27 00:00:00+01:00
\n", - "
" - ], - "text/plain": [ - " id name service_id \\\n", - "0 1319613 newsletter enseignants janvier 2022 721 \n", - "1 1319586 lsf_janvier_2022 717 \n", - "2 1319282 Invitation à déjeuner au Mucem | Vernissage « ... 591 \n", - "3 1319283 Vacances de la Toussaint - centres des loisirs 590 \n", - "4 1319636 ddcp_promo_md_livemag 730 \n", - "\n", - " created_at updated_at \\\n", - "0 2022-01-14 16:06:42.586321+01:00 2022-02-03 14:17:27.112963+01:00 \n", - "1 2022-01-07 11:30:35.315895+01:00 2022-02-03 14:17:27.116171+01:00 \n", - "2 2021-09-28 12:50:24.448752+02:00 2022-02-03 14:17:27.119582+01:00 \n", - "3 2021-09-28 18:01:04.692073+02:00 2022-02-03 14:17:27.124408+01:00 \n", - "4 2022-01-27 18:00:41.053069+01:00 2022-02-03 14:17:27.127607+01:00 \n", - "\n", - " process_id report_url category to_be_synced \\\n", - "0 NaN NaN 0.0 False \n", - "1 NaN NaN 0.0 False \n", - "2 NaN NaN 0.0 False \n", - "3 NaN NaN 0.0 False \n", - "4 NaN NaN 0.0 False \n", - "\n", - " identifier sent_at \n", - "0 aba3b6fd5d186d28e06ff97135cade7f 2022-01-14 00:00:00+01:00 \n", - "1 788d986905533aba051261497ecffcbb 2022-01-07 00:00:00+01:00 \n", - "2 3493894fa4ea036cfc6433c3e2ee63b0 2021-09-28 00:00:00+02:00 \n", - "3 08b255a5d42b89b0585260b6f2360bdd 2021-09-28 00:00:00+02:00 \n", - "4 d5cfead94f5350c12c322b5b664544c1 2022-01-27 00:00:00+01:00 " - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 1. campaigns\n", - "df1_campaigns.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "83eaa447-9144-41ed-9e26-f0f23799a8fd", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcampaign_idcustomer_idopened_atsent_atdelivered_atcreated_atupdated_at
01979358112597NaN2021-03-28 18:01:09+02:002021-03-28 18:24:18+02:002021-03-28 18:34:20.616136+02:002022-04-15 22:52:04.397693+02:00
11421158113666NaN2021-03-28 18:01:09+02:002021-03-28 18:21:02+02:002021-03-28 18:21:04.297213+02:002022-04-15 22:52:04.397693+02:00
21315058280561NaN2021-03-28 18:00:59+02:002021-03-28 18:08:45+02:002021-03-28 18:18:49.991042+02:002022-04-15 22:52:04.397693+02:00
37073581010072021-03-28 20:11:06+02:002021-03-28 18:00:59+02:002021-03-28 18:09:47+02:002021-03-28 18:09:50.915354+02:002022-04-15 22:52:04.397693+02:00
4517558103972NaN2021-03-28 18:01:06+02:002021-03-28 18:05:03+02:002021-03-28 18:05:08.507398+02:002022-04-15 22:52:04.397693+02:00
\n", - "
" - ], - "text/plain": [ - " id campaign_id customer_id opened_at \\\n", - "0 19793 58 112597 NaN \n", - "1 14211 58 113666 NaN \n", - "2 13150 58 280561 NaN \n", - "3 7073 58 101007 2021-03-28 20:11:06+02:00 \n", - "4 5175 58 103972 NaN \n", - "\n", - " sent_at delivered_at \\\n", - "0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n", - "1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n", - "2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n", - "3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n", - "4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n", - "\n", - " created_at updated_at \n", - "0 2021-03-28 18:34:20.616136+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "1 2021-03-28 18:21:04.297213+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "2 2021-03-28 18:18:49.991042+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "3 2021-03-28 18:09:50.915354+02:00 2022-04-15 22:52:04.397693+02:00 \n", - "4 2021-03-28 18:05:08.507398+02:00 2022-04-15 22:52:04.397693+02:00 " - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 2. campaigns stats\n", - "df1_campaign_stats.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "7f25eb1b-e7c8-4715-bc30-7ac29a7181ac", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcampaign_idcustomer_idopened_atsent_atdelivered_atcampaign_namecampaign_service_idcampaign_sent_at
01979358112597NaN2021-03-28 18:01:09+02:002021-03-28 18:24:18+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 00:00:00+01:00
11421158113666NaN2021-03-28 18:01:09+02:002021-03-28 18:21:02+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 00:00:00+01:00
21315058280561NaN2021-03-28 18:00:59+02:002021-03-28 18:08:45+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 00:00:00+01:00
37073581010072021-03-28 20:11:06+02:002021-03-28 18:00:59+02:002021-03-28 18:09:47+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 00:00:00+01:00
4517558103972NaN2021-03-28 18:01:06+02:002021-03-28 18:05:03+02:00Le Mucem chez vous, gardons le lien #224042021-03-28 00:00:00+01:00
\n", - "
" - ], - "text/plain": [ - " id campaign_id customer_id opened_at \\\n", - "0 19793 58 112597 NaN \n", - "1 14211 58 113666 NaN \n", - "2 13150 58 280561 NaN \n", - "3 7073 58 101007 2021-03-28 20:11:06+02:00 \n", - "4 5175 58 103972 NaN \n", - "\n", - " sent_at delivered_at \\\n", - "0 2021-03-28 18:01:09+02:00 2021-03-28 18:24:18+02:00 \n", - "1 2021-03-28 18:01:09+02:00 2021-03-28 18:21:02+02:00 \n", - "2 2021-03-28 18:00:59+02:00 2021-03-28 18:08:45+02:00 \n", - "3 2021-03-28 18:00:59+02:00 2021-03-28 18:09:47+02:00 \n", - "4 2021-03-28 18:01:06+02:00 2021-03-28 18:05:03+02:00 \n", - "\n", - " campaign_name campaign_service_id \\\n", - "0 Le Mucem chez vous, gardons le lien #22 404 \n", - "1 Le Mucem chez vous, gardons le lien #22 404 \n", - "2 Le Mucem chez vous, gardons le lien #22 404 \n", - "3 Le Mucem chez vous, gardons le lien #22 404 \n", - "4 Le Mucem chez vous, gardons le lien #22 404 \n", - "\n", - " campaign_sent_at \n", - "0 2021-03-28 00:00:00+01:00 \n", - "1 2021-03-28 00:00:00+01:00 \n", - "2 2021-03-28 00:00:00+01:00 \n", - "3 2021-03-28 00:00:00+01:00 \n", - "4 2021-03-28 00:00:00+01:00 " - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 3. merge campaigns and campaigns stats\n", - "\n", - "df1_campaigns_full = pd.merge(df1_campaign_stats[[\"id\", \"campaign_id\", \"customer_id\", \"opened_at\", \"sent_at\", \"delivered_at\"]], \n", - " df1_campaigns[[\"id\", \"name\", \"service_id\", \"sent_at\"]].add_prefix(\"campaign_\"),\n", - " on = \"campaign_id\", how = \"left\")\n", - "df1_campaigns_full.head()" - ] - }, - { - "cell_type": "markdown", - "id": "87fc686a-4a80-40ab-9987-20d2774f3055", - "metadata": {}, - "source": [ - "## Link stats" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "2f9df2d0-8a23-496b-8e92-617285f64530", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idclicked_atlink_idcustomer_idcreated_atupdated_at
012021-03-26 16:30:36+01:0012840332021-03-26 15:30:37.050161+01:002021-03-26 15:30:37.050161+01:00
122021-03-26 17:16:34+01:0021197682021-03-26 16:16:34.950871+01:002021-03-26 16:16:34.950871+01:00
22722021-03-28 20:03:32+02:00421131052021-03-28 18:03:32.736394+02:002021-03-28 18:03:32.736394+02:00
342021-03-26 17:43:19+01:0032722802021-03-26 16:43:19.338321+01:002021-03-26 16:43:19.338321+01:00
452021-03-26 17:46:00+01:0031050952021-03-26 16:46:00.502945+01:002021-03-26 16:46:00.502945+01:00
.....................
1510462435532023-11-09 16:34:27+01:00146669982023-11-09 15:34:29.425425+01:002023-11-09 15:34:29.425425+01:00
1510472435542023-11-09 16:34:35+01:00146709982023-11-09 15:34:37.505505+01:002023-11-09 15:34:37.505505+01:00
1510482435592023-11-09 16:51:15+01:0014686829232023-11-09 15:51:17.439518+01:002023-11-09 15:51:17.439518+01:00
1510492435612023-11-09 16:59:42+01:0014677829232023-11-09 15:59:44.030922+01:002023-11-09 15:59:44.030922+01:00
1510502435642023-11-09 17:16:41+01:001469112543552023-11-09 16:16:43.012932+01:002023-11-09 16:16:43.012932+01:00
\n", - "

151051 rows × 6 columns

\n", - "
" - ], - "text/plain": [ - " id clicked_at link_id customer_id \\\n", - "0 1 2021-03-26 16:30:36+01:00 1 284033 \n", - "1 2 2021-03-26 17:16:34+01:00 2 119768 \n", - "2 272 2021-03-28 20:03:32+02:00 42 113105 \n", - "3 4 2021-03-26 17:43:19+01:00 3 272280 \n", - "4 5 2021-03-26 17:46:00+01:00 3 105095 \n", - "... ... ... ... ... \n", - "151046 243553 2023-11-09 16:34:27+01:00 14666 998 \n", - "151047 243554 2023-11-09 16:34:35+01:00 14670 998 \n", - "151048 243559 2023-11-09 16:51:15+01:00 14686 82923 \n", - "151049 243561 2023-11-09 16:59:42+01:00 14677 82923 \n", - "151050 243564 2023-11-09 17:16:41+01:00 14691 1254355 \n", - "\n", - " created_at updated_at \n", - "0 2021-03-26 15:30:37.050161+01:00 2021-03-26 15:30:37.050161+01:00 \n", - "1 2021-03-26 16:16:34.950871+01:00 2021-03-26 16:16:34.950871+01:00 \n", - "2 2021-03-28 18:03:32.736394+02:00 2021-03-28 18:03:32.736394+02:00 \n", - "3 2021-03-26 16:43:19.338321+01:00 2021-03-26 16:43:19.338321+01:00 \n", - "4 2021-03-26 16:46:00.502945+01:00 2021-03-26 16:46:00.502945+01:00 \n", - "... ... ... \n", - "151046 2023-11-09 15:34:29.425425+01:00 2023-11-09 15:34:29.425425+01:00 \n", - "151047 2023-11-09 15:34:37.505505+01:00 2023-11-09 15:34:37.505505+01:00 \n", - "151048 2023-11-09 15:51:17.439518+01:00 2023-11-09 15:51:17.439518+01:00 \n", - "151049 2023-11-09 15:59:44.030922+01:00 2023-11-09 15:59:44.030922+01:00 \n", - "151050 2023-11-09 16:16:43.012932+01:00 2023-11-09 16:16:43.012932+01:00 \n", - "\n", - "[151051 rows x 6 columns]" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_link_stats" - ] - }, - { - "cell_type": "markdown", - "id": "aad6fb14-9694-4c1e-9885-1ebe0f38afe3", - "metadata": {}, - "source": [ - "## Bonus : peut-on lier link stats et campaign ? Non, les dates à laquelle le client clique sur le lie/ouvre la campagne ne permettent pas de faire coincider link_id et campaign_id" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "id": "8be7c974-72c9-4e31-a874-d7e5d2719fb3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idclicked_atlink_idcustomer_idcreated_atupdated_at
012021-03-26 16:30:36+01:0012840332021-03-26 15:30:37.050161+01:002021-03-26 15:30:37.050161+01:00
7526140182021-05-10 18:07:59+02:003122840332021-05-10 16:08:00.541322+02:002021-05-10 16:08:00.541322+02:00
968481334492021-03-25 08:42:22+01:0042840332022-04-15 22:51:01.994343+02:002022-04-15 22:51:01.994343+02:00
1157282075442022-08-23 10:33:04+02:00123652840332022-08-23 08:33:06.498908+02:002022-08-23 08:33:06.498908+02:00
\n", - "
" - ], - "text/plain": [ - " id clicked_at link_id customer_id \\\n", - "0 1 2021-03-26 16:30:36+01:00 1 284033 \n", - "7526 14018 2021-05-10 18:07:59+02:00 312 284033 \n", - "96848 133449 2021-03-25 08:42:22+01:00 4 284033 \n", - "115728 207544 2022-08-23 10:33:04+02:00 12365 284033 \n", - "\n", - " created_at updated_at \n", - "0 2021-03-26 15:30:37.050161+01:00 2021-03-26 15:30:37.050161+01:00 \n", - "7526 2021-05-10 16:08:00.541322+02:00 2021-05-10 16:08:00.541322+02:00 \n", - "96848 2022-04-15 22:51:01.994343+02:00 2022-04-15 22:51:01.994343+02:00 \n", - "115728 2022-08-23 08:33:06.498908+02:00 2022-08-23 08:33:06.498908+02:00 " - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_link_stats[df1_link_stats[\"customer_id\"] == 284033]" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "id": "902e9947-58e1-44f4-b634-1239b0e4df02", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcampaign_idcustomer_idopened_atsent_atdelivered_atcampaign_namecampaign_service_idcampaign_sent_at
403064340363764284033NaN2021-03-21 18:01:22+01:002021-03-21 18:08:04+01:00Le Mucem chez vous, gardons le lien #213982021-03-21 00:00:00+01:00
\n", - "
" - ], - "text/plain": [ - " id campaign_id customer_id opened_at \\\n", - "4030643 4036376 4 284033 NaN \n", - "\n", - " sent_at delivered_at \\\n", - "4030643 2021-03-21 18:01:22+01:00 2021-03-21 18:08:04+01:00 \n", - "\n", - " campaign_name campaign_service_id \\\n", - "4030643 Le Mucem chez vous, gardons le lien #21 398 \n", - "\n", - " campaign_sent_at \n", - "4030643 2021-03-21 00:00:00+01:00 " - ] - }, - "execution_count": 82, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1_campaigns_full[ (df1_campaigns_full[\"customer_id\"] == 284033) & (df1_campaigns_full[\"campaign_id\"] == 4)]" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}