2024-03-02 11:37:44 +01:00
{
"cells": [
2024-03-03 09:32:45 +01:00
{
"cell_type": "markdown",
"id": "be628bfc-0bca-48b0-97c9-29063289127e",
"metadata": {},
"source": [
"# Statistiques descriptives : compagnies offrant des spectacles"
]
},
{
"cell_type": "markdown",
"id": "0bf5450b-f44d-430a-aed7-d875dc365048",
"metadata": {},
"source": [
"## Importations et chargement des données"
]
},
2024-03-02 11:37:44 +01:00
{
"cell_type": "code",
2024-03-05 00:36:48 +01:00
"execution_count": 2,
2024-03-02 11:37:44 +01:00
"id": "aa915888-cede-4eb0-8a26-7df573d29a3e",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import os\n",
"import s3fs\n",
"import warnings\n",
"from datetime import date, timedelta, datetime\n",
2024-03-03 09:32:45 +01:00
"import numpy as np\n",
"import matplotlib.pyplot as plt"
2024-03-02 11:37:44 +01:00
]
},
{
"cell_type": "code",
2024-03-05 00:36:48 +01:00
"execution_count": 3,
2024-03-02 11:37:44 +01:00
"id": "17949e81-c30b-4fdf-9872-d7dc2b22ba9e",
"metadata": {},
"outputs": [],
"source": [
"# Import KPI construction functions\n",
"#exec(open('0_KPI_functions.py').read())\n",
"exec(open('../0_KPI_functions.py').read())\n"
]
},
{
"cell_type": "code",
2024-03-05 00:36:48 +01:00
"execution_count": 4,
2024-03-02 11:37:44 +01:00
"id": "9c1737a2-bad8-4266-8dec-452085d8cfe7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']"
]
},
2024-03-05 00:36:48 +01:00
"execution_count": 4,
2024-03-02 11:37:44 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"BUCKET = \"projet-bdc2324-team1/0_Input/Company_10\"\n",
"fs.ls(BUCKET)"
]
},
{
"cell_type": "code",
2024-03-05 00:36:48 +01:00
"execution_count": 5,
2024-03-02 11:37:44 +01:00
"id": "a35dc2f6-2017-4b21-abd2-2c4c112c96b2",
"metadata": {},
"outputs": [],
"source": [
"dic_base=['campaigns_information','customerplus_cleaned','products_purchased_reduced','target_information']\n",
"for nom_base in dic_base:\n",
" FILE_PATH_S3_fanta = 'projet-bdc2324-team1/0_Input/Company_10/' + nom_base + '.csv'\n",
" with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n",
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
2024-03-05 00:36:48 +01:00
"execution_count": 6,
2024-03-02 11:37:44 +01:00
"id": "40b705eb-fd18-436b-b150-61611a3c6a84",
"metadata": {},
"outputs": [],
"source": [
2024-03-03 09:32:45 +01:00
"# fonction permettant d'extraire une table à partir du numéro de la compagnie (directory_path)\n",
2024-03-02 11:37:44 +01:00
"\n",
"def display_databases(directory_path, file_name, datetime_col = None):\n",
" \"\"\"\n",
" This function returns the file from s3 storage \n",
" \"\"\"\n",
" file_path = \"projet-bdc2324-team1\" + \"/0_Input/Company_\" + directory_path + \"/\" + file_name + \".csv\"\n",
" print(\"File path : \", file_path)\n",
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser) \n",
" return df \n"
]
},
{
"cell_type": "code",
2024-03-05 00:36:48 +01:00
"execution_count": 10,
"id": "c56decc3-de19-4786-82a4-1386c72a6bfb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>customer_id</th>\n",
" <th>target_name</th>\n",
" <th>target_type_is_import</th>\n",
" <th>target_type_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1165098</td>\n",
" <td>618562</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1165100</td>\n",
" <td>618559</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1165101</td>\n",
" <td>618561</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1165102</td>\n",
" <td>618560</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1165103</td>\n",
" <td>618558</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69253</th>\n",
" <td>1698158</td>\n",
" <td>18580</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69254</th>\n",
" <td>1698159</td>\n",
" <td>18569</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69255</th>\n",
" <td>1698160</td>\n",
" <td>2962</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69256</th>\n",
" <td>1698161</td>\n",
" <td>3825</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69257</th>\n",
" <td>1698162</td>\n",
" <td>5731</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>69258 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" id customer_id target_name target_type_is_import \\\n",
"0 1165098 618562 Newsletter mensuelle False \n",
"1 1165100 618559 Newsletter mensuelle False \n",
"2 1165101 618561 Newsletter mensuelle False \n",
"3 1165102 618560 Newsletter mensuelle False \n",
"4 1165103 618558 Newsletter mensuelle False \n",
"... ... ... ... ... \n",
"69253 1698158 18580 Newsletter mensuelle False \n",
"69254 1698159 18569 Newsletter mensuelle False \n",
"69255 1698160 2962 Newsletter mensuelle False \n",
"69256 1698161 3825 Newsletter mensuelle False \n",
"69257 1698162 5731 Newsletter mensuelle False \n",
"\n",
" target_type_name \n",
"0 manual_static_filter \n",
"1 manual_static_filter \n",
"2 manual_static_filter \n",
"3 manual_static_filter \n",
"4 manual_static_filter \n",
"... ... \n",
"69253 manual_static_filter \n",
"69254 manual_static_filter \n",
"69255 manual_static_filter \n",
"69256 manual_static_filter \n",
"69257 manual_static_filter \n",
"\n",
"[69258 rows x 5 columns]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"target_information"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "c825d64b-356c-4b71-aa3c-90e0dd7ca092",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ticket_id</th>\n",
" <th>customer_id</th>\n",
" <th>purchase_id</th>\n",
" <th>event_type_id</th>\n",
" <th>supplier_name</th>\n",
" <th>purchase_date</th>\n",
" <th>amount</th>\n",
" <th>is_full_price</th>\n",
" <th>name_event_types</th>\n",
" <th>name_facilities</th>\n",
" <th>name_categories</th>\n",
" <th>name_events</th>\n",
" <th>name_seasons</th>\n",
" <th>start_date_time</th>\n",
" <th>end_date_time</th>\n",
" <th>open</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1799177</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>2</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>danse</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>aringa rossa</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2016-09-27 00:00:00+02:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1799178</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>3</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>cirque</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>5èmes hurlants</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2016-11-18 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1799179</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>dom juan</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2016-12-07 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1799180</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>vanishing point</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2017-01-04 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1799181</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>3</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>12.0</td>\n",
" <td>False</td>\n",
" <td>cirque</td>\n",
" <td>la cite des congres</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>a o lang pho</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2017-01-03 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492309</th>\n",
" <td>3252232</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492310</th>\n",
" <td>3252233</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492311</th>\n",
" <td>3252234</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492312</th>\n",
" <td>3252235</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492313</th>\n",
" <td>3252236</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>492314 rows × 16 columns</p>\n",
"</div>"
],
"text/plain": [
" ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
"0 1799177 36984 409613 2 guichet \n",
"1 1799178 36984 409613 3 guichet \n",
"2 1799179 36984 409613 1 guichet \n",
"3 1799180 36984 409613 1 guichet \n",
"4 1799181 36984 409613 3 guichet \n",
"... ... ... ... ... ... \n",
"492309 3252232 621716 710062 1 guichet \n",
"492310 3252233 621716 710062 1 guichet \n",
"492311 3252234 621716 710062 1 guichet \n",
"492312 3252235 621716 710062 1 guichet \n",
"492313 3252236 621716 710062 1 guichet \n",
"\n",
" purchase_date amount is_full_price name_event_types \\\n",
"0 2016-04-28 17:58:26+02:00 9.0 False danse \n",
"1 2016-04-28 17:58:26+02:00 9.0 False cirque \n",
"2 2016-04-28 17:58:26+02:00 9.0 False théâtre \n",
"3 2016-04-28 17:58:26+02:00 9.0 False théâtre \n",
"4 2016-04-28 17:58:26+02:00 12.0 False cirque \n",
"... ... ... ... ... \n",
"492309 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492310 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492311 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492312 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492313 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"\n",
" name_facilities name_categories \\\n",
"0 le grand t abo t gourmand jeune \n",
"1 le grand t abo t gourmand jeune \n",
"2 le grand t abo t gourmand jeune \n",
"3 le grand t abo t gourmand jeune \n",
"4 la cite des congres abo t gourmand jeune \n",
"... ... ... \n",
"492309 cap nort tarif sco co 1 seance scolaire \n",
"492310 cap nort tarif sco co 1 seance scolaire \n",
"492311 cap nort tarif sco co 1 seance scolaire \n",
"492312 cap nort tarif sco co 1 seance scolaire \n",
"492313 cap nort tarif sco co 1 seance scolaire \n",
"\n",
" name_events name_seasons start_date_time \\\n",
"0 aringa rossa test 2016/2017 2016-09-27 00:00:00+02:00 \n",
"1 5èmes hurlants test 2016/2017 2016-11-18 00:00:00+01:00 \n",
"2 dom juan test 2016/2017 2016-12-07 00:00:00+01:00 \n",
"3 vanishing point test 2016/2017 2017-01-04 00:00:00+01:00 \n",
"4 a o lang pho test 2016/2017 2017-01-03 00:00:00+01:00 \n",
"... ... ... ... \n",
"492309 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492310 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492311 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492312 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492313 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"\n",
" end_date_time open \n",
"0 1901-01-01 00:09:21+00:09 True \n",
"1 1901-01-01 00:09:21+00:09 True \n",
"2 1901-01-01 00:09:21+00:09 True \n",
"3 1901-01-01 00:09:21+00:09 True \n",
"4 1901-01-01 00:09:21+00:09 True \n",
"... ... ... \n",
"492309 1901-01-01 00:09:21+00:09 True \n",
"492310 1901-01-01 00:09:21+00:09 True \n",
"492311 1901-01-01 00:09:21+00:09 True \n",
"492312 1901-01-01 00:09:21+00:09 True \n",
"492313 1901-01-01 00:09:21+00:09 True \n",
"\n",
"[492314 rows x 16 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_purchased_reduced"
]
},
{
"cell_type": "code",
"execution_count": 7,
2024-03-02 13:05:51 +01:00
"id": "afd044b8-ac83-4a35-b959-700cae0b3b41",
2024-03-02 12:16:24 +01:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-02 12:16:24 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-02 12:16:24 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-02 13:32:54 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-02 13:05:51 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-05 00:36:48 +01:00
"File path : projet-bdc2324-team1/0_Input/Company_11/customerplus_cleaned.csv\n",
2024-03-02 13:05:51 +01:00
"File path : projet-bdc2324-team1/0_Input/Company_11/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-02 13:05:51 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_11/products_purchased_reduced.csv\n"
]
},
2024-03-02 12:16:24 +01:00
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-02 13:32:54 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_11/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-02 13:05:51 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-02 13:05:51 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-02 13:05:51 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-02 13:05:51 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: DtypeWarning: Columns (4,8,10) have mixed types. Specify dtype option on import or set low_memory=False.\n",
2024-03-02 13:32:54 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-02 13:05:51 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-02 13:05:51 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-02 13:05:51 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-02 13:32:54 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-02 13:05:51 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-02 13:05:51 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-02 13:05:51 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-02 13:05:51 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: DtypeWarning: Columns (8,9) have mixed types. Specify dtype option on import or set low_memory=False.\n",
2024-03-02 13:32:54 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-05 00:36:48 +01:00
"/tmp/ipykernel_430/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-02 13:05:51 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
2024-03-02 12:16:24 +01:00
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
}
],
"source": [
2024-03-03 09:32:45 +01:00
"# création des bases contenant les KPI pour les 5 compagnies de spectacle\n",
2024-03-02 12:16:24 +01:00
"\n",
2024-03-03 09:32:45 +01:00
"# liste des compagnies de spectacle\n",
2024-03-02 13:05:51 +01:00
"nb_compagnie=['10','11','12','13','14']\n",
2024-03-03 09:32:45 +01:00
"\n",
"# début de la boucle permettant de générer des datasets agrégés pour les 5 compagnies de spectacle\n",
2024-03-02 13:05:51 +01:00
"for directory_path in nb_compagnie:\n",
" df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
" df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
" df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
2024-03-02 13:32:54 +01:00
" df_target_information = display_databases(directory_path, file_name = \"target_information\")\n",
2024-03-03 09:32:45 +01:00
" \n",
2024-03-02 13:05:51 +01:00
" df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information) \n",
" df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n",
" df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)\n",
2024-03-02 13:32:54 +01:00
"\n",
" \n",
2024-03-03 09:32:45 +01:00
"# creation de la colonne Number compagnie, qui permettra d'agréger les résultats\n",
" df_tickets_kpi[\"number_compagny\"]=int(directory_path)\n",
" df_campaigns_kpi[\"number_compagny\"]=int(directory_path)\n",
" df_customerplus_clean[\"number_compagny\"]=int(directory_path)\n",
" df_target_information[\"number_compagny\"]=int(directory_path)\n",
2024-03-02 13:05:51 +01:00
"\n",
" if nb_compagnie.index(directory_path)>=1:\n",
" customerplus_clean_spectacle=pd.concat([customerplus_clean_spectacle,df_customerplus_clean],axis=0)\n",
" campaigns_information_spectacle=pd.concat([campaigns_information_spectacle,df_campaigns_kpi],axis=0)\n",
" products_purchased_reduced_spectacle=pd.concat([products_purchased_reduced_spectacle,df_tickets_kpi],axis=0)\n",
2024-03-02 13:32:54 +01:00
" target_information_spectacle=pd.concat([target_information_spectacle,df_target_information],axis=0)\n",
2024-03-02 13:05:51 +01:00
" else:\n",
" customerplus_clean_spectacle=df_customerplus_clean\n",
" campaigns_information_spectacle=df_campaigns_kpi\n",
2024-03-02 13:32:54 +01:00
" products_purchased_reduced_spectacle=df_tickets_kpi\n",
" target_information_spectacle=df_target_information"
2024-03-02 13:05:51 +01:00
]
},
2024-03-03 09:32:45 +01:00
{
"cell_type": "code",
"execution_count": 37,
"id": "b5a4a031-9533-4a50-8569-5f4246691a7a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>2</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18031</th>\n",
" <td>2</td>\n",
" <td>319517</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>1556</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>2</td>\n",
" <td>2020-01-01 14:06:52+00:00</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>291642</th>\n",
" <td>2</td>\n",
" <td>757541</td>\n",
" <td>303.0</td>\n",
" <td>5.0</td>\n",
" <td>1</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>2016-09-08 14:50:00+00:00</td>\n",
" <td>fr</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3 rows × 29 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"17 2 139 NaN NaN 0 \n",
"18031 2 319517 NaN NaN 0 \n",
"291642 2 757541 303.0 5.0 1 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"17 875 False NaN 2 False ... \n",
"18031 1556 False NaN 0 True ... \n",
"291642 862 False NaN 1 True ... \n",
"\n",
" purchase_count first_buying_date country gender_label \\\n",
"17 3 NaN NaN other \n",
"18031 2 2020-01-01 14:06:52+00:00 fr female \n",
"291642 3 2016-09-08 14:50:00+00:00 fr male \n",
"\n",
" gender_female gender_male gender_other country_fr has_tags \\\n",
"17 0 0 1 NaN 0 \n",
"18031 1 0 0 1.0 0 \n",
"291642 0 1 0 1.0 1 \n",
"\n",
" number_compagny \n",
"17 10 \n",
"18031 11 \n",
"291642 14 \n",
"\n",
"[3 rows x 29 columns]"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"customer_id\"]==2]"
]
},
{
"cell_type": "code",
2024-03-05 00:36:48 +01:00
"execution_count": 1,
2024-03-03 09:32:45 +01:00
"id": "b9b6ec1f-36fb-4ee9-a1ed-09ff41878005",
"metadata": {},
"outputs": [
{
2024-03-05 00:36:48 +01:00
"ename": "NameError",
"evalue": "name 'customerplus_clean_spectacle' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcustomerplus_clean_spectacle\u001b[49m[customerplus_clean_spectacle[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcustomer_id\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m==\u001b[39m\u001b[38;5;241m1\u001b[39m]\n",
"\u001b[0;31mNameError\u001b[0m: name 'customerplus_clean_spectacle' is not defined"
]
2024-03-03 09:32:45 +01:00
}
],
"source": [
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"customer_id\"]==1]"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "a12c1b7d-6f6f-483e-b215-6336d7a51057",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['customer_id', 'street_id', 'structure_id', 'mcp_contact_id',\n",
" 'fidelity', 'tenant_id', 'is_partner', 'deleted_at', 'gender',\n",
" 'is_email_true', 'opt_in', 'last_buying_date', 'max_price',\n",
" 'ticket_sum', 'average_price', 'average_purchase_delay',\n",
" 'average_price_basket', 'average_ticket_basket', 'total_price',\n",
" 'purchase_count', 'first_buying_date', 'country', 'gender_label',\n",
" 'gender_female', 'gender_male', 'gender_other', 'country_fr',\n",
" 'has_tags', 'number_compagny'],\n",
" dtype='object')"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6f263e9c-0adf-4f25-8939-7416f3013c04",
"metadata": {},
"outputs": [],
"source": []
},
2024-03-02 13:05:51 +01:00
{
"cell_type": "code",
2024-03-02 14:05:48 +01:00
"execution_count": 38,
2024-03-02 13:32:54 +01:00
"id": "05b9a396-dcd7-4d3d-8b39-5ca48beba4b0",
2024-03-02 12:16:24 +01:00
"metadata": {},
2024-03-02 14:05:48 +01:00
"outputs": [],
2024-03-02 12:16:24 +01:00
"source": [
2024-03-02 14:05:48 +01:00
"#customerplus_clean_spectacle.isna().sum()\n",
"#campaigns_information_spectacle.isna().sum()\n",
"#products_purchased_reduced_spectacle.isna().sum()\n",
"#target_information_spectacle.isna().sum()"
2024-03-02 12:16:24 +01:00
]
2024-03-03 09:32:45 +01:00
},
{
"cell_type": "markdown",
"id": "fff306c2-1d41-4ef6-867b-ba9a7cf4ee68",
"metadata": {},
"source": [
"## Statistiques descriptives"
]
},
{
"cell_type": "markdown",
"id": "0549bdc4-edd7-4511-916e-26e94b5a30f5",
"metadata": {},
"source": [
"### 0. Détection du client anonyme (outlier) - utile pour la section 3"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "5b460061-f8b5-4a6b-ba59-539446d8487f",
"metadata": {},
"outputs": [],
"source": [
"def outlier_detection(directory_path = \"1\", coupure = 1):\n",
" df_tickets = display_databases(directory_path, file_name = 'products_purchased_reduced' , datetime_col = ['purchase_date'])\n",
" df_tickets_kpi = tickets_kpi_function(df_tickets)\n",
"\n",
" if directory_path == \"101\" :\n",
" df_tickets_1 = display_databases(directory_path, file_name = 'products_purchased_reduced_1' , datetime_col = ['purchase_date'])\n",
" df_tickets_kpi_1 = tickets_kpi_function(df_tickets_1)\n",
"\n",
" df_tickets_kpi = pd.concat([df_tickets_kpi, df_tickets_kpi_1])\n",
" # Part du CA par customer\n",
" total_amount_share = df_tickets_kpi.groupby('customer_id')['total_amount'].sum().reset_index()\n",
" total_amount_share['total_amount_entreprise'] = total_amount_share['total_amount'].sum()\n",
" total_amount_share['share_total_amount'] = total_amount_share['total_amount']/total_amount_share['total_amount_entreprise']\n",
" \n",
" total_amount_share_index = total_amount_share.set_index('customer_id')\n",
" df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False)\n",
" \n",
" top = df_circulaire[:coupure]\n",
" rest = df_circulaire[coupure:]\n",
" \n",
" # Calculez la somme du reste\n",
" rest_sum = rest.sum()\n",
" \n",
" # Créez une nouvelle série avec les cinq plus grandes parts et 'Autre'\n",
" new_series = pd.concat([top, pd.Series([rest_sum], index=['Autre'])])\n",
" \n",
" # Créez le graphique circulaire\n",
" plt.figure(figsize=(3, 3))\n",
" plt.pie(new_series, labels=new_series.index, autopct='%1.1f%%', startangle=140, pctdistance=0.5)\n",
" plt.axis('equal') # Assurez-vous que le graphique est un cercle\n",
" plt.title('Répartition des montants totaux')\n",
" plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "b6417f09-a6c7-4319-95b3-98c95ec5a3b7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAUwAAAEQCAYAAADbIk3TAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA5TUlEQVR4nO3dd3xT9f7H8VeStkkX3dDSySxbliggU9kgyLiKskVUcKIX0SvrOlB/XkXFLVdEkesGUUQ2IgKCWPYqs0DL6KB7Jd/fH4FAaEtTaHvS5vN8PPrQJCcn76Snb8745hydUkohhBCiVHqtAwghRFUhhSmEEA6SwhRCCAdJYQohhIOkMIUQwkFSmEII4SApTCGEcJAUphBCOEgKUwghHFRtCnPnzp14eXnxzjvvaB1FCFFNOVVhzp8/H51OZ/txc3MjLCyMe+65h0OHDpX4vIyMDIYOHcqjjz7Ko48+WomJi1q2bBkzZ84s9rGYmBjGjBlju3369GlmzpxJXFxckWlnzpyJTqermJDXSafTlfjeXMXevXuZOXMmx44dq5TXe/nll1m8eHG5za888l9rGa/2lBP59NNPFaA+/fRTtWnTJrV27Vr14osvKk9PT1WzZk2VkpJS7POGDRum7rvvPmWxWCo5cVGTJk1SJX2s27dvV/Hx8bbbW7dutb3fqyUkJKhNmzZVVMzrAqgZM2ZoHUNT33zzjQLU2rVrK+X1vL291ejRo8ttfuWR/1rLeHXnpmFXl6hZs2a0bdsWgK5du2I2m5kxYwaLFy9m7NixRab/+uuvKztiEdnZ2Xh5eV1zmlatWjk8v4iICCIiIm40lhCiPGnd2Fe6tIa5detWu/t//vlnBajZs2fb3b9161Y1YMAAFRAQoIxGo2rZsqX66quvip3nihUr1JgxY1RAQIDy8vJS/fv3V4cPH7abdsWKFerOO+9U4eHhymg0qnr16qkJEyaoc+fO2U03Y8YMBai//vpLDRkyRPn7+6vQ0FA1evRoBRT5OXr0qFJKqejoaNvawtq1a4ud9tIa3KXXuJLZbFavvvqqio2NVR4eHiokJESNHDlSJSQk2E3XpUsX1bRpU/Xnn3+q2267TXl6eqo6deqo2bNnK7PZXOrv4cKFC2r8+PEqMDBQeXt7q169eqkDBw4Uu4Z58OBBNXz4cBUSEqI8PDxUo0aN1Ny5c4vkfuGFF1TDhg2VyWRSfn5+qnnz5mrOnDnXzHHpM1q4cKGaMmWKCg0NVd7e3qp///4qKSlJpaenqwceeEAFBQWpoKAgNWbMGJWRkWE3j5ycHDV16lQVExOj3N3dVe3atdXEiRNVamqq3XTR0dGqX79+6pdfflGtWrVSJpNJxcbGqnnz5tmmubQsXf1zaQuhrMvP7t271T333KNq1KihatasqcaOHavS0tJs0xX3Wl26dFFKKZWVlaWeeuopFRMTo4xGowoICFBt2rRRX375ZYmfZ2n5lVJq3rx5qkWLFrZ5Dho0SO3du9f2eGnL+Ny5c1WnTp1USEiI8vLyUs2aNVOvvvqqys/PL/J5F7fm3KVLF9t7VEqpBx98UBmNRrVt2zbbfWazWXXv3l3VrFlTnT59usT3WxGqRGHOnTtXAeq7776z3bdmzRrl4eGhOnXqpL766iu1fPlyNWbMmCILwKV5RkZGqnHjxqlffvlFffTRR6pmzZoqMjLS7g/n/fffV7Nnz1Y//vijWr9+vfrss8/UTTfdpGJjY+1+4ZcW+OjoaPXMM8+olStXqsWLF6v4+Hg1dOhQBahNmzbZfnJzc5VS9gvJhQsXbNmef/5527SXyq+4wpwwYYIC1COPPKKWL1+uPvjgAxUSEqIiIyPt/ii7dOmigoKCVIMGDdQHH3ygVq5cqSZOnKgA9dlnn13zd2CxWFS3bt2U0WhUL730klqxYoWaMWOGqlu3bpHC3LNnj638FixYoFasWKGeeuoppdfr1cyZM23TzZ49WxkMBjVjxgy1evVqtXz5cjVnzhy7aYpzqTCjo6PVmDFjbO/Zx8dHdevWTfXo0UM9/fTTasWKFerVV19VBoNBPfroo3bvpVevXsrNzU1NmzZNrVixQr3++uvK29tbtWrVyvZ7ufS7iYiIUE2aNFELFixQv/76qxo2bJgC1Pr165VSSp09e1a9/PLLClDvvvuu7Xd29uzZ61p+YmNj1fTp09XKlSvVG2+8oYxGoxo7dqxtuk2bNilPT0/Vt29f22vt2bNHKWUtEi8vL/XGG2+otWvXqp9++km98sor6p133inx8ywt/6XHhg8frn7++We1YMECVbduXeXn56cOHjyolFKlLuNPPvmkev/999Xy5cvVmjVr1JtvvqmCg4Pt3telz9uRwszJyVEtW7ZUdevWtf2tTp8+Xen1erVixYoS32tFccrC3Lx5syooKFAZGRlq+fLlKjQ0VHXu3FkVFBTYpm3UqJFq1aqV3X1KKdW/f38VFhZmW5O6NM+77rrLbrqNGzcqQL344ovFZrFYLKqgoEAdP35cAWrJkiW2xy4t8NOnTy/yvGvt37l6IbnWPsyrC3Pfvn0KUBMnTrSbbsuWLQpQzz33nO2+Ll26KEBt2bLFbtomTZqoXr16FZvtkl9++UUB6q233rK7/6WXXipSmL169VIRERHqwoULdtM+8sgjymQy2fY59+/fX7Vs2fKar1ucS4U5YMAAu/ufeOIJBajHHnvM7v5BgwapwMBA2+3ly5crQL322mt203311VcKUB999JHtvujoaGUymdTx48dt9+Xk5KjAwED14IMP2u5zdB+gI8vP1bkmTpyoTCaT3b74kvZhNmvWTA0aNOiaGYpTUv7U1FRbOV/pxIkTymg0qnvvvdd2n6P7MM1msyooKFALFixQBoPB7hiEo4WplFKHDh1SNWrUUIMGDVKrVq1Ser1ePf/886W/2QrgVEfJL7n11ltxd3fH19eX3r17ExAQwJIlS3Bzs+5yjY+PZ//+/dx3330AFBYW2n769u1LYmIiBw4csJvnpWkv6dChA9HR0axdu9Z239mzZ3nooYeIjIzEzc0Nd3d3oqOjAdi3b1+RnEOGDCnX930tl3JeeZQdoF27djRu3JjVq1fb3R8aGkq7du3s7mvRogXHjx936HWu/rzuvfdeu9u5ubmsXr2au+66Cy8vryK/g9zcXDZv3mzLuGPHDiZOnMivv/5Kenq6Y2/6ov79+9vdbty4MQD9+vUrcn9KSgqZmZkArFmzBij6mQ0bNgxvb+8in1nLli2Jioqy3TaZTDRs2LDUz+ySsi4/d955p93tFi1akJuby9mzZ0t9rXbt2vHLL78wdepU1q1bR05OjkMZS7Jp0yZycnKKfFaRkZF07969yGdVkr///ps777yToKAgDAYD7u7ujBo1CrPZzMGDB68rW/369fn4449ZvHgx/fv3p1OnTpodpXfKwlywYAFbt25lzZo1PPjgg+zbt4/hw4fbHj9z5gwATz/9NO7u7nY/EydOBOD8+fN28wwNDS3yOqGhoSQnJwNgsVjo2bMn33//PVOmTGH16tX8+eeftj/64hbIsLCw8nnDDriUs7jXrF27tu3xS4KCgopMZzQaS/3DSk5Oxs3Nrcjzr/78kpOTKSws5J133inyO+jbty9w+Xfw7LPP8vrrr7N582b69OlDUFAQt99+O9u2bSvlXVsFBgba3fbw8Ljm/bm5uXbvJSQkxG46nU5n97u/5Ho/M7i+5efq1zMajSVOe7W3336bZ555hsWLF9OtWzcCAwMZNGjQNYffXUtZl6/inDhxgk6dOnHq1CneeustNmzYwNatW3n33XcBx95XSfr160etWrXIzc1l8uTJGAyG657XjXDKo+SNGze2HSXv1q0bZrOZTz75hG+//ZahQ4cSHBwMWP8QBw8eXOw8YmNj7W4nJSUVmSYpKYn69esDsHv3bnbs2MH8+fMZPXq0bZr4+PgSc1bmOMlLf1yJiYlFjp6fPn3a9pmUx+sUFhaSnJxs9wd99ecXEBCAwWBg5MiRTJo0qdh51alTBwA3NzcmT57M5MmTSUtLY9WqVTz33HP06tWLhISEUkcX3Oh
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"outlier_detection(directory_path=\"10\")"
]
},
{
"cell_type": "markdown",
"id": "42f8171c-e80d-4faa-b278-21fcbe3b242c",
"metadata": {},
"source": [
"### 1. customerplus_clean"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "47f98721-53dd-4f8f-85ac-88043ee8d967",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>821538</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>809126</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11005</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>14</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>17663</td>\n",
" <td>12731</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>38100</td>\n",
" <td>12395</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>307036</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2946</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>8</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>18441</td>\n",
" <td>11139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>9231</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>9870</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10 rows × 29 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity tenant_id \\\n",
"0 821538 139 NaN NaN 0 875 \n",
"1 809126 1063 NaN NaN 0 875 \n",
"2 11005 1063 NaN NaN 0 875 \n",
"3 17663 12731 NaN NaN 0 875 \n",
"4 38100 12395 NaN NaN 0 875 \n",
"5 307036 139 NaN NaN 0 875 \n",
"6 2946 1063 NaN NaN 0 875 \n",
"7 18441 11139 NaN NaN 0 875 \n",
"8 9231 139 NaN NaN 0 875 \n",
"9 9870 139 NaN NaN 0 875 \n",
"\n",
" is_partner deleted_at gender is_email_true ... purchase_count \\\n",
"0 False NaN 2 True ... 0 \n",
"1 False NaN 2 True ... 0 \n",
"2 False NaN 2 False ... 14 \n",
"3 False NaN 0 False ... 1 \n",
"4 False NaN 0 True ... 1 \n",
"5 False NaN 2 True ... 1 \n",
"6 False NaN 2 False ... 8 \n",
"7 False NaN 2 False ... 3 \n",
"8 False NaN 0 True ... 1 \n",
"9 False NaN 2 True ... 1 \n",
"\n",
" first_buying_date country gender_label gender_female gender_male \\\n",
"0 NaN NaN other 0 0 \n",
"1 NaN fr other 0 0 \n",
"2 NaN fr other 0 0 \n",
"3 NaN fr female 1 0 \n",
"4 NaN fr female 1 0 \n",
"5 NaN NaN other 0 0 \n",
"6 NaN fr other 0 0 \n",
"7 NaN fr other 0 0 \n",
"8 NaN NaN female 1 0 \n",
"9 NaN NaN other 0 0 \n",
"\n",
" gender_other country_fr has_tags number_compagny \n",
"0 1 NaN 0 10 \n",
"1 1 1.0 0 10 \n",
"2 1 1.0 0 10 \n",
"3 0 1.0 0 10 \n",
"4 0 1.0 0 10 \n",
"5 1 NaN 0 10 \n",
"6 1 1.0 0 10 \n",
"7 1 1.0 0 10 \n",
"8 0 NaN 0 10 \n",
"9 1 NaN 0 10 \n",
"\n",
"[10 rows x 29 columns]"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# visu de la table\n",
"customerplus_clean_spectacle.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "738e063b-f84e-4a00-b35d-6d1d657e3c09",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de lignes de la table : 1523688\n"
]
},
{
"data": {
"text/plain": [
"customer_id 0\n",
"street_id 0\n",
"structure_id 1460624\n",
"mcp_contact_id 729167\n",
"fidelity 0\n",
"tenant_id 0\n",
"is_partner 0\n",
"deleted_at 1523688\n",
"gender 0\n",
"is_email_true 0\n",
"opt_in 0\n",
"last_buying_date 762879\n",
"max_price 762879\n",
"ticket_sum 0\n",
"average_price 667328\n",
"average_purchase_delay 762915\n",
"average_price_basket 762915\n",
"average_ticket_basket 762915\n",
"total_price 95551\n",
"purchase_count 0\n",
"first_buying_date 762879\n",
"country 429486\n",
"gender_label 0\n",
"gender_female 0\n",
"gender_male 0\n",
"gender_other 0\n",
"country_fr 429486\n",
"has_tags 0\n",
"number_compagny 0\n",
"dtype: int64"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de NaN\n",
"print(\"Nombre de lignes de la table : \",customerplus_clean_spectacle.shape[0])\n",
"customerplus_clean_spectacle.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 148,
"id": "296e51c5-30ae-4ade-ba3d-4ba4981a8758",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>customer_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>45264</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>35313</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>216105</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>388731</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>101642</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny customer_id\n",
"0 10 45264\n",
"1 11 35313\n",
"2 12 216105\n",
"3 13 388731\n",
"4 14 101642"
]
},
"execution_count": 148,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de clients de la compagnie (pas les clients visés par une campagne mais ceux ayant acheté)\n",
"# on rq le nbre de clients est très variable : de 35k à 389k\n",
"company_nb_clients = customerplus_clean_spectacle[customerplus_clean_spectacle[\"purchase_count\"]>0].groupby(\"number_compagny\")[\"customer_id\"].count().reset_index()\n",
"company_nb_clients"
]
},
{
"cell_type": "code",
"execution_count": 151,
"id": "5845aedf-78ca-4d3d-ad61-3561d4fc1886",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAHFCAYAAAAUpjivAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABREUlEQVR4nO3dd1RU1/428OdIGYqAAsqAIqBiC2LDqBhFFFRi14jGXEssMdZLwFhijJir2BL7VVOMNYrJVbyxiw1jsCBK7InmYoegiCCIQ9vvH76cnyOgDAwOHJ/PWrOWZ58953xnDwyP+5SRhBACRERERApVydAFEBEREZUlhh0iIiJSNIYdIiIiUjSGHSIiIlI0hh0iIiJSNIYdIiIiUjSGHSIiIlI0hh0iIiJSNIYdIiIiUjSGnTK2bt06SJIEMzMz3Lx5s8D6Dh06wMPDwwCVAcOGDUPlypUNsu9XkSQJoaGhr3WfHTp0QIcOHV57HXv27Hltr9UQ4+rq6oru3bu/1n1S+Xfjxg1IkoR169aV+b4M8XNvCPfu3UNoaCji4uLKdD+v873TF4ad10Sj0eDzzz83dBmkoxMnTmDkyJFluo89e/Zg1qxZZboPovLG0dERJ06cQLdu3QxdimLcu3cPs2bNKvOwUxEx7LwmXbt2xebNm/H7778buhS9EEIgMzPT0GWUudatW6NmzZqGLoNIcVQqFVq3bo1q1aoZuhR6AzDsvCaTJ0+GnZ0dpkyZ8sq+T58+xbRp0+Dm5gZTU1PUqFED48aNw6NHj7T65R8e2LVrF5o1awZzc3M0bNgQu3btAvDsEFrDhg1haWmJt99+G2fOnCl0f5cuXUKnTp1gaWmJatWqYfz48Xjy5IlWH0mSMH78eKxevRoNGzaESqXC+vXrAQDXrl3DoEGDUL16dahUKjRs2BD//ve/izUuaWlpGDVqFOzs7FC5cmV07doVf/75Z6F9S7OfvLw8LF++HE2bNoW5uTmqVKmC1q1b45dffnnp8wqb/k5MTMTo0aNRs2ZNmJqaws3NDbNmzUJOTo7cJ3+a96uvvsKiRYvg5uaGypUro02bNjh58qTcb9iwYfJrkCRJfty4cQMA8PPPP6NVq1awsbGBhYUFateujeHDh7/y9ZbHcd23bx+aN28Oc3NzNGjQAD/88IPW+vv372Ps2LFo1KgRKleujOrVq6Njx4749ddfC2zr3r17CAwMhJWVFWxsbDBgwACcPHmywNR6YYcmgWfj7urqqtWWlZWF2bNno0GDBlCpVKhWrRo+/PBD3L9/v1hjcerUKfTo0QN2dnYwMzNDnTp1EBQUpNXn+PHj6NSpE6ysrGBhYQFvb2/s3r1bq0/+oe/Dhw/L76G1tTWGDBmCjIwMJCYmIjAwEFWqVIGjoyMmTZqE7Oxs+fn5P3sLFizAnDlzUKtWLZiZmcHLywuHDh3S2tf169fx4Ycfwt3dHRYWFqhRowZ69OiBCxcuFHh9ly5dQufOnWFhYYFq1aph3Lhx2L17NyRJwtGjR7XG3MPDAzExMWjXrp38cztv3jzk5eUVqPPFQyEV4fPkVb+XR48ehSRJ2LRpE4KDg6FWq2Fubg4fHx+cO3euwPbOnDmDnj17wtbWFmZmZmjWrBl++umnAv3u3r2Ljz76CM7OzjA1NYWTkxPee+89/P333zh69ChatmwJAPjwww/lz5L8z68zZ85g4MCBcHV1hbm5OVxdXfH+++8XenrFy/bzMqUZ0zInqEytXbtWABAxMTFi6dKlAoA4dOiQvN7Hx0e89dZb8nJeXp7o0qWLMDY2FjNmzBAHDhwQX331lbC0tBTNmjUTT58+lfu6uLiImjVrCg8PD7FlyxaxZ88e0apVK2FiYiK++OIL0bZtW7F9+3YREREh6tWrJxwcHMSTJ0/k5w8dOlSYmpqKWrVqiTlz5ogDBw6I0NBQYWxsLLp37671OgCIGjVqCE9PT7F582Zx+PBhcfHiRXHp0iVhY2MjGjduLDZs2CAOHDggQkJCRKVKlURoaOhLxyYvL0/4+voKlUol73/mzJmidu3aAoCYOXOm3Lc0+xFCiMGDBwtJksTIkSPFf//7X7F3714xZ84csXTpUq33wsfHp8Drfr6OhIQE4ezsLFxcXMQ333wjDh48KP71r38JlUolhg0bJveLj48XAISrq6vo2rWr2LFjh9ixY4do3LixqFq1qnj06JEQQojr16+L9957TwAQJ06ckB9Pnz4V0dHRQpIkMXDgQLFnzx5x+PBhsXbtWjF48OAKNa75P6eNGjUSGzZsEPv37xf9+/cXAERUVJTc7+rVq2LMmDEiPDxcHD16VOzatUuMGDFCVKpUSRw5ckTu9+TJE9GwYUNhY2Mjli9fLvbv3y8mTpwoatWqJQCItWvXvvQ9FeLZz76Li4u8nJubK7p27SosLS3FrFmzRGRkpPj+++9FjRo1RKNGjbR+bwqzb98+YWJiIjw9PcW6devE4cOHxQ8//CAGDhwo9zl69KgwMTERLVq0EFu3bhU7duwQnTt3FpIkifDwcLlf/meGm5ubCAkJEQcOHBDz588XRkZG4v333xfNmzcXs2fPFpGRkWLKlCkCgPj666/l5+f/7Dk7O4t33nlHbNu2Tfz888+iZcuWwsTERERHR8t9o6KiREhIiPjPf/4joqKiREREhOjdu7cwNzcXV69elfvdu3dP2NnZiVq1aol169aJPXv2iMGDBwtXV1cBQOv98fHxEXZ2dsLd3V2sXr1aREZGirFjxwoAYv369QXqfP79qgifJ8X5vTxy5Ij8HvTq1Uvs3LlTbNq0SdStW1dYW1uLv/76S+57+PBhYWpqKtq1aye2bt0q9u3bJ4YNG1ZgbO7cuSMcHR2Fvb29WLRokTh48KDYunWrGD58uLhy5YpITU2Vf3Y+//xz+bPk9u3bQgghfv75Z/HFF1+IiIgIERUVJcLDw4WPj4+oVq2auH//frH3Uxbv3evAsFPGng87Go1G1K5dW3h5eYm8vDwhRMGws2/fPgFALFiwQGs7W7duFQDEt99+K7e5uLgIc3NzcefOHbktLi5OABCOjo4iIyNDbt+xY4cAIH755Re5bejQoQKA1h8mIYSYM2eOACCOHz8utwEQNjY24uHDh1p9u3TpImrWrClSU1O12sePHy/MzMwK9H/e3r17X7r/5z+cSrOfY8eOCQBi+vTpRfYRonhhZ/To0aJy5cri5s2bWv2++uorAUBcunRJCPF/HwaNGzcWOTk5cr/Tp08LAGLLli1y27hx40Rh/+/I32Z+MCqu8jauLi4uwszMTGvMMjMzha2trRg9enSRz8vJyRHZ2dmiU6dOok+fPnL7qlWrBADx3//+V6v/qFGjShx2tmzZIgCIbdu2afWLiYkRAMTKlStf+hrr1Kkj6tSpIzIzM4vs07p1a1G9enXx+PFjrdfo4eEhatasKX8m5H9mTJgwQev5vXv3FgDEokWLtNqbNm0qmjdvLi/n/+w5OTlp1ZOWliZsbW2Fn59fkTXm5OSIrKws4e7uLj755BO5/dNPPxWSJMk/3/m6dOlSaNgBIE6dOqXVt1GjRqJLly4F6nz+/aoInyfF+b3MDzvNmzeX31chhLhx44YwMTERI0eOlNsaNGggmjVrJrKzs7W20b17d+Ho6Chyc3OFEEIMHz5cmJiYiMuXLxe53/yf1+fHtCg5OTkiPT1dWFpaao1Zcfaj7/fudeBhrNfI1NQUs2fPxpkzZwqdogSAw4cPA3g2zf68/v37w9LSssA0dNOmTVGjRg15uWHDhgCeTSVbWFgUaC9syvKDDz7QWh40aBAA4MiRI1rtHTt2RNWqVeXlp0+f4tChQ+jTpw8sLCyQk5MjP9599108ffpU65DNi/K3X9T+9bWfvXv3AgDGjRtXZJ/i2rVrF3x9feHk5KRVR0BAAAAgKipKq3+3bt1gZGQkL3t6egIo/H14Uf6UdGBgIH766SfcvXu3WDWWx3Ft2rQpatWqJS+bmZmhXr16BcZh9erVaN68OczMzGBsbAwTExMcOnQIV65c0Xp9VlZW6Nmz50tfny527dqFKlWqoEePHlr
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_nb_clients[\"number_compagny\"], company_nb_clients[\"customer_id\"]/1000)\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Nombre de clients (milliers)\")\n",
"plt.title(\"Nombre de clients de chaque compagnie de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 152,
"id": "fd11c547-7128-4ef6-ad7b-4b7c2a30cd9e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>max_price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>13823.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>108.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>5000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>3180.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>456.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny max_price\n",
"0 10 13823.0\n",
"1 11 108.0\n",
"2 12 5000.0\n",
"3 13 3180.0\n",
"4 14 456.0"
]
},
"execution_count": 152,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# prix maximal payé par un client pour chaque compagnie - très variable : de 108 à 13823\n",
"\n",
"company_max_price = customerplus_clean_spectacle.groupby(\"number_compagny\")[\"max_price\"].max().reset_index()\n",
"company_max_price"
]
},
{
"cell_type": "code",
"execution_count": 153,
"id": "b8f8f162-4153-4cfe-bfaa-d981d414510d",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlAAAAHGCAYAAAC7NbWGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABc0UlEQVR4nO3dd1gUV/828HulCQgrRcBVBCyxgT0iGINGURNKNBoLilhiLyFifRJjSWKPGOUxamKJJWJMwMdYUGxEI1hQYkMTE6yIGMFFLIDLef/wx7yuC8joIgven+vaS/fMmZnvDLvLzZmyCiGEABERERGVWKWyLoCIiIiovGGAIiIiIpKJAYqIiIhIJgYoIiIiIpkYoIiIiIhkYoAiIiIikokBioiIiEgmBigiIiIimRigiIiIiGRigCIiekH/+c9/4OjoiL///rusSyGiV4wBSk/Wrl0LhUIhPYyNjVGzZk0MGjQIN27cKNEyBg4cCFdX19IttAwV7KPLly+/8nVfvnwZCoUCa9eufaH5Dx48CIVCgYMHD+q1rrKwc+dOzJgxo8zWX/A6OHHiRJnVoA+7du1CREQEtm/fjjp16pR1OVSKFArFK3nPuLq6YuDAgaW+nrL24MEDzJgx45V8npbmz44BSs/WrFmD+Ph4xMbGYujQodi0aRPatWuH+/fvP3feadOmITo6+hVUWTb8/PwQHx+P6tWrl3Upr7WdO3di5syZZV1GuXbt2jUMGjQImzdvxptvvlnW5VApi4+Px0cffVTWZVQYDx48wMyZM8v9H6TGZV1ARePu7o5WrVoBADp06ACNRoMvvvgCW7duRb9+/Qqd58GDB7CwsKjwf8VWq1YN1apVK+syiCQF7z25nJ2dkZaWVgoVvbwX3SYqWps2bcq6BDJAHIEqZQVvvCtXrgB4cpiuSpUqOHPmDDp37gwrKyt07NhRmvb0IbzIyEgoFApERERoLXP69OkwMjJCbGxsset2dXWFv78/tm/fjubNm8Pc3BwNGzbE9u3bATw5lNKwYUNYWlqidevWOodUTpw4gT59+sDV1RXm5uZwdXVF3759pW0BACEE3nvvPdjZ2eHq1atS+4MHD9C4cWM0bNhQGn0r7BBe+/bt4e7ujvj4eHh7e0vrWbNmDQBgx44daNGiBSwsLODh4YGYmBitGi9duoRBgwahXr16sLCwQI0aNRAQEIAzZ84Uu2+Kc+HCBXTt2hUWFhawt7fHiBEjcO/evUL77t27Fx07doS1tTUsLCzQtm1b7Nu3r9jl3759G6amppg2bVqh61YoFFiyZInUlpaWhuHDh6NmzZowNTWFm5sbZs6cicePH0t9Cg5RLly4EIsWLYKbmxuqVKkCLy8vJCQkSP0GDhyI//73vwCgdci54GcihMCyZcvQrFkzmJubw8bGBj179sQ///xTon13+PBhdOzYEVZWVrCwsIC3tzd27NhRaN/MzEwMGjQItra2sLS0REBAgM56Tp06BX9/fzg4OMDMzAwqlQp+fn64fv261KekNRe81n777Td4e3vDwsICgwcPRrdu3eDi4oL8/HydGj09PdGiRQvZ6yrMjBkzoFAocOrUKXzwwQewtraGUqlE//79cfv2ba2+mzdvRufOnVG9enXpfTtlyhSdkeziPk+KcuHCBfTt2xeOjo4wMzNDrVq1MGDAAOTk5Eh9zp49i/fffx82NjaoXLkymjVrhh9++EFrOQWHtX/88UdMnjwZ1atXR5UqVRAQEIBbt27h3r17GDZsGOzt7WFvb49BgwYhOztbaxkKhQJjxozBihUr8MYbb8DMzAyNGjVCZGSkVr/bt29j1KhRaNSoEapUqQIHBwe88847OHTokM72Xb9+HT179oSVlRWqVq2Kfv364fjx4zqH8Av23aVLl/Dee++hSpUqcHZ2RlhYmNa+KKjz2cNAJXlfFiUvLw+TJk2Ck5MTLCws8NZbb+HYsWOF9n2Z9ezfvx/t27eHnZ0dzM3NUatWLfTo0QMPHjwA8P8/N+bPn4+vvvoKtWrVQuXKldGqVatCP8f++usvBAUFSe/Hhg0bSp8nT7t79y7CwsJQu3ZtmJmZwcHBAe+99x4uXLiAy5cvS39Iz5w5U/oMKjh0Keczvbj1FOdl9qkWQXqxZs0aAUAcP35cq/2bb74RAMTKlSuFEEKEhIQIExMT4erqKubMmSP27dsndu/eLU1zcXHRmn/EiBHC1NRUWu6+fftEpUqVxGefffbcmlxcXETNmjWFu7u72LRpk9i5c6fw9PQUJiYm4vPPPxdt27YVUVFRIjo6WrzxxhvC0dFRPHjwQJp/y5Yt4vPPPxfR0dEiLi5OREZGCh8fH1GtWjVx+/Ztqd+///4ratasKTw9PUVubq60Lebm5uL06dM6+yglJUVq8/HxEXZ2dqJ+/fpi1apVYvfu3cLf318AEDNnzhQeHh5S7W3atBFmZmbixo0b0vxxcXEiLCxM/PzzzyIuLk5ER0eLbt26CXNzc3HhwgWpX0pKigAg1qxZU+w+S0tLEw4ODqJGjRpizZo1YufOnaJfv36iVq1aAoA4cOCA1Hf9+vVCoVCIbt26iaioKPHrr78Kf39/YWRkJPbu3Vvserp37y6cnZ2FRqPRap80aZIwNTUV//77rxBCiJs3bwpnZ2fh4uIiVqxYIfbu3Su++OILYWZmJgYOHKizfa6urqJr165i69atYuvWrcLDw0PY2NiIu3fvCiGEuHTpkujZs6cAIOLj46XHo0ePhBBCDB06VJiYmIiwsDARExMjfvzxR9GgQQPh6Ogo0tLSit2mgwcPChMTE9GyZUuxefNmsXXrVtG5c2ehUChEZGSk1K/gdeDs7CwGDx4sdu3aJVauXCkcHByEs7OzyMzMFEIIkZ2dLezs7ESrVq3ETz/9JOLi4sTmzZvFiBEjxPnz56XllbRmHx8fYWtrK5ydncXSpUvFgQMHRFxcnPjf//4nAIjY2Fit7UlOThYAxJIlS2SvqzDTp08XAISLi4uYOHGi2L17t1i0aJGwtLQUzZs3l947QgjxxRdfiPDwcLFjxw5x8OBBsXz5cuHm5iY6dOigtcziPk8Kk5SUJKpUqSJcXV3F8uXLxb59+8SGDRtEr169RFZWlhBCiAsXLggrKytRp04dsW7dOrFjxw7Rt29fAUDMmzdPWtaBAwek7Rk4cKCIiYkRy5cvF1WqVBEdOnQQvr6+YsKECWLPnj1i3rx5wsjISIwdO1arnoLXQaNGjcSmTZvEtm3bRNeuXQUAsWXLFqnfhQsXxMiRI0VkZKQ4ePCg2L59uxgyZIioVKmS1nsyOztb1K1bV9ja2or//ve/Yvfu3eKTTz4Rbm5uOu//kJAQYWpqKho2bCgWLlwo9u7dKz7//HOhUCjEzJkzdeqcPn269Lyk78uihISECIVCISZOnCj27NkjFi1aJGrUqCGsra1FSEiIXtaTkpIiKleuLHx9fcXWrVvFwYMHxcaNG0VwcLD0Hiv43HB2dhZvvfWW+OWXX8SWLVvEm2++KUxMTMSRI0ek5Z07d04olUrh4eEh1q1bJ/bs2SPCwsJEpUqVxIwZM6R+WVlZonHjxsLS0lLMmjVL7N69W/zyyy/i448/Fvv37xePHj0SMTExAoAYMmSI9Bl06dIlIUTJP9Oft57S+tk9jQFKTwp+KSQkJIi8vDxx7949sX37dlGtWjVhZWUlfbiGhIQIAGL16tU6yygsQD169Eg0b95cuLm5ifPnzwtHR0fh4+MjHj9+/NyaXFxchLm5ubh+/brUlpSUJACI6tWri/v370vtW7duFQDEtm3bilze48ePRXZ2trC0tBTffPON1rTDhw8LY2NjERoaKlavXi0AiO+//77QffRsgAIgTpw4IbXduXNHGBkZCXNzc62wVFD707/QCqsxNzdX1KtXT3zyySdSe0kD1OTJk4VCoRBJSUla7b6+vloB6v79+8LW1lYEBARo9dNoNKJp06aidevWxa5n27ZtAoDYs2ePVu0qlUr06NFDahs+fLioUqWKuHLlitb8CxcuFADEuXPntLbPw8ND67Vx7NgxAUBs2rRJahs9erQ
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_max_price[\"number_compagny\"], company_max_price[\"max_price\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Prix maximal d'un billet vendu\")\n",
"plt.title(\"Prix maximal de vente observé par compagnie de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "bff23e5d-d7ed-4092-ae3c-5df503e54a6d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 762879.000000\n",
"mean 0.079068\n",
"std 3.969729\n",
"min 0.000000\n",
"25% 0.000000\n",
"50% 0.000000\n",
"75% 0.000000\n",
"max 3334.000000\n",
"Name: purchase_count, dtype: float64"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"first_buying_date\"].isna()][\"purchase_count\"].describe()"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "89466dbd-14d2-4ede-9ca0-b9c32b764e25",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 7.608090e+05\n",
"mean 3.863940e+00\n",
"std 1.685825e+03\n",
"min 1.000000e+00\n",
"25% 1.000000e+00\n",
"50% 1.000000e+00\n",
"75% 2.000000e+00\n",
"max 1.469325e+06\n",
"Name: purchase_count, dtype: float64"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle[~customerplus_clean_spectacle[\"first_buying_date\"].isna()][\"purchase_count\"].describe()"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "5f9feae4-35f4-43b6-adeb-f75773900a2d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>821538</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>809126</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11005</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>17663</td>\n",
" <td>12731</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>38100</td>\n",
" <td>12395</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343121</th>\n",
" <td>4667645</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534181.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343122</th>\n",
" <td>4667649</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534177.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343123</th>\n",
" <td>4667660</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534165.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343124</th>\n",
" <td>4667679</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534132.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343125</th>\n",
" <td>4667686</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1567949.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1523688 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"0 821538 139 NaN NaN 0 \n",
"1 809126 1063 NaN NaN 0 \n",
"2 11005 1063 NaN NaN 0 \n",
"3 17663 12731 NaN NaN 0 \n",
"4 38100 12395 NaN NaN 0 \n",
"... ... ... ... ... ... \n",
"343121 4667645 122 NaN 1534181.0 0 \n",
"343122 4667649 122 NaN 1534177.0 0 \n",
"343123 4667660 122 NaN 1534165.0 0 \n",
"343124 4667679 122 NaN 1534132.0 0 \n",
"343125 4667686 122 NaN 1567949.0 0 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"0 875 False NaN 2 True ... \n",
"1 875 False NaN 2 True ... \n",
"2 875 False NaN 2 False ... \n",
"3 875 False NaN 0 False ... \n",
"4 875 False NaN 0 True ... \n",
"... ... ... ... ... ... ... \n",
"343121 862 False NaN 2 True ... \n",
"343122 862 False NaN 2 True ... \n",
"343123 862 False NaN 0 True ... \n",
"343124 862 False NaN 2 True ... \n",
"343125 862 False NaN 0 True ... \n",
"\n",
" first_buying_date country gender_label gender_female gender_male \\\n",
"0 NaN NaN other 0 0 \n",
"1 NaN fr other 0 0 \n",
"2 NaN fr other 0 0 \n",
"3 NaN fr female 1 0 \n",
"4 NaN fr female 1 0 \n",
"... ... ... ... ... ... \n",
"343121 NaN NaN other 0 0 \n",
"343122 NaN NaN other 0 0 \n",
"343123 NaN NaN female 1 0 \n",
"343124 NaN NaN other 0 0 \n",
"343125 NaN NaN female 1 0 \n",
"\n",
" gender_other country_fr has_tags number_compagny already_purchased \n",
"0 1 NaN 0 10 False \n",
"1 1 1.0 0 10 False \n",
"2 1 1.0 0 10 False \n",
"3 0 1.0 0 10 False \n",
"4 0 1.0 0 10 False \n",
"... ... ... ... ... ... \n",
"343121 1 NaN 0 14 False \n",
"343122 1 NaN 0 14 False \n",
"343123 0 NaN 0 14 False \n",
"343124 1 NaN 0 14 False \n",
"343125 0 NaN 0 14 False \n",
"\n",
"[1523688 rows x 30 columns]"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle[\"already_purchased\"] = customerplus_clean_spectacle[\"first_buying_date\"].isna()==False\n",
"customerplus_clean_spectacle"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "cec4f1eb-cec8-409d-8b2c-1e01f1bf81ff",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11005</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>17663</td>\n",
" <td>12731</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>38100</td>\n",
" <td>12395</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>307036</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2946</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338933</th>\n",
" <td>3625705</td>\n",
" <td>648752</td>\n",
" <td>NaN</td>\n",
" <td>1253864.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338954</th>\n",
" <td>3627626</td>\n",
" <td>636890</td>\n",
" <td>NaN</td>\n",
" <td>1253887.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338959</th>\n",
" <td>3628124</td>\n",
" <td>653042</td>\n",
" <td>NaN</td>\n",
" <td>1253899.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338986</th>\n",
" <td>3631189</td>\n",
" <td>648423</td>\n",
" <td>NaN</td>\n",
" <td>1253928.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>339039</th>\n",
" <td>3635380</td>\n",
" <td>659417</td>\n",
" <td>NaN</td>\n",
" <td>1253975.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>26246 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"2 11005 1063 NaN NaN 0 \n",
"3 17663 12731 NaN NaN 0 \n",
"4 38100 12395 NaN NaN 0 \n",
"5 307036 139 NaN NaN 0 \n",
"6 2946 1063 NaN NaN 0 \n",
"... ... ... ... ... ... \n",
"338933 3625705 648752 NaN 1253864.0 0 \n",
"338954 3627626 636890 NaN 1253887.0 0 \n",
"338959 3628124 653042 NaN 1253899.0 0 \n",
"338986 3631189 648423 NaN 1253928.0 0 \n",
"339039 3635380 659417 NaN 1253975.0 0 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"2 875 False NaN 2 False ... \n",
"3 875 False NaN 0 False ... \n",
"4 875 False NaN 0 True ... \n",
"5 875 False NaN 2 True ... \n",
"6 875 False NaN 2 False ... \n",
"... ... ... ... ... ... ... \n",
"338933 862 False NaN 0 True ... \n",
"338954 862 False NaN 0 True ... \n",
"338959 862 False NaN 0 True ... \n",
"338986 862 False NaN 0 True ... \n",
"339039 862 False NaN 1 True ... \n",
"\n",
" first_buying_date country gender_label gender_female gender_male \\\n",
"2 NaN fr other 0 0 \n",
"3 NaN fr female 1 0 \n",
"4 NaN fr female 1 0 \n",
"5 NaN NaN other 0 0 \n",
"6 NaN fr other 0 0 \n",
"... ... ... ... ... ... \n",
"338933 NaN fr female 1 0 \n",
"338954 NaN fr female 1 0 \n",
"338959 NaN fr female 1 0 \n",
"338986 NaN fr female 1 0 \n",
"339039 NaN fr male 0 1 \n",
"\n",
" gender_other country_fr has_tags number_compagny already_purchased \n",
"2 1 1.0 0 10 False \n",
"3 0 1.0 0 10 False \n",
"4 0 1.0 0 10 False \n",
"5 1 NaN 0 10 False \n",
"6 1 1.0 0 10 False \n",
"... ... ... ... ... ... \n",
"338933 0 1.0 0 14 False \n",
"338954 0 1.0 0 14 False \n",
"338959 0 1.0 0 14 False \n",
"338986 0 1.0 0 14 False \n",
"339039 0 1.0 0 14 False \n",
"\n",
"[26246 rows x 30 columns]"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# attention, on a des cas où le client a pas de première date d'achat alors qu'il compte plusieurs achats\n",
"# on peut donc avoir une date de première achat valant NaN non pas parce que l'individu n'a jamais acheté \n",
"# mais simplement car elle n'est pas renseignée\n",
"\n",
"customerplus_clean_spectacle[(customerplus_clean_spectacle[\"already_purchased\"]==False) &\n",
"(customerplus_clean_spectacle[\"purchase_count\"]>0)]"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "b5904039-a967-47d5-ba13-1b805bcd76ca",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"<p>0 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [customer_id, street_id, structure_id, mcp_contact_id, fidelity, tenant_id, is_partner, deleted_at, gender, is_email_true, opt_in, last_buying_date, max_price, ticket_sum, average_price, average_purchase_delay, average_price_basket, average_ticket_basket, total_price, purchase_count, first_buying_date, country, gender_label, gender_female, gender_male, gender_other, country_fr, has_tags, number_compagny, already_purchased]\n",
"Index: []\n",
"\n",
"[0 rows x 30 columns]"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# cpdt, si un client a un nombre d'achats nul, il a bien une date de premier achat valant NaN, OK\n",
"customerplus_clean_spectacle[(customerplus_clean_spectacle[\"already_purchased\"]) &\n",
"(customerplus_clean_spectacle[\"purchase_count\"]==0)]"
]
},
{
"cell_type": "code",
"execution_count": 89,
"id": "e940bfcf-29cc-4d4c-ae5e-e2a8cecf28af",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"number_compagny already_purchased\n",
"10 False 0.234840\n",
" True 0.236236\n",
"11 False 0.141746\n",
" True 0.002804\n",
"12 False 0.485950\n",
" True 0.244779\n",
"13 False 0.084057\n",
" True 0.177213\n",
"14 False 0.885553\n",
" True 0.308859\n",
"Name: opt_in, dtype: float64"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# différence de consentement aux campagnes de mails (opt in)\n",
"\n",
"# en se restreignant au personnes n'ayant pas acheté, on a quand même des individus acceptant d'être ciblés\n",
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"first_buying_date\"].isna()][\"opt_in\"].unique()\n",
"\n",
"# taux de consentement variés\n",
"customerplus_clean_spectacle[\"already_purchased\"] = customerplus_clean_spectacle[\"purchase_count\"] > 0\n",
"customerplus_clean_spectacle.groupby([\"number_compagny\", \"already_purchased\"])[\"opt_in\"].mean()"
]
},
{
"cell_type": "code",
"execution_count": 94,
"id": "a5e79beb-9ba0-4c89-b084-e27ff0d65dcc",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" <th>opt_in</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" <td>0.234840</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" <td>0.236236</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>False</td>\n",
" <td>0.141746</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>True</td>\n",
" <td>0.002804</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>False</td>\n",
" <td>0.485950</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>True</td>\n",
" <td>0.244779</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>False</td>\n",
" <td>0.084057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>True</td>\n",
" <td>0.177213</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" <td>0.885553</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" <td>0.308859</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny already_purchased opt_in\n",
"0 10 False 0.234840\n",
"1 10 True 0.236236\n",
"2 11 False 0.141746\n",
"3 11 True 0.002804\n",
"4 12 False 0.485950\n",
"5 12 True 0.244779\n",
"6 13 False 0.084057\n",
"7 13 True 0.177213\n",
"8 14 False 0.885553\n",
"9 14 True 0.308859"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_graph = customerplus_clean_spectacle.groupby([\"number_compagny\", \"already_purchased\"])[\"opt_in\"].mean().reset_index()\n",
"df_graph"
]
},
{
"cell_type": "code",
"execution_count": 127,
"id": "5be56c41-7697-481a-84ea-f77a2041484b",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0oAAAIhCAYAAABwnkrAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABeY0lEQVR4nO3deZyN9f//8ecx+2aYGWbRmBkZa9aU7ZN9J0lSKEsKSVJkSZmxjaXCRyp8ypCy9Uk+Eso62QmDkCj70giNYYwxM9fvj35zvp1rLHMyM2eMx/12O7eb876213XNe5an93W9j8UwDEMAAAAAAKtCji4AAAAAAPIbghIAAAAAmBCUAAAAAMCEoAQAAAAAJgQlAAAAADAhKAEAAACACUEJAAAAAEwISgAAAABgQlACAAAAABOCEpALZs+eLYvFYn05OzvrgQceUI8ePXT69OkcPVZMTIyWLFlyV/s4duyYLBaLZs+enSM13W8++uijAnntcqJv3cuio6NlsVhs2ho0aKAGDRrYtFksFkVHR+ddYbnEUeeR+fPy2LFjeX5s2Keg9HUguwhKQC6KjY3Vli1btGrVKr300kuaP3++HnvsMV29ejXHjnG//zGbHxCUCqYXX3xRW7ZsueN6W7Zs0YsvvpgHFQGORV/H/cbZ0QUABdlDDz2kGjVqSJIaNmyo9PR0jR49WkuWLFGXLl3uat/Xrl2Th4dHTpQJ4CYeeOABPfDAA3dcr1atWnlQDeB49HXcbxhRAvJQ5i+Z48ePS5JGjhypmjVrys/PT4ULF1b16tX16aefyjAMm+3Cw8PVpk0bLV68WNWqVZO7u7tGjhwpi8Wiq1evas6cOdbb/My3BZmdOXNGHTt2lI+Pj3x9ffXMM8/o3LlzN133xx9/VNu2beXn5yd3d3dVq1ZNixYtyta5Xr9+XaNGjVL58uXl7u4uf39/NWzYUJs3b7auk5KSomHDhikiIkKurq4qUaKEXnnlFf355583Pf+VK1eqevXq8vDwULly5TRr1iyb9ZKTkzVo0CBFRETI3d1dfn5+qlGjhubPn2/3eWXeDrRu3Tq9/PLLCggIkL+/v9q3b68zZ87Y1LZ//37FxcVZvwbh4eHW5ZcvX7bWlHmOAwYMyDKqaLFY1K9fP8XGxqps2bLy8PBQjRo1tHXrVhmGoXfffVcRERHy9vZWo0aNdOTIkSzXfPXq1WrcuLEKFy4sT09P1a1bV2vWrLFZJ/N2sv3796tTp07y9fVVYGCgXnjhBSUmJtrUY2/fym5/vtXtO+Hh4erevfttj5F5m+i7776rCRMmKDw8XB4eHmrQoIF++eUX3bhxQ0OHDlVISIh8fX315JNPKiEhwWYfCxcuVLNmzRQcHCwPDw+VL19eQ4cOzfI1udmtdzdjPp/s9h3pr++TgQMHKigoSJ6enqpXr5527tyZrWshSR9//LGqVKkib29v+fj4qFy5cnrrrbds1jl37px69+6tBx54QK6uroqIiNDIkSOVlpZ2x/3/9NNPeuKJJ1S0aFG5u7uratWqmjNnjs0669evl8Vi0fz58zV8+HCFhISocOHCatKkiQ4dOnTHY9xKdvrz+fPn1atXL4WGhsrNzU3FihVT3bp1tXr16jvu/+eff1anTp0UGBgoNzc3lSxZUl27dtX169f/0fnPmzdPQ4YMUXBwsLy9vfX444/r999/V1JSknr16qWAgAAFBASoR48eunLlis0+Mr//Z8yYoTJlysjNzU0VKlTQggULspxv3759VaFCBXl7e6t48eJq1KiRNmzYkOX8Tp06pQ4dOsjHx0dFihRRly5dtGPHjiy3WXfv3l3e3t46cuSIWrVqJW9vb4WGhmrgwIE21yKzTvP3bnb7V3b6KpDfMKIE5KHMP26LFSsm6a8/+nr37q2SJUtKkrZu3apXX31Vp0+f1ogRI2y23bVrlw4ePKi3335bERER8vLyUrt27dSoUSM1bNhQ77zzjiSpcOHCtzz+tWvX1KRJE505c0bjxo1TmTJl9O233+qZZ57Jsu66devUokUL1axZU9OnT5evr68WLFigZ555RsnJybf9Iy4tLU0tW7bUhg0bNGDAADVq1EhpaWnaunWrTpw4oTp16sgwDLVr105r1qzRsGHD9Nhjj2nv3r2KiorSli1btGXLFrm5uVn3uWfPHg0cOFBDhw5VYGCgPvnkE/Xs2VOlS5dWvXr1JElvvPGG5s6dqzFjxqhatWq6evWqfvrpJ124cOEfn9eLL76o1q1ba968eTp58qTefPNNPffcc1q7dq0k6euvv1aHDh3k6+urjz76SJKsdScnJ6t+/fo6deqU3nrrLVWuXFn79+/XiBEjtG/fPq1evdrmD/Fly5Zp9+7dGj9+vCwWi4YMGaLWrVurW7du+u233zRt2jQlJibqjTfe0FNPPaX4+Hjr9p9//rm6du2qJ554QnPmzJGLi4tmzJih5s2b67vvvlPjxo1tzuupp57SM888o549e2rfvn0aNmyYJFnD55YtW+zqW5J9/fluffjhh6pcubI+/PBD/fnnnxo4cKAef/xx1axZUy4uLpo1a5aOHz+uQYMG6cUXX9TSpUut2x4+fFitWrXSgAED5OXlpZ9//lkTJkzQ9u3brV/XnHCnviNJPXr00MKFCzV48GA1atRIBw4c0JNPPqnLly/fcf8LFixQ37599eqrr+q9995ToUKFdOTIER04cMC6zrlz5/Too4+qUKFCGjFihB588EFt2bJFY8aM0bFjxxQbG3vL/R86dEh16tRR8eLFNXXqVPn7++vzzz9X9+7d9fvvv2vw4ME267/11luqW7euPvnkE12+fFlDhgzR448/roMHD8rJycmua5fd/vz8889r165dGjt2rMqUKaM///xTu3btsvmev5k9e/boX//6lwICAjRq1ChFRkbq7NmzWrp0qVJTU+Xm5vaPzr9hw4aaPXu2jh07pkGDBqlTp05ydnZWlSpVNH/+fO3evVtvvfWWfHx8NHXqVJvtly5dqnXr1mnUqFHy8vLSRx99ZN2+Q4cOkqSLFy9KkqKiohQUFKQrV67o66+/VoMGDbRmzRrrf2ZcvXpVDRs21MWLFzVhwgSVLl1aK1euvOnPekm6ceOG2rZtq549e2rgwIH64YcfNHr0aPn6+t72eze7/Ss7fRXIlwwAOS42NtaQZGzdutW4ceOGkZSUZCxbtswoVqyY4ePjY5w7dy7LNunp6caNGzeMUaNGGf7+/kZGRoZ1WVhYmOHk5GQcOnQoy3ZeXl5Gt27dslXXxx9/bEgy/ve//9m0v/TSS4YkIzY21tpWrlw5o1q1asaNGzds1m3Tpo0RHBxspKen3/I4n332mSHJ+M9//nPLdVauXGlIMiZOnGjTvnDhQkOSMXPmTGtbWFiY4e7ubhw/ftzadu3aNcPPz8/o3bu3te2hhx4y2rVrd8tj2nNemV/Dvn372qw3ceJEQ5Jx9uxZa1vFihWN+vXrZznWuHHjjEKFChk7duywaf/vf/9rSDKWL19ubZNkBAUFGVeuXLG2LVmyxJBkVK1a1aY/TJkyxZBk7N271zAMw7h69arh5+dnPP744zbHSU9PN6pUqWI8+uij1raoqKibXve+ffsa7u7uNsexp2+Z3a4/SzKioqKybBMWFnbH4x09etSQZFSpUsWmD2Zek7Zt29qsP2DAAEOSkZiYeNP9ZWRkGDdu3DDi4uIMScaePXusyzKv1d/Vr18/y9fafD7Z7Tv79+83JBlDhgyxWW/+/PmGpDtei379+hlFihS57Tq9e/c2vL29bb53DMMw3nvvPUOSsX///luex7PPPmu4ubkZJ06csNm2ZcuWhqenp/Hnn38ahmEY69atMyQZrVq1sllv0aJFhiRjy5Ytt60x83odPXrUMAz7+rO3t7cxYMCA2+7/Zho1amQUKVLESEhIuOU69p6/ud7Mvte/f3+b9nbt2hl+fn42bZIMDw8Pm98NaWlpRrly5YzSpUvfssa0tDTjxo0bRuPGjY0nn3zS2v7hhx8akowVK1bYrN+7d+8sP+u7detmSDIWLVpks26rVq2MsmXLZqnz730
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot groupé\n",
"fig, ax = plt.subplots(figsize=(10, 6))\n",
"\n",
"categories = df_graph[\"number_compagny\"].unique()\n",
"bar_width = 0.35\n",
"bar_positions = np.arange(len(categories))\n",
"\n",
"# Grouper les données par label et créer les barres groupées\n",
"for label in df_graph[\"already_purchased\"].unique():\n",
" label_data = df_graph[df_graph['already_purchased'] == label]\n",
" values = [label_data[label_data['number_compagny'] == category]['opt_in'].values[0]*100 for category in categories]\n",
"\n",
" label_printed = \"purchased\" if label else \"no purchase\"\n",
" ax.bar(bar_positions, values, bar_width, label=label_printed)\n",
"\n",
" # Mise à jour des positions des barres pour le prochain groupe\n",
" bar_positions = [pos + bar_width for pos in bar_positions]\n",
"\n",
"# Ajout des étiquettes, de la légende, etc.\n",
"ax.set_xlabel('Numero de compagnie')\n",
"ax.set_ylabel('Part de consentement (%)')\n",
"ax.set_title('Part de consentement au mailing selon les compagnies')\n",
"ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])\n",
"ax.set_xticklabels(categories)\n",
"ax.legend()\n",
"\n",
"# Affichage du plot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 124,
"id": "32960530-cb46-4eeb-a6d2-1dcf5fb640d8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>gender_male</th>\n",
" <th>gender_female</th>\n",
" <th>gender_other</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.181580</td>\n",
" <td>0.343837</td>\n",
" <td>0.474583</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>0.179520</td>\n",
" <td>0.314443</td>\n",
" <td>0.506037</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>0.346380</td>\n",
" <td>0.454036</td>\n",
" <td>0.199584</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>0.318108</td>\n",
" <td>0.503092</td>\n",
" <td>0.178800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>0.331954</td>\n",
" <td>0.316181</td>\n",
" <td>0.351865</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny gender_male gender_female gender_other\n",
"0 10 0.181580 0.343837 0.474583\n",
"1 11 0.179520 0.314443 0.506037\n",
"2 12 0.346380 0.454036 0.199584\n",
"3 13 0.318108 0.503092 0.178800\n",
"4 14 0.331954 0.316181 0.351865"
]
},
"execution_count": 124,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# genre \n",
"\n",
"company_genders = customerplus_clean_spectacle.groupby(\"number_compagny\")[[\"gender_male\", \"gender_female\", \"gender_other\"]].mean().reset_index()\n",
"company_genders"
]
},
{
"cell_type": "code",
"execution_count": 126,
"id": "1b4a49d7-7bfe-4e80-aa7e-c9c6d4bc46e2",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHFCAYAAAAOmtghAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABYJ0lEQVR4nO3dd1RU1/428GdoQxNQkaJS7QUVQSMYNUQFS9TYO9Z7LTEWNCrXKFgxJlFMwZKIaEIMMZZEQ1RiISqWiGiM2BuoIIoKVpCZ/f7hy/wyDugcGBgcn89as5azZ59zvmdP4fFUmRBCgIiIiMhAGOm7ACIiIiJdYrghIiIig8JwQ0RERAaF4YaIiIgMCsMNERERGRSGGyIiIjIoDDdERERkUBhuiIiIyKAw3BAREZFBYbjRgyNHjqBnz55wdXWFXC6Ho6Mj/Pz8MHXqVH2XJplMJkN4eLi+y9DwYl379u2DTCbDvn37ynS5UVFRiImJKdNlAOW3Pv929epVyGQyfPbZZ+W2THo9xMTEQCaT4erVq2W6HH187vUlKSkJ4eHhuH//fpkup7zeu/LGcFPOfvvtN/j7+yM3NxdLlizBrl27sHz5crRu3RpxcXH6Ls9gNW/eHIcOHULz5s3LdDnlFW6IKpKuXbvi0KFDcHZ21ncpBiMpKQlz584t83BjqEz0XcCbZsmSJfDw8MDOnTthYvJ/wz9gwAAsWbJEj5UZNhsbG7Rq1UrfZRAZpGrVqqFatWr6LoNIhVtuyll2djbs7e3Vgk0hIyPNtyMuLg5+fn6wsrKCtbU1goKCkJKSonr9wIEDMDU1xbRp09SmK9zUuGbNGlXbhQsXMGjQIDg4OEAul6NBgwb4+uuvtao7NzcX//nPf1C1alVYW1ujU6dOOH/+fJF9tVmOUqnEggULUK9ePVhYWMDOzg5NmjTB8uXLX1nL/fv3MXXqVHh6ekIul8PBwQFdunTB2bNni52muM3Zx44dQ/fu3VGlShWYm5vD29sbP/30k1qfwrHcu3cvxo0bB3t7e1StWhW9evXCzZs3Vf3c3d1x+vRpJCYmQiaTQSaTwd3dvdTre/bsWXTq1AmWlpawt7fH2LFj8eDBgyL7/vHHH2jfvj1sbGxgaWmJ1q1bY/fu3a9cBiBtXJcuXQoPDw9YW1vDz88Phw8fVnv92LFjGDBgANzd3WFhYQF3d3cMHDgQ165d05jX4cOH0bp1a5ibm6N69eoIDQ3FN998o7GpvLhdoO7u7hg+fLhaW2ZmJsaMGYOaNWvCzMwMHh4emDt3LgoKCrQaix9++AF+fn6wtraGtbU1mjVrpvZdAoDo6Gg0bdoU5ubmqFKlCnr27IkzZ86o9Rk+fDisra1x9uxZBAUFwcrKCs7Ozli8eLFq3d9++21YWVmhbt26WLdundr0hZ+9hIQEjBgxAlWqVIGVlRW6deuGy5cvq/VNSEhAjx49ULNmTZibm6N27doYM2YM7ty5o7F+v/zyC5o0aQK5XA5PT08sX74c4eHhkMlkav1kMhkmTJiA7777Dg0aNIClpSWaNm2K7du3F1nni7s2SvN5LI/PvTbfy8JxSUlJQa9evWBjYwNbW1sMGTIEt2/f1pjnq36zCx05cgTdunVD1apVYW5ujlq1amHy5MmqZX700UcAAA8PD9XvSeHvV1xcHAIDA+Hs7AwLCws0aNAAM2fOxKNHjyQt52VK895VCILK1ejRowUA8eGHH4rDhw+L/Pz8YvsuXLhQyGQyMXLkSLF9+3axefNm4efnJ6ysrMTp06dV/RYvXiwAiF9++UUIIcQ///wjLC0txZAhQ1R9Tp8+LWxtbYWXl5dYv3692LVrl5g6daowMjIS4eHhL61ZqVSKgIAAIZfLxcKFC8WuXbtEWFiY8PT0FABEWFiY5OVEREQIY2NjERYWJnbv3i127NghIiMjX1lLbm6uaNSokbCyshLz5s0TO3fuFJs2bRKTJk0Se/bsUfV7sa69e/cKAGLv3r2qtj179ggzMzPRpk0bERcXJ3bs2CGGDx8uAIi1a9eq+q1du1YAEJ6enuLDDz8UO3fuFN9++62oXLmyCAgIUPU7fvy48PT0FN7e3uLQoUPi0KFD4vjx46Va38zMTOHg4CBq1Kgh1q5dK+Lj48XgwYOFq6urxvp89913QiaTiffff19s3rxZbNu2Tbz33nvC2NhY/PHHH6Ue1ytXrggAwt3dXXTq1Els3bpVbN26VXh5eYnKlSuL+/fvq+a3ceNGMWfOHLFlyxaRmJgofvzxR9GuXTtRrVo1cfv2bVW/06dPC0tLS9GwYUOxYcMG8csvv4igoCDV+l25cqXY97SQm5ubGDZsmOp5RkaGcHFxEW5ubmLVqlXijz/+EPPnzxdyuVwMHz78peMghBCzZ88WAESvXr3Exo0bxa5du8TSpUvF7NmzVX0WLVokAIiBAweK3377Taxfv154enoKW1tbcf78eVW/YcOGCTMzM9GgQQOxfPlykZCQIEaMGCEAiNDQUFG3bl2xZs0asXPnTvHee+8JAOLYsWOq6Qs/ey4uLmLkyJHi999/F6tXrxYODg7CxcVF3Lt3T9V3xYoVIiIiQvz6668iMTFRrFu3TjRt2lTUq1dP7Xfm999/F0ZGRuKdd94RW7ZsERs3bhRvvfWWcHd3Fy/+SSh8v1u2bCl++uknER8fL9555x1hYmIiLl26pFHnv9+v0nwey+tzr833MiwsTAAQbm5u4qOPPhI7d+4US5cuFVZWVsLb21ttbLX9zd6xY4cwNTUVTZo0ETExMWLPnj0iOjpaDBgwQAghRHp6uvjwww8FALF582bV70lOTo4QQoj58+eLZcuWid9++03s27dPrFy5Unh4eKj9HmmznLJ47yoKhptydufOHfH2228LAAKAMDU1Ff7+/iIiIkI8ePBA1S8tLU2YmJiIDz/8UG36Bw8eCCcnJ9GvXz9Vm1KpFF26dBF2dnbin3/+EQ0bNhT169cXDx8+VPUJCgoSNWvWVH05Ck2YMEGYm5uLu3fvFlvz77//LgCI5cuXq7UvXLhQ4w+Otst57733RLNmzV4xWprmzZsnAIiEhISX9tMm3NSvX194e3uLZ8+eqU373nvvCWdnZ6FQKIQQ//flHz9+vFq/JUuWCAAiIyND1daoUSPRrl07jXpKur4zZswQMplMnDhxQq29Y8eOauvz6NEjUaVKFdGtWze1fgqFQjRt2lS0bNnypcvRZlwLw42Xl5coKChQtR89elQAEBs2bCh22oKCAvHw4UNhZWWl9jnq37+/sLCwEJmZmWp969evX+JwM2bMGGFtbS2uXbum1u+zzz4TANT+yLzo8uXLwtjYWAwePLjYPvfu3RMWFhaiS5cuau1paWlCLpeLQYMGqdqGDRsmAIhNmzap2p49eyaqVasmAKjCrxBCZGdnC2NjYxESEqJqK/zs9ezZU21ZBw8eFADEggULiqxRqVSKZ8+eiWvXrqn9x0cIIVq0aCFcXFxEXl6equ3BgweiatWqRYYbR0dHkZubq2rLzMwURkZGIiIiQqPOwvertJ/H8vrca/O9LAw3U6ZMUWuPjY0VAMT3338vhJD2m12rVi1Rq1Yt8eTJk2KX++mnn2p8B4pS+F4nJiYKAOLkyZOSlqPr966i4G6pcla1alXs378ff/31FxYvXowePXrg/PnzCA0NhZeXl2oT8s6dO1FQUIDg4GAUFBSoHubm5mjXrp3a7hWZTIb169ejUqVK8PX1xZUrV/DTTz/BysoKAPD06VPs3r0bPXv2hKWlpdr8unTpgqdPn2rsVvi3vXv3AgAGDx6s1j5o0CC151KW07JlS5w8eRLjx4/Hzp07kZubq9X4/f7776hbty46dOigVf/iXLx4EWfPnlWt04u1ZmRk4Ny5c2rTdO/eXe15kyZNAKDIXS0vKun67t27F40aNULTpk3V2l8c+6SkJNy9exfDhg1TWxelUolOnTrhr7/+KnKTdSEp49q1a1cYGxurnhc1Dg8fPsSMGTNQu3ZtmJiYwMTEBNbW1nj06JHarpu9e/eiffv2cHR0VLUZGxujf//+r6yjONu3b0d
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_genders[\"number_compagny\"], company_genders[\"gender_male\"], label = \"Homme\")\n",
"plt.bar(company_genders[\"number_compagny\"], company_genders[\"gender_female\"], \n",
" bottom = company_genders[\"gender_male\"], label = \"Femme\")\n",
"\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Part de clients de chaque sexe\")\n",
"plt.title(\"Sexe des clients de chaque compagnie de spectacle\")\n",
"plt.legend()\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 144,
"id": "ed6374e5-f36c-4f8e-9dba-602715b726f1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>country_fr</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.996136</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>0.994838</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>0.002119</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>0.831795</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>0.993978</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny country_fr\n",
"0 10 0.996136\n",
"1 11 0.994838\n",
"2 12 0.002119\n",
"3 13 0.831795\n",
"4 14 0.993978"
]
},
"execution_count": 144,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# pays d'origine (France VS reste du monde)\n",
"\n",
"company_country_fr = customerplus_clean_spectacle.groupby(\"number_compagny\")[\"country_fr\"].mean().reset_index()\n",
"company_country_fr"
]
},
{
"cell_type": "code",
"execution_count": 147,
"id": "8d95cdd9-2ab3-4c9a-8442-bb9b98e0dd18",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHGCAYAAACIDqqPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABINElEQVR4nO3deVxU9f7H8fcAAoKAggliCph7LrmUe7jhkqm3zCXNLe1qWl63NDOXvC7pLTMrtXJBy7pmmql5UzIzS819yy1LwQUXRMUVFc7vDx/Mz3FAZ2Bw9PR6Ph7zeDjf8z3nfM53zgxvzzJjMQzDEAAAgEl4uLsAAAAAVyLcAAAAUyHcAAAAUyHcAAAAUyHcAAAAUyHcAAAAUyHcAAAAUyHcAAAAUyHcAAAAUyHcIFddvHhRZcqUUZs2bZSenu7ucgAAfwOEm1vExsbKYrHI19dX8fHxdtPr1aun8uXLZ2vZX3zxhSZPnpzpNIvFolGjRmVrua6WMQaHDx+2tnXt2lWRkZE2/caNG6fFixffdXkvvfSSQkND9fnnn8vDI/u7W2RkpLp27Zrt+XPL7XUdPnxYFotFsbGxubreO+1PrnSvtud2FotFr7zyyj1dJ+5/P/30kywWi3766adcXY+79nt32LNnj0aNGmXzmZ8b7tVrl4Fwk4nU1FS9+eabLl3mnf4YrV+/Xj169HDp+lxp+PDh+uabb2zaHAk3H330kXbu3Klvv/1WPj4+uVjh/aNw4cJav369mjdvnqvruVfhBrifVKlSRevXr1eVKlXcXYpp7NmzR2+99Vauh5t7jXCTiaZNm+qLL77Qjh077sn6atSooYcffvierCs7HnnkEVWuXNnp+fr06aPff/9d+fPnd31R9ykfHx/VqFFDDz30kLtLAUwnMDBQNWrUUGBgoLtLwX2OcJOJwYMHKyQkREOGDLlr348++khPPvmkChUqJH9/f1WoUEETJ07U9evXrX3q1aun7777TvHx8bJYLNZHhsxOS+3evVutWrVSgQIF5Ovrq8cee0xz5syx6ZNxmO/LL7/UsGHDFB4ersDAQDVq1Ej79++36RsXF6dWrVrp4Ycflq+vr0qUKKGePXsqKSnprtt4+2kpi8WiS5cuac6cOdZtqVevnnX6iRMn1LNnTz388MPy9vZWVFSU3nrrLd24ceOu67p+/boGDx6ssLAw+fn5qU6dOtq4cWOmfR1dz7Rp01SpUiXly5dPAQEBKlOmjN5444271pKamqrRo0erbNmy8vX1VUhIiOrXr69169ZlOU9Wh7P/+OMPdejQQYUKFZKPj4/Kli2rjz76yKaPo6/n3fan7G7v8ePH1bZtWwUEBCgoKEjt2rXTiRMnMu27efNmtWzZUsHBwfL19VXlypX11Vdf3XUdknPj+tlnn6ls2bLy8/NTpUqVtGzZMpvpBw8eVLdu3VSyZEn5+fmpSJEiatGihXbt2mW3rH379qlp06by8/NTwYIF1atXLy1dutTuUHlWp0Dr1atns59LUkpKigYNGqSoqCh5e3urSJEi6tevny5duuTQWHz//fdq2LChgoKC5Ofnp7Jly2r8+PE2fZYsWaKaNWvKz89PAQEBiomJ0fr16236jBo1ShaLRTt37lSbNm0UFBSk4OBgDRgwQDdu3ND+/fvVtGlTBQQEKDIyUhMnTrSZP2Pf+/zzzzVgwACFhYUpb968io6O1rZt22z6bt68We3bt1dkZKTy5s2ryMhIPf/885meyv/ll19Us2ZN+fr6qkiRIho+fLhmzJhhd+o7MjJSTz/9tL7//ntVqVJFefPmVZkyZTRr1qxM67z91EZO9sd7td/f7X2ZcUlAXFycunXrpuDgYPn7+6tFixb666+/7Jb3ww8/qGHDhgoMDJSfn59q166tVatW2fXbt2+fnn/+eYWGhsrHx0fFihVT586dlZqaqtjYWLVp00aSVL9+fetnScbnlzN/N+60njvJyZjeiVeOl2BCAQEBevPNN/Wvf/1LP/74oxo0aJBl3z///FMdOnSwfrjt2LFDY8eO1b59+6xvzKlTp+qf//yn/vzzT7vTO5nZv3+/atWqpUKFCmnKlCkKCQnR559/rq5du+rkyZMaPHiwTf833nhDtWvX1owZM5SSkqIhQ4aoRYsW2rt3rzw9Pa111qxZUz169FBQUJAOHz6sSZMmqU6dOtq1a5fy5Mnj8PisX79eDRo0UP369TV8+HBJsv5P6sSJE3riiSfk4eGhESNG6JFHHtH69es1ZswYHT58WLNnz77jsl966SXNnTtXgwYNUkxMjHbv3q1nn31WFy5csOnn6Hr++9//qnfv3nr11Vf1zjvvyMPDQwcPHtSePXvuWMeNGzfUrFkzrV27Vv369VODBg1048YNbdiwQQkJCapVq5bD47Vnzx7VqlVLxYoV07vvvquwsDCtWLFCffv2VVJSkkaOHGnT/26v5532p+xu75UrV9SoUSMdP35c48ePV6lSpfTdd9+pXbt2dn1Xr16tpk2bqnr16po+fbqCgoL03//+V+3atdPly5fveG2UM+P63XffadOmTRo9erTy5cuniRMn6plnntH+/ftVvHhxSTf/MIWEhOjtt9/WQw89pOTkZM2ZM0fVq1fXtm3bVLp0aUnSyZMnFR0drTx58mjq1KkKDQ3VvHnzcnRdz+XLlxUdHa2jR4/qjTfeUMWKFfX7779rxIgR2rVrl3744Qeb0Hm7mTNn6qWXXlJ0dLSmT5+uQoUK6cCBA9q9e7e1zxdffKGOHTuqcePG+vLLL5WamqqJEyeqXr16WrVqlerUqWOzzLZt2+qFF15Qz549FRcXZ/2P1g8//KDevXtr0KBB+uKLLzRkyBCVKFFCzz77rM38b7zxhqpUqaIZM2bo/PnzGjVqlOrVq6dt27ZZx/zw4cMqXbq02rdvr+DgYCUmJmratGl6/PHHtWfPHhUsWFCStHPnTsXExKhUqVKaM2eO/Pz8NH36dH3++eeZjseOHTs0cOBAvf766woNDdWMGTPUvXt3lShRQk8++WSW45iT/fFe7ffOvC+7d++umJgYffHFFzpy5IjefPNN1atXTzt37rQeBf/888/VuXNntWrVSnPmzFGePHn08ccfq0mTJlqxYoUaNmxoHdM6deqoYMGCGj16tEqWLKnExEQtWbJE165dU/PmzTVu3Di98cYb+uijj6yn+x555BFJjv/duNt6srosISdjelcGrGbPnm1IMjZt2mSkpqYaxYsXN6pVq2akp6cbhmEY0dHRxqOPPprl/Glpacb169eNuXPnGp6enkZycrJ1WvPmzY2IiIhM55NkjBw50vq8ffv2ho+Pj5GQkGDTr1mzZoafn59x7tw5wzAMY/Xq1YYk46mnnrLp99VXXxmSjPXr12e6vvT0dOP69etGfHy8Icn49ttv7cbg0KFD1rYuXbrY1e7v72906dLFbtk9e/Y08uXLZ8THx9u0v/POO4Yk4/fff8+0JsMwjL179xqSjP79+9u0z5s3z5Bksz5H1/PKK68Y+fPnz3KdWZk7d64hyfj000/v2C8iIsKmrkOHDhmSjNmzZ1vbmjRpYjz88MPG+fPnbeZ95ZVXDF9fX+t+4szrmdX+lN3tnTZtmt2+YBiG8dJLL9ltT5kyZYzKlSsb169ft+n79NNPG4ULFzbS0tKyXI+j4yrJCA0NNVJSUqxtJ06cMDw8PIzx48dnOd+NGzeMa9euGSVLlrTZj4YMGWJYLBZj+/btNv1jYmIMScbq1autbbe/phmio6ON6Oho6/Px48cbHh4exqZNm2z6ff3114YkY/ny5VnWeeHCBSMwMNCoU6eO9fPldmlpaUZ4eLhRoUIFmzG9cOGCUahQIaNWrVrWtpEjRxqSjHfffddmGY899pghyVi0aJG17fr168ZDDz1kPPvss9a2jH2vSpUqNvUcPnzYyJMnj9GjR48st+XGjRvGxYsXDX9/f+P999+3trdp08bw9/c3Tp8+bbNN5cqVs/uMiYiIMHx9fW3ez1euXDGCg4ONnj172tV56+uVk/3xXu33jrwvMz57n3nmGZv2X3/91ZBkjBkzxjA
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_country_fr[\"number_compagny\"], company_country_fr[\"country_fr\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Part de clients français\")\n",
"plt.title(\"Nationalité des clients de chaque compagnie de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "ecfd112e-270a-4223-b80f-7e95e57d199d",
"metadata": {},
"source": [
"### 2. campaigns_information"
]
},
{
"cell_type": "code",
"execution_count": 189,
"id": "b37e7ddf-321a-4ebe-9742-9e760a541d29",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de lignes de la table : 688953\n"
]
},
{
"data": {
"text/plain": [
"customer_id 0\n",
"nb_campaigns 0\n",
"nb_campaigns_opened 0\n",
"time_to_open 301495\n",
"number_compagny 0\n",
"dtype: int64"
]
},
"execution_count": 189,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de nan\n",
"print(\"Nombre de lignes de la table : \",campaigns_information_spectacle.shape[0])\n",
"campaigns_information_spectacle.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 192,
"id": "de1ecaac-25bb-4853-b8ab-3ef2ca6917ed",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>number_compagny</th>\n",
" <th>no_campaign_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>29</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>37</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>39</td>\n",
" <td>4</td>\n",
" <td>1.0</td>\n",
" <td>0 days 05:16:38</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>41</td>\n",
" <td>4</td>\n",
" <td>1.0</td>\n",
" <td>0 days 01:12:29</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>44</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254699</th>\n",
" <td>6837769</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0 days 23:42:15</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254700</th>\n",
" <td>6875038</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254701</th>\n",
" <td>6875066</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254702</th>\n",
" <td>6875099</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254703</th>\n",
" <td>6875143</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0 days 01:17:01</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>688953 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_campaigns nb_campaigns_opened time_to_open \\\n",
"0 29 4 0.0 NaT \n",
"1 37 3 0.0 NaT \n",
"2 39 4 1.0 0 days 05:16:38 \n",
"3 41 4 1.0 0 days 01:12:29 \n",
"4 44 4 0.0 NaT \n",
"... ... ... ... ... \n",
"254699 6837769 1 1.0 0 days 23:42:15 \n",
"254700 6875038 1 0.0 NaT \n",
"254701 6875066 1 0.0 NaT \n",
"254702 6875099 1 0.0 NaT \n",
"254703 6875143 1 1.0 0 days 01:17:01 \n",
"\n",
" number_compagny no_campaign_opened \n",
"0 10 True \n",
"1 10 True \n",
"2 10 False \n",
"3 10 False \n",
"4 10 True \n",
"... ... ... \n",
"254699 14 False \n",
"254700 14 True \n",
"254701 14 True \n",
"254702 14 True \n",
"254703 14 False \n",
"\n",
"[688953 rows x 6 columns]"
]
},
"execution_count": 192,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# part de clients n'ouvrant jamais les mails par compagnie\n",
"\n",
"campaigns_information_spectacle[\"no_campaign_opened\"] = pd.isna(campaigns_information_spectacle[\"time_to_open\"])\n",
"campaigns_information_spectacle"
]
},
{
"cell_type": "code",
"execution_count": 197,
"id": "b5a0060f-a9dd-435b-844f-b24674b8bc27",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>no_campaign_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.605656</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>0.294001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>0.475719</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>0.353820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>0.428148</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny no_campaign_opened\n",
"0 10 0.605656\n",
"1 11 0.294001\n",
"2 12 0.475719\n",
"3 13 0.353820\n",
"4 14 0.428148"
]
},
"execution_count": 197,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"company_lazy_customers = campaigns_information_spectacle.groupby(\"number_compagny\")[\"no_campaign_opened\"].mean().reset_index()\n",
"company_lazy_customers"
]
},
{
"cell_type": "code",
"execution_count": 198,
"id": "788c90e0-f13a-4804-ace7-e5159fddd7fd",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAo0AAAHFCAYAAACXTsPRAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABcSElEQVR4nO3dd1gUV9sG8HvpHQWlKQJGQRSwR9EodoJEsEUNKoqaiMauMXYFNagxSkxiS1Q0lhBrjDEFu4ldQE00dgEVREHFCric7w8/9s2yy+6CyADev+vaS/bMzDnPzM7OPp6ZMyMTQggQEREREWmgJ3UARERERFT2MWkkIiIiIq2YNBIRERGRVkwaiYiIiEgrJo1EREREpBWTRiIiIiLSikkjEREREWnFpJGIiIiItGLSSERERERaFSlpjImJgUwmU7wMDAxQvXp1hIWF4datWyUa2GeffYYdO3a8Uh03btyATCZDTExMicSkK1dXVwwcOLDU49i4cSOio6NfaxuFmTVrFlxdXSVpu7iePn2KWbNm4cCBA1KHUip2796NWbNmSR3GG0kmkylt+wMHDkAmk1W4fa/gsY/KplmzZkEmk0kdBgBg4MCB5e63o7iWLl1aKvlImzZt0KZNm9dSd7F6GtesWYOjR48iLi4OH374ITZt2oRWrVrhyZMnJRZYSSSNZYWjoyOOHj2KwMDA19qOlEljefT06VNERERUuB/uwuzevRsRERFSh/FGOnr0KIYMGSJ1GEQAgCFDhuDo0aNSh/HGKa2k8XUyKM5CXl5eaNKkCQCgbdu2kMvlmD17Nnbs2IG+ffu+UkDPnj2DqanpK9VR1hgbG6N58+ZSh0FvqKdPn8LMzEzqMN5oFeX7L5fL8eLFCxgbG0sdCr2C6tWro3r16lKHQeVQiVzTmH9ATEpKAgBERESgWbNmsLGxgZWVFRo1aoRVq1ZBCKG0nKurK9577z1s27YNDRs2hImJCSIiIiCTyfDkyROsXbtWcSpcW1fr7du30atXL1haWsLa2hq9e/dGWlqa2nlPnTqFoKAg2NjYwMTEBA0bNsSPP/6o07pmZ2cjMjISnp6eMDExga2tLdq2bYsjR44Uukxhp6cvX76MkJAQ2NnZwdjYGJ6envjmm2+U5sk/jbVp0yZMnToVTk5OsLKyQocOHXDx4kXFfG3atMEvv/yCpKQkpUsI8i1btgz169eHhYUFLC0tUadOHUyZMkXjuubHvXDhQixatAhubm6wsLCAr68vjh07pnVbffPNN2jdujXs7Oxgbm4Ob29vLFiwALm5uYp5Zs+eDQMDA6SkpKgsP2jQINja2uL58+cAgNjYWHTq1AmOjo4wNTWFp6cnJk2apNLDPXDgQFhYWODKlSvo3LkzLCws4OzsjPHjxyM7O1uxblWrVgUAxT4nk8m0nlpLTk5Gv379lD6zL774Anl5eYp5Cjv1WHA/iI6Ohkwmw5UrV1Ta+fTTT2FkZIR79+4pyvbs2YP27dvDysoKZmZmaNmyJfbu3au0XP5pp/j4ePTs2ROVK1fGW2+9hYEDByr2rf/uHzdu3Ch0XePi4hAcHIzq1avDxMQEtWrVwtChQ5Viyt/e6k4vqTsFlpeXh6+++goNGjSAqakpKlWqhObNm2Pnzp2KeQqeys1X8NRn/uUy+/fvx7Bhw1ClShXY2tqie/fuuH37dqHr9d+4LSws8O+//8Lf3x/m5uZwdHTEvHnzAADHjh3DO++8A3Nzc7i7u2Pt2rVKy9+9exfDhw9H3bp1YWFhATs7O7Rr1w6HDx9WaauwddImfx3j4uIQFhYGGxsbmJubo0uXLrh27ZrK/KtXr0b9+vVhYmICGxsbdOvWDRcuXFCap7BTVwU/x/z9dcGCBZgzZw7c3NxgbGyM/fv3F2kdsrKyMGHCBLi5ucHIyAjVqlXDmDFjVL63mzdvRrNmzWBtbQ0zMzPUrFkTgwYN0lq/LvtUXl4eFixYgDp16sDY2Bh2dnYIDQ3FzZs3VbaNl5cXjh49ihYtWsDU1BSurq5Ys2YNAOCXX35Bo0aNYGZmBm9vb/z2229Ky+fv8wkJCejevTusrKxgbW2Nfv364e7du0rz6no8A4Bvv/0W7u7uMDY2Rt26dbFx48ZCPy9djteFnZ6OjY2Fr68vzM3NYWFhAX9/fyQkJCjNc+3aNfTp0wdOTk4wNjaGvb092rdvj8TExMI/pP8XExMDDw8PxbFz3bp1aufLycnBnDlzFJ9X1apVERYWprIN1dElvvzcY/v27fDx8YGJiQlq1qyJJUuWqNSn6/6rbT90dXXFP//8g4MHDyqOv/mf3/PnzzF+/Hg0aNAA1tbWsLGxga+vL3766SeVeHTZ30t6m/5XsXoaC8r/0cv/Eb5x4waGDh2KGjVqAHh58B05ciRu3bqFGTNmKC0bHx+PCxcuYNq0aXBzc4O5uTm6du2Kdu3aoW3btpg+fToAwMrKqtD2nz17hg4dOuD27duIioqCu7s7fvnlF/Tu3Vtl3v379+Pdd99Fs2bNsHz5clhbW+OHH35A79698fTpU41Jw4sXLxAQEIDDhw9jzJgxaNeuHV68eIFjx44hOTkZLVq00HmbnT9/Hi1atECNGjXwxRdfwMHBAb///jtGjRqFe/fuYebMmUrzT5kyBS1btsR3332HrKwsfPrpp+jSpQsuXLgAfX19LF26FB999BGuXr2K7du3Ky37ww8/YPjw4Rg5ciQWLlwIPT09XLlyBefPn9cp1m+++QZ16tRRnPqePn06OnfujOvXr8Pa2hrAy4NQwR/Fq1evIiQkRPFlO3PmDObOnYt///0Xq1evBgAMHToUc+fOxYoVKzBnzhzFspmZmfjhhx8wYsQImJiYAHiZZHfu3BljxoyBubk5/v33X8yfPx8nTpzAvn37lNrOzc1FUFAQBg8ejPHjx+PQoUOYPXs2rK2tMWPGDDg6OuK3337Du+++i8GDBytOHebvw+rcvXsXLVq0QE5ODmbPng1XV1fs2rULEyZMwNWrV7F06VKdtme+fv364dNPP0VMTIzSusvlcqxfvx5dunRBlSpVAADr169HaGgogoODsXbtWhgaGmLFihXw9/fH77//jvbt2yvV3b17d/Tp0wfh4eF48uQJvLy88OTJE2zZskXptJSjo2Oh8V29ehW+vr4YMmQIrK2tcePGDSxatAjvvPMOzp07B0NDwyKtL/AyMVm/fj0GDx6MyMhIGBkZIT4+XmPyqs2QIUMQGBiIjRs3IiUlBZ988gn69eunsk+ok5ubi+7duyM8PByffPIJNm7ciMmTJyMrKwtbt27Fp59+iurVq+Orr77CwIED4eXlhcaNGwN4uY8CwMyZM+Hg4IDHjx9j+/btaNOmDfbu3Vui1xQNHjwYHTt2VKzjtGnT0KZNG5w9exaVKlUCAERFRWHKlCn44IMPEBUVhYyMDMyaNQu+vr44efIkateuXay2lyxZAnd3dyxcuBBWVlZFqufp06fw8/PDzZs3MWXKFPj4+OCff/7BjBkzcO7cOezZswcymQxHjx5F79690bt3b8yaNQsmJiZISkrS6TPUZZ8aNmwYVq5ciREjRuC9997DjRs3MH36dBw4cADx8fGK7xkApKWlISwsDBMnTlR89oMGDUJKSgq2bNmCKVOmwNraGpGRkejatSuuXbsGJycnpZi6deuGXr16ITw8HP/88w+mT5+O8+fP4/jx44rvja7Hs5UrV2Lo0KHo0aMHFi9ejIcPHyIiIkLxH+CCdDleq/PZZ59h2rRpCAsLw7Rp05CTk4PPP/8crVq1wokTJ1C3bl0AQOfOnSGXy7FgwQLUqFED9+7dw5EjR/DgwQONn1NMTAzCwsIQHByML774Ag8fPsSsWbOQnZ0NPb3/9V/l5eUhODgYhw8fxsSJE9GiRQskJSVh5syZaNOmDU6dOqXxbKSu8SUmJmLMmDGYNWsWHBwcsGHDBowePRo5OTmYMGECAN33X0D7frh9+3b07NkT1tbWit+K/B777OxsZGZmYsKECahWrRpycnKwZ88edO/eHWvWrEFoaKgi7uIcQ191myoRRbBmzRoBQBw7dkz
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_lazy_customers[\"number_compagny\"], company_lazy_customers[\"no_campaign_opened\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Part de clients n'ayant ouvert aucun mail\")\n",
"plt.title(\"Part de clients n'ayant ouvert aucun mail pour les compagnies de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 203,
"id": "c48015c2-6451-4089-93b7-6d55d3b2e553",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>ratio_campaigns_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>734772</td>\n",
" <td>126151.0</td>\n",
" <td>0.171687</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>342396</td>\n",
" <td>129833.0</td>\n",
" <td>0.379190</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>3168123</td>\n",
" <td>810722.0</td>\n",
" <td>0.255900</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>3218569</td>\n",
" <td>793581.0</td>\n",
" <td>0.246563</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>2427043</td>\n",
" <td>723846.0</td>\n",
" <td>0.298242</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny nb_campaigns nb_campaigns_opened ratio_campaigns_opened\n",
"0 10 734772 126151.0 0.171687\n",
"1 11 342396 129833.0 0.379190\n",
"2 12 3168123 810722.0 0.255900\n",
"3 13 3218569 793581.0 0.246563\n",
"4 14 2427043 723846.0 0.298242"
]
},
"execution_count": 203,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# taux d'ouverture des campaigns\n",
"\n",
"company_campaigns_stats = campaigns_information_spectacle.groupby(\"number_compagny\")[[\"nb_campaigns\", \"nb_campaigns_opened\"]].sum().reset_index()\n",
"company_campaigns_stats[\"ratio_campaigns_opened\"] = company_campaigns_stats[\"nb_campaigns_opened\"] / company_campaigns_stats[\"nb_campaigns\"]\n",
"company_campaigns_stats"
]
},
{
"cell_type": "code",
"execution_count": 213,
"id": "d06ab865-4832-4fe9-918b-e5ff72bebee4",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAApYAAAHFCAYAAABIALnOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABO10lEQVR4nO3dd3gU5f7+8XuBZBMgCSSQhpCAAkoJiCACSgClSS+CoFL1SLHQDs2DBKT7FYEDgkq34UEBPSC9KdKCVAUVkBJKQFpCDZA8vz882V82jYADuyHv13XtdTHPzM585tmZ2TtTFpsxxggAAAD4m3K5ugAAAADcHwiWAAAAsATBEgAAAJYgWAIAAMASBEsAAABYgmAJAAAASxAsAQAAYAmCJQAAACxBsAQAAIAlshwsbTZbll7r1q27i+X+fYcPH5bNZtPs2bMdbVFRUQoPD3dZTVbbu3evoqKidPjwYVeXkmXr1q3LFtsPXKdWrVqqVauWq8u4bekdc2bPni2bzZat9lErsJ9nH+Hh4erUqZOry5D0V/6IiopydRl33YkTJxQVFaWdO3fe1eWkd0yyUp6sTrhp0yan4XfeeUdr167VmjVrnNrLlCljTWW4Y3v37tWwYcNUq1at+yowA9lRSEiINm3apAcffNDVpQBZtnDhQvn6+rq6jBzlxIkTGjZsmMLDw1WxYkVXl3PHshwsn3jiCafhwoULK1euXGna4To3btyQzWa76/PPkyfLmw2Q49nt9hx1nLxy5Yry5s3r6jLwNz366KOuLgHZlKX3WE6ZMkU1a9ZUYGCg8uXLp/Lly2vcuHG6ceOG03QZnWJPfamrW7du8vLy0k8//eRoS0pK0tNPP62goCCdPHky03pOnDihNm3ayMfHR35+fmrbtq1iY2OztC7Xrl3ToEGDVLx4cXl6eqpIkSLq2bOnLly44DRdRqfoU67jrl27ZLPZNGPGjDTTLV26VDabTd9++62jbf/+/Wrfvr0CAwNlt9v1yCOPaMqUKU7vS76k9Mknn6hv374qUqSI7Ha7pk+frueee06SVLt2bcctCsmnvLPa9xnN/8CBA5KkVatW6emnn5avr6/y5s2rGjVqaPXq1bfo1b/8+uuvatCggfLmzatChQqpW7duunjxYrrTZmU5f/75p/7xj3+oaNGistvtKly4sGrUqKFVq1ZlqZZ27dopKChIdrtdxYoVU4cOHZSQkOCYd48ePVSmTBnlz59fgYGBqlOnjn744Qen+SRfWnj33Xc1duxYhYeHy9vbW7Vq1dLvv/+uGzduaODAgQoNDZWfn59atGih06dPO80jPDxcjRs31sKFCxURESEvLy+VKFFCkyZNcpru2rVr6tu3rypWrCg/Pz/5+/urWrVq+uabb9Ks34ULF9S1a1f5+/srf/78atSokf744480221UVJRsNpt++eUXtWvXTn5+fgoKClKXLl0UFxfnNE9jjD744ANVrFhR3t7eKliwoFq3bq0//vjDabodO3aocePGju04NDRUjRo10rFjxzL9TIwxGjdunMLCwuTl5aVKlSpp6dKl6U4bHx+vfv36Oe2nvXr10uXLlzNdhvTXNl+uXDlt2rRJ1atXl7e3t8LDwzVr1ixJ0pIlS1SpUiXlzZtX5cuX17Jly5zef+DAAXXu3FklS5ZU3rx5VaRIETVp0kR79uxxmi6rl53utL+S1+OHH37QE088IW9vbxUpUkRDhgxRYmKi07Tnzp1Tjx49VKRIEXl6eqpEiRJ66623HNv7rerNaLvZvn27WrdurYIFC97Rmdlt27apadOm8vf3l5eXlx599FH95z//cZrmypUrjs/ay8tL/v7+qly5sr744otbzv/48eOOY4Snp6dCQ0PVunVrnTp1yjHN0aNH9eKLLzodd9977z0lJSWl6ZucsJ+n912R1f1t/vz5qlq1qvz8/JQ3b16VKFFCXbp0ueXnFB8fr1deeUUBAQHKnz+/GjRooN9//z3dabPyPZmRW9WX/P336aefqk+fPgoODpa3t7ciIyO1Y8eONPPLyvYrZb4drlu3TlWqVJEkde7c2fHdnfz5bdu2Tc8//7xjmwsPD1e7du105MiR21pOZv5On6Zk6amngwcPqn379o6NbteuXRo5cqR+/fVXzZw587bnN2HCBG3ZskVt2rTRTz/9pAIFCmjYsGFat26dli1bppCQkAzfe/XqVT3zzDM6ceKERo8erVKlSmnJkiVq27ZtmmmjoqKcdj5jjJo3b67Vq1dr0KBBeuqpp7R7924NHTpUmzZt0qZNm2S327O8HhUqVNCjjz6qWbNmqWvXrk7jZs+ercDAQD377LOS/rqMXb16dRUrVkzvvfeegoODtXz5cr3xxhs6c+aMhg4d6vT+QYMGqVq1apo2bZpy5cqlypUr6/z58xo8eLCmTJmiSpUqSdIdX4ZLPf/AwEB9+umn6tChg5o1a6Y5c+bIw8NDH374oerXr6/ly5fr6aefznB+p06dUmRkpDw8PPTBBx8oKChIn332mV577bU002Z1OS+99JK2b9+ukSNHqlSpUrpw4YK2b9+us2fPZrpuu3bt0pNPPqlChQpp+PDhKlmypE6ePKlvv/1W169fl91u17lz5yRJQ4cOVXBwsC5duqSFCxeqVq1aWr16dZp7/qZMmaKIiAhNmTJFFy5cUN++fdWkSRNVrVpVHh4emjlzpo4cOaJ+/frp5ZdfdvqDQpJ27typXr16KSoqSsHBwfrss8/05ptv6vr16+rXr58kKSEhQefOnVO/fv1UpEgRXb9+XatWrVLLli01a9YsdejQQdJff4Q1adJE27ZtU1RUlCpVqqRNmzapQYMGGfZJq1at1LZtW3Xt2lV79uzRoEGDJMlp/3311Vc1e/ZsvfHGGxo7dqzOnTun4cOHq3r16tq1a5eCgoJ0+fJl1a1bV8WLF9eUKVMUFBSk2NhYrV27NsM/IpINGzZMw4YNU9euXdW6dWvFxMTolVdeUWJiokqXLu2Y7sqVK4qMjNSxY8c0ePBgRURE6JdfftHbb7+tPXv2aNWqVbc8gx8bG6vOnTurf//+euCBB/Tvf/9bXbp0UUxMjL766isNHjxYfn5+Gj58uJo3b64//vhDoaGhkv76wzUgIEBjxoxR4cKFde7cOc2ZM0dVq1bVjh07nGq9lb/TX8nr8fzzz2vgwIEaPny4lixZohEjRuj8+fOaPHmypL+CSu3atXXw4EENGzZMERER+uGHHzR69Gjt3LlTS5YsyXK9qbVs2VLPP/+8unXrlqVQn9LatWvVoEEDVa1aVdOmTZOfn5/mzZuntm3b6sqVK45w06dPH33yyScaMWKEHn30UV2+fFk///zzLffz48ePq0qVKrpx44ZjOzl79qyWL1+u8+fPKygoSH/++aeqV6+u69ev65133lF4eLgWL16sfv366eDBg/rggw+c5pkT9vPUsrq/bdq0SW3btlXbtm0VFRUlLy8vHTlyJM1tc6klf+9u3LhRb7/9tqpUqaIff/xRDRs2TDPt7X5PpnQ79Q0ePFiVKlXS9OnTFRcXp6ioKNWqVUs7duxQiRIlJGV9+73VdlipUiXNmjVLnTt31r/+9S81atRIkvTAAw9I+uuPmtKlS+v555+Xv7+/Tp48qalTp6pKlSrau3evChUqlKXlBAUFpdsvf6dP0/sw70jHjh1Nvnz5MhyfmJhobty4YebOnWty585tzp075xgXFhZmOnbsmOY9kZGRJjIy0qlt//79xtfX1zRv3tysWrXK5MqVy/zrX/+6ZX1Tp041ksw333zj1P7KK68YSWbWrFkZvnfZsmVGkhk3bpxT+5dffmkkmY8++sjRJskMHTo0zTxSr+OkSZOMJPPbb7852s6dO2fsdrvp27evo61+/frmgQceMHFxcU7ze+2114yXl5ejH9euXWskmZo1a6ZZ9vz5840ks3bt2lvWlSx132c0/8uXLxt/f3/TpEkTp/bExERToUIF8/jjj6eZd0oDBgwwNpvN7Ny506m9bt26TjXfznLy589vevXqlely01OnTh1ToEABc/r06Sy/5+bNm+bGjRvm6ae
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_campaigns_stats[\"number_compagny\"], 100 * company_campaigns_stats[\"ratio_campaigns_opened\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Taux d'ouverture (%)\")\n",
"plt.title(\"Taux d'ouverture des campagnes de mails pour les compagnies de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 219,
"id": "5c37e063-a717-4a8c-828e-b386b87e8409",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkoAAAHFCAYAAAANLdYJAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABbXUlEQVR4nO3dd1gUV/828HulLB0EgQVdARWxYSUSNQloFHuPPSoajQY1sT2WqBEsoEYRyyMao4IFjcYSW1RsmAQL9l4fFI0iiQUUpJ/3D1/m5wJDE1jU+3Nde13OmTMz351dZm/PzOwqhBACRERERJRDOW0XQERERFRWMSgRERERyWBQIiIiIpLBoEREREQkg0GJiIiISAaDEhEREZEMBiUiIiIiGQxKRERERDIYlIiIiIhklKmgFBISAoVCgdOnT8v2uXv3LhQKBUJCQkqvsBIUFhaGoKCgUtteUlISfH19cfTo0VLbZnHz9vaGo6OjtssoUQ8fPoSvry/Onz+v7VIK7H3728zO09MTnp6eGm0KhQK+vr6lVsP78PdbkOO8tuT2Gpc0X19fKBSKUq/j6tWr8PX1xd27d0t0O0WVV325fQY8ffoUvXv3ho2NDRQKBbp06VJstegW25pKiZ2dHY4fP46qVatqu5RiERYWhsuXL2P06NGlsr2kpCT4+fkBQKkfEKjgHj58CD8/Pzg6OqJ+/fraLocALFu2TNsl8O/3A1Ea77WrV6/Cz88Pnp6eZfI/nnnVN23aNHz33XcabTNnzsT27duxevVqVK1aFZaWlsVWyzsXlJRKJT7++GNtl6EVGRkZSE9Ph1Kp1HYpVEKyXuOSXj/fQ4VXq1YtbZdAeB0WjYyMtF1GieJ7LW+5DZRcvnwZVatWRb9+/Yp9e2Xq1FtByA3v37p1C3379oWNjQ2USiVq1qyJ//73vxp9jh49CoVCgY0bN2LKlCmwt7eHmZkZWrZsiRs3bhRo+8W5HU9PT+zZswf37t2DQqGQHm8+z3nz5mHWrFlwcnKCUqnEkSNHAACnT59Gp06dYGlpCQMDAzRo0ACbN2/Od99ZW1sDAPz8/KTteXt7AwBu376NQYMGwdnZGUZGRqhYsSI6duyIS5cuaawna+g8+5Bo1vPOOi1w69YtmJmZoUePHhr9Dh8+DB0dHUybNi3f/R0SEgIXFxdpX69duzbfZbJkZmZi3rx5qFGjBpRKJWxsbDBgwAA8ePBAo5+jo6O0D9705vD3P//8A319/Vxrvn79OhQKBRYvXiy1xcbGYtiwYahUqRL09fXh5OQEPz8/jRCU12v80UcfAQAGDRokvU5Zp3jkhuWzD0eXxHsoy8OHD9GzZ0+YmprC3NwcvXr1QmxsbK59C7KdpKQkjB8/Hk5OTjAwMIClpSXc3NywcePGPOvIei8ePnwYQ4cOhZWVFczMzDBgwAAkJiYiNjYWPXv2hIWFBezs7DB+/HikpaVprMPPzw/u7u6wtLSEmZkZGjZsiFWrViH774UX5HRIUZ8HkP97Jr+/39wU9pi3evVq1KtXT6q9a9euuHbtmkYfb29vmJiY4Pr162jdujWMjY1hZ2eHOXPmAABOnDiBTz75BMbGxqhevTpCQ0Nzre3Zs2cYNGgQLC0tYWxsjI4dO+J///ufRh9PT0/UqVMHx44dQ9OmTWFkZITBgwcDABISEqR9ra+vj4oVK2L06NFITEzMd18LITBv3jw4ODjAwMAADRs2xO+//55r37fZDgDs27cPn3/+OczNzWFkZISaNWsiICAgz2Vye6+lpqZi1qxZ0vHM2toagwYNwj///KPRz9HRER06dMC+ffvQsGFDGBoaokaNGli9erXUJyQkRDouN2/eXHovZX2unjt3Dh06dJA+5+zt7dG+ffscx87swsPD0blzZ1SqVAkGBgaoVq0ahg0bhn///TdH3+vXr6NPnz6wtbWFUqlE5cqVMWDAAKSkpORb35vHuqzj3MGDB3Ht2jWpb9bnUHBwMOrVqwcTExOYmpqiRo0a+P777/N8HjmIMmTNmjUCgIiKipLtEx0dLQCINWvWSG1XrlwR5ubmwtXVVaxdu1YcOHBAjBs3TpQrV074+vpK/Y4cOSIACEdHR9GvXz+xZ88esXHjRlG5cmXh7Ows0tPT86yvuLdz5coV0axZM6FSqcTx48elx5vPs2LFiqJ58+bi119/FQcOHBDR0dHi8OHDQl9fX3z66afil19+Efv27RPe3t459kt2ycnJYt++fQKA+Oqrr6Tt3b59WwghREREhBg3bpz49ddfRUREhNi+fbvo0qWLMDQ0FNevX8/xOkVHR2usP+t5HzlyRGrbtGmTACAWLVokhBDi0aNHwtbWVnh4eOS7v7O207lzZ7Fr1y6xfv16Ua1aNaFWq4WDg0OeywohxNdffy0AiJEjR4p9+/aJ5cuXC2tra6FWq8U///wj9XNwcBADBw7MsbyHh4fw8PCQprt27SrUarXIyMjQ6DdhwgShr68v/v33X+k5ZtW4YsUKcfDgQTFz5kyhVCqFt7e3tJzca3zhwgXpuU+dOlV6ne7fv59rXVkGDhyosV9K4j0khBBJSUmiZs2awtzcXCxZskTs379ffPvtt6Jy5co5li/odoYNGyaMjIxEYGCgOHLkiNi9e7eYM2eOWLJkSZ61ZO0nJycnMW7cOHHgwAExd+5coaOjI/r06SMaNmwoZs2aJcLDw8XEiRMFALFgwQKNdXh7e4tVq1aJ8PBwER4eLmbOnCkMDQ2Fn5+fRr/c9jsAMX369Ld+HgV5z+T395ubwhzz/P39BQDRp08fsWfPHrF27VpRpUoVYW5uLm7evCn1GzhwoNDX1xc1a9YUixYtEuHh4WLQoEECgJg8ebKoXr26WLVqldi/f7/o0KGDACBOnz6d4zVTq9Vi8ODB4vfffxc//fSTsLGxEWq1Wjx79kxjn1taWgq1Wi2WLFkijhw5IiIiIkRiYqKoX7++qFChgggMDBQHDx4UixYtEubm5qJFixYiMzMzz/09ffp0aT9mbb9ixYpCpVJpvMZvu52ff/5ZKBQK4enpKcLCwsTBgwfFsmXLhI+PT45a3pT9vZaRkSHatGkjjI2NhZ+fnwgPDxc///yzqFixoqhVq5ZISkqS+jo4OIhKlSqJWrVqibVr14r9+/eLHj16CAAiIiJCCCFEXFyc9Hr/97//ld5LcXFx4uXLl8LKykq4ubmJzZs3i4iICPHLL7+I4cOHi6tXr+b5fIODg0VAQIDYuXOniIiIEKGhoaJevXrCxcVFpKamSv3Onz8vTExMhKOjo1i+fLk4dOiQWL9+vejZs6dISEjIsz4hNI91ycnJ4vjx46JBgwaiSpUqUt/4+HixceNGAUCMGjVKHDhwQBw8eFAsX75cfPvtt3k+j+zei6DUunVrUalSJREfH6/Rd+TIkcLAwEA8ffpUCPF/B4127dpp9Nu8ebMAIIUUOSWxnfbt2+f6oZ/1PKtWrarxBhNCiBo1aogGDRqItLQ0jfYOHToIOzu7HB/kb/rnn39yHNzlpKeni9TUVOHs7CzGjBkjtRcmKAkhxDfffCP09fXF8ePHRYsWLYSNjY14+PBhntvOyMgQ9vb2omHDhhoHo7t37wo9Pb18g9K1a9cEAI0DkhBCnDx5UgAQ33//vdRW0KC0c+dOAUAcOHBAaktPTxf29vaie/fuUtuwYcOEiYmJuHfvnsb65s+fLwCIK1euCCHyfo2joqJkQ0thg1Jxv4eCg4MFAPHbb79ptA8dOjRHzQXdTp06dUSXLl1ktykn6704atQojfYuXboIACIwMFCjvX79+qJhw4ay68vIyBBpaWlixowZwsrKSuO9V5CgVNTnUdD3TGH+foUo+LHo2bNnwtDQMEe/mJgYoVQqRd++faW2gQMHCgBi69atUltaWpqwtrYWAMTZs2el9idPnggdHR0xduxYqS3rNevatavGtv766y8BQMyaNUtq8/DwEADEoUOHNPoGBASIcuXK5fis+PXXXwUAsXfvXtl98uzZM2FgYCC7/Tdf47fZzosXL4SZmZn45JNP8gxUBQlKWR/4b+5zIf7vOLFs2TK
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# création d'un barplot permettant de visualiser les 2 indicateurs sur le même graphique\n",
"\n",
"# Création du premier barplot\n",
"plt.bar(company_campaigns_stats[\"number_compagny\"], 100 * company_campaigns_stats[\"ratio_campaigns_opened\"],\n",
" label = \"taux d'ouverture\", alpha = 0.7)\n",
"\n",
"# Création du deuxième barplot à côté du premier\n",
"bar_width = 0.4 # Largeur des barres\n",
"indices2 = company_campaigns_stats[\"number_compagny\"] + bar_width\n",
"plt.bar(indices2, 100 * (1 - company_lazy_customers[\"no_campaign_opened\"]), \n",
" label='Part de clients ouvrant des mails', alpha=0.7, width=bar_width)\n",
"\n",
"# Ajout des étiquettes et de la légende\n",
"plt.xlabel('Compagnie')\n",
"plt.ylabel('Taux (%)')\n",
"plt.title('Lien entre taux d ouverture des mails et nombre de clients actifs')\n",
"plt.legend()\n",
"\n",
"# Affichage du graphique\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "783f6fb2-5f26-42a9-a22d-f4ece44bfaf2",
"metadata": {},
"source": [
"### 3. products_purchased_reduced"
]
},
{
"cell_type": "code",
2024-03-05 00:36:48 +01:00
"execution_count": 8,
2024-03-03 09:32:45 +01:00
"id": "74534ded-8121-43fb-8cf8-af353bed2c77",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de lignes de la table : 764880\n"
]
},
{
"data": {
"text/plain": [
"customer_id 0\n",
"nb_tickets 0\n",
"nb_purchases 0\n",
"total_amount 0\n",
"nb_suppliers 0\n",
"vente_internet_max 0\n",
"purchase_date_min 0\n",
"purchase_date_max 0\n",
"time_between_purchase 0\n",
"nb_tickets_internet 0\n",
"number_compagny 0\n",
"dtype: int64"
]
},
2024-03-05 00:36:48 +01:00
"execution_count": 8,
2024-03-03 09:32:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de nan\n",
"print(\"Nombre de lignes de la table : \",products_purchased_reduced_spectacle.shape[0])\n",
"products_purchased_reduced_spectacle.isna().sum()"
]
},
{
"cell_type": "code",
2024-03-05 02:43:40 +01:00
"execution_count": 22,
"id": "d64979ba-fccf-45f2-8a15-40ef1b49c74f",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
2024-03-05 02:43:40 +01:00
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_430/3239820253.py:6: DtypeWarning: Columns (39) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" dataset_train = pd.read_csv(file_in, sep=\",\")\n"
]
2024-03-03 09:32:45 +01:00
}
],
"source": [
2024-03-05 02:43:40 +01:00
"#base d'entrainement\n",
"\n",
"#FILE_PATH_S3='projet-bdc2324-team1/Generalization/musique/Train_test/dataset_train14.csv'\n",
"\n",
"#with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" #dataset_train = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "635d60cd-2dbc-49da-b0f4-94e16667882f",
"metadata": {},
"outputs": [],
"source": [
"#Creation de la variable dependante fictive: 1 si l'individu a effectué un achat au cours de la periode de train et 0 sinon\n",
"\n",
"#dataset_train_modif=dataset_train\n",
"\n",
"#dataset_train_modif[\"y_purchase_fictive\"]=np.random.randint(2, size=dataset_train_modif.shape[0])"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "ea63e5d6-70f9-4685-8b08-673a47108954",
"metadata": {},
"outputs": [],
"source": [
"#dataset_train_modif[\"y_purchase_fictive\"].value_counts(normalize=True)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "98f7645f-ffe6-4d7b-8032-15e65f36af87",
"metadata": {},
"outputs": [],
"source": [
"\n",
"#dataset_train_modif[\"y_purchase_fictive\"]=dataset_train_modif[\"y_purchase_fictive\"].replace([0,1],[\"Purchase_train\",\"no_purchase_train\"])"
2024-03-03 09:32:45 +01:00
]
},
2024-03-05 00:36:48 +01:00
{
"cell_type": "code",
"execution_count": null,
2024-03-05 02:43:40 +01:00
"id": "eb6355e0-3f8c-47d9-a5ee-d349040dcf51",
2024-03-05 00:36:48 +01:00
"metadata": {},
"outputs": [],
"source": [
2024-03-05 02:43:40 +01:00
"#repartion Chiffre d'affaire selon le numero de la compagnie\n",
2024-03-05 00:36:48 +01:00
"\n",
2024-03-05 02:43:40 +01:00
"sns.boxplot(data=chiffre_affaire_categ, y=\"chiffre_affaire\",x=\"categ\",showfliers=False,showmeans=True)\n",
"plt.title(\"Boite à moustache du chiffre d'affaire selon les categories de livre\")"
2024-03-05 00:36:48 +01:00
]
},
2024-03-03 09:32:45 +01:00
{
"cell_type": "markdown",
"id": "b9e84af4-a02b-4f83-81ae-b7a73475d060",
"metadata": {},
"source": [
"### 4. target_information"
]
},
{
"cell_type": "code",
"execution_count": 222,
"id": "2867eceb-1f72-406c-adc2-adfedcaf60e6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de lignes de la table : 6240166\n"
]
},
{
"data": {
"text/plain": [
"id 0\n",
"customer_id 0\n",
"target_name 0\n",
"target_type_is_import 0\n",
"target_type_name 0\n",
"number_compagny 0\n",
"dtype: int64"
]
},
"execution_count": 222,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de nan\n",
"print(\"Nombre de lignes de la table : \",target_information_spectacle.shape[0])\n",
"target_information_spectacle.isna().sum()"
]
2024-03-05 00:36:48 +01:00
},
{
"cell_type": "code",
"execution_count": 11,
"id": "985b6403-3c75-420e-a4a4-d3045213e9ef",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>customer_id</th>\n",
" <th>target_name</th>\n",
" <th>target_type_is_import</th>\n",
" <th>target_type_name</th>\n",
" <th>number_compagny</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1165098</td>\n",
" <td>618562</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1165100</td>\n",
" <td>618559</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1165101</td>\n",
" <td>618561</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1165102</td>\n",
" <td>618560</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1165103</td>\n",
" <td>618558</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>779653</th>\n",
" <td>4207082</td>\n",
" <td>6764876</td>\n",
" <td>INSCRIPTION NL VOYAGES HUMA</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>779654</th>\n",
" <td>4207083</td>\n",
" <td>6764877</td>\n",
" <td>Inscriptions newsletters (depuis 2019)</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>779655</th>\n",
" <td>4207084</td>\n",
" <td>6801322</td>\n",
" <td>Inscriptions newsletters (depuis 2019)</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>779656</th>\n",
" <td>4207085</td>\n",
" <td>6837768</td>\n",
" <td>Inscriptions newsletters (depuis 2019)</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>779657</th>\n",
" <td>4207086</td>\n",
" <td>6837769</td>\n",
" <td>Inscriptions newsletters (depuis 2019)</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>6240166 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" id customer_id target_name \\\n",
"0 1165098 618562 Newsletter mensuelle \n",
"1 1165100 618559 Newsletter mensuelle \n",
"2 1165101 618561 Newsletter mensuelle \n",
"3 1165102 618560 Newsletter mensuelle \n",
"4 1165103 618558 Newsletter mensuelle \n",
"... ... ... ... \n",
"779653 4207082 6764876 INSCRIPTION NL VOYAGES HUMA \n",
"779654 4207083 6764877 Inscriptions newsletters (depuis 2019) \n",
"779655 4207084 6801322 Inscriptions newsletters (depuis 2019) \n",
"779656 4207085 6837768 Inscriptions newsletters (depuis 2019) \n",
"779657 4207086 6837769 Inscriptions newsletters (depuis 2019) \n",
"\n",
" target_type_is_import target_type_name number_compagny \n",
"0 False manual_static_filter 10 \n",
"1 False manual_static_filter 10 \n",
"2 False manual_static_filter 10 \n",
"3 False manual_static_filter 10 \n",
"4 False manual_static_filter 10 \n",
"... ... ... ... \n",
"779653 False manual_static_filter 14 \n",
"779654 False manual_static_filter 14 \n",
"779655 False manual_static_filter 14 \n",
"779656 False manual_static_filter 14 \n",
"779657 False manual_static_filter 14 \n",
"\n",
"[6240166 rows x 6 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"target_information_spectacle"
]
},
{
"cell_type": "code",
"execution_count": null,
2024-03-05 02:43:40 +01:00
"id": "93a8ae1f-6fbd-4210-a857-728ae472d1c5",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "904cbf32-77b6-49dd-a96c-9e7e5a0175c3",
2024-03-05 00:36:48 +01:00
"metadata": {},
"outputs": [],
"source": []
2024-03-02 11:37:44 +01:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}