BDC-team-1/Spectacle/Stat_desc.ipynb

8261 lines
1.1 MiB
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"id": "be628bfc-0bca-48b0-97c9-29063289127e",
"metadata": {},
"source": [
"# Statistiques descriptives : compagnies offrant des spectacles"
]
},
{
"cell_type": "markdown",
"id": "0bf5450b-f44d-430a-aed7-d875dc365048",
"metadata": {},
"source": [
"## Importations et chargement des données"
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "aa915888-cede-4eb0-8a26-7df573d29a3e",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import os\n",
"import s3fs\n",
"import warnings\n",
"from datetime import date, timedelta, datetime\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import re\n",
"import io"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "17949e81-c30b-4fdf-9872-d7dc2b22ba9e",
"metadata": {},
"outputs": [],
"source": [
"# Import KPI construction functions\n",
"#exec(open('0_KPI_functions.py').read())\n",
"exec(open('../0_KPI_functions.py').read())\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "9c1737a2-bad8-4266-8dec-452085d8cfe7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"BUCKET = \"projet-bdc2324-team1/0_Input/Company_10\"\n",
"fs.ls(BUCKET)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "a35dc2f6-2017-4b21-abd2-2c4c112c96b2",
"metadata": {},
"outputs": [],
"source": [
"# test avec company 10\n",
"\n",
"dic_base=['campaigns_information','customerplus_cleaned','products_purchased_reduced','target_information']\n",
"for nom_base in dic_base:\n",
" FILE_PATH_S3_fanta = 'projet-bdc2324-team1/0_Input/Company_10/' + nom_base + '.csv'\n",
" with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n",
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "40b705eb-fd18-436b-b150-61611a3c6a84",
"metadata": {},
"outputs": [],
"source": [
"# fonction permettant d'extraire une table à partir du numéro de la compagnie (directory_path)\n",
"\n",
"def display_databases(directory_path, file_name, datetime_col = None):\n",
" \"\"\"\n",
" This function returns the file from s3 storage \n",
" \"\"\"\n",
" file_path = \"projet-bdc2324-team1\" + \"/0_Input/Company_\" + directory_path + \"/\" + file_name + \".csv\"\n",
" print(\"File path : \", file_path)\n",
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser) \n",
" return df \n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "c56decc3-de19-4786-82a4-1386c72a6bfb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>customer_id</th>\n",
" <th>target_name</th>\n",
" <th>target_type_is_import</th>\n",
" <th>target_type_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1165098</td>\n",
" <td>618562</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1165100</td>\n",
" <td>618559</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1165101</td>\n",
" <td>618561</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1165102</td>\n",
" <td>618560</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1165103</td>\n",
" <td>618558</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69253</th>\n",
" <td>1698158</td>\n",
" <td>18580</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69254</th>\n",
" <td>1698159</td>\n",
" <td>18569</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69255</th>\n",
" <td>1698160</td>\n",
" <td>2962</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69256</th>\n",
" <td>1698161</td>\n",
" <td>3825</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69257</th>\n",
" <td>1698162</td>\n",
" <td>5731</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>69258 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" id customer_id target_name target_type_is_import \\\n",
"0 1165098 618562 Newsletter mensuelle False \n",
"1 1165100 618559 Newsletter mensuelle False \n",
"2 1165101 618561 Newsletter mensuelle False \n",
"3 1165102 618560 Newsletter mensuelle False \n",
"4 1165103 618558 Newsletter mensuelle False \n",
"... ... ... ... ... \n",
"69253 1698158 18580 Newsletter mensuelle False \n",
"69254 1698159 18569 Newsletter mensuelle False \n",
"69255 1698160 2962 Newsletter mensuelle False \n",
"69256 1698161 3825 Newsletter mensuelle False \n",
"69257 1698162 5731 Newsletter mensuelle False \n",
"\n",
" target_type_name \n",
"0 manual_static_filter \n",
"1 manual_static_filter \n",
"2 manual_static_filter \n",
"3 manual_static_filter \n",
"4 manual_static_filter \n",
"... ... \n",
"69253 manual_static_filter \n",
"69254 manual_static_filter \n",
"69255 manual_static_filter \n",
"69256 manual_static_filter \n",
"69257 manual_static_filter \n",
"\n",
"[69258 rows x 5 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"target_information"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "c825d64b-356c-4b71-aa3c-90e0dd7ca092",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ticket_id</th>\n",
" <th>customer_id</th>\n",
" <th>purchase_id</th>\n",
" <th>event_type_id</th>\n",
" <th>supplier_name</th>\n",
" <th>purchase_date</th>\n",
" <th>amount</th>\n",
" <th>is_full_price</th>\n",
" <th>name_event_types</th>\n",
" <th>name_facilities</th>\n",
" <th>name_categories</th>\n",
" <th>name_events</th>\n",
" <th>name_seasons</th>\n",
" <th>start_date_time</th>\n",
" <th>end_date_time</th>\n",
" <th>open</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1799177</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>2</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>danse</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>aringa rossa</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2016-09-27 00:00:00+02:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1799178</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>3</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>cirque</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>5èmes hurlants</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2016-11-18 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1799179</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>dom juan</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2016-12-07 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1799180</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>vanishing point</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2017-01-04 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1799181</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>3</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>12.0</td>\n",
" <td>False</td>\n",
" <td>cirque</td>\n",
" <td>la cite des congres</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>a o lang pho</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2017-01-03 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492309</th>\n",
" <td>3252232</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492310</th>\n",
" <td>3252233</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492311</th>\n",
" <td>3252234</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492312</th>\n",
" <td>3252235</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492313</th>\n",
" <td>3252236</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>492314 rows × 16 columns</p>\n",
"</div>"
],
"text/plain": [
" ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
"0 1799177 36984 409613 2 guichet \n",
"1 1799178 36984 409613 3 guichet \n",
"2 1799179 36984 409613 1 guichet \n",
"3 1799180 36984 409613 1 guichet \n",
"4 1799181 36984 409613 3 guichet \n",
"... ... ... ... ... ... \n",
"492309 3252232 621716 710062 1 guichet \n",
"492310 3252233 621716 710062 1 guichet \n",
"492311 3252234 621716 710062 1 guichet \n",
"492312 3252235 621716 710062 1 guichet \n",
"492313 3252236 621716 710062 1 guichet \n",
"\n",
" purchase_date amount is_full_price name_event_types \\\n",
"0 2016-04-28 17:58:26+02:00 9.0 False danse \n",
"1 2016-04-28 17:58:26+02:00 9.0 False cirque \n",
"2 2016-04-28 17:58:26+02:00 9.0 False théâtre \n",
"3 2016-04-28 17:58:26+02:00 9.0 False théâtre \n",
"4 2016-04-28 17:58:26+02:00 12.0 False cirque \n",
"... ... ... ... ... \n",
"492309 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492310 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492311 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492312 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492313 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"\n",
" name_facilities name_categories \\\n",
"0 le grand t abo t gourmand jeune \n",
"1 le grand t abo t gourmand jeune \n",
"2 le grand t abo t gourmand jeune \n",
"3 le grand t abo t gourmand jeune \n",
"4 la cite des congres abo t gourmand jeune \n",
"... ... ... \n",
"492309 cap nort tarif sco co 1 seance scolaire \n",
"492310 cap nort tarif sco co 1 seance scolaire \n",
"492311 cap nort tarif sco co 1 seance scolaire \n",
"492312 cap nort tarif sco co 1 seance scolaire \n",
"492313 cap nort tarif sco co 1 seance scolaire \n",
"\n",
" name_events name_seasons start_date_time \\\n",
"0 aringa rossa test 2016/2017 2016-09-27 00:00:00+02:00 \n",
"1 5èmes hurlants test 2016/2017 2016-11-18 00:00:00+01:00 \n",
"2 dom juan test 2016/2017 2016-12-07 00:00:00+01:00 \n",
"3 vanishing point test 2016/2017 2017-01-04 00:00:00+01:00 \n",
"4 a o lang pho test 2016/2017 2017-01-03 00:00:00+01:00 \n",
"... ... ... ... \n",
"492309 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492310 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492311 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492312 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492313 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"\n",
" end_date_time open \n",
"0 1901-01-01 00:09:21+00:09 True \n",
"1 1901-01-01 00:09:21+00:09 True \n",
"2 1901-01-01 00:09:21+00:09 True \n",
"3 1901-01-01 00:09:21+00:09 True \n",
"4 1901-01-01 00:09:21+00:09 True \n",
"... ... ... \n",
"492309 1901-01-01 00:09:21+00:09 True \n",
"492310 1901-01-01 00:09:21+00:09 True \n",
"492311 1901-01-01 00:09:21+00:09 True \n",
"492312 1901-01-01 00:09:21+00:09 True \n",
"492313 1901-01-01 00:09:21+00:09 True \n",
"\n",
"[492314 rows x 16 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_purchased_reduced"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "afd044b8-ac83-4a35-b959-700cae0b3b41",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:28: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tables imported for tenant 10\n",
"File path : projet-bdc2324-team1/0_Input/Company_11/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_11/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_11/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_11/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:28: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tables imported for tenant 11\n",
"File path : projet-bdc2324-team1/0_Input/Company_12/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"/tmp/ipykernel_436/3170175140.py:10: DtypeWarning: Columns (4,8,10) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:28: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tables imported for tenant 12\n",
"File path : projet-bdc2324-team1/0_Input/Company_13/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:28: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tables imported for tenant 13\n",
"File path : projet-bdc2324-team1/0_Input/Company_14/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"/tmp/ipykernel_436/3170175140.py:10: DtypeWarning: Columns (8,9) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:28: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tables imported for tenant 14\n"
]
}
],
"source": [
"# création des bases contenant les KPI pour les 5 compagnies de spectacle\n",
"\n",
"# liste des compagnies de spectacle\n",
"nb_compagnie=['10','11','12','13','14']\n",
"\n",
"# début de la boucle permettant de générer des datasets agrégés pour les 5 compagnies de spectacle\n",
"for directory_path in nb_compagnie:\n",
" df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
" df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
" df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
" df_target_information = display_databases(directory_path, file_name = \"target_information\")\n",
" \n",
" df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information) \n",
" df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n",
" df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)\n",
"\n",
" \n",
"# creation de la colonne Number compagnie, qui permettra d'agréger les résultats\n",
" df_tickets_kpi[\"number_compagny\"]=int(directory_path)\n",
" df_campaigns_kpi[\"number_compagny\"]=int(directory_path)\n",
" df_customerplus_clean[\"number_compagny\"]=int(directory_path)\n",
" df_target_information[\"number_compagny\"]=int(directory_path)\n",
"\n",
" if nb_compagnie.index(directory_path)>=1:\n",
" customerplus_clean_spectacle=pd.concat([customerplus_clean_spectacle,df_customerplus_clean],axis=0)\n",
" campaigns_information_spectacle=pd.concat([campaigns_information_spectacle,df_campaigns_kpi],axis=0)\n",
" products_purchased_reduced_spectacle=pd.concat([products_purchased_reduced_spectacle,df_tickets_kpi],axis=0)\n",
" target_information_spectacle=pd.concat([target_information_spectacle,df_target_information],axis=0)\n",
" else:\n",
" customerplus_clean_spectacle=df_customerplus_clean\n",
" campaigns_information_spectacle=df_campaigns_kpi\n",
" products_purchased_reduced_spectacle=df_tickets_kpi\n",
" target_information_spectacle=df_target_information\n",
"\n",
" print(f\"Tables imported for tenant {directory_path}\")"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "b5a4a031-9533-4a50-8569-5f4246691a7a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>2</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18031</th>\n",
" <td>2</td>\n",
" <td>319517</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>1556</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>2</td>\n",
" <td>2020-01-01 14:06:52+00:00</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>291642</th>\n",
" <td>2</td>\n",
" <td>757541</td>\n",
" <td>303.0</td>\n",
" <td>5.0</td>\n",
" <td>1</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>2016-09-08 14:50:00+00:00</td>\n",
" <td>fr</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3 rows × 29 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"17 2 139 NaN NaN 0 \n",
"18031 2 319517 NaN NaN 0 \n",
"291642 2 757541 303.0 5.0 1 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"17 875 False NaN 2 False ... \n",
"18031 1556 False NaN 0 True ... \n",
"291642 862 False NaN 1 True ... \n",
"\n",
" purchase_count first_buying_date country gender_label \\\n",
"17 3 NaN NaN other \n",
"18031 2 2020-01-01 14:06:52+00:00 fr female \n",
"291642 3 2016-09-08 14:50:00+00:00 fr male \n",
"\n",
" gender_female gender_male gender_other country_fr has_tags \\\n",
"17 0 0 1 NaN 0 \n",
"18031 1 0 0 1.0 0 \n",
"291642 0 1 0 1.0 1 \n",
"\n",
" number_compagny \n",
"17 10 \n",
"18031 11 \n",
"291642 14 \n",
"\n",
"[3 rows x 29 columns]"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"customer_id\"]==2]"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "b9b6ec1f-36fb-4ee9-a1ed-09ff41878005",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'customerplus_clean_spectacle' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcustomerplus_clean_spectacle\u001b[49m[customerplus_clean_spectacle[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcustomer_id\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m==\u001b[39m\u001b[38;5;241m1\u001b[39m]\n",
"\u001b[0;31mNameError\u001b[0m: name 'customerplus_clean_spectacle' is not defined"
]
}
],
"source": [
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"customer_id\"]==1]"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "a12c1b7d-6f6f-483e-b215-6336d7a51057",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['customer_id', 'street_id', 'structure_id', 'mcp_contact_id',\n",
" 'fidelity', 'tenant_id', 'is_partner', 'deleted_at', 'gender',\n",
" 'is_email_true', 'opt_in', 'last_buying_date', 'max_price',\n",
" 'ticket_sum', 'average_price', 'average_purchase_delay',\n",
" 'average_price_basket', 'average_ticket_basket', 'total_price',\n",
" 'purchase_count', 'first_buying_date', 'country', 'gender_label',\n",
" 'gender_female', 'gender_male', 'gender_other', 'country_fr',\n",
" 'has_tags', 'number_compagny'],\n",
" dtype='object')"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle.columns"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "05b9a396-dcd7-4d3d-8b39-5ca48beba4b0",
"metadata": {},
"outputs": [],
"source": [
"#customerplus_clean_spectacle.isna().sum()\n",
"#campaigns_information_spectacle.isna().sum()\n",
"#products_purchased_reduced_spectacle.isna().sum()\n",
"#target_information_spectacle.isna().sum()"
]
},
{
"cell_type": "markdown",
"id": "81e15508-32ca-46f1-a03d-1febddbbf5b4",
"metadata": {},
"source": [
"### Ajout : importation de la table train_set pour faire les stats desc dessus"
]
},
{
"cell_type": "code",
"execution_count": 119,
"id": "3a1fdd6b-ac43-4e90-9a31-4f522bcc44bb",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3450421856.py:9: DtypeWarning: Columns (38) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" train_set_spectacle = pd.read_csv(file_in, sep=\",\")\n"
]
}
],
"source": [
"# importation de la table train_set pour les compagnies de spectacle (ou musique)\n",
"\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"path_train_set_spectacle = \"projet-bdc2324-team1/Generalization/musique/Train_set.csv\"\n",
"\n",
"with fs.open(path_train_set_spectacle, mode=\"rb\") as file_in:\n",
" train_set_spectacle = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": 120,
"id": "3a4c1ff4-2861-4e86-99df-26eea0370dc3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10_492779</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>13.0</td>\n",
" <td>4.0</td>\n",
" <td>8 days 04:08:27</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10_563424</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>0 days 01:39:58.555555555</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10_44369</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>14.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10_620271</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10_687644</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 40 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 10_492779 0.0 0.0 0.0 0.0 \n",
"1 10_563424 0.0 0.0 0.0 0.0 \n",
"2 10_44369 0.0 0.0 0.0 0.0 \n",
"3 10_620271 0.0 0.0 0.0 0.0 \n",
"4 10_687644 0.0 0.0 0.0 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0.0 550.0 550.0 \n",
"1 0.0 550.0 550.0 \n",
"2 0.0 550.0 550.0 \n",
"3 0.0 550.0 550.0 \n",
"4 0.0 550.0 550.0 \n",
"\n",
" time_between_purchase nb_tickets_internet ... country gender_label \\\n",
"0 -1.0 0.0 ... fr female \n",
"1 -1.0 0.0 ... fr other \n",
"2 -1.0 0.0 ... fr male \n",
"3 -1.0 0.0 ... NaN other \n",
"4 -1.0 0.0 ... NaN other \n",
"\n",
" gender_female gender_male gender_other country_fr nb_campaigns \\\n",
"0 1 0 0 1.0 13.0 \n",
"1 0 0 1 1.0 10.0 \n",
"2 0 1 0 1.0 14.0 \n",
"3 0 0 1 NaN 9.0 \n",
"4 0 0 1 NaN 4.0 \n",
"\n",
" nb_campaigns_opened time_to_open y_has_purchased \n",
"0 4.0 8 days 04:08:27 0.0 \n",
"1 9.0 0 days 01:39:58.555555555 0.0 \n",
"2 0.0 NaN 0.0 \n",
"3 0.0 NaN 0.0 \n",
"4 0.0 NaN 0.0 \n",
"\n",
"[5 rows x 40 columns]"
]
},
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_set_spectacle.head()"
]
},
{
"cell_type": "code",
"execution_count": 121,
"id": "4632384d-2a06-445d-9fdb-b0c91b37ebaf",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0., 1.])"
]
},
"execution_count": 121,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# on remplace les valeurs has purchased = NaN par des 0\n",
"train_set_spectacle[\"y_has_purchased\"] = train_set_spectacle[\"y_has_purchased\"].fillna(0)\n",
"train_set_spectacle[\"y_has_purchased\"].unique()"
]
},
{
"cell_type": "code",
"execution_count": 122,
"id": "5fd56696-b479-46c7-8a59-fb8137db5fb5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([10, 11, 12, 13, 14])"
]
},
"execution_count": 122,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# on reproduit une colonne avec le numéro de la compagnie \n",
"\n",
"train_set_spectacle[\"number_company\"] = train_set_spectacle[\"customer_id\"].apply(lambda x : int(re.split(\"_\", str(x))[0]))\n",
"train_set_spectacle[\"number_company\"].unique()"
]
},
{
"cell_type": "code",
"execution_count": 123,
"id": "91c6e047-43d2-456c-81f1-087026eef4f0",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" <th>number_company</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10_492779</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>13.0</td>\n",
" <td>4.0</td>\n",
" <td>8 days 04:08:27</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10_563424</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>0 days 01:39:58.555555555</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10_44369</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>14.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10_620271</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10_687644</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 41 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 10_492779 0.0 0.0 0.0 0.0 \n",
"1 10_563424 0.0 0.0 0.0 0.0 \n",
"2 10_44369 0.0 0.0 0.0 0.0 \n",
"3 10_620271 0.0 0.0 0.0 0.0 \n",
"4 10_687644 0.0 0.0 0.0 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0.0 550.0 550.0 \n",
"1 0.0 550.0 550.0 \n",
"2 0.0 550.0 550.0 \n",
"3 0.0 550.0 550.0 \n",
"4 0.0 550.0 550.0 \n",
"\n",
" time_between_purchase nb_tickets_internet ... gender_label \\\n",
"0 -1.0 0.0 ... female \n",
"1 -1.0 0.0 ... other \n",
"2 -1.0 0.0 ... male \n",
"3 -1.0 0.0 ... other \n",
"4 -1.0 0.0 ... other \n",
"\n",
" gender_female gender_male gender_other country_fr nb_campaigns \\\n",
"0 1 0 0 1.0 13.0 \n",
"1 0 0 1 1.0 10.0 \n",
"2 0 1 0 1.0 14.0 \n",
"3 0 0 1 NaN 9.0 \n",
"4 0 0 1 NaN 4.0 \n",
"\n",
" nb_campaigns_opened time_to_open y_has_purchased \\\n",
"0 4.0 8 days 04:08:27 0.0 \n",
"1 9.0 0 days 01:39:58.555555555 0.0 \n",
"2 0.0 NaN 0.0 \n",
"3 0.0 NaN 0.0 \n",
"4 0.0 NaN 0.0 \n",
"\n",
" number_company \n",
"0 10 \n",
"1 10 \n",
"2 10 \n",
"3 10 \n",
"4 10 \n",
"\n",
"[5 rows x 41 columns]"
]
},
"execution_count": 123,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_set_spectacle.head()"
]
},
{
"cell_type": "markdown",
"id": "fff306c2-1d41-4ef6-867b-ba9a7cf4ee68",
"metadata": {},
"source": [
"## Statistiques descriptives"
]
},
{
"cell_type": "markdown",
"id": "0549bdc4-edd7-4511-916e-26e94b5a30f5",
"metadata": {},
"source": [
"### 0. Détection du client anonyme (outlier) - utile pour la section 3"
]
},
{
"cell_type": "code",
"execution_count": 132,
"id": "5b460061-f8b5-4a6b-ba59-539446d8487f",
"metadata": {},
"outputs": [],
"source": [
"def outlier_detection(directory_path = \"1\", coupure = 1):\n",
" df_tickets = display_databases(directory_path, file_name = 'products_purchased_reduced' , datetime_col = ['purchase_date'])\n",
" df_tickets_kpi = tickets_kpi_function(df_tickets)\n",
"\n",
" if directory_path == \"101\" :\n",
" df_tickets_1 = display_databases(directory_path, file_name = 'products_purchased_reduced_1' , datetime_col = ['purchase_date'])\n",
" df_tickets_kpi_1 = tickets_kpi_function(df_tickets_1)\n",
"\n",
" df_tickets_kpi = pd.concat([df_tickets_kpi, df_tickets_kpi_1])\n",
" # Part du CA par customer\n",
" total_amount_share = df_tickets_kpi.groupby('customer_id')['total_amount'].sum().reset_index()\n",
" total_amount_share['total_amount_entreprise'] = total_amount_share['total_amount'].sum()\n",
" total_amount_share['share_total_amount'] = total_amount_share['total_amount']/total_amount_share['total_amount_entreprise']\n",
" \n",
" total_amount_share_index = total_amount_share.set_index('customer_id')\n",
" df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False)\n",
" \n",
" top = df_circulaire[:coupure]\n",
" rest = df_circulaire[coupure:]\n",
" \n",
" # Calculez la somme du reste\n",
" rest_sum = rest.sum()\n",
" \n",
" # Créez une nouvelle série avec les cinq plus grandes parts et 'Autre'\n",
" new_series = pd.concat([top, pd.Series([rest_sum], index=['Autre'])])\n",
" \n",
" # Créez le graphique circulaire\n",
" plt.figure(figsize=(3, 3))\n",
" plt.pie(new_series, labels=new_series.index, autopct='%1.1f%%', startangle=140, pctdistance=0.5)\n",
" plt.axis('equal') # Assurez-vous que le graphique est un cercle\n",
" plt.title('Répartition des montants totaux')\n",
" plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 208,
"id": "cccee90c-67d1-4e14-8410-1210a5ef97d9",
"metadata": {},
"outputs": [],
"source": [
"# def d'une fonction permettant de générer un barplot à plusieurs barres selon une modalité \n",
"\n",
"def multiple_barplot(data, x, y, var_labels, bar_width=0.35,\n",
" figsize=(10, 6), xlabel=None, ylabel=None, title=None, dico_labels = None) :\n",
"\n",
" # si on donne aucun nom pour la legende, le graphique reprend les noms des variables x et y \n",
" xlabel = x if xlabel==None else xlabel\n",
" ylabel = y if ylabel==None else ylabel\n",
" \n",
" fig, ax = plt.subplots(figsize=figsize)\n",
" \n",
" categories = data[x].unique()\n",
" bar_width = bar_width\n",
" bar_positions = np.arange(len(categories))\n",
" \n",
" # Grouper les données par label et créer les barres groupées\n",
" for label in data[var_labels].unique():\n",
" label_data = data[data[var_labels] == label]\n",
" values = [label_data[label_data[x] == category][y].values[0] for category in categories]\n",
" \n",
" # label_printed = \"achat durant la période\" if label else \"aucun achat\"\n",
" label_printed = f\"{var_labels}={label}\" if dico_labels==None else dico_labels[label]\n",
" \n",
" ax.bar(bar_positions, values, bar_width, label=label_printed)\n",
" \n",
" # Mise à jour des positions des barres pour le prochain groupe\n",
" bar_positions = [pos + bar_width for pos in bar_positions]\n",
"\n",
" # Ajout des étiquettes, de la légende, etc.\n",
" ax.set_xlabel(xlabel)\n",
" ax.set_ylabel(ylabel)\n",
" ax.set_title(title)\n",
" ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])\n",
" ax.set_xticklabels(categories)\n",
" ax.legend()\n",
" \n",
" # Affichage du plot - la proportion de français est la même selon qu'il y ait achat sur la période ou non\n",
" # sauf compagnie 12, et peut-être 13\n",
" # plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 133,
"id": "b6417f09-a6c7-4319-95b3-98c95ec5a3b7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# outlier à enlever (dépend des stats desc !)\n",
"outlier_detection(directory_path=\"10\") # mettre 2 si on veut le 1er client non anonyme"
]
},
{
"cell_type": "code",
"execution_count": 145,
"id": "f08c082e-f76f-41f3-9530-3e6700eb74d9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"outlier for tenant 10\n",
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"outlier for tenant 11\n",
"File path : projet-bdc2324-team1/0_Input/Company_11/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"outlier for tenant 12\n",
"File path : projet-bdc2324-team1/0_Input/Company_12/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"/tmp/ipykernel_436/3170175140.py:10: DtypeWarning: Columns (4,8,10) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"outlier for tenant 13\n",
"File path : projet-bdc2324-team1/0_Input/Company_13/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAATEAAAEQCAYAAADYlUP7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA4m0lEQVR4nO3dd3hT5d8G8Ds7TffegzIKFArIlF0EyigCArKnKArCDxERBxQURBQVBGUoMgR8EQcCMoqAKLL3HmUVaUtL926TPO8fsdHQlbRpn5zk+7muXNr05OROcnpzVs4jYowxEEKIQIl5ByCEkOqgEiOECBqVGCFE0KjECCGCRiVGCBE0KjFCiKBRiRFCBI1KjBAiaFRihBBB415iFy9ehEqlwvLly3lHIYQIkFlKbP369RCJRPqbVCqFr68vhg0bhlu3bpX7uOzsbAwePBhTp07F1KlTzRGlynbv3o158+aV+buQkBCMGzdO/3NCQgLmzZuH8+fPl5p23rx5EIlENROyikQiUbmvzVZcvXoV8+bNw71792rl+T744ANs377dbPMzR/6KlnFBY2awbt06BoCtW7eOHTt2jB06dIgtWLCA2dnZMS8vL5aWllbm44YMGcJGjhzJtFqtOWJUy5QpU1h5b8fZs2dZXFyc/udTp07pX++THjx4wI4dO1ZTMasEAIuJieEdg6tt27YxAOzQoUO18nz29vZs7NixZpufOfJXtIwLmdSchdikSRO0atUKANC1a1doNBrExMRg+/btGD9+fKnpv//+e3M+fZXk5eVBpVJVOE2LFi2Mnl9AQAACAgKqG4sQYixzNGHJmtipU6cM7v/1118ZALZo0SKD+0+dOsX69evHXF1dmUKhYM2bN2dbt24tc56xsbFs3LhxzNXVlalUKhYdHc1u375tMG1sbCx79tlnmb+/P1MoFKxu3brspZdeYikpKQbTxcTEMADszJkzbNCgQczFxYX5+PiwsWPHMgClbnfv3mWMMRYcHKz/V/XQoUNlTluyplPyHP+l0WjY4sWLWVhYGJPL5czT05ONHj2aPXjwwGC6Ll26sPDwcHby5EnWsWNHZmdnx+rUqcMWLVrENBpNpZ9DZmYmmzhxInNzc2P29vYsKiqK3bhxo8w1sZs3b7Lhw4czT09PJpfLWcOGDdmKFStK5X7//fdZgwYNmFKpZM7Ozqxp06Zs6dKlFeYoeY82b97MZs2axXx8fJi9vT2Ljo5mSUlJLCsri7344ovM3d2dubu7s3HjxrHs7GyDeeTn57PZs2ezkJAQJpPJmJ+fH5s8eTJLT083mC44OJj17duX7dmzh7Vo0YIplUoWFhbG1q5dq5+mZFl68layJm3q8nP58mU2bNgw5uTkxLy8vNj48eNZRkaGfrqynqtLly6MMcZyc3PZ66+/zkJCQphCoWCurq6sZcuWbMuWLeW+n5XlZ4yxtWvXsoiICP08BwwYwK5evar/fWXL+IoVK1inTp2Yp6cnU6lUrEmTJmzx4sWsqKio1Ptd1hpmly5d9K+RMcYmTZrEFAoFO336tP4+jUbDunXrxry8vFhCQkK5r9dUNVpiK1asYADYjz/+qL/v4MGDTC6Xs06dOrGtW7eyvXv3snHjxpX6UErmGRgYyCZMmMD27NnD1qxZw7y8vFhgYKDBwrxy5Uq2aNEitmPHDnb48GG2YcMG1qxZMxYWFmbwIZQshMHBwezNN99k+/fvZ9u3b2dxcXFs8ODBDAA7duyY/lZQUMAYM/zgMjMz9dneffdd/bQlhVRWib300ksMAHv11VfZ3r172apVq5inpycLDAw0+EPp0qULc3d3Z/Xr12erVq1i+/fvZ5MnT2YA2IYNGyr8DLRaLYuMjGQKhYItXLiQxcbGspiYGBYaGlqqxK5cuaIvpI0bN7LY2Fj2+uuvM7FYzObNm6efbtGiRUwikbCYmBh24MABtnfvXrZ06VKDacpSUmLBwcFs3Lhx+tfs4ODAIiMjWY8ePdjMmTNZbGwsW7x4MZNIJGzq1KkGryUqKopJpVI2Z84cFhsby5YsWcLs7e1ZixYt9J9LyWcTEBDAGjduzDZu3Mj27dvHhgwZwgCww4cPM8YYS05OZh988AEDwL744gv9Z5acnFyl5ScsLIzNnTuX7d+/n3366adMoVCw8ePH66c7duwYs7OzY3369NE/15UrVxhjuj9ulUrFPv30U3bo0CG2a9cu9uGHH7Lly5eX+35Wlr/kd8OHD2e//vor27hxIwsNDWXOzs7s5s2bjDFW6TL+2muvsZUrV7K9e/eygwcPss8++4x5eHgYvK6S99uYEsvPz2fNmzdnoaGh+r/VuXPnMrFYzGJjY8t9rVVh1hI7fvw4Ky4uZtnZ2Wzv3r3Mx8eHde7cmRUXF+unbdiwIWvRooXBfYwxFh0dzXx9ffVrHCXzHDhwoMF0f/31FwPAFixYUGYWrVbLiouL2f379xkA9ssvv+h/V7IQzp07t9TjKtpf8OQHV9E+sSdL7Nq1awwAmzx5ssF0J06cYADY22+/rb+vS5cuDAA7ceKEwbSNGzdmUVFRZWYrsWfPHgaALVu2zOD+hQsXliqxqKgoFhAQwDIzMw2mffXVV5lSqdTvw4yOjmbNmzev8HnLUlJi/fr1M7h/+vTpDACbNm2awf0DBgxgbm5u+p/37t3LALCPPvrIYLqtW7cyAGzNmjX6+4KDg5lSqWT379/X35efn8/c3NzYpEmT9PcZu0/JmOXnyVyTJ09mSqXSYN9uefvEmjRpwgYMGFBhhrKUlz89PV1fmP8VHx/PFAoFGzFihP4+Y/eJaTQaVlxczDZu3MgkEonBPm1jS4wxxm7dusWcnJzYgAED2G+//cbEYjF79913K3+xJjLrKRbt2rWDTCaDo6MjevXqBVdXV/zyyy+QSnW73uLi4nD9+nWMHDkSAKBWq/W3Pn36IDExETdu3DCYZ8m0Jdq3b4/g4GAcOnRIf19ycjJefvllBAYGQiqVQiaTITg4GABw7dq1UjkHDRpkzpddoZKc/z26CQBt2rRBo0aNcODAAYP7fXx80KZNG4P7IiIicP/+faOe58n3a8SIEQY/FxQU4MCBAxg4cCBUKlWpz6CgoADHjx/XZ7xw4QImT56Mffv2ISsry7gX/Y/o6GiDnxs1agQA6Nu3b6n709LSkJOTAwA4ePAggNLv2ZAhQ2Bvb1/qPWvevDmCgoL0PyuVSjRo0KDS96yEqcvPs88+a/BzREQECgoKkJycXOlztWnTBnv27MHs2bPx+++/Iz8/36iM5Tl27Bjy8/NLvVeBgYHo1q1bqfeqPOfOncOzzz4Ld3d3SCQSyGQyjBkzBhqNBjdv3qxStnr16uGrr77C9u3bER0djU6dOtXI0VGzltjGjRtx6tQpHDx4EJMmTcK1a9cwfPhw/e8fPXoEAJg5cyZkMpnBbfLkyQCAx48fG8zTx8en1PP4+PggNTUVAKDVatGzZ0/89NNPmDVrFg4cOICTJ0/q/xDLWkh8fX3N84KNUJKzrOf08/PT/76Eu7t7qekUCkWlC3tqaiqkUmmpxz/5/qWmpkKtVmP58uWlPoM+ffoA+PczeOutt7BkyRIcP34cvXv3hru7O5555hmcPn26klet4+bmZvCzXC6v8P6CggKD1+Lp6WkwnUgkMvjsS1T1PQOqtvw8+XwKhaLcaZ/0+eef480338T27dsRGRkJNzc3DBgwoMJTkSpi6vJVlvj4eHTq1AkPHz7EsmXL8Oeff+LUqVP44osvABj3usrTt29feHt7o6CgADNmzIBEIqnyvMpj1qOTjRo10h+djIyMhEajwddff40ffvgBgwcPhoeHBwDdH8dzzz1X5jzCwsIMfk5KSio1TVJSEurVqwcAuHz5Mi5cuID169dj7Nix+mni4uLKzVmb53GVLPCJiYmljlomJCTo3xNzPI9arUZqaqrBH9mT75+rqyskEglGjx6NKVOmlDmvOnXqAACkUilmzJiBGTNmICMjA7/99hvefvttREVF4cGDB5Ue1a3ua0lJSTEoMsYYkpKS0Lp1a7M9V1WWn+qwt7fH/PnzMX/+fDx69Ei/VtavXz9cv37d5Pn9d/l6krHL1/bt25Gbm4uffvpJvwYKoMzzIJVKJQoLC0vd//jx4zKf6+WXX0Z2djbCw8Mxbdo0dOrUCa6urpVmMkWNnrH/0UcfwdXVFXPnzoVWq0VYWBjq16+PCxcuoFWrVmXeHB0dDeaxefNmg5+PHj2K+/fvo2vXrgD+LaSSfw1LrF692qSspvxrasq03bp1AwBs2rTJ4P5Tp07h2rVreOaZZ0zKWZ7IyEgApd+vLVu2GPysUqkQGRmJc+fOISIioszPoKw1GxcXFwwePBhTpkxBWlpajZ40WvKePPme/fjjj8jNza3Se1beZ2au5aes56ts+fD29sa4ceMwfPhw3LhxA3l5eRXODyid/+mnn4adnV2p9+rvv//GwYMHDd4rU94Dxhi++uqrUjlCQkJw8eJFg/tu3rxZajcQAHz99dfYtGkTVqxYgR07diAjI6PMU62qy6xrYk9ydXXFW2+9hVmzZmHLli0YNWoUVq9ejd69eyMqKgrjxo2Dv78/0tLScO3aNZw9exbbtm0zmMfp06cxceJEDBkyBA8ePMA777wDf39//eZnw4YNUbduXcyePRuMMbi5uWHnzp3Yv3+/SVmbNm0KAFi8eDF69+4NiUSCiIgI/abOf9WtWxd2dnbYvHkzGjVqBAcHB/j5+cHPz6/UtGFhYXjppZewfPlyiMVi9O7dG/fu3cOcOXMQGBiI1157zaSc5enZsyc6d+6MWbNmITc3F61atcJff/2Fb7/9ttS0y5YtQ8eOHdGpUye88sorCAkJQXZ2NuLi4rBz5079Pql+/frpz/3z9PTE/fv3sXTpUgQHB6N+/fpmyV2WHj16ICoqCm+++SaysrLQoUMHXLx4ETExMWjRogVGjx5t8jybNGkCAFizZg0cHR2hVCpRp04dsy0/T2ratCl+//137Ny5E76+vnB0dERYWBjatm2L6OhoREREwNXVFdeuXcO3336Lp59+usI12/Lyu7u7Y86cOXj77bcxZswYDB8+HKmpqZg/fz6USiViYmIMMgGll/EePXpALpdj+PDhmDVrFgoKCrBy5Uqkp6eXyjF69GiMGjUKkydPxqBBg3D//n189NFHpTb9L126hGnTpmHs2LH64lq7di0GDx6MpUuXYvr06dV6fw2Y4+hAeadYMKY7UhQUFMTq16/P1Go1Y4yxCxcusOeff555eXkxmUzGfHx8WLdu3diqVatKzTM2NpaNHj2aubi46I/C3Lp1y+A5rl69ynr06MEcHR2Zq6srGzJkCIuPjy91VK7k6NKT5/8wxlhhYSGbOHEi8/T0ZCKRqNzzxEp89913rGHDhkwmkxl9nliDBg2YTCZjHh4ebNSoUeWeJ/aksWPHsuDg4FL3PykjI4NNmDCBubi4MJVKxXr06MGuX79e5nlid+/eZRMmTGD+/v5MJpMxT09P1r59e4Ojvp988glr37498/DwYHK5nAUFBbEXXniB3bt3r8IcJUcnt23bZnB/ectJWZ9Lfn4+e/PNN1lwcDCTyWTM19eXvfLKK+WeJ/akso6WLV26lNWpU4dJJBKDo8vVXX5KXlfJ8sIYY+fPn2cdOnRgKpXK4Dyx2bNns1atWunPkQwNDWWvvfYae/z4cQXvaMX5GWPs66+/ZhEREUwulzNnZ2fWv39//WkdJSpaxnfu3MmaNWvGlEol8/f3Z2+88Yb+iPd/j4hqtVr20UcfsdDQUKZUKlmrVq3YwYMHDd7vnJwc1rBhQ9a4cWOWm5trkGHKlClMJpOVOgJfHSLGLHPItvXr12P8+PE4deqUfj8bIYQ8iftVLAghpDqoxAghgmaxm5OEEGIMWhMjhAgalRghRNCoxAghgkYlRggRNCoxQoigUYkRQgSNSowQImhUYoQQQaMSI4QIGpUYIUTQqMQIIYJGJUYIETQqMUKIoNXo5amJDdGogaJsoCgXKMwBmAYQSQCxFBBL/rlJAbEMsHMBJDLeiYmVoBIjFctNBTLuA1kPgcyHQNbf//z3IZDzSFdYRTmAusC0+SqcADtXQOX+783BC3AJAlxDANc6gGswlR2pFF1PjOgUFwAp14BHV4BHV4HkK7r/z03hl0ks1RWae33Apwng9xTg/xTgWHosUmK7qMRsVW4qcP+vf2+Pruo2AYXA0U9XZn4tgIBWQGA7QKbknYpwQiVmKwqygLjfgHt/Avf+Ah7fBGAlH73UDgh+GqjbTXfzDuediNQiKjFrlpUI3PgVuP4rcO8IoCninah2OPoCoZFAwz5A/Z6AVFH5Y4hgUYlZm/R7wOWfgOu7gIdnYTVrW1WlcNaVWZNBumKT0LEsa0MlZg0Kc4ArPwMXvgPuH4XNF1d57NyAxs8CEcN0m5/EKlCJCdnDM8CZ9bo1r6Ic3mmExbsJ0GoCEDEUUDjwTkOqgUpMaLQa4OovwNHlQMJZ3mmET+EENBsGtJ4IeIbxTkOqgEpMKIrygPObgWMrdPu9iPmFRgKd3wBCOvBOQkxAJWbp8tKAE6uAk18B+Wm809iG4I5Al1lAaBfeSYgRqMQsVVEucOwL3WZjYRbvNLYp6Gndmlm9Z3gnIRWgErM0GjVwZh3wx8e67yYS/gLbAb0+APxb8k5CykAlZikY050mcXABkHabdxpSikh3AOCZGMDJl3cY8h9UYpYg+Rqw6zUg/hjvJKQyMnug42tA+6n0fU0LQSXGU3E+cHgxcHQFoC3mnYaYwjkI6P0h0LAv7yQ2j0qMl1v7gV9f112riwhXk0FA748Be3feSWwWlVhty00Fdr+u2/9FrIO9J9BnCRA+gHcSm0QlVpviDgDbJwM5SbyTkJrQuD/Q5xPAwZN3EptCJVYb1IXAb/OB41+Cvpxt5VTuQP8vgbBevJPYDCqxmpZ8HfhxIvDoEu8kpDa1mwJ0nwdI5byTWD0qsZp0diOwexagzuedhPDg9xTw/Abd4CekxlCJ1QSNGtg7Gzj1Fe8khDc7N2DQV0C97ryTWC0qMXPLTQW2jdVdy54QABCJgW5zgE4zeCexSlRi5pR0Cfi/EUBGPO8kxBI9NQbo+xldItvMqMTM5eoO4OdJQHEe7yTEkoV2BZ7fCCideSexGlRi5nD6G93Z90zLOwkRAs9GwMjvaYe/mVCJVdcfS4CD7/NOQYTGwRsY8T3g15x3EsGjEqsqxoDYd3WXiyakKpTOwKifgQC6Tll1UIlVhVYD7Jiqu+Y9IdWhcAJG/gAEteWdRLCoxEyl1QA/vkBf4CbmI3fQbVrSACVVIuYdQFAYA355lQqMmFdRDrB5MHDnMO8kgkQlZordM4ELW3inINaoOA/YMhS4d4R3EsGhEjNW7Bzg1Ne8UxBrps4HvhsBPLrCO4mgUIkZ4/fFwNHPeacgtqAwE9g0iL71YQIqscqcWQ/8/gHvFMSWZCcC3z6nGziZVIpKrCK3D+rOxCektqXeAjYPAYroa2yVoRIrx/3kNGi3Twa0at5RiK16eFp3QU06C6pCVGJlyMgrwpgNFzAFb0Ht6M87DrFlN37VjQZPykUnuz5BrdFi7LqT+CsuFQDQwD4fP7l9AYeUs5yTEZslEgPDtwINevJOYpFoTewJi/Zc1xcYANzMtUPbxNfwIIAGSSWcMC3w00Qg7Q7vJBaJSuw/Dlx7hLVH7pa6P1ctQae4kTgSOAkMIg7JiM0ryAT+bxRQlMs7icWhEvtHclYB3vjhYoXTjLrVBWt95oJJ7WopFSH/kXwF2Pk/3iksDpUYAK2WYfrW80jLLap02gX3wvC6/SJo7H1qIRkhT7i0Dbi4jXcKi0IlBmDl4ds4eju18gn/8dMjLwwoeh/5Hk1qMBUh5dj9OpD5kHcKi2HzJXYuPh2f7b9p8uMuZduj/aNZSPLvUQOpCKlAQSaw/RU6f+wfNl1i+UUa/O//zkOtrdrCkF4sxdN3xuF00AQzJyOkEncPA8dX8k5hEWy6xJb+dhPxadX7WgdjIgy+2R2b/N4BkyjMlIwQIxyYDyRf552CO5stsWuJWWWeTlFV794JxztOH0Cr8jDbPAmpkLpAd5l0G9+stMkS02oZ3v75UpU3I8uzJdEXgzULUegWZtb5ElKuv08C5zbxTsGVTZbY5hP3cS4+o0bmfTbTEZ1S38Zjv641Mn9CSvktxqYv22NzJZacVYCP9t2o2ecolKHt3Ym4FDiyRp+HEABAXipw4D3eKbixuRJ7b9dVZBfU/OV1NEyMfrf64if/N8DEshp/PmLjzm4AHp7hnYILmyqx8w8ysOtiYq0+54zbLbDAdQG0StdafV5iY5hWdwFPM+zkP3r0KCQSCXr16mXyY+fNm4fmzZtXO4MpbKrEFu/hczh67cNAjBF/gCKXulyen9iIhHPAlZ+qPZtvvvkGU6dOxZEjRxAfXzPX+i8uLjbbvGymxP64mYJjd4z/apG5HUlzRmTGu8jwac8tA7EBhxbpBniuotzcXHz//fd45ZVXEB0djfXr1+t/t379eri4uBhMv337dohEIv3v58+fjwsXLkAkEkEkEukfLxKJsGrVKvTv3x/29vZYsGABAGDnzp1o2bIllEolQkNDMX/+fKjVpu3usYkSY4zh4xremW+MhwUKtI2fjJuBQ3hHIdYq9RZwvupjo27duhVhYWEICwvDqFGjsG7dOhh73dShQ4fi9ddfR3h4OBITE5GYmIihQ4fqfx8TE4P+/fvj0qVLmDBhAvbt24dRo0Zh2rRpuHr1KlavXo3169dj4cKFJmW2iRL79VIiLj3M5B0DAFCoFaPnrYHYEzAdTCThHYdYo8MfAerKr8hSlrVr12LUqFEAgF69eiEnJwcHDhww6rF2dnZwcHCAVCqFj48PfHx8YGf372WrRowYgQkTJiA0NBTBwcFYuHAhZs+ejbFjxyI0NBQ9evTA+++/j9WrV5uU2epLTK3R4tNY07/gXdNeiWuDTzzeB1M48o5CrE1mPHBmnckPu3HjBk6ePIlhw4YBAKRSKYYOHYpvvvnGLLFatWpl8POZM2fw3nvvwcHBQX978cUXkZiYiLw8478OKDVLOgu240IC7jy2zKthrngQgmvui7BK8TFkWfd5xyHW5I8lwFNjAJnxF/Bcu3Yt1Go1/P3/HRyHMQaZTIb09HSIxeJSm5am7KC3t7c3+Fmr1WL+/Pl47rnnSk2rVCqNnq/Vl9jXf5rv+5E14UCqG6JUMdjutRJOyad4xyHWIjcZOL8ZaD3RqMnVajU2btyITz75BD17Gg5IMmjQIGzevBl169ZFdnY2cnNz9YV0/vx5g2nlcjk0GuMOLDz11FO4ceMG6tWrZ9T05bHqEjsa9xhXE7N4x6jUnTwlnn74P+yq8wPq/L2ddxxiLY59AbScAIgr32u0a9cupKen44UXXoCzs7PB7wYPHoy1a9fiwIEDUKlUePvttzF16lScPHnS4OglAISEhODu3bs4f/48AgIC4OjoCIWi7Ku7zJ07F9HR0QgMDMSQIUMgFotx8eJFXLp0SX/00hhWvU/sqz+FMzpMrkaMyLjncTBwCpjIqj8WUlvS7gDXdxk16dq1a9G9e/dSBQbo1sTOnz+Pe/fuYdOmTdi9ezeaNm2K7777DvPmzSs1ba9evRAZGQlPT09899135T5nVFQUdu3ahf3796N169Zo164dPv30UwQHB5v0Mq123Mm45Bz0+OywIK9SMjM4DlPSFkNUbJn78oiABLUHJuzhnaJGWe0/+WuP3BVkgQHAkvv1MFW5iEYfJ9UXfxRIrHgUL6GzyhJLzy3Cz+f+5h2jWnaleCA6fz5yPZvzjkKE7qRp510JjVWW2M/nHqKgWMs7RrVdz1GhXeIMPPTvzTsKEbLLPwOFObxT1BirLTFrka2WosPt0TgW+BLvKESoinOBazt4p6gxVlditx5lW8xXjMxp+K2u+MZ3DpjU+JMACdGrxvcpLZ3VldhPVrQW9qT37jbCGw6LoLH34h2FCM29I0DGA94paoRVlZhWy/CLFZcYAPyQ5I2BxQuQ7x7OOwoRFAZc/D/eIWqEVZXY8TupSMgs4B2jxl3MckDHlFl45NeddxQiJBe28k5QI6yqxKxph35lUotkaHd3PM4GjuMdhQhF6i2rPGfMakpMq2U4cD2Zd4xaxZgIz93qie/83gKTyHnHIUJwcx/vBGZnNSV2/u8MpOVW7UJwQvfWnaaY67wQWjt33lGIpbtpfV9BspoS+/1GCu8IXH2b4I9hbCEKXRvwjkIs2cOzQI51bbFYUYlZ1wdTFScznNAl7W2k+nbmHYVYLGZ1m5RWUWKPcwqt8gTXqkgqlKPd/Um4GjicdxRiqW7u5Z3ArKyixA7fSBHsFStqQrFWhD63+uEX/5lgYqu+7mUpD7O0GPVTPtw/yoZqYRaar8rBmYSyrzQ6aWc+RPOzsPR4YYXzXH++CKL5WaVuBep/F7rNF4sR+Fk23BZn4Y1Yw9N87mVo0WB5DrIKLWQhvfM7oDFtWDRLZhVL+O83bXt/WHn+d/spXAlYgNk5H0JckME7To1Lz2fo8E0uIutIsWekCl72ItxO08JFKSo17fbrxTjxUAM/x9K/K4uTArjxqoPBfUqp7rGP87SYuDMf6/vbIdRVjL5b8tA1RIK+DWQAgFd+zceH3RVwUhj3XDWuKAdIugD4t+SdxCysYk3s5F1+g+JaujV/B2Gc+AMUO4fyjlLjFv9ViEBnMdb1t0MbfwlCXMR4JlSKum6Gi/nDLC1e3V2Azc/ZQWbkX4AIgI+D2OBW4k46g7NChKFNZGjtL0FkHQmupuiuorLlUjHkEhGeayQz18s0j/gTvBOYjeBLLCmzAI+yKt4csHV/pLngmax3kendjneUGrXjhhqtfCUYsi0PXh9no8XqHHx1xvC0Gy1jGP1zPt5oL0e4l/HjfuYUAcFLsxHwaTait+ThXOK/m6j13cTIK2Y4l6hBWj7DqYcaRHhLkJbPMPdQAVb0tsAv7T84zjuB2Qi+xM4/SOcdQRDi85Vo9/cUxAUO4h2lxtxJ12Ll6SLUdxNj3ygVXm4px7S9Bdh44d8iW3ykCFIxMK2t8ScHN/QQY/0AJXYMU+G7QXZQSoEO3+TiVqquyFztRNgwwA5jtuejzVc5GNNMhqh6UsyMLcDUNnLczdCixeocNPkyBz9cNX6IsxplRWtigt8ndu5BBu8IgpGvkaD7rUFYXS8APR8uh4gJ/8KR/6VlQCs/CT54Rrfm08JXgispWqw8XYwxzeQ4k6DBshNFODvJHiKR8fun2gVI0S7g3587BEnw1OpcLD9ZjM9769bmBjaSYeB/Nhl/v6fGpWQNVvRRot7nOfhukB18HERo83UuOgdL4GXPef0hJwlIvwe4hvDNYQbCXxOLz+AdQXAmxbXFUs/3wOQOlU8sIL6OIjT2NFykG3mIEZ+pK+s/49VIzmUI+iwH0veyIH0vC/czGV6PLUTI0myjn0csEqG1nwS30so+6lmoZpj8awFWR9shLk0LtRboEiJFmIcEDdzFOPG3ceMy1jgrWRsT9JqYVstwmc4Pq5Jl8aG45vEhvlQuhjTLOq4z1SFQghuphmuXN1O1CHbWFdvoCBm6hxou8lGb8jA6QobxzY3f8c4Yw/lHGjQtZ5/a+38Uonc9KZ7yleBcogZq7b+nVhRrAI2FnGmBxPNAs6G8U1SboEvsZnI2coss5F81AYp97IYo1Tz84vUlHJLP8I5Tba+1U6D9N7n44M9CPB8uw8mHGqw5W4Q10XYAAHeVGO4qw8fIxICPgwhhHv8W0pif8+HvKMKi7rrN0vm/F6JdgAT13cXIKmT4/EQRzidp8UUfu1IZriRrsPWKGucn6UbIbughhlgkwtqzRfBxEOH6Yy1a+xl/QKFGpVznncAsBF1i1wQwurelu51nh7YJ07En5HsE/b2Td5xqae0vwc9D7fDWgUK8d7gQdVzFWBqlxMgI005viM/UQvyfAYwzChhe2pWPpBzdqRQtfMX4Y5wKbfwNy4gxhpd2FeCzKAXs5bp9bnYyEdYPUGLK7gIUqoEVfZTwd7KQvTjJ1lFigh48d8m+G1hxKI53DKuxsf6f6PRgFUQQ7CJBTDX7AaB04p2iWizkn4SqufPYeoeh4mHMrU5Y7R0DJlNVPjGxDik3eCeoNmGXWEou7whW58P7DTDNbhE0Dr68o5DakHKNd4JqE3SJxafl8Y5glXYmeyK64H3keUTwjkJqGq2J8fM4pxB5dGSyxlzLUeHpRzOR4N+LdxRSk7ISeCeoNsGW2ANaC6txmcVSdLgzGicCJ/KOQmpKdhLvBNUm2BJLyLD+odksAWMiDL3VDRt936XRx61RdiLvBNUm2BJLy7PNQUF4mXu3MWY7fgCtypN3FGJOOY94J6g2wZZYJpVYrdua6IPBmoUocGvEOwoxl+I8ID+Dd4pqEWyJZeRZyCVNbMzZTAd0ejwbKX7deEch5iLw/WLCLbF8KjFeUopkaHd3Ai4EjeYdhZhD3mPeCapFuCVGa2JcaZgY/W/2xvf+b4KJLezSy8Q0xcI+SCbYEsvMp31ilmDW7WaY77IQWjs33lFIVRUL+3QlAZcYrYlZivUJARiJhShyrc87CqkKNa2JcVFsMVeWIwBwLN0ZXdLfQZpPR95RiKloTYwQncQCOdrGv4LrgcK/WqhNoX1ihPyrWCtCr1v9sdN/hs2NPi5Y6nzeCaqFSozUiKm3W2GLzyzeMYgNEGyJCfiCtDYjrsCZdwRiDInxY3BaIsGWGLF8jlI6giwIVGKElM1BTCUmCFRifCikFjLsFSmXg4hOSBYEqYJ3gmoRbIk529FXXSyditbEhIHWxPhwohKzeCpaExMGWhPjw0VFJWbpVKJC3hGIMexceSeoFuGWGK2JWTw70JqYIKjceSeoFuGWGK2JWTwFaE1MEKjE+KAd+5ZPSSVm+cRSKjFe3B2EvTPSFsgZbU5aPHsvQCTinaJaBFtiga4q3hFIJeRaYV8dwSY4+vBOUG2CLbFgDyoxSydjtDlp8Zz9eSeoNsGWmJNSBlfauW/RZLQmZvnchX81XsGWGAAEu9vzjkAqINVQiVk8jwa8E1SboK9aF+KuwvkHGbxjAACyz+1G9rndUGfqRlSWeQTBpf1w2NVtBQB4/OtnyL18wOAxct8w+I75pNx55lz6Dam7l5a6P+j1nyCS6r4qknPlEDIObwArLoBDRE+4Rk7QT6fOfIRHW+fAd+xSiBW1v/ktoRKzfB7CXxMTdIkFWdCamMTRHa5dxkLq6gcAyLl8AMk/LYDvuGWQewYDAJR1WsKjz/T/PKjyt18kV8H/xdWG9/1TYJq8TKTtXQ73PtMhdfFB8g/zoQhqClXd1gCA1H1fwrXLOC4FBgASjbCvGGoTqMT4qmNBO/dV9doa/OzaeQxyzu1GYcINfYmJpDJIHEz8iodIVO5j1BlJEClUsG/UGQCgDIpA8eN4oG5r5F79HSKJFKqw9qa/GDMRC/yyx1bP3gtQCv/ClYIusXA/y/wAmFaDvOtHoC0ugMK/of7+gvhLeLB8JMQKeygDm8Cl8xhI7F0qnldRPv5eOR7QaiH3DoVLp1GQe9cFAEjd/MGKC1H06DYkTl4oSrwJh6bdocnPRsafm+E9/IOafJmVElGJWTYr2B8GACIm4Os8a7UMEfNjkVOo5h0FAFCUcg9J384EUxdBJLeDZ7+ZsPtn0y732h8Qye0gdfKEOvMRMv7cBGg18B27DCJp2UdZCx9eR3FGIuSewdAW5iH79A7k3zkD3/GfQ+amOzSed/MoMv7cDKYugn14V7h0HInHu5dC7lUHcu+6SPttDaBVw7nDCNg3rN3h1O7ajYaIaWr1OYkJ2k0Gei3inaLaBF1iADBszTEcv5PGOwYAgGmKoc5KgbYgF3k3/0LOhVh4j/gQco+gUtOqc9LwcOUEeD47y+hNPsa0SFz/PygDm8Ct+6QypymIv4j0Q+vgPWIREta8BI9+b0Bi74rEjTPg/9KaStf8zMVeosUV2ahaeS5SRYO/AZoM4p2i2gR9igUANAt04R1BTySRQebqB4Vvfbh2GQe5Vx1kn95R5rRSBzdInT1RnJ5g/PxFYih86qM4rezHMHUx0mJXwi1qCtTpiWBaDZRBTSFzD4DMzR+FiTeq9LqqwllGF0S0eAGteScwC+GXWIAL7wgVYGCasv+YNflZUGc9NmlHP2MMRcl3y31MxtH/gzK0JRQ+9QCmBbT/bsoxrRrQak2LXw0uUsvYxCflcPABXEpvIQiRoHfsA5azJpZ+eAPsQltC6uQJbVE+cq/9gYL4y/AaMh/aonxkHtkCVVh7SBzcdPvEDm+ExM4JqvpP6+fxeNcn/5yqMQ4AkHFkCxR+Ybod+IV5yDqzA0XJd+DW4+VSz1+Uch951/+A77jlAACpWwAgEiP7QiwkDq4oTv0bct/aO5zuIlMD1GOWK6AV7wRmI/gS83exg6ejAinZfL+np8nNwONdn0KTmwaxwh5yzxB4DZkPuzotoC0uRFHKPeRcOQhtQS4kDq5QBkXAo/+bBudwqbNSANG/K8fawlyk7lsBTW66bp5eofAZ8SEUfmEGz80YQ9q+FXDt9iLEciUAQCxTwL3PdKTtXwmmKYZbj5chdfSonTcDgDMN12bZAtvwTmA2gt+xDwDTvjuHHReM37dEat7zPkn4KGMG7xikPBNigaC2lU8nAILfJwYAXcM8eUcgT3CS0JqYxVI6W9XmpFWUWOcGnkK/rpvVcaQSs1x1uwFi6xm31SpKzMNBgab+lnn2vq1yENNVXS1WvR68E5iVVZQYAHRtQJuUlsSeBs61UCKgPpWYReoS5sU7AvkPlZiu6mqRfCMAB+v6W7GaEmsR6ELDuFkQlYjWxCxS/Z68E5id1ZSYWCxCz8bevGOQf9DAuRaq8QDeCczOakoMAAa2COAdgfyDxpy0QF7hgE8T3inMzqpKrF2oG/yclbxjEFCJWaSIIbwT1AirKjGRSIT+LYQ/BJU1UNDmpIURAU2pxAThOSoxi6BgNEiIRQnuADhb5+4Wqyux+t6OCPdz4h3D5sm1tDlpUax0UxKwwhIDgIG0NsYdDZxrQWT2QPhA3ilqjFWW2KCnAqCQWuVLEwwqMQvSbJhVjGpUHqv8S3e1l6N/cz/eMWwajf5tKURA27LHY7AWJpWYWq3Gu+++izp16sDOzg6hoaF47733oC3nsseTJk2CSCTC0qVL9felpaVh6tSpCAsLg0qlQlBQEKZNm4bMzEz9NPfu3cMLL7ygf566desiJiYGRUXGH/Ea176OKS+NmJmE9olZhtCugGdYpZMJmUlXdl28eDFWrVqFDRs2IDw8HKdPn8b48ePh7OyM//3vfwbTbt++HSdOnICfn+EaUUJCAhISErBkyRI0btwY9+/fx8svv4yEhAT88MMPAIDr169Dq9Vi9erVqFevHi5fvowXX3wRubm5WLJkiVFZG/s5oV2om8WMhGRraOBcC9G29KXMrY1JV3aNjo6Gt7c31q5dq79v0KBBUKlU+Pbbb/X3PXz4EG3btsW+ffvQt29fTJ8+HdOnTy93vtu2bcOoUaOQm5sLqbTsXv3444+xcuVK3Llzx9i4OHQ9GePXnzJ6emI+d1ynQZz/mHcM2+YWCkw9C2u/2J5Jm5MdO3bEgQMHcPPmTQDAhQsXcOTIEfTp00c/jVarxejRo/HGG28gPDzcqPlmZmbCycmp3AIrmcbNzc2UuOga5okG3g4mPYaYB43+bQHaTbb6AgNM3Jx88803kZmZiYYNG0IikUCj0WDhwoUYPny4fprFixdDKpVi2rRpRs0zNTUV77//PiZNKn/n4+3bt7F8+XJ88sknpsSFSCTCS53rYua2CyY9jpgBlRhfTgHAU2N4p6gVJpXY1q1bsWnTJmzZsgXh4eE4f/48pk+fDj8/P4wdOxZnzpzBsmXLcPbsWYiM+BcgKysLffv2RePGjRETE1PmNAkJCejVqxeGDBmCiRMnmhIXgO6csS9/j8OdlFyTH0uqxkGqhojV3hiXpAydZgBSBe8UtcKkfWKBgYGYPXs2pkyZor9vwYIF2LRpE65fv46lS5dixowZEIv/3UrVaDQQi8UIDAzEvXv39PdnZ2cjKioKKpUKu3btglJZ+ovbCQkJiIyMRNu2bbF+/XqD+Zri14uJmLLlbJUeS0wXqCzAn5jAO4btcg4Cpp0FJLZxfT2T1sTy8vJKFYlEItGfYjF69Gh0797d4PdRUVEYPXo0xo8fr78vKysLUVFRUCgU2LFjR5kF9vDhQ0RGRqJly5ZYt25dlQsMAPo09UFTf2dcephZ+cSk2lxkaoCuichP55k2U2CAiSXWr18/LFy4EEFBQQgPD8e5c+fw6aefYsIE3b+67u7ucHd3N3iMTCaDj48PwsJ056pkZ2ejZ8+eyMvLw6ZNm5CVlYWsrCwAgKenJyQSCRISEtC1a1cEBQVhyZIlSElJ0c/Px8fH5BcpEonwRlQYxnxz0uTHEtNRiXHkGgI0H8k7Ra0yqcSWL1+OOXPmYPLkyUhOToafnx8mTZqEuXPnGj2PM2fO4MSJEwCAevXqGfzu7t27CAkJQWxsLOLi4hAXF4eAAMNv3ld1rN/ODTzRvq47jt5OrdLjifGcJGreEWxX5DuAxKQ/a8GzihHAjXUuPh0DvzzKO4bVG+6bgEXpM3nHsD2B7YAX9vFOUeus8ruT5WkR5Ip+zeg7lTXNkdbEap9IDPT5iHcKLmyqxABgTnQjOCpta3W7tjnQmJO1r+U4wLcZ7xRc2FyJeTkqMatXQ94xrJq9hC5NXavs3IBuc3in4MbmSgwARrYJQosgF94xrJYDjTlZu7q9C6hM+0qeNbHJEhOLRfhgYFNIxdb/vTIeVCJaE6s1AW2AluMrn86K2WSJAUAjXydM6EjXHKsJKjGVWK2QKoEBXwLVOBHcGtj0q3+tewMEu6t4x7A6NPp3Len6FuBRn3cK7my6xOzkEiwb1gIyCW1WmpMdDZxb8wLbAe2Nu1KMtbPpEgOA5oEueK1HA94xrAqN/l3D5A7AwFU2vxlZgt4FAK90qYsO9dwrn5AYRcGoxGpUrw8BN9qfW4JKDLoviH/2fHO42ct5R7EKctonVnOajwSeGs07hUWhEvuHl5MSHw+O4B3DKtCYkzXEuynQ17SrG9sCKrH/eKaRNybSaRfVJqcSMz+lMzD0W0BmxzuJxaESe8JbfRqha5gn7xiCJqUxJ81MBAxcTfvBykEl9gSJWITlw1ugvheNklRVNPq3mXWaAYT15p3CYlGJlcFRKcPasa3hqrKdS/yak0RDIx2ZTeP+QOS7vFNYNCqxcgS5q7BqVEs6EbYKxLQmZh5BTwMD19D5YJWgd6cCbUPdsXBAU94xBEdMY05Wn0cDYNgWQFZ6EB1iiEqsEs+3DsQMOqPfJKJiKrFqcfAGRv5g05fXMQWVmBGmPVMfL3epyzuGIIhEDFDT5mSVyR2AEd8DrsG8kwgGlZiRZvduiHHtQ3jHsHjOUjVEsJmxZ8xLZg+M3Ab4NeedRFCoxEwQ068xhrUO5B3DorlINbwjCJPMHhj1AxDcnncSwaESM4FIpLsi7IDmNGJSeZxlNNKRyeQOVGDVQCVmIrFYhE+eb05FVg4XKV1f3yRyB90mJBVYlVGJVYFELMJnQ5vjBfqeZSk05qQJFE5UYGZAJVZFIpEIc6IbY3bvhhDR+bB6TrQmZhwnf2DCXiowM6ASq6aXu9TFksHNaOSkfzhJqMQq5RUOvLAf8A7nncQqUImZwaCWAfhqTCvYySS8o3BHo39XIrSrbg3M2Z93EqtBJWYmkQ298H8vtYOvs21/TcSB1sTK12y47kx8pRPvJFaFSsyMmgW6YOfUjmgXartfF7EX0bXEShFJgB7v6Qb3kNCVUcyNSszMPBwU2DyxHV7sZJtHLu1p4FxDDt7A2B1Ah//xTmK1qMRqgEQswjt9G2PFiBawl9vWfjKliDYn9YLaA5P+AEI68k5i1ajEalB0hB+2T+mAUE973lFqDY3+/Y+nXwXG7gQcfXgnsXpUYjWsvrcjdk3tiBFtg3hHqRU2P/q3gzcwfCsQtRCQSHmnsQlUYrVAJZfig4FNsW5ca3g6KnjHqVE2Pfp3k0HA5ONAWC/eSWwKlVgtimzohdjpndHfir93aZOjf6vcgSEbgMHf0IUMOaASq2Wu9nIsG9YCX41pBS8rXCuT21qJNYwGJp8AwgfwTmKzaKOdkx6NvdE21A2f/3YLG47dQ7HGOi4kaDMl5hYKRC2iTUcLIGKMWcdfj4DdTsnB+7uu4vcbKbyjVNv5kBVwSTrKO0bNkdkDnV/XHX2UWt+atBBRiVmQg9cf4f1d13D3cS7vKFV2OXAJHFLO8o5RM5oMBnq+DzhZ7z5NIaLNSQvSraE3OtbzxPqjd/HFodvIzBfeiaMSrRUOEhLUHugeAwS1452ElIHWxCxUTqEaG47ew9ojd5GWK5wTSG96vwN55l3eMczD7ymg2ztAve68k5AKUIlZuLwiNb49dh9f/XkHj3Msv8xue8yEJCeBd4zqCWgNdJ4FNOjJOwkxApWYQBQUa7D5RDzW/HEbj7Is9wjgHZcpEBek845RBSKgfk+g3StA3UjeYYgJqMQEplijxd7LSdh84j6O30njHaeUuw4TIBLS4LkKZ6DFSKD1RMCdBkgWIioxAYtLzsGWE/H48ezfFnEQQCRiuKsYyTuGcTzCgDYv6i5UqHDgnYZUA5WYFSgo1mDnhQRsPfUAZ+LTwesTdZMV46xkLJ8nN4a9JxD+HBAxFAhoyTsNMRMqMSvzKKsAey4lYvflJJy+lwZtLX66oap8HNS+UHtPaAy5g+6rQRFDgNBIQGxb13ezBVRiViw5uwD7Lidh96UknLyXBk0NN9pTztn4qXBSjT6HUZwCgPrdgXo9gLrdALmKdyJSg6jEbERmXjFO3E3F8TtpOHYnFdeTssy+2RnploZ1ea+ad6bGkMh1J6LW6wHU7wF4Nar9DIQbOmPfRjirZOgZ7oOe4borjabnFulL7cTdNMQlZ1f7S+hOsloa/dvRFwhopTufK6A14Nuc1rZsGJWYjXK1l6NXE1/0auILQHfqRlxyDq4lZuF6Urb+vynZxp+TZvaBc6V2utMe3OsBHvUB7ya60qIxG8l/UIkRAIBMIkYjXyc08jUcEzE1pxB/p+cjMbMASZn5SMwqQFLmP7esAqTlFiG3UA0tM3HMSakSsPcCHDx1Rw3tPQEHL8DBR1dcHvUB50BARCOrk4rRPjFiFvlFGhQX5cNJnQaoiwBNIcC0gEShG2tRItfdpHLdfTLbHmSYmA+VGCFE0Ojy1IQQQaMSI4QIGpUYIUTQqMQIIYJGJUYIETQqMUKIoFGJEUIEjUqMECJoVGKEEEGjEiOECBqVGCFE0KjECCGCRiVGCBE0KjFCiKBRiRFCBI1KjBAiaFRihBBBoxIjhAja/wM+CzoRKRNTkgAAAABJRU5ErkJggg==",
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"outlier for tenant 14\n",
"File path : projet-bdc2324-team1/0_Input/Company_14/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"/tmp/ipykernel_436/3170175140.py:10: DtypeWarning: Columns (8,9) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# boucle pour identifier les outliers de chaque compagnie (et le client principal non anonyme)\n",
"\n",
"# nb_compagnie=['10','11','12','13','14']\n",
"for company_number in nb_compagnie :\n",
" print(f\"outlier for tenant {company_number}\")\n",
" outlier_detection(directory_path=company_number, coupure = 1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dbe1af6a-79e9-45c7-a810-c6df3bf647f7",
"metadata": {},
"outputs": [],
"source": [
"# print(products_purchased_reduced_spectacle.loc[products_purchased_reduced_spectacle[\"number_compagny\"]==10][\"total_amount\"].describe())\n",
"\n",
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==10) & \n",
"(products_purchased_reduced_spectacle[\"customer_id\"]==19521)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "20e2b8a2-f31c-42a4-8ea5-7ad67ab66915",
"metadata": {},
"outputs": [],
"source": [
"# company 11 \n",
"# etrange, pas de vente sur internet, et un seul supplier. Plus de 9k achats\n",
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==11) & \n",
"(products_purchased_reduced_spectacle[\"customer_id\"]==36)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5dbce57c-d091-4ce2-92f9-1201deb2462e",
"metadata": {},
"outputs": [],
"source": [
"# company 12\n",
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==12) & \n",
"(products_purchased_reduced_spectacle[\"customer_id\"]==1706757)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0a243b57-19da-4e29-a53d-bb8d03e2ab77",
"metadata": {},
"outputs": [],
"source": [
"# company 13\n",
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==13) & \n",
"(products_purchased_reduced_spectacle[\"customer_id\"]==8422)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d9b01bc-9584-4882-bd06-7de8acb8a88f",
"metadata": {},
"outputs": [],
"source": [
"# company 14\n",
"# a-t-on vrmt un outlier ? A acheté quasi 3k tickets, pr 96 achats\n",
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==14) & \n",
"(products_purchased_reduced_spectacle[\"customer_id\"]==6354)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "033c1e00-52bd-4651-b893-57bda531760e",
"metadata": {},
"outputs": [],
"source": [
"# verifs dans les tables customerplus (outlier incertain pr 11 et 14)\n",
"\n",
"customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==36) &\n",
"(customerplus_clean_spectacle[\"number_compagny\"]==11)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "28ac8cda-32fa-4fb7-a75b-e1cc24871c39",
"metadata": {},
"outputs": [],
"source": [
"customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==6354) &\n",
"(customerplus_clean_spectacle[\"number_compagny\"]==14)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3faea297-2cc5-4704-af85-77d95f600cc1",
"metadata": {},
"outputs": [],
"source": [
"customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==8422) &\n",
"(customerplus_clean_spectacle[\"number_compagny\"]==13)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b165ea79-347b-46fb-8217-635d9e888c65",
"metadata": {},
"outputs": [],
"source": [
"customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==19521) &\n",
"(customerplus_clean_spectacle[\"number_compagny\"]==10)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "282b0a96-5e78-48aa-9c2c-7d00d3907add",
"metadata": {},
"outputs": [],
"source": [
"customerplus_clean_spectacle.columns"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "ad47a812-a744-49c5-8079-0919b49ef24c",
"metadata": {},
"outputs": [],
"source": [
"# on enlève les outliers des tables\n",
"\n",
"outliers_musique_dico = {10 : 19521, 11 : 36, 12 : 1706757, 13 : 8422}\n",
"\n",
"# outlier_music_list = list(outliers_musique_dico.values())\n"
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "9717dfd5-c39c-41eb-858d-5baf3ab71554",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10 19521\n",
"11 36\n",
"12 1706757\n",
"13 8422\n"
]
}
],
"source": [
"for tenant_number, customer_id in outliers_musique_dico.items() :\n",
"\n",
" print(tenant_number, customer_id)\n",
" \n",
" customerplus_clean_spectacle = customerplus_clean_spectacle[(customerplus_clean_spectacle['number_compagny']!= tenant_number) |\n",
" (customerplus_clean_spectacle['customer_id']!= customer_id) ]\n",
"\n",
" campaigns_information_spectacle = campaigns_information_spectacle[(campaigns_information_spectacle['number_compagny']!= tenant_number) |\n",
" (campaigns_information_spectacle['customer_id']!= customer_id) ]\n",
"\n",
" products_purchased_reduced_spectacle = products_purchased_reduced_spectacle[(products_purchased_reduced_spectacle['number_compagny']!= tenant_number) |\n",
" (products_purchased_reduced_spectacle['customer_id']!= customer_id) ]\n",
"\n",
" target_information_spectacle = target_information_spectacle[(target_information_spectacle['number_compagny']!= tenant_number) |\n",
" (target_information_spectacle['customer_id']!= customer_id) ]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eb7f4c95-817b-4145-9319-11d2f62b24d9",
"metadata": {},
"outputs": [],
"source": [
"# on vérifie que les outliers sont pas dans le train set "
]
},
{
"cell_type": "code",
"execution_count": 147,
"id": "b50e1de8-28fe-42bd-bd81-dde7e36b64fb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['10_19521', '11_36', '12_1706757', '13_8422']"
]
},
"execution_count": 147,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"outliers_train_set_musique = [str(tenant_id) + \"_\" + str(customer_id) for tenant_id, customer_id in outliers_musique_dico.items()]\n",
"outliers_train_set_musique"
]
},
{
"cell_type": "code",
"execution_count": 161,
"id": "1753d45d-beac-48a4-9bc4-f84925320a89",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" <th>number_company</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"<p>0 rows × 41 columns</p>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [customer_id, nb_tickets, nb_purchases, total_amount, nb_suppliers, vente_internet_max, purchase_date_min, purchase_date_max, time_between_purchase, nb_tickets_internet, street_id, structure_id, mcp_contact_id, fidelity, tenant_id, is_partner, deleted_at, gender, is_email_true, opt_in, last_buying_date, max_price, ticket_sum, average_price, average_purchase_delay, average_price_basket, average_ticket_basket, total_price, purchase_count, first_buying_date, country, gender_label, gender_female, gender_male, gender_other, country_fr, nb_campaigns, nb_campaigns_opened, time_to_open, y_has_purchased, number_company]\n",
"Index: []\n",
"\n",
"[0 rows x 41 columns]"
]
},
"execution_count": 161,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_set_spectacle[train_set_spectacle[\"customer_id\"].isin(outliers_train_set_musique)] # OK"
]
},
{
"cell_type": "markdown",
"id": "42f8171c-e80d-4faa-b278-21fcbe3b242c",
"metadata": {},
"source": [
"### 1. customerplus_clean"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "47f98721-53dd-4f8f-85ac-88043ee8d967",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>total_price</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>number_compagny</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>821538</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>809126</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11005</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>14</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>17663</td>\n",
" <td>12731</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>38100</td>\n",
" <td>12395</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>307036</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2946</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>8</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>18441</td>\n",
" <td>11139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>9231</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>9870</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10 rows × 28 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity tenant_id \\\n",
"0 821538 139 NaN NaN 0 875 \n",
"1 809126 1063 NaN NaN 0 875 \n",
"2 11005 1063 NaN NaN 0 875 \n",
"3 17663 12731 NaN NaN 0 875 \n",
"4 38100 12395 NaN NaN 0 875 \n",
"5 307036 139 NaN NaN 0 875 \n",
"6 2946 1063 NaN NaN 0 875 \n",
"7 18441 11139 NaN NaN 0 875 \n",
"8 9231 139 NaN NaN 0 875 \n",
"9 9870 139 NaN NaN 0 875 \n",
"\n",
" is_partner deleted_at gender is_email_true ... total_price \\\n",
"0 False NaN 2 True ... 0.0 \n",
"1 False NaN 2 True ... 0.0 \n",
"2 False NaN 2 False ... NaN \n",
"3 False NaN 0 False ... NaN \n",
"4 False NaN 0 True ... NaN \n",
"5 False NaN 2 True ... NaN \n",
"6 False NaN 2 False ... NaN \n",
"7 False NaN 2 False ... NaN \n",
"8 False NaN 0 True ... NaN \n",
"9 False NaN 2 True ... NaN \n",
"\n",
" purchase_count first_buying_date country gender_label gender_female \\\n",
"0 0 NaN NaN other 0 \n",
"1 0 NaN fr other 0 \n",
"2 14 NaN fr other 0 \n",
"3 1 NaN fr female 1 \n",
"4 1 NaN fr female 1 \n",
"5 1 NaN NaN other 0 \n",
"6 8 NaN fr other 0 \n",
"7 3 NaN fr other 0 \n",
"8 1 NaN NaN female 1 \n",
"9 1 NaN NaN other 0 \n",
"\n",
" gender_male gender_other country_fr number_compagny \n",
"0 0 1 NaN 10 \n",
"1 0 1 1.0 10 \n",
"2 0 1 1.0 10 \n",
"3 0 0 1.0 10 \n",
"4 0 0 1.0 10 \n",
"5 0 1 NaN 10 \n",
"6 0 1 1.0 10 \n",
"7 0 1 1.0 10 \n",
"8 0 0 NaN 10 \n",
"9 0 1 NaN 10 \n",
"\n",
"[10 rows x 28 columns]"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# visu de la table\n",
"customerplus_clean_spectacle.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "738e063b-f84e-4a00-b35d-6d1d657e3c09",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de lignes de la table : 1523684\n"
]
},
{
"data": {
"text/plain": [
"customer_id 0\n",
"street_id 0\n",
"structure_id 1460622\n",
"mcp_contact_id 729163\n",
"fidelity 0\n",
"tenant_id 0\n",
"is_partner 0\n",
"deleted_at 1523684\n",
"gender 0\n",
"is_email_true 0\n",
"opt_in 0\n",
"last_buying_date 762879\n",
"max_price 762879\n",
"ticket_sum 0\n",
"average_price 667328\n",
"average_purchase_delay 762915\n",
"average_price_basket 762915\n",
"average_ticket_basket 762915\n",
"total_price 95551\n",
"purchase_count 0\n",
"first_buying_date 762879\n",
"country 429485\n",
"gender_label 0\n",
"gender_female 0\n",
"gender_male 0\n",
"gender_other 0\n",
"country_fr 429485\n",
"number_compagny 0\n",
"dtype: int64"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de NaN\n",
"print(\"Nombre de lignes de la table : \",customerplus_clean_spectacle.shape[0])\n",
"customerplus_clean_spectacle.isna().sum()"
]
},
{
"cell_type": "markdown",
"id": "b44054b3-d850-4bc9-bc73-feb9979908bc",
"metadata": {},
"source": [
"#### Nombre de clients de la compagnie"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "884a33d0-c275-4ab4-ab1f-8b53e563fb95",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" number_compagny already_purchased customer_id\n",
"0 10 True 45263\n",
"1 11 True 35312\n",
"2 12 True 216104\n",
"3 13 True 388730\n",
"4 14 True 101642\n",
" number_compagny already_purchased customer_id\n",
"0 10 False 53530\n",
"1 11 False 35994\n",
"2 12 False 26620\n",
"3 13 False 379005\n",
"4 14 False 241484\n"
]
}
],
"source": [
"# nouveau barplot pr les clients : on regarde la taille totale de la base et on distingue clients ayant acheté / pas acheté\n",
"\n",
"# variable relative à l'achat\n",
"customerplus_clean_spectacle[\"already_purchased\"] = customerplus_clean_spectacle[\"purchase_count\"]>0\n",
"\n",
"nb_customers_purchasing_spectacle = customerplus_clean_spectacle[customerplus_clean_spectacle[\"already_purchased\"]].groupby([\"number_compagny\",\"already_purchased\"])[\"customer_id\"].count().reset_index()\n",
"nb_customers_no_purchase_spectacle = customerplus_clean_spectacle[~customerplus_clean_spectacle[\"already_purchased\"]].groupby([\"number_compagny\",\"already_purchased\"])[\"customer_id\"].count().reset_index()\n",
"\n",
"print(nb_customers_purchasing_spectacle)\n",
"print(nb_customers_no_purchase_spectacle)"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "41c9fb5a-708b-4f85-9918-00337151f155",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(nb_customers_purchasing_spectacle[\"number_compagny\"], nb_customers_purchasing_spectacle[\"customer_id\"]/1000, label = \"clients ayant acheté\")\n",
"plt.bar(nb_customers_no_purchase_spectacle[\"number_compagny\"], nb_customers_no_purchase_spectacle[\"customer_id\"]/1000, \n",
" bottom = nb_customers_purchasing_spectacle[\"customer_id\"]/1000, label = \"clients ciblés par un mail\")\n",
"\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Compagnie')\n",
"plt.ylabel(\"Nombre de clients (en milliers)\")\n",
"plt.title(\"Nombre de clients identifiés pour les compagnies de spectacle\")\n",
"plt.legend()\n",
"\n",
"# Affichage du barplot\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 112,
"id": "a41dfb3e-12b6-4a7b-9282-698d9476b17b",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# syntaxe à retenir pr exporter des images !!\n",
"\n",
"\n",
"FILE_PATH = \"projet-bdc2324-team1/graphics/music/\"\n",
"FILE_NAME = \"number_customers_music.png\"\n",
"FILE_PATH_OUT_S3 = FILE_PATH + FILE_NAME\n",
"\n",
"# Création du barplot\n",
"plt.bar(nb_customers_purchasing_spectacle[\"number_compagny\"], nb_customers_purchasing_spectacle[\"customer_id\"]/1000, label = \"clients ayant acheté\")\n",
"plt.bar(nb_customers_no_purchase_spectacle[\"number_compagny\"], nb_customers_no_purchase_spectacle[\"customer_id\"]/1000, \n",
" bottom = nb_customers_purchasing_spectacle[\"customer_id\"]/1000, label = \"clients ciblés par un mail\")\n",
"\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Compagnie')\n",
"plt.ylabel(\"Nombre de clients (en milliers)\")\n",
"plt.title(\"Nombre de clients identifiés pour les compagnies de spectacle\")\n",
"plt.legend()\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as file_out:\n",
" plt.savefig(file_out)"
]
},
{
"cell_type": "markdown",
"id": "85b6c7a9-d970-4071-8633-45bc1f50e157",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"#### Prix maximal payé par un client (utile ??)"
]
},
{
"cell_type": "code",
"execution_count": 152,
"id": "fd11c547-7128-4ef6-ad7b-4b7c2a30cd9e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>max_price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>13823.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>108.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>5000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>3180.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>456.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny max_price\n",
"0 10 13823.0\n",
"1 11 108.0\n",
"2 12 5000.0\n",
"3 13 3180.0\n",
"4 14 456.0"
]
},
"execution_count": 152,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# prix maximal payé par un client pour chaque compagnie - très variable : de 108 à 13823\n",
"\n",
"company_max_price = customerplus_clean_spectacle.groupby(\"number_compagny\")[\"max_price\"].max().reset_index()\n",
"company_max_price"
]
},
{
"cell_type": "code",
"execution_count": 153,
"id": "b8f8f162-4153-4cfe-bfaa-d981d414510d",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_max_price[\"number_compagny\"], company_max_price[\"max_price\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Prix maximal d'un billet vendu\")\n",
"plt.title(\"Prix maximal de vente observé par compagnie de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "bff23e5d-d7ed-4092-ae3c-5df503e54a6d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 762879.000000\n",
"mean 0.079068\n",
"std 3.969729\n",
"min 0.000000\n",
"25% 0.000000\n",
"50% 0.000000\n",
"75% 0.000000\n",
"max 3334.000000\n",
"Name: purchase_count, dtype: float64"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"first_buying_date\"].isna()][\"purchase_count\"].describe()"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "89466dbd-14d2-4ede-9ca0-b9c32b764e25",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 7.608090e+05\n",
"mean 3.863940e+00\n",
"std 1.685825e+03\n",
"min 1.000000e+00\n",
"25% 1.000000e+00\n",
"50% 1.000000e+00\n",
"75% 2.000000e+00\n",
"max 1.469325e+06\n",
"Name: purchase_count, dtype: float64"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle[~customerplus_clean_spectacle[\"first_buying_date\"].isna()][\"purchase_count\"].describe()"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "5f9feae4-35f4-43b6-adeb-f75773900a2d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>821538</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>809126</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11005</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>17663</td>\n",
" <td>12731</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>38100</td>\n",
" <td>12395</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343121</th>\n",
" <td>4667645</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534181.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343122</th>\n",
" <td>4667649</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534177.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343123</th>\n",
" <td>4667660</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534165.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343124</th>\n",
" <td>4667679</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534132.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343125</th>\n",
" <td>4667686</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1567949.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1523688 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"0 821538 139 NaN NaN 0 \n",
"1 809126 1063 NaN NaN 0 \n",
"2 11005 1063 NaN NaN 0 \n",
"3 17663 12731 NaN NaN 0 \n",
"4 38100 12395 NaN NaN 0 \n",
"... ... ... ... ... ... \n",
"343121 4667645 122 NaN 1534181.0 0 \n",
"343122 4667649 122 NaN 1534177.0 0 \n",
"343123 4667660 122 NaN 1534165.0 0 \n",
"343124 4667679 122 NaN 1534132.0 0 \n",
"343125 4667686 122 NaN 1567949.0 0 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"0 875 False NaN 2 True ... \n",
"1 875 False NaN 2 True ... \n",
"2 875 False NaN 2 False ... \n",
"3 875 False NaN 0 False ... \n",
"4 875 False NaN 0 True ... \n",
"... ... ... ... ... ... ... \n",
"343121 862 False NaN 2 True ... \n",
"343122 862 False NaN 2 True ... \n",
"343123 862 False NaN 0 True ... \n",
"343124 862 False NaN 2 True ... \n",
"343125 862 False NaN 0 True ... \n",
"\n",
" first_buying_date country gender_label gender_female gender_male \\\n",
"0 NaN NaN other 0 0 \n",
"1 NaN fr other 0 0 \n",
"2 NaN fr other 0 0 \n",
"3 NaN fr female 1 0 \n",
"4 NaN fr female 1 0 \n",
"... ... ... ... ... ... \n",
"343121 NaN NaN other 0 0 \n",
"343122 NaN NaN other 0 0 \n",
"343123 NaN NaN female 1 0 \n",
"343124 NaN NaN other 0 0 \n",
"343125 NaN NaN female 1 0 \n",
"\n",
" gender_other country_fr has_tags number_compagny already_purchased \n",
"0 1 NaN 0 10 False \n",
"1 1 1.0 0 10 False \n",
"2 1 1.0 0 10 False \n",
"3 0 1.0 0 10 False \n",
"4 0 1.0 0 10 False \n",
"... ... ... ... ... ... \n",
"343121 1 NaN 0 14 False \n",
"343122 1 NaN 0 14 False \n",
"343123 0 NaN 0 14 False \n",
"343124 1 NaN 0 14 False \n",
"343125 0 NaN 0 14 False \n",
"\n",
"[1523688 rows x 30 columns]"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle[\"already_purchased\"] = customerplus_clean_spectacle[\"first_buying_date\"].isna()==False\n",
"customerplus_clean_spectacle"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "cec4f1eb-cec8-409d-8b2c-1e01f1bf81ff",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11005</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>17663</td>\n",
" <td>12731</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>38100</td>\n",
" <td>12395</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>307036</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2946</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338933</th>\n",
" <td>3625705</td>\n",
" <td>648752</td>\n",
" <td>NaN</td>\n",
" <td>1253864.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338954</th>\n",
" <td>3627626</td>\n",
" <td>636890</td>\n",
" <td>NaN</td>\n",
" <td>1253887.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338959</th>\n",
" <td>3628124</td>\n",
" <td>653042</td>\n",
" <td>NaN</td>\n",
" <td>1253899.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338986</th>\n",
" <td>3631189</td>\n",
" <td>648423</td>\n",
" <td>NaN</td>\n",
" <td>1253928.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>339039</th>\n",
" <td>3635380</td>\n",
" <td>659417</td>\n",
" <td>NaN</td>\n",
" <td>1253975.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>26246 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"2 11005 1063 NaN NaN 0 \n",
"3 17663 12731 NaN NaN 0 \n",
"4 38100 12395 NaN NaN 0 \n",
"5 307036 139 NaN NaN 0 \n",
"6 2946 1063 NaN NaN 0 \n",
"... ... ... ... ... ... \n",
"338933 3625705 648752 NaN 1253864.0 0 \n",
"338954 3627626 636890 NaN 1253887.0 0 \n",
"338959 3628124 653042 NaN 1253899.0 0 \n",
"338986 3631189 648423 NaN 1253928.0 0 \n",
"339039 3635380 659417 NaN 1253975.0 0 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"2 875 False NaN 2 False ... \n",
"3 875 False NaN 0 False ... \n",
"4 875 False NaN 0 True ... \n",
"5 875 False NaN 2 True ... \n",
"6 875 False NaN 2 False ... \n",
"... ... ... ... ... ... ... \n",
"338933 862 False NaN 0 True ... \n",
"338954 862 False NaN 0 True ... \n",
"338959 862 False NaN 0 True ... \n",
"338986 862 False NaN 0 True ... \n",
"339039 862 False NaN 1 True ... \n",
"\n",
" first_buying_date country gender_label gender_female gender_male \\\n",
"2 NaN fr other 0 0 \n",
"3 NaN fr female 1 0 \n",
"4 NaN fr female 1 0 \n",
"5 NaN NaN other 0 0 \n",
"6 NaN fr other 0 0 \n",
"... ... ... ... ... ... \n",
"338933 NaN fr female 1 0 \n",
"338954 NaN fr female 1 0 \n",
"338959 NaN fr female 1 0 \n",
"338986 NaN fr female 1 0 \n",
"339039 NaN fr male 0 1 \n",
"\n",
" gender_other country_fr has_tags number_compagny already_purchased \n",
"2 1 1.0 0 10 False \n",
"3 0 1.0 0 10 False \n",
"4 0 1.0 0 10 False \n",
"5 1 NaN 0 10 False \n",
"6 1 1.0 0 10 False \n",
"... ... ... ... ... ... \n",
"338933 0 1.0 0 14 False \n",
"338954 0 1.0 0 14 False \n",
"338959 0 1.0 0 14 False \n",
"338986 0 1.0 0 14 False \n",
"339039 0 1.0 0 14 False \n",
"\n",
"[26246 rows x 30 columns]"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# attention, on a des cas où le client a pas de première date d'achat alors qu'il compte plusieurs achats\n",
"# on peut donc avoir une date de première achat valant NaN non pas parce que l'individu n'a jamais acheté \n",
"# mais simplement car elle n'est pas renseignée\n",
"\n",
"customerplus_clean_spectacle[(customerplus_clean_spectacle[\"already_purchased\"]==False) &\n",
"(customerplus_clean_spectacle[\"purchase_count\"]>0)]"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "b5904039-a967-47d5-ba13-1b805bcd76ca",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"<p>0 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [customer_id, street_id, structure_id, mcp_contact_id, fidelity, tenant_id, is_partner, deleted_at, gender, is_email_true, opt_in, last_buying_date, max_price, ticket_sum, average_price, average_purchase_delay, average_price_basket, average_ticket_basket, total_price, purchase_count, first_buying_date, country, gender_label, gender_female, gender_male, gender_other, country_fr, has_tags, number_compagny, already_purchased]\n",
"Index: []\n",
"\n",
"[0 rows x 30 columns]"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# cpdt, si un client a un nombre d'achats nul, il a bien une date de premier achat valant NaN, OK\n",
"customerplus_clean_spectacle[(customerplus_clean_spectacle[\"already_purchased\"]) &\n",
"(customerplus_clean_spectacle[\"purchase_count\"]==0)]"
]
},
{
"cell_type": "markdown",
"id": "703d9986-4497-404f-881a-45ca44b25beb",
"metadata": {},
"source": [
"#### différence de consentement aux campagnes de mails (opt in)"
]
},
{
"cell_type": "code",
"execution_count": 113,
"id": "e940bfcf-29cc-4d4c-ae5e-e2a8cecf28af",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"number_compagny already_purchased\n",
"10 False 0.234840\n",
" True 0.236242\n",
"11 False 0.141746\n",
" True 0.002804\n",
"12 False 0.485950\n",
" True 0.244780\n",
"13 False 0.084057\n",
" True 0.177213\n",
"14 False 0.885553\n",
" True 0.308859\n",
"Name: opt_in, dtype: float64"
]
},
"execution_count": 113,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# différence de consentement aux campagnes de mails (opt in)\n",
"\n",
"# en se restreignant au personnes n'ayant pas acheté, on a quand même des individus acceptant d'être ciblés\n",
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"first_buying_date\"].isna()][\"opt_in\"].unique()\n",
"\n",
"# taux de consentement variés\n",
"customerplus_clean_spectacle[\"already_purchased\"] = customerplus_clean_spectacle[\"purchase_count\"] > 0\n",
"customerplus_clean_spectacle.groupby([\"number_compagny\", \"already_purchased\"])[\"opt_in\"].mean()"
]
},
{
"cell_type": "code",
"execution_count": 209,
"id": "a5e79beb-9ba0-4c89-b084-e27ff0d65dcc",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" <th>opt_in</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" <td>0.234840</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" <td>0.236242</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>False</td>\n",
" <td>0.141746</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>True</td>\n",
" <td>0.002804</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>False</td>\n",
" <td>0.485950</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>True</td>\n",
" <td>0.244780</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>False</td>\n",
" <td>0.084057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>True</td>\n",
" <td>0.177213</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" <td>0.885553</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" <td>0.308859</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny already_purchased opt_in\n",
"0 10 False 0.234840\n",
"1 10 True 0.236242\n",
"2 11 False 0.141746\n",
"3 11 True 0.002804\n",
"4 12 False 0.485950\n",
"5 12 True 0.244780\n",
"6 13 False 0.084057\n",
"7 13 True 0.177213\n",
"8 14 False 0.885553\n",
"9 14 True 0.308859"
]
},
"execution_count": 209,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_graph = customerplus_clean_spectacle.groupby([\"number_compagny\", \"already_purchased\"])[\"opt_in\"].mean().reset_index()\n",
"df_graph"
]
},
{
"cell_type": "code",
"execution_count": 210,
"id": "5be56c41-7697-481a-84ea-f77a2041484b",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot groupé\n",
"fig, ax = plt.subplots(figsize=(10, 6))\n",
"\n",
"categories = df_graph[\"number_compagny\"].unique()\n",
"bar_width = 0.35\n",
"bar_positions = np.arange(len(categories))\n",
"\n",
"# Grouper les données par label et créer les barres groupées\n",
"for label in df_graph[\"already_purchased\"].unique():\n",
" label_data = df_graph[df_graph['already_purchased'] == label]\n",
" values = [label_data[label_data['number_compagny'] == category]['opt_in'].values[0]*100 for category in categories]\n",
"\n",
" label_printed = \"client ayant déjà acheté\" if label else \"client n'ayant jamais acheté\"\n",
" ax.bar(bar_positions, values, bar_width, label=label_printed)\n",
"\n",
" # Mise à jour des positions des barres pour le prochain groupe\n",
" bar_positions = [pos + bar_width for pos in bar_positions]\n",
"\n",
"# Ajout des étiquettes, de la légende, etc.\n",
"ax.set_xlabel('Compagnie')\n",
"ax.set_ylabel('Part de consentement (%)')\n",
"ax.set_title('Part de consentement au mailing selon les compagnies')\n",
"ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])\n",
"ax.set_xticklabels(categories)\n",
"ax.legend()\n",
"\n",
"# sauvegarde dans le MinIO\n",
"\n",
"FILE_NAME = \"consent_customers_music.png\"\n",
"FILE_PATH_OUT_S3 = FILE_PATH + FILE_NAME\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as file_out:\n",
" plt.savefig(file_out)"
]
},
{
"cell_type": "code",
"execution_count": 211,
"id": "91b743c4-5473-41e1-b97e-cf06904f0fa8",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>opt_in</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
" <td>55.896356</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>50.795672</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>0.0</td>\n",
" <td>4.856590</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>0.046125</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>37.098498</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>0.021608</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
" <td>32.457022</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>19.461217</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
" <td>69.470107</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>26.682793</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_company y_has_purchased opt_in\n",
"0 10 0.0 55.896356\n",
"1 10 1.0 50.795672\n",
"2 11 0.0 4.856590\n",
"3 11 1.0 0.046125\n",
"4 12 0.0 37.098498\n",
"5 12 1.0 0.021608\n",
"6 13 0.0 32.457022\n",
"7 13 1.0 19.461217\n",
"8 14 0.0 69.470107\n",
"9 14 1.0 26.682793"
]
},
"execution_count": 211,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# on refait le graphique sur train set \n",
"\n",
"df_graph = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[\"opt_in\"].mean().reset_index()\n",
"df_graph[\"opt_in\"] = 100 * df_graph[\"opt_in\"]\n",
"df_graph"
]
},
{
"cell_type": "code",
"execution_count": 163,
"id": "728e0021-4f95-4601-bb01-032db2cf6571",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.43006504592722195\n",
"0.2889608343987336\n"
]
}
],
"source": [
"# pourquoi une telle différence sur la variable opt in ??\n",
"print(train_set_spectacle[\"opt_in\"].mean())\n",
"print(customerplus_clean_spectacle[\"opt_in\"].mean())"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "274b4bc5-277f-476a-8bc1-c1764b1df2de",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.8473746548562269\n",
"0.7573747808905485\n"
]
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": 164,
"id": "e1d837e1-c445-424b-867a-48b1e790f703",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"genre = homme : \n",
"0.3754292890099192\n",
"0.3103924435775397\n",
"email vérifié : \n",
"0.9966249488521722\n",
"0.936015604285403\n",
"nationalité française : \n",
"0.7882316165225254\n",
"0.7573741156773128\n",
"nbre d'achats : \n",
"1.7069010765735895\n",
"0.9938799646120849\n"
]
}
],
"source": [
"# pour les autres variables, la distribution semble similaire\n",
"\n",
"print(\"genre = homme : \")\n",
"print(train_set_spectacle[\"gender_male\"].mean())\n",
"print(customerplus_clean_spectacle[\"gender_male\"].mean())\n",
"\n",
"print(\"email vérifié : \")\n",
"print(train_set_spectacle[\"is_email_true\"].mean())\n",
"print(customerplus_clean_spectacle[\"is_email_true\"].mean())\n",
"\n",
"print(\"nationalité française : \")\n",
"print(train_set_spectacle[\"country_fr\"].mean())\n",
"print(customerplus_clean_spectacle[\"country_fr\"].mean())\n",
"\n",
"# sauf pr nbre d'achats - à verif\n",
"print(\"nbre d'achats : \")\n",
"print(train_set_spectacle[\"purchase_count\"].mean())\n",
"print(customerplus_clean_spectacle[\"purchase_count\"].mean())"
]
},
{
"cell_type": "code",
"execution_count": 214,
"id": "43deeeb5-8092-42fc-b80b-59d2c58093de",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# with the generic function\n",
"multiple_barplot(df_graph, x=\"number_company\", y=\"opt_in\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"aucun achat\", 1 : \"achat durant la période\"},\n",
" xlabel = \"Numéro de compagnie\", ylabel = \"Part de consentement (%)\", \n",
" title = \"Part de consentement au mailing selon les compagnies (train set)\")\n",
"\n",
"# save in the s3\n",
"\n",
"FILE_NAME = \"consent_customers_train_set_music.png\"\n",
"FILE_PATH_OUT_S3 = FILE_PATH + FILE_NAME\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as file_out:\n",
" plt.savefig(file_out)"
]
},
{
"cell_type": "code",
"execution_count": 213,
"id": "360047fc-70a4-4876-b0f1-c0af5cc93e17",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 640x480 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": []
},
{
"cell_type": "markdown",
"id": "5fcff5cb-923b-44d7-b345-0bee89d30ea2",
"metadata": {},
"source": [
"#### Etude du genre"
]
},
{
"cell_type": "code",
"execution_count": 216,
"id": "32960530-cb46-4eeb-a6d2-1dcf5fb640d8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>gender_male</th>\n",
" <th>gender_female</th>\n",
" <th>gender_other</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.181582</td>\n",
" <td>0.343840</td>\n",
" <td>0.474578</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>0.179522</td>\n",
" <td>0.314448</td>\n",
" <td>0.506030</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>0.346381</td>\n",
" <td>0.454038</td>\n",
" <td>0.199581</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>0.318108</td>\n",
" <td>0.503093</td>\n",
" <td>0.178799</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>0.331954</td>\n",
" <td>0.316181</td>\n",
" <td>0.351865</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny gender_male gender_female gender_other\n",
"0 10 0.181582 0.343840 0.474578\n",
"1 11 0.179522 0.314448 0.506030\n",
"2 12 0.346381 0.454038 0.199581\n",
"3 13 0.318108 0.503093 0.178799\n",
"4 14 0.331954 0.316181 0.351865"
]
},
"execution_count": 216,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# genre \n",
"\n",
"company_genders = customerplus_clean_spectacle.groupby(\"number_compagny\")[[\"gender_male\", \"gender_female\", \"gender_other\"]].mean().reset_index()\n",
"company_genders"
]
},
{
"cell_type": "code",
"execution_count": 217,
"id": "1b4a49d7-7bfe-4e80-aa7e-c9c6d4bc46e2",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_genders[\"number_compagny\"], company_genders[\"gender_male\"], label = \"Homme\")\n",
"plt.bar(company_genders[\"number_compagny\"], company_genders[\"gender_female\"], \n",
" bottom = company_genders[\"gender_male\"], label = \"Femme\")\n",
"\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Part de clients de chaque sexe\")\n",
"plt.title(\"Sexe des clients de chaque compagnie de spectacle\")\n",
"plt.legend()\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 218,
"id": "c7348c95-e506-4002-90d9-d3b6768af985",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>gender_male</th>\n",
" <th>gender_female</th>\n",
" <th>gender_other</th>\n",
" <th>share_of_women</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
" <td>0.140862</td>\n",
" <td>0.288775</td>\n",
" <td>0.570363</td>\n",
" <td>67.213639</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>0.284532</td>\n",
" <td>0.714831</td>\n",
" <td>0.000637</td>\n",
" <td>71.528662</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>0.0</td>\n",
" <td>0.289900</td>\n",
" <td>0.512669</td>\n",
" <td>0.197431</td>\n",
" <td>63.878535</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>0.321033</td>\n",
" <td>0.609779</td>\n",
" <td>0.069188</td>\n",
" <td>65.510406</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>0.357546</td>\n",
" <td>0.470654</td>\n",
" <td>0.171799</td>\n",
" <td>56.828519</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>0.396824</td>\n",
" <td>0.494058</td>\n",
" <td>0.109118</td>\n",
" <td>55.457191</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
" <td>0.363198</td>\n",
" <td>0.492956</td>\n",
" <td>0.143846</td>\n",
" <td>57.577983</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>0.379703</td>\n",
" <td>0.516605</td>\n",
" <td>0.103693</td>\n",
" <td>57.637000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
" <td>0.447676</td>\n",
" <td>0.443646</td>\n",
" <td>0.108678</td>\n",
" <td>49.773906</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>0.487695</td>\n",
" <td>0.471498</td>\n",
" <td>0.040808</td>\n",
" <td>49.155702</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_company y_has_purchased gender_male gender_female gender_other \\\n",
"0 10 0.0 0.140862 0.288775 0.570363 \n",
"1 10 1.0 0.284532 0.714831 0.000637 \n",
"2 11 0.0 0.289900 0.512669 0.197431 \n",
"3 11 1.0 0.321033 0.609779 0.069188 \n",
"4 12 0.0 0.357546 0.470654 0.171799 \n",
"5 12 1.0 0.396824 0.494058 0.109118 \n",
"6 13 0.0 0.363198 0.492956 0.143846 \n",
"7 13 1.0 0.379703 0.516605 0.103693 \n",
"8 14 0.0 0.447676 0.443646 0.108678 \n",
"9 14 1.0 0.487695 0.471498 0.040808 \n",
"\n",
" share_of_women \n",
"0 67.213639 \n",
"1 71.528662 \n",
"2 63.878535 \n",
"3 65.510406 \n",
"4 56.828519 \n",
"5 55.457191 \n",
"6 57.577983 \n",
"7 57.637000 \n",
"8 49.773906 \n",
"9 49.155702 "
]
},
"execution_count": 218,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# sur le train set \n",
"company_genders = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[[\"gender_male\", \"gender_female\", \"gender_other\"]].mean().reset_index()\n",
"company_genders[\"share_of_women\"] = 100 * (company_genders[\"gender_female\"]/(1-company_genders[\"gender_other\"]))\n",
"company_genders"
]
},
{
"cell_type": "code",
"execution_count": 219,
"id": "b36e5a8f-45dc-4b74-8137-80b7e916aa84",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0oAAAIiCAYAAAD2CjhuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABkHUlEQVR4nO3dfXzO9f////thZ3bOxs40NhoRQ8lZsUnO6USlotC5UDlLeVdfE+aspKh0IpSkepO3EJaTlZyfDElCc1JZE3LO2J6/P/rt+DheG46DbcfM7Xq5HJeL1/N19ni9jueO7e71ej0PmzHGCAAAAABgV8rdBQAAAABAcUNQAgAAAAALghIAAAAAWBCUAAAAAMCCoAQAAAAAFgQlAAAAALAgKAEAAACABUEJAAAAACwISgAAAABgQVDCNWXKlCmy2Wz2l6enp6677jo9+uij+uOPPwp0X8nJyZo9e/YVbWP37t2y2WyaMmVKgdSUu8127dopJCRENptNffr0KbBtw1FhvH/OSkpKks1mK/L9wjXu7CNWy5Ytk81m07Jly9xdSqFbsWKFkpKS9M8//xTqfnJ/5+zevbtAt/vDDz/Ix8dHe/bssbe9++67hdqPbDabkpKSCm37V2L+/Pn51nb27FlVqVJF48aNK/KaUDIQlHBNmjx5slauXKmUlBQ9+eST+vzzz9WkSROdOHGiwPZREEGpMPTt21erV6/Wxx9/rJUrV6pv377uLgm4ZkVGRmrlypVq166du0u5pqxYsUJDhgwp9KBUGIwx6tOnj5588klVqlTJ3l7YQWnlypV64oknCm37V2L+/PkaMmRInnYvLy/9v//3//Taa6/p4MGDbqgMVzuCEq5JNWvWVMOGDdWsWTMNHjxYAwcOVHp6eoEEm1OnTl15gYXop59+Uv369XX33XerYcOGDr9oARQtHx8fNWzYUOXLl3d3KbhKLFiwQBs2bNCzzz572ds4e/aszp0759I6DRs21HXXXXfZ+3SXhx56SDabTe+//767S8FViKAE6N9fAJLstzEMGTJEDRo0UEhIiIKCgnTTTTdp0qRJMsY4rBcTE6P27dtr1qxZqlu3rkqXLq0hQ4bIZrPpxIkTmjp1qv02v8TExIvW8Oeff6pTp04KDAxUcHCwHnjgAWVkZOS77Lp163TnnXcqJCREpUuXVt26dfXll19edPu5t9Xs3LlT3377rb2u3FtCjh49qgEDBig2Nlbe3t6qUKGC+vTpk+cqm81mU+/evTV58mRVq1ZNvr6+qlevnlatWiVjjMaMGaPY2FgFBATo9ttv186dOx3WT0xMVM2aNbVy5Uo1btxYvr6+iomJ0eTJkyVJ8+bN00033SQ/Pz/VqlVLCxYsyHMsO3bsUOfOnRUWFiYfHx9Vr15d77zzjsMyOTk5GjZsmL3GMmXKKD4+Xm+99dZFz5Oz6zlTw4UsX75czZs3V2BgoPz8/NS4cWPNmzfPYZncW3aWLl2qZ555RuXKlVNoaKg6duyoP//806n95OeLL75Qo0aN5O/vr4CAALVq1UobN250WOa3337Tgw8+qKioKPn4+Cg8PFzNmzdXWlraJbe/evVqdejQQaGhoSpdurSqVKmS5/ZOV45/yZIlevLJJxUaGqqgoCB17dpVJ06cUEZGhjp16qQyZcooMjJSAwYM0NmzZ+3r597SNnr0aA0fPlwVK1ZU6dKlVa9ePS1evNhhXzt37tSjjz6quLg4+fn5qUKFCurQoYO2bNmS5/i2bt2qli1bys/PT+XLl1evXr00b968PLes5fbztWvXqkmTJvLz81PlypU1cuRI5eTk5KnTeiWgMPu4JP3yyy9q3bq1/Pz8VK5cOfXo0UPHjh3Ld9nvvvtOzZs3V1BQkPz8/HTrrbfmOYf5caa+3FtEN27cqI4dOyooKEjBwcF6+OGHdeDAgTzbdKb/Shfvh0lJSXrhhRckSbGxsfbPwtz374svvlDLli0VGRkpX19fVa9eXS+99FK+dxw4098L8pxK0nvvvadbbrlF1apVs7fFxMRo69atSk1NtR9PTEyMpP/77P/000/Vv39/VahQQT4+Ptq5c6cOHDignj17qkaNGgoICFBYWJhuv/12/fDDD3n2a7317ko/o5z9nLnUe969e3f7z8b5t9bn/m7z9vbWAw88oA8++CDP73DgkgxwDZk8ebKRZNauXevQ/tZbbxlJ5oMPPjDGGNO9e3czadIkk5KSYlJSUszQoUONr6+vGTJkiMN6lSpVMpGRkaZy5crm448/NkuXLjVr1qwxK1euNL6+vqZt27Zm5cqVZuXKlWbr1q0XrOvkyZOmevXqJjg42IwfP94sXLjQPPfcc6ZixYpGkpk8ebJ92SVLlhhvb2/TpEkT88UXX5gFCxaY7t2751nO6siRI2blypUmIiLC3Hrrrfa6Tp8+bU6cOGHq1KljypUrZ8aOHWu+++4789Zbb5ng4GBz++23m5ycHPt2JJlKlSqZxo0bm1mzZpmvv/7aVK1a1YSEhJi+ffuau+66y8ydO9d89tlnJjw83MTHxzusn5CQYEJDQ021atXMpEmTzMKFC0379u2NJDNkyBBTq1Yt8/nnn5v58+ebhg0bGh8fH/PHH3/Y19+6dasJDg42tWrVMp988olZtGiR6d+/vylVqpRJSkqyLzdixAjj4eFhBg8ebBYvXmwWLFhgxo0b57BMfpxZz9ka0tPT87wvy5YtM15eXubmm282X3zxhZk9e7Zp2bKlsdlsZsaMGfblcvtq5cqVzbPPPmsWLlxoPvroI1O2bFnTrFmzix6DMcYMHjzYWD/ihw8fbmw2m3nsscfM3LlzzaxZs0yjRo2Mv7+/Q/+sVq2auf76682nn35qUlNTzcyZM03//v3N0qVLL7rPBQsWGC8vLxMfH2+mTJlilixZYj7++GPz4IMPXvbxx8bGmv79+5tFixaZUaNGGQ8PD/PQQw+Zm266yQwbNsykpKSYF1980Ugyb7zxRp5zHx0dbW677TYzc+ZM89VXX5lbbrnFeHl5mRUrVtiXTU1NNf379zf//e9/TWpqqvn666/N3XffbXx9fc0vv/xiX+7PP/80oaGhpmLFimbKlClm/vz55pFHHjExMTFGksP5ye3ncXFxZuLEiSYlJcX07NnTSDJTp07NU+f5faSw+3hGRoYJCwszFSpUMJMnTzbz5883Xbp0sX/enH8cn376qbHZbObuu+82s2bNMt98841p37698fDwMN99991F9+NMfbn9tFKlSuaFF14wCxcuNGPHjjX+/v6mbt26Jisry76ss/33Uv1w37595tlnnzWSzKxZs+yfhUeOHDHGGDN06FDz5ptvmnnz5plly5aZiRMnmtjY2Dw/d87099x+nJ6eXiDn9MyZM8bX19cMHDjQoX3Dhg2mcuXKpm7duvbj2bBhgzHGmKVLlxpJpkKFCua+++4zc+bMMXPnzjUHDx40v/zyi3nmmWfMjBkzzLJly8zcuXPN448/bkqVKpXn512SGTx4cJ5ju9zPKGc+Z5x5z3fu3Gnuu+8+I8l+7Lm/23J98cUXRpLZvHnzJesCzkdQwjUl94N91apV5uzZs+bYsWNm7ty5pnz58iYwMNBkZGTkWSc7O9ucPXvWvPbaayY0NNThj/5KlSoZDw8Ps3379jzr+fv7m27dujlV13vvvWckmf/9738O7U8++WSeP6JuuOEGU7duXXP27FmHZdu3b28iIyNNdnb2RfdVqVIl065dO4e2ESNGmFKlSuUJkP/973+NJDN//nx7myQTERFhjh8/bm+bPXu2kWTq1KnjcH7GjRuX55dTQkKCkWTWrVtnbzt48KDx8PAwvr6+DqEoLS3NSDJvv/22va1Vq1bmuuuus/9Rk6t3796mdOnS5tChQ/bzUadOnYuei/w4s56zNeT3R3DDhg1NWFiYOXbsmL3t3LlzpmbNmua6666zn7/cvtqzZ0+HfYwePdpIMvv3779ojdagtHfvXuPp6WmeffZZh+WOHTtmIiIiTKdOnYwxxvz9999Gkhk3btxFt5+fKlWqmCpVqphTp05dcBlXj99a7913320kmbFjxzq016lTx9x000326dxzHxUV5VDP0aNHTUhIiLnjjjsuWOO5c+dMVlaWiYuLM3379rW3v/DCC8Zms+X5T49WrVrlG5QkmdWrVzssW6NGDdOqVas8dZ7fRwq7j7/44ovGZrOZtLQ0h/YWLVo4HMeJEydMSEiI6dChg8Ny2dnZpnbt2qZ+/foX3Y8z9eX20/PPszHGfPbZZ0aSmTZtmjHG+f5rjHP9cMyYMXkCTH5ycnLM2bNnTWpqqpFkNm3a5NJ+rEHpSs/p6tWrjSSH/1TIdeONN5qEhIQ87blBqWnTphfdtjH/9v2zZ8+a5s2bm3vuucdh3oWC0uV8RjnzOePKe96rV688/zF0vh07dhhJ5r333rvgMkB+uPUO16SGDRvKy8tLgYGBat++vSIiIvTtt98qPDxckrRkyRLdcccdCg4OloeHh/2B0IMHDyozM9NhW/Hx8apateoV1bN06VIFBgbqzjvvdGjv3Lmzw/TOnTv1yy+/qEuXLpKkc+fO2V9t27bV/v37tX37dpf3P3fuXNWsWVN16tRx2GarVq3yHQWrWbNm8vf3t09Xr15dktSmTRuHkdZy288fmUn69wH2m2++2T4dEhKisLAw1alTR1FRURdc//Tp01q8eLHuuece+fn55Tn+06dPa9WqVZKk+vXra9OmTerZs6cWLlyoo0ePOnUuLrWeKzVYnThxQqtXr9Z9992ngIAAe7uHh4ceeeQR/f7773neP2ufiI+Pdzgnzlq4cKHOnTunrl27OtRcunRpJSQk2N/jkJAQValSRWPGjNHYsWO1ceNGh1vFLuTXX3/Vrl279Pjjj6t06dL5LnM5x9++fXuH6dw+YR38oHr16vmek44dOzrUExgYqA4dOuj7779Xdna2pH9/jpKTk1WjRg15e3vL09NT3t7e2rFjh7Zt22ZfNzU1VTVr1lSNGjUc9vHQQw/le7wRERGqX7++Q1t8fPxF37ui6ONLly7VjTfeqNq1azu0Wz9vVqxYoUOHDqlbt24OdeTk5Kh169Zau3btRQfAcaW+3M+0XJ06dZKnp6eWLl0qyfn+60w/vJTffvtNnTt3VkREhP3zPyEhQZLs/eFy93Ol5zT3lrawsDCXj+vee+/Nt33ixIm66aabVLp0aXl6esrLy0uLFy926PsXczmfUc58zjj7njsj93wV9Oi2KPk83V0A4A6ffPKJqlevLk9PT4WHhysyMtI+b82aNWrZsqUSExP14Ycf6rrrrpO3t7dmz56t4cOH5xms4fx1L9fBgwftIe18ERERDtN//fWXJGnAgAEaMGBAvtv6+++/Xd7/X3/9pZ07d8rLy8upbYaEhDhMe3t7X7T99OnTF10/d9lLrX/w4EGdO3dO48eP1/jx4y9a66BBg+Tv769p06Zp4sSJ8vDwUNOmTTVq1CjVq1cv33WdWc+VGqwOHz4sY0y+fSY3IFpHZgoNDXWY9vHxkeT6oCG5feeWW27Jd36pUv/+v5nNZtPixYv12muvafTo0erfv79CQkLUpUsXDR8+XIGBgfmun/s8ycUe9r6c43elr1n7mZT3Zyi3LSsrS8ePH1dwcLD69eund955Ry+++KISEhJUtmxZlSpVSk888YTDeT548KBiY2PzbC+/n10p73sn/fv+Xey9K4o+fqHjuNDnzX333XfBbR06dMjhP03O50p91n17enoqNDTU3h+c7b/O9MOLOX78uJo0aaLSpUtr2LBhqlq1qvz8/LRv3z517NjR/t5d7n6u9Jzm7v9yQmB+P3djx45V//791aNHDw0dOlTlypWTh4eHXn31VaeD0uV8RjnzOePse+6M3PNV3AdbQvFDUMI1qXr16hf8Q2LGjBny8vLS3LlzHX4ZXWhEvIL4rprQ0FCtWbMmT7t1MIdy5cpJ+vcPkI4dO+a7rfMf8HVWuXLl5Ovrq48//viC84uDsmXL2q8+9OrVK99lcv8A9PT0VL9+/dSvXz/9888/+u677/Sf//xHrVq10r59++Tn55fv+pdaz5Ua8qu/VKlS2r9/f555uf9TXFjnOne7//3vfy850mGlSpU0adIkSf/+z/mXX36ppKQkZWVlaeLEifmukztq2++//37B7brj+PMbECUjI0Pe3t72q1rTpk1T165dlZyc7LDc33//rTJlytinQ0ND7X+8XWofl6so+nhoaOgFz8v5ct+L8ePH2we8sbpQSHS1voyMDFWoUME+fe7cOR08eND+R7iz/deZfngxS5Ys0Z9//qlly5bZryJJyjOM+OXu50rPae76hw4dcmm/Uv6/q6ZNm6bExES99957Du0XGtijIF3qc8aVz6xLyT1fxeV3Ga4eBCXAIveLaD08POxtp06d0qeffurSdi71P8fna9asmb788kvNmTPH4TaG6dOnOyxXrVo1xcXFadOmTXn+qLsS7du3V3JyskJDQy/4R35x4Ofnp2bNmmnjxo2Kj4+3X124lDJlyui+++7TH3/8oT59+mj37t15bp9yZb3LqUGS/P391aBBA82aNUuvv/66fH19Jf07Oti0adN03XXXXfFtnBfSqlUreXp6ateuXRe8BSc/VatW1SuvvKKZM2dqw4YNF12uSpUq+vjjj9WvXz/7/yqfzx3HP2vWLI0ZM8b+nx7Hjh3TN998oyZNmth/xm02W556582bpz/++EPXX3+9vS0hIUGvv/66fv75Z4f+M2PGjAKrtyj6eLNmzTR69Ght2rTJ4fY76+fNrbfeqjJlyujnn39W7969L/+gnKjvs88+c7gd98svv9S5c+fso4U623+d6YfSha965IYJ63rWoaWd3Y/VlZ7T3FtPd+3alWeeK79zcuXX9zdv3qyVK1cqOjra5fouV36fM658Zp3/fuZ+rpzvt99+kySnPveB8xGUAIt27dpp7Nix6ty5s5566ikdPHhQr7/+utO/CHPVqlVLy5Yt0zfffKPIyEgFBgZe8GpP165d9eabb6pr164aPny44uLiNH/+fC1cuDDPsu+//77atGmjVq1aqXv37qpQoYIOHTqkbdu2acOGDfrqq69cPuY+ffpo5syZatq0qfr27av4+Hjl5ORo7969WrRokfr3768GDRq4vN3C8NZbb+m2225TkyZN9MwzzygmJkbHjh3Tzp079c0332jJkiWSpA4dOqhmzZqqV6+eypcvrz179mjcuHGqVKmS4uLiLrh9Z9Zztob8jBgxQi1atFCzZs00YMAAeXt7691339VPP/2kzz//vECuUOYnJiZGr732ml5++WX99ttvat26tcqWLau//vpLa9askb+/v4YMGaLNmzerd+/euv/++xUXFydvb28tWbJEmzdv1ksvvXTRfbzzzjvq0KGDGjZsqL59+6pixYrau3evFi5cqM8++8wtx+/h4aEWLVqoX79+ysnJ0ahRo3T06FGHL6ds3769pkyZohtuuEHx8fFav369xowZk+e2qj59+ujjjz9WmzZt9Nprryk8PFzTp0/XL7/8Ism1W4EuprD7eO5xtGvXTsOGDVN4eLg+++wz+3HkCggI0Pjx49WtWzcdOnRI9913n8LCwnTgwAFt2rRJBw4cyHMl4nyu1Ddr1ix5enqqRYsW2rp1q1599VXVrl1bnTp1kuR8/5Wc64e1atWyn+tu3brJy8tL1apVU+PGjVW2bFn16NFDgwcPlpeXlz777DNt2rQpz/E5sx+rKz2n1113nSpXrqxVq1bpueeec5hXq1YtzZgxQ1988YUqV66s0qVL24/zQtq3b6+hQ4dq8ODBSkhI0Pbt2/Xaa68pNjbW5e9ZcoUznzOuvOe5xzlq1Ci1adNGHh4eDv/RsGrVKvutn4BL3D2aBFCULjQ8uNXHH39sqlWrZnx8fEzlypXNiBEjzKRJk/KMkpTfCHK50tLSzK233mr8/PyMpHxHIzrf77//bu69914TEBBgAgMDzb333mtWrFiR77DfmzZtMp06dTJhYWHGy8vLREREmNtvv91MnDjxkufgQjUfP37cvPLKK6ZatWrG29vbPjxx3759HUYDlGR69erlsG7uyF1jxoxxaM8dbemrr76ytyUkJJgbb7zR6boutL/HHnvMVKhQwXh5eZny5cubxo0bm2HDhtmXeeONN0zjxo1NuXLljLe3t6lYsaJ5/PHHze7duy96fpxdz5ka8hvRzBhjfvjhB3P77bcbf39/4+vraxo2bGi++eYbh2Uu1Fdzz+mlhurOb3hwY/4dobBZs2YmKCjI+Pj4mEqVKpn77rvPPizxX3/9Zbp3725uuOEG4+/vbwICAkx8fLx58803zblz5y66T2OMWblypWnTpo0JDg42Pj4+pkqVKnlGNLuS4889rgMHDji0d+vWzfj7+9unc8/9qFGjzJAhQ8x1111nvL29Td26dc3ChQsd1j18+LB5/PHHTVhYmPHz8zO33Xab+eGHH0xCQkKen9uffvrJ3HHHHaZ06dImJCTEPP7442bq1Kl5RkS7UD/v1q2bqVSpUp46rX2kMPu4Mcb8/PPPpkWLFg7H8b///S/fvpWammratWtnQkJCjJeXl6lQoYJp166dw891fpypL/f9XL9+venQoYP98++hhx4yf/31V55tXqr/5nKmHw4aNMhERUWZUqVKORz3ihUrTKNGjYyfn58pX768eeKJJ8yGDRvyfZ8utZ/8hge/knNqjDGvvvqqKVu2rMPw18YYs3v3btOyZUsTGBhoH3LdmPw/h3OdOXPGDBgwwFSoUMGULl3a3HTTTWb27Nl5+qkxFx717nI+o1z5nHHmPT9z5ox54oknTPny5Y3NZstzzps0aZJnpEHAGTZj+PYtAEDJsnv3bsXGxmrMmDEXHPikoDz11FP6/PPPdfDgQZduxcS/X/46ZMgQHThwgOdHnPTnn38qNjZWn3zyiR544AF3l1Ps7dq1S3FxcVq4cKFatGjh7nJwleHWOwAAnPTaa68pKipKlStX1vHjxzV37lx99NFHeuWVVwhJKBJRUVHq06ePhg8frvvvv7/AbvksqYYNG6bmzZsTknBZCEoAADjJy8tLY8aM0e+//65z584pLi5OY8eO1fPPP+/u0nANeeWVV+Tn56c//vijSAdduNqcO3dOVapU0aBBg9xdCq5S3HoHAAAAABZcrwUAAAAAC4ISAAAAAFgQlAAAAADAosQP5pCTk6M///xTgYGBhfZFjgAAAACKP2OMjh07pqioqEuOGlnig9Kff/7JiDAAAAAA7Pbt26frrrvuosuU+KAUGBgo6d+TERQU5OZqAAAAALjL0aNHFR0dbc8IF1Pig1Lu7XZBQUEEJQAAAABOPZLDYA4AAAAAYEFQAgAAAAALghIAAAAAWJT4Z5QAAACuBtnZ2Tp79qy7ywCual5eXvLw8CiQbRGUAAAA3MgYo4yMDP3zzz/uLgUoEcqUKaOIiIgr/g5VghIAAIAb5YaksLAw+fn5XfEfd8C1yhijkydPKjMzU5IUGRl5RdsjKAEAALhJdna2PSSFhoa6uxzgqufr6ytJyszMVFhY2BXdhsdgDgAAAG6S+0ySn5+fmysBSo7cn6crfeaPoAQAAOBm3G4HFJyC+nkiKAEAAACABUEJAAAAcEH37t119913u7sMFDIGcwAAACiGYl6aV6T72z2yXZHu71q3bNkyNWvWTIcPH1aZMmXcXQ7ywRUlAAAAALAgKAEAAMBlCxYs0G233aYyZcooNDRU7du3165du+zzly1bJpvN5vBFumlpabLZbNq9e7e97ccff1RCQoL8/PxUtmxZtWrVSocPH5YkxcTEaNy4cQ77rVOnjpKSkuzTNptNH330ke655x75+fkpLi5Oc+bMuWjt06ZNU7169RQYGKiIiAh17tzZ/t07ubZu3ap27dopKChIgYGBatKkicPxSdLrr7+uyMhIhYaGqlevXg6jrF1sH7t371azZs0kSWXLlpXNZlP37t0vWjOKHkEJAAAALjtx4oT69euntWvXavHixSpVqpTuuece5eTkOL2NtLQ0NW/eXDfeeKNWrlyp5cuXq0OHDsrOznapliFDhqhTp07avHmz2rZtqy5duujQoUMXXD4rK0tDhw7Vpk2bNHv2bKWnpzsElT/++ENNmzZV6dKltWTJEq1fv16PPfaYzp07Z19m6dKl2rVrl5YuXaqpU6dqypQpmjJlilP7iI6O1syZMyVJ27dv1/79+/XWW2+5dMwofDyjBAAAAJfde++9DtOTJk1SWFiYfv75Z9WsWdOpbYwePVr16tXTu+++a2+78cYbXa6le/fueuihhyRJycnJGj9+vNasWaPWrVvnu/xjjz1m/3flypX19ttvq379+jp+/LgCAgL0zjvvKDg4WDNmzJCXl5ckqWrVqg7bKFu2rCZMmCAPDw/dcMMNateunRYvXqwnn3zSqX2EhIRIksLCwnhGqZjiihIAAABctmvXLnXu3FmVK1dWUFCQYmNjJUl79+51ehu5V5SuVHx8vP3f/v7+CgwMzHMr3fk2btyou+66S5UqVVJgYKASExMl/V/taWlpatKkiT0k5efGG2+Uh4eHfToyMtJhn5faB4o/ghIAAABc1qFDBx08eFAffvihVq9erdWrV0v695YzSSpV6t8/M40x9nXOf4ZHknx9fS+6j1KlSjmsn982JOUJNDab7YK3AJ44cUItW7ZUQECApk2bprVr1+rrr792qP1SdV1qn87sA8UfQQkAAAAuOXjwoLZt26ZXXnlFzZs3V/Xq1e0DMOQqX768JGn//v32trS0NIdl4uPjtXjx4gvup3z58g7rHz16VOnp6VdU+y+//KK///5bI0eOVJMmTXTDDTfkufoUHx+vH374Id9QVlD78Pb2liSXn8dC0eEZJVy5pGB3V1Cwko64uwIAAIq1smXLKjQ0VB988IEiIyO1d+9evfTSSw7LXH/99YqOjlZSUpKGDRumHTt26I033nBYZtCgQapVq5Z69uypHj16yNvbW0uXLtX999+vcuXK6fbbb9eUKVPUoUMHlS1bVq+++qrD7W6Xo2LFivL29tb48ePVo0cP/fTTTxo6dKjDMr1799b48eP14IMPatCgQQoODtaqVatUv359VatWrUD2UalSJdlsNs2dO1dt27aVr6+vAgICrujYULC4ogQAAACXlCpVSjNmzND69etVs2ZN9e3bV2PGjHFYxsvLS59//rl++eUX1a5dW6NGjdKwYcMclqlataoWLVqkTZs2qX79+mrUqJH+97//ydPz3//LHzRokJo2bar27durbdu2uvvuu1WlSpUrqr18+fKaMmWKvvrqK9WoUUMjR47U66+/7rBMaGiolixZouPHjyshIUE333yzPvzww4s+s+TqPipUqKAhQ4bopZdeUnh4uHr37n1Fx4WCZzPWGz9LmKNHjyo4OFhHjhxRUFCQu8spmbiiBADAZTl9+rTS09MVGxur0qVLu7scoES42M+VK9mAK0oAAAAAYEFQAgAAAAALtwalmJgY2Wy2PK9evXpJ+nc4yaSkJEVFRcnX11eJiYnaunWrO0sGAAAAcA1wa1Bau3at9u/fb3+lpKRIku6//35J/35b89ixYzVhwgStXbtWERERatGihY4dO+bOsgEAAACUcG4NSuXLl1dERIT9NXfuXFWpUkUJCQkyxmjcuHF6+eWX1bFjR9WsWVNTp07VyZMnNX36dHeWDQAAAKCEKzbPKGVlZWnatGl67LHHZLPZlJ6eroyMDLVs2dK+jI+PjxISErRixQo3VgoAAACgpCs2Xzg7e/Zs/fPPP+revbskKSMjQ5IUHh7usFx4eLj27Nlzwe2cOXNGZ86csU8fPXq04IsFAAAAUKIVmytKkyZNUps2bRQVFeXQbrPZHKaNMXnazjdixAgFBwfbX9HR0YVSLwAAAICSq1gEpT179ui7777TE088YW+LiIiQ9H9XlnJlZmbmucp0vkGDBunIkSP21759+wqnaAAAAOAakJWVpeTkZG3bts3dpRSpYhGUJk+erLCwMLVr187eFhsbq4iICPtIeNK/b1JqaqoaN258wW35+PgoKCjI4QUAAIDiJSkpSXXq1HHb/pctWyabzaZ//vnHbTVcju7du+vuu+8u0n0OGDBAW7Zs0Q033HDJZQuivuLy3rj9GaWcnBxNnjxZ3bp1k6fn/5Vjs9nUp08fJScnKy4uTnFxcUpOTpafn586d+7sxooBAACKQFJwEe/vSNHurwDs3r1bsbGx2rhxo1tDV0FISkrS7NmzlZaW5u5SHMycOVM//fSTFixYcNHHX3K99dZbMsYUQWWFz+1B6bvvvtPevXv12GOP5Zk3cOBAnTp1Sj179tThw4fVoEEDLVq0SIGBgW6oFAAAAPg/WVlZ8vb2dncZheree+/Vvffee8nlsrOzZbPZFBxcxAG/ELn91ruWLVvKGKOqVavmmWez2ZSUlKT9+/fr9OnTSk1NVc2aNd1QJQAAAM63YMEC3XbbbSpTpoxCQ0PVvn177dq1y2GZ33//XQ8++KBCQkLk7++vevXqafXq1Q7LfPrpp4qJiVFwcLAefPBBHTt2zOl9xMbGSpLq1q0rm82mxMTEC9Y7f/58Va1aVb6+vmrWrJl2797tMD+/WwHHjRunmJgY+3TubWUjRoxQVFSU/e/XadOmqV69egoMDFRERIQ6d+6szMxM+3q5t5ItXrxY9erVk5+fnxo3bqzt27dLkqZMmaIhQ4Zo06ZNstlsstlsmjJlygWP5XzOvA9WiYmJ6t27t3r37m1f75VXXnG4EpSVlaWBAweqQoUK8vf3V4MGDbRs2TL7/ClTpqhMmTKaO3euatSoIR8fH+3ZsyfPrXdnzpzRc889p7CwMJUuXVq33Xab1q5d61DPpd4bSVqxYoWaNm0qX19fRUdH67nnntOJEyecOkeXy+1BCQAAAFefEydOqF+/flq7dq0WL16sUqVK6Z577lFOTo4k6fjx40pISNCff/6pOXPmaNOmTRo4cKB9viTt2rVLs2fP1ty5czV37lylpqZq5MiRTu9jzZo1kv69Q2n//v2aNWtWvrXu27dPHTt2VNu2bZWWlqYnnnhCL7300mUd9+LFi7Vt2zalpKRo7ty5kv4NFUOHDtWmTZs0e/Zspaen27/y5nwvv/yy3njjDa1bt06enp72O6oeeOAB9e/fXzfeeKP279+v/fv364EHHnCqnkudowuZOnWqPD09tXr1ar399tt688039dFHH9nnP/roo/rxxx81Y8YMbd68Wffff79at26tHTt22Jc5efKkRowYoY8++khbt25VWFhYnv0MHDhQM2fO1NSpU7VhwwZdf/31atWqlQ4dOiTJufdmy5YtatWqlTp27KjNmzfriy++0PLly9W7d2+nztHlcvutdwAAALj6WG/HmjRpksLCwvTzzz+rZs2amj59ug4cOKC1a9cqJCREknT99dc7rJOTk6MpU6bYH6t45JFHtHjxYg0fPtypfZQvX16SFBoaah8xOT/vvfeeKleurDfffFM2m03VqlXTli1bNGrUKJeP29/fXx999JHDLXfnP0JSuXJlvf3226pfv76OHz+ugIAA+7zhw4crISFBkvTSSy+pXbt2On36tHx9fRUQECBPT8+LHkd+LnWOLiQ6OjrP+XjzzTf15JNPateuXfr888/1+++/27+6Z8CAAVqwYIEmT56s5ORkSdLZs2f17rvvqnbt2vnu48SJE3rvvfc0ZcoUtWnTRpL04YcfKiUlRZMmTdILL7zg1HszZswYde7cWX369JEkxcXF6e2331ZCQoLee+89lS5d2qVz5iyuKAEAAMBlu3btUufOnVW5cmUFBQXZb4Pbu3evJCktLU1169a1h6T8xMTEODx7HhkZ6XDL2qX24axt27apYcOGDoMRNGrUyKVt5KpVq1ae55I2btyou+66S5UqVVJgYKD9FkBrnfHx8fZ/R0ZGSpLD8V6Oyz1H+Z2PHTt2KDs7Wxs2bLA/GhMQEGB/paamOtzW5+3t7XBM+dV29uxZ3XrrrfY2Ly8v1a9f3z7UuDPvzfr16zVlyhSHWlq1aqWcnBylp6c7cZYuD1eUAAAA4LIOHTooOjpaH374oaKiopSTk6OaNWsqKytLkuTr63vJbXh5eTlM22w2h1vGLrUPZzkzClupUqXyLHf27Nk8y/n7+ztMnzhxQi1btlTLli01bdo0lS9fXnv37lWrVq3y1Hn+8eYGg0vdIncpBXWOzpeTkyMPDw+tX79eHh4eDvPOv0Lm6+t70ZHwcs+ndRljjL3NmfcmJydHTz/9tJ577rk88ypWrHjJ9S8XQQkAAAAuOXjwoLZt26b3339fTZo0kSQtX77cYZn4+Hh99NFHOnTo0EWvKl3JPnKv7GRnZ190WzVq1NDs2bMd2latWuUwXb58eWVkZDj8Ee/MUN2//PKL/v77b40cOVLR0dGSpHXr1l1yPStvb+9LHoeVM+foQqzHv2rVKsXFxcnDw0N169ZVdna2MjMz7du9HNdff728vb21fPly+9f7nD17VuvWrbPfRufMe3PTTTdp69ateW7dLGzcegcAAACXlC1bVqGhofrggw+0c+dOLVmyRP369XNY5qGHHlJERITuvvtu/fjjj/rtt980c+ZMrVy5ssD2ERYWJl9fXy1YsEB//fWXjhzJ/7ugevTooV27dqlfv37avn27pk+fnmdUucTERB04cECjR4/Wrl279M477+jbb7+9ZJ0VK1aUt7e3xo8fr99++01z5szR0KFDnTrG88XExCg9PV1paWn6+++/debMmUuu48w5upB9+/bZz8fnn3+u8ePH6/nnn5ckVa1aVV26dFHXrl01a9Yspaena+3atRo1apTmz5/v9DH5+/vrmWee0QsvvKAFCxbo559/1pNPPqmTJ0/q8ccfl+Tce/Piiy9q5cqV6tWrl9LS0rRjxw7NmTNHzz77rNO1XA6CEgAAAFxSqlQpzZgxQ+vXr1fNmjXVt29fjRkzxmEZb29vLVq0SGFhYWrbtq1q1aqlkSNH5rmV60r24enpqbffflvvv/++oqKidNddd+W7rYoVK2rmzJn65ptvVLt2bU2cONE+IEGu6tWr691339U777yj2rVra82aNRowYMAl6yxfvrymTJmir776SjVq1NDIkSP1+uuvO3WM57v33nvVunVrNWvWTOXLl9fnn39+yXWcOUcX0rVrV506dUr169dXr1699Oyzz+qpp56yz588ebK6du2q/v37q1q1arrzzju1evVq+1UzZ40cOVL33nuvHnnkEd10003auXOnFi5cqLJly0py7r2Jj49XamqqduzYoSZNmqhu3bp69dVX7c95FRabKSlfnXsBR48eVXBwsI4cOaKgoCB3l1MyFfU3hxe2q/CbyQEAV6fTp08rPT1dsbGxhTZyF2CVmJioOnXqaNy4ce4upVBc7OfKlWzAFSUAAAAAsCAoAQAAAIAFo94BAAAA15Bly5a5u4SrAkGpiMW8NM/dJRS43dxSDQAAgBKGW+8AAAAAwIKgBAAA4GYlfBBioEgV1M8TQQkAAMBNvLy8JEknT550cyVAyZH785T783W5eEYJAADATTw8PFSmTBllZmZKkvz8/GSz2dxcFXB1Msbo5MmTyszMVJkyZZz+cuMLISgBAAC4UUREhCTZwxKAK1OmTBn7z9WVICgBAAC4kc1mU2RkpMLCwnT27Fl3lwNc1by8vK74SlIughIAAEAx4OHhUWB/4AG4cgzmAAAAAAAWBCUAAAAAsODWOwDFU1KwuysoWElH3F0BAABwAVeUAAAAAMCCoAQAAAAAFgQlAAAAALAgKAEAAACABUEJAAAAACwISgAAAABgQVACAAAAAAuCEgAAAABYEJQAAAAAwMLT3QUAuHIxL81zdwkFbndpd1cAAACuZVxRAgAAAAALghIAAAAAWBCUAAAAAMCCoAQAAAAAFgQlAAAAALAgKAEAAACABUEJAAAAACwISgAAAABgQVACAAAAAAuCEgAAAABYEJQAAAAAwIKgBAAAAAAWBCUAAAAAsCAoAQAAAIAFQQkAAAAALAhKAAAAAGBBUAIAAAAAC4ISAAAAAFgQlAAAAADAwu1B6Y8//tDDDz+s0NBQ+fn5qU6dOlq/fr19vjFGSUlJioqKkq+vrxITE7V161Y3VgwAAACgpHNrUDp8+LBuvfVWeXl56dtvv9XPP/+sN954Q2XKlLEvM3r0aI0dO1YTJkzQ2rVrFRERoRYtWujYsWPuKxwAAABAiebpzp2PGjVK0dHRmjx5sr0tJibG/m9jjMaNG6eXX35ZHTt2lCRNnTpV4eHhmj59up5++umiLhkAAADANcCtV5TmzJmjevXq6f7771dYWJjq1q2rDz/80D4/PT1dGRkZatmypb3Nx8dHCQkJWrFiRb7bPHPmjI4ePerwAgAAAABXuDUo/fbbb3rvvfcUFxenhQsXqkePHnruuef0ySefSJIyMjIkSeHh4Q7rhYeH2+dZjRgxQsHBwfZXdHR04R4EAAAAgBLHrUEpJydHN910k5KTk1W3bl09/fTTevLJJ/Xee+85LGez2RymjTF52nINGjRIR44csb/27dtXaPUDAAAAKJncGpQiIyNVo0YNh7bq1atr7969kqSIiAhJynP1KDMzM89Vplw+Pj4KCgpyeAEAAACAK9walG699VZt377doe3XX39VpUqVJEmxsbGKiIhQSkqKfX5WVpZSU1PVuHHjIq0VAAAAwLXDraPe9e3bV40bN1ZycrI6deqkNWvW6IMPPtAHH3wg6d9b7vr06aPk5GTFxcUpLi5OycnJ8vPzU+fOnd1ZOgAAAIASzK1B6ZZbbtHXX3+tQYMG6bXXXlNsbKzGjRunLl262JcZOHCgTp06pZ49e+rw4cNq0KCBFi1apMDAQDdWDgAAAKAkc2tQkqT27durffv2F5xvs9mUlJSkpKSkoisKAAAAwDXNrc8oAQAAAEBxRFACAAAAAAuCEgAAAABYEJQAAAAAwIKgBAAAAAAWBCUAAAAAsCAoAQAAAIAFQQkAAAAALNz+hbMAAODaEfPSPHeXUKB2l+7s7hIKXtIRd1cAFAtcUQIAAAAAC4ISAAAAAFgQlAAAAADAgqAEAAAAABYM5gAAyFdJe+heknaPbOfuEgAAVwmuKAEAAACABUEJAAAAACwISgAAAABgQVACAAAAAAuCEgAAAABYEJQAAAAAwIKgBAAAAAAWfI8SAODakRTs7goKVtIRd1cAACUWV5QAAAAAwIKgBAAAAAAWBCUAAAAAsCAoAQAAAIAFQQkAAAAALAhKAAAAAGBBUAIAAAAAC4ISAAAAAFgQlAAAAADAgqAEAAAAABYEJQAAAACwICgBAAAAgAVBCQAAAAAsCEoAAAAAYEFQAgAAAAALghIAAAAAWBCUAAAAAMCCoAQAAAAAFgQlAAAAALAgKAEAAACABUEJAAAAACwISgAAAABgQVACAAAAAAuCEgAAAABYEJQAAAAAwIKgBAAAAAAWBCUAAAAAsCAoAQAAAICFW4NSUlKSbDabwysiIsI+3xijpKQkRUVFydfXV4mJidq6dasbKwYAAABwLXD7FaUbb7xR+/fvt7+2bNlinzd69GiNHTtWEyZM0Nq1axUREaEWLVro2LFjbqwYAAAAQEnn9qDk6empiIgI+6t8+fKS/r2aNG7cOL388svq2LGjatasqalTp+rkyZOaPn26m6sGAAAAUJK5PSjt2LFDUVFRio2N1YMPPqjffvtNkpSenq6MjAy1bNnSvqyPj48SEhK0YsUKd5ULAAAA4Brg6c6dN2jQQJ988omqVq2qv/76S8OGDVPjxo21detWZWRkSJLCw8Md1gkPD9eePXsuuM0zZ87ozJkz9umjR48WTvEAAAAASiy3BqU2bdrY/12rVi01atRIVapU0dSpU9WwYUNJks1mc1jHGJOn7XwjRozQkCFDCqdgAAAAANcEtwYlK39/f9WqVUs7duzQ3XffLUnKyMhQZGSkfZnMzMw8V5nON2jQIPXr188+ffToUUVHRxdazQAAAHCvmJfmubuEArV7ZDt3lwAVg2eUznfmzBlt27ZNkZGRio2NVUREhFJSUuzzs7KylJqaqsaNG19wGz4+PgoKCnJ4AQAAAIAr3HpFacCAAerQoYMqVqyozMxMDRs2TEePHlW3bt1ks9nUp08fJScnKy4uTnFxcUpOTpafn586d+7szrIBAAAAlHBuDUq///67HnroIf39998qX768GjZsqFWrVqlSpUqSpIEDB+rUqVPq2bOnDh8+rAYNGmjRokUKDAx0Z9kAAAAASji3BqUZM2ZcdL7NZlNSUpKSkpKKpiAAAADA3ZKC3V1BwUs64u4KXFasnlECAAAAgOKAoAQAAAAAFgQlAAAAALC4rGeUzp49q4yMDJ08eVLly5dXSEhIQdcFAAAAAG7j9BWl48eP6/3331diYqKCg4MVExOjGjVqqHz58qpUqZKefPJJrV27tjBrBQAAAIAi4VRQevPNNxUTE6MPP/xQt99+u2bNmqW0tDRt375dK1eu1ODBg3Xu3Dm1aNFCrVu31o4dOwq7bgAAAAAoNE7derdixQotXbpUtWrVynd+/fr19dhjj2nixImaNGmSUlNTFRcXV6CFAgAAAEBRcSooffXVV05tzMfHRz179ryiggAAAADA3a7oC2fPnj2rX3/9VdnZ2apWrZp8fHwKqi4AAAAAcJvLHh78hx9+UExMjJo1a6bExERFR0drwYIFBVkbAAAAALiF00HJGOMw3adPH3322WfKzMzUoUOHNGzYMD3zzDMFXiAAAAAAFDWng1L9+vW1YcMG+3RWVpYqVqxon65YsaJOnz5dsNUBAAAAgBs4/YzShAkT9MQTTyghIUHDhg3T4MGDdfPNN6tatWo6e/asfvnlF40fP74wawUAAACAIuF0UGrQoIHWrFmj0aNH6+abb9bo0aO1fft2rV69WtnZ2apfv76ioqIKs1YAAAAAKBIujXrn6emp//znP+rUqZOeeeYZTZ06VePHjycgAQAAAChRXBr17ueff9bMmTOVk5OjlJQUdejQQU2aNNG7775bWPUBAAAAQJFzOiiNGzdO9erV05gxY9SoUSN9+OGH6t69u1avXq2VK1eqUaNG2rJlS2HWCgAAAABFwumgNGrUKM2bN0+rVq3Shg0bNHbsWElSuXLl9Omnn+q1115Tp06dCq1QAAAAACgqLn2PUqlS/y7u4eGR53uVWrRooY0bNxZsdQAAAADgBk4P5jBgwAC1bdtWtWvX1q+//qrk5OQ8y5QuXbpAiwMAAAAAd3ApKLVu3Vrbtm1TrVq1dMMNNxRmXQAAAADgNi4ND16zZk3VrFmzsGoBAAAAgGLBqWeURo4cqRMnTji1wdWrV2vevHlXVBQAAAAAuJNTQennn39WpUqV9Mwzz+jbb7/VgQMH7PPOnTunzZs3691331Xjxo314IMPKigoqNAKBgAAAIDC5tStd5988ok2b96sd955R126dNGRI0fk4eEhHx8fnTx5UpJUt25dPfXUU+rWrZt8fHwKtWgAAAAAKExOP6MUHx+v999/XxMnTtTmzZu1e/dunTp1SuXKlVOdOnVUrly5wqwTAAAAAIqMS4M5SJLNZlPt2rVVu3btwqgHAAAAANzO6S+cBQAAAIBrBUEJAAAAACwISgAAAABgQVACAAAAAIsrDkpHjx7V7NmztW3btoKoBwAAAADczuWg1KlTJ02YMEGSdOrUKdWrV0+dOnVSfHy8Zs6cWeAFAgAAAEBRczkoff/992rSpIkk6euvv5YxRv/884/efvttDRs2rMALBAAAAICi5nJQOnLkiEJCQiRJCxYs0L333is/Pz+1a9dOO3bsKPACAQAAAKCouRyUoqOjtXLlSp04cUILFixQy5YtJUmHDx9W6dKlC7xAAAAAAChqnq6u0KdPH3Xp0kUBAQGqWLGiEhMTJf17S16tWrUKuj4AAAAAKHIuB6WePXuqfv362rdvn1q0aKFSpf69KFW5cmWeUQIAAABQIrgclCSpXr16io+PV3p6uqpUqSJPT0+1a9euoGsDAAAAALdw+RmlkydP6vHHH5efn59uvPFG7d27V5L03HPPaeTIkQVeIAAAAAAUNZeD0qBBg7Rp0yYtW7bMYfCGO+64Q1988UWBFgcAAAAA7uDyrXezZ8/WF198oYYNG8pms9nba9SooV27dhVocQAAAADgDi5fUTpw4IDCwsLytJ84ccIhOAEAAADA1crloHTLLbdo3rx59unccPThhx+qUaNGBVcZAAAAALiJy7fejRgxQq1bt9bPP/+sc+fO6a233tLWrVu1cuVKpaamFkaNAAAAAFCkXL6i1LhxY/344486efKkqlSpokWLFik8PFwrV67UzTffXBg1AgAAAECRuqzvUapVq5amTp1a0LUAAAAAQLFwWUFJkjIzM5WZmamcnByH9vj4+CsuCgAAAADcyeWgtH79enXr1k3btm2TMcZhns1mU3Z2doEVBwAAAADu4HJQevTRR1W1alVNmjRJ4eHhDAkOAAAAoMRxOSilp6dr1qxZuv766wujHgAAAABwO5dHvWvevLk2bdpU4IWMGDFCNptNffr0sbcZY5SUlKSoqCj5+voqMTFRW7duLfB9AwAAAMD5XL6i9NFHH6lbt2766aefVLNmTXl5eTnMv/POO10uYu3atfrggw/yDAQxevRojR07VlOmTFHVqlU1bNgwtWjRQtu3b1dgYKDL+wEAAAAAZ7gclFasWKHly5fr22+/zTPvcgZzOH78uLp06aIPP/xQw4YNs7cbYzRu3Di9/PLL6tixoyRp6tSpCg8P1/Tp0/X000+7WjoAAAAAOMXlW++ee+45PfLII9q/f79ycnIcXpcz4l2vXr3Url073XHHHQ7t6enpysjIUMuWLe1tPj4+SkhI0IoVKy64vTNnzujo0aMOLwAAAABwhctXlA4ePKi+ffsqPDz8inc+Y8YMbdiwQWvXrs0zLyMjQ5Ly7Cc8PFx79uy54DZHjBihIUOGXHFtAAAAAK5dLl9R6tixo5YuXXrFO963b5+ef/55TZs2TaVLl77gctbhx40xFx2SfNCgQTpy5Ij9tW/fviuuFQAAAMC1xeUrSlWrVtWgQYO0fPly1apVK89gDs8995xT21m/fr0yMzN1880329uys7P1/fffa8KECdq+fbukf68sRUZG2pfJzMy86NUsHx8f+fj4uHJIAAAAAODgska9CwgIUGpqqlJTUx3m2Ww2p4NS8+bNtWXLFoe2Rx99VDfccINefPFFVa5cWREREUpJSVHdunUlSVlZWUpNTdWoUaNcLRsAAAAAnHZZXzhbEAIDA1WzZk2HNn9/f4WGhtrb+/Tpo+TkZMXFxSkuLk7Jycny8/NT586dC6QGAAAAAMiPy0GpKA0cOFCnTp1Sz549dfjwYTVo0ECLFi3iO5QAAAAAFCqXg5IxRv/973+1dOlSZWZmKicnx2H+rFmzLruYZcuWOUzbbDYlJSUpKSnpsrcJAAAAAK5yOSg9//zz+uCDD9SsWTOFh4dfdAQ6AAAAALgauRyUpk2bplmzZqlt27aFUQ8AAAAAuJ3L36MUHBysypUrF0YtAAAAAFAsuByUkpKSNGTIEJ06daow6gEAAAAAt3P51rv7779fn3/+ucLCwhQTE5PnC2c3bNhQYMUBAAAAgDu4HJS6d++u9evX6+GHH2YwBwAAAAAlkstBad68eVq4cKFuu+22wqgHAAAAANzO5WeUoqOjFRQUVBi1AAAAAECx4HJQeuONNzRw4EDt3r27EMoBAAAAAPdz+da7hx9+WCdPnlSVKlXk5+eXZzCHQ4cOFVhxAAAAAOAOLgelcePGFUIZAAAAAFB8uByUunXrVhh1AAAAAECx4fIzSpK0a9cuvfLKK3rooYeUmZkpSVqwYIG2bt1aoMUBAAAAgDu4HJRSU1NVq1YtrV69WrNmzdLx48clSZs3b9bgwYMLvEAAAAAAKGouB6WXXnpJw4YNU0pKiry9ve3tzZo108qVKwu0OAAAAABwB5eD0pYtW3TPPffkaS9fvrwOHjxYIEUBAAAAgDu5HJTKlCmj/fv352nfuHGjKlSoUCBFAQAAAIA7uRyUOnfurBdffFEZGRmy2WzKycnRjz/+qAEDBqhr166FUSMAAAAAFCmXg9Lw4cNVsWJFVahQQcePH1eNGjXUtGlTNW7cWK+88kph1AgAAAAARcqp71E6evSogoKCJEleXl767LPPNHToUG3YsEE5OTmqW7eu4uLiCrVQAAAAACgqTgWlsmXLav/+/QoLC9Ptt9+uWbNmqXLlyqpcuXJh1wcAAAAARc6pW+8CAgLsI9otW7ZMZ8+eLdSiAAAAAMCdnLqidMcdd6hZs2aqXr26JOmee+5x+A6l8y1ZsqTgqgMAAAAAN3AqKE2bNk1Tp07Vrl27lJqaqhtvvFF+fn6FXRsAAAAAuIVTQcnX11c9evSQJK1bt06jRo1SmTJlCrMuAAAAAHAbp4LS+ZYuXVoYdQAAAABAseHy9ygBAAAAQElHUAIAAAAAC4ISAAAAAFgQlAAAAADA4rKC0g8//KCHH35YjRo10h9//CFJ+vTTT7V8+fICLQ4AAAAA3MHloDRz5ky1atVKvr6+2rhxo86cOSNJOnbsmJKTkwu8QAAAAAAoai4HpWHDhmnixIn68MMP5eXlZW9v3LixNmzYUKDFAQAAAIA7uByUtm/frqZNm+ZpDwoK0j///FMQNQEAAACAW7kclCIjI7Vz58487cuXL1flypULpCgAAAAAcCeXg9LTTz+t559/XqtXr5bNZtOff/6pzz77TAMGDFDPnj0Lo0YAAAAAKFKerq4wcOBAHTlyRM2aNdPp06fVtGlT+fj4aMCAAerdu3dh1AgAAAAARcrloCRJw4cP18svv6yff/5ZOTk5qlGjhgICAgq6NgAAAABwi8sKSpLk5+enevXqFWQtAAAAAFAsOBWUOnbs6PQGZ82addnFAAAAAEBx4NRgDsHBwfZXUFCQFi9erHXr1tnnr1+/XosXL1ZwcHChFQoAAAAARcWpK0qTJ0+2//vFF19Up06dNHHiRHl4eEiSsrOz1bNnTwUFBRVOlQAAAABQhFweHvzjjz/WgAED7CFJkjw8PNSvXz99/PHHBVocAAAAALiDy0Hp3Llz2rZtW572bdu2KScnp0CKAgAAAAB3cnnUu0cffVSPPfaYdu7cqYYNG0qSVq1apZEjR+rRRx8t8AIBAAAAoKi5HJRef/11RURE6M0339T+/fslSZGRkRo4cKD69+9f4AUCAAAAQFFzOSiVKlVKAwcO1MCBA3X06FFJYhAHAAAAACXKZX/hrERAAgAAAFAyuTyYAwAAAACUdAQlAAAAALBwa1B67733FB8fr6CgIAUFBalRo0b69ttv7fONMUpKSlJUVJR8fX2VmJiorVu3urFiAAAAANcCl4PSJ598ojNnzuRpz8rK0ieffOLStq677jqNHDlS69at07p163T77bfrrrvusoeh0aNHa+zYsZowYYLWrl2riIgItWjRQseOHXO1bAAAAABwmstB6dFHH9WRI0fytB87dszl71Hq0KGD2rZtq6pVq6pq1aoaPny4AgICtGrVKhljNG7cOL388svq2LGjatasqalTp+rkyZOaPn26q2UDAAAAgNNcDkrGGNlstjztv//+u4KDgy+7kOzsbM2YMUMnTpxQo0aNlJ6eroyMDLVs2dK+jI+PjxISErRixYrL3g8AAAAAXIrTw4PXrVtXNptNNptNzZs3l6fn/62anZ2t9PR0tW7d2uUCtmzZokaNGun06dMKCAjQ119/rRo1atjDUHh4uMPy4eHh2rNnzwW3d+bMGYdbA3O/6wkAAAAAnOV0ULr77rslSWlpaWrVqpUCAgLs87y9vRUTE6N7773X5QKqVaumtLQ0/fPPP5o5c6a6deum1NRU+3zr1asLXdHKNWLECA0ZMsTlOgAAAAAgl9NBafDgwcrOzlalSpXUqlUrRUZGFkgB3t7euv766yVJ9erV09q1a/XWW2/pxRdflCRlZGQ47CszMzPPVabzDRo0SP369bNPHz16VNHR0QVSKwAAAIBrg0vPKHl4eKhHjx46ffp0YdUjY4zOnDmj2NhYRUREKCUlxT4vKytLqampaty48QXX9/HxsQ83nvsCAAAAAFc4fUUpV61atfTbb78pNjb2inf+n//8R23atFF0dLSOHTumGTNmaNmyZVqwYIFsNpv69Omj5ORkxcXFKS4uTsnJyfLz81Pnzp2veN8AAAAAcCEuB6Xhw4drwIABGjp0qG6++Wb5+/s7zHflCs5ff/2lRx55RPv371dwcLDi4+O1YMECtWjRQpI0cOBAnTp1Sj179tThw4fVoEEDLVq0SIGBga6WDQAAAABOczko5Y5sd+eddzoMqpA7yEJ2drbT25o0adJF59tsNiUlJSkpKcnVMgEAAADgsrkclJYuXVoYdQAAAABAseFyUEpISCiMOgAAAACg2HA5KOU6efKk9u7dq6ysLIf2+Pj4Ky4KAAAAANzJ5aB04MABPfroo/r222/zne/KM0oAAAAAUBy59D1KktSnTx8dPnxYq1atkq+vrxYsWKCpU6cqLi5Oc+bMKYwaAQAAAKBIuXxFacmSJfrf//6nW265RaVKlVKlSpXUokULBQUFacSIEWrXrl1h1AkAAAAARcblK0onTpxQWFiYJCkkJEQHDhyQ9O8X0W7YsKFgqwMAAAAAN3A5KFWrVk3bt2+XJNWpU0fvv/++/vjjD02cOFGRkZEFXiAAAAAAFDWXb73r06eP/vzzT0nS4MGD1apVK3322Wfy9vbWlClTCro+AAAAAChyLgelLl262P9dt25d7d69W7/88osqVqyocuXKFWhxAAAAAOAOTt96d/LkSfXq1UsVKlRQWFiYOnfurL///lt+fn666aabCEkAAAAASgyng9LgwYM1ZcoUtWvXTg8++KBSUlL0zDPPFGZtAAAAAOAWTt96N2vWLE2aNEkPPvigJOnhhx/WrbfequzsbHl4eBRagQAAAABQ1Jy+orRv3z41adLEPl2/fn15enraB3YAAAAAgJLC6aCUnZ0tb29vhzZPT0+dO3euwIsCAAAAAHdy+tY7Y4y6d+8uHx8fe9vp06fVo0cP+fv729tmzZpVsBUCAAAAQBFzOih169YtT9vDDz9coMUAAAAAQHHgdFCaPHlyYdYBAAAAAMWG088oAQAAAMC1gqAEAAAAABYEJQAAAACwICgBAAAAgAVBCQAAAAAsCEoAAAAAYEFQAgAAAAALghIAAAAAWBCUAAAAAMCCoAQAAAAAFgQlAAAAALAgKAEAAACABUEJAAAAACwISgAAAABgQVACAAAAAAuCEgAAAABYEJQAAAAAwIKgBAAAAAAWBCUAAAAAsCAoAQAAAIAFQQkAAAAALAhKAAAAAGBBUAIAAAAAC4ISAAAAAFgQlAAAAADAgqAEAAAAABYEJQAAAACwICgBAAAAgAVBCQAAAAAsCEoAAAAAYEFQAgAAAAALghIAAAAAWBCUAAAAAMDCrUFpxIgRuuWWWxQYGKiwsDDdfffd2r59u8MyxhglJSUpKipKvr6+SkxM1NatW91UMQAAAIBrgVuDUmpqqnr16qVVq1YpJSVF586dU8uWLXXixAn7MqNHj9bYsWM1YcIErV27VhEREWrRooWOHTvmxsoBAAAAlGSe7tz5ggULHKYnT56ssLAwrV+/Xk2bNpUxRuPGjdPLL7+sjh07SpKmTp2q8PBwTZ8+XU8//bQ7ygYAAABQwhWrZ5SOHDkiSQoJCZEkpaenKyMjQy1btrQv4+Pjo4SEBK1YsSLfbZw5c0ZHjx51eAEAAACAK4pNUDLGqF+/frrttttUs2ZNSVJGRoYkKTw83GHZ8PBw+zyrESNGKDg42P6Kjo4u3MIBAAAAlDjFJij17t1bmzdv1ueff55nns1mc5g2xuRpyzVo0CAdOXLE/tq3b1+h1AsAAACg5HLrM0q5nn32Wc2ZM0fff/+9rrvuOnt7RESEpH+vLEVGRtrbMzMz81xlyuXj4yMfH5/CLRgAAABAiebWK0rGGPXu3VuzZs3SkiVLFBsb6zA/NjZWERERSklJsbdlZWUpNTVVjRs3LupyAQAAAFwj3HpFqVevXpo+fbr+97//KTAw0P7cUXBwsHx9fWWz2dSnTx8lJycrLi5OcXFxSk5Olp+fnzp37uzO0gEAAACUYG4NSu+9954kKTEx0aF98uTJ6t69uyRp4MCBOnXqlHr27KnDhw+rQYMGWrRokQIDA4u4WgAAAADXCrcGJWPMJZex2WxKSkpSUlJS4RcEAAAAACpGo94BAAAAQHFBUAIAAAAAC4ISAAAAAFgQlAAAAADAgqAEAAAAABYEJQAAAACwICgBAAAAgAVBCQAAAAAsCEoAAAAAYEFQAgAAAAALghIAAAAAWBCUAAAAAMCCoAQAAAAAFgQlAAAAALAgKAEAAACABUEJAAAAACwISgAAAABgQVACAAAAAAuCEgAAAABYEJQAAAAAwIKgBAAAAAAWBCUAAAAAsCAoAQAAAIAFQQkAAAAALAhKAAAAAGBBUAIAAAAAC4ISAAAAAFgQlAAAAADAgqAEAAAAABYEJQAAAACwICgBAAAAgAVBCQAAAAAsCEoAAAAAYEFQAgAAAAALghIAAAAAWBCUAAAAAMCCoAQAAAAAFgQlAAAAALAgKAEAAACABUEJAAAAACwISgAAAABgQVACAAAAAAuCEgAAAABYEJQAAAAAwIKgBAAAAAAWBCUAAAAAsCAoAQAAAIAFQQkAAAAALAhKAAAAAGBBUAIAAAAAC7cGpe+//14dOnRQVFSUbDabZs+e7TDfGKOkpCRFRUXJ19dXiYmJ2rp1q3uKBQAAAHDNcGtQOnHihGrXrq0JEybkO3/06NEaO3asJkyYoLVr1yoiIkItWrTQsWPHirhSAAAAANcST3fuvE2bNmrTpk2+84wxGjdunF5++WV17NhRkjR16lSFh4dr+vTpevrpp4uyVAAAAADXkGL7jFJ6eroyMjLUsmVLe5uPj48SEhK0YsUKN1YGAAAAoKRz6xWli8nIyJAkhYeHO7SHh4drz549F1zvzJkzOnPmjH366NGjhVMgAAAAgBKr2F5RymWz2RymjTF52s43YsQIBQcH21/R0dGFXSIAAACAEqbYBqWIiAhJ/3dlKVdmZmaeq0znGzRokI4cOWJ/7du3r1DrBAAAAFDyFNugFBsbq4iICKWkpNjbsrKylJqaqsaNG19wPR8fHwUFBTm8AAAAAMAVbn1G6fjx49q5c6d9Oj09XWlpaQoJCVHFihXVp08fJScnKy4uTnFxcUpOTpafn586d+7sxqoBAAAAlHRuDUrr1q1Ts2bN7NP9+vWTJHXr1k1TpkzRwIEDderUKfXs2VOHDx9WgwYNtGjRIgUGBrqrZAAAAADXALcGpcTERBljLjjfZrMpKSlJSUlJRVcUAAAAgGtesX1GCQAAAADchaAEAAAAABYEJQAAAACwICgBAAAAgAVBCQAAAAAsCEoAAAAAYEFQAgAAAAALghIAAAAAWBCUAAAAAMCCoAQAAAAAFgQlAAAAALAgKAEAAACABUEJAAAAACwISgAAAABgQVACAAAAAAuCEgAAAABYEJQAAAAAwIKgBAAAAAAWBCUAAAAAsCAoAQAAAIAFQQkAAAAALAhKAAAAAGBBUAIAAAAAC4ISAAAAAFgQlAAAAADAgqAEAAAAABYEJQAAAACwICgBAAAAgAVBCQAAAAAsCEoAAAAAYEFQAgAAAAALghIAAAAAWBCUAAAAAMCCoAQAAAAAFgQlAAAAALAgKAEAAACABUEJAAAAACwISgAAAABgQVACAAAAAAuCEgAAAABYEJQAAAAAwIKgBAAAAAAWBCUAAAAAsCAoAQAAAIAFQQkAAAAALAhKAAAAAGBBUAIAAAAAC4ISAAAAAFgQlAAAAADAgqAEAAAAABYEJQAAAACwuCqC0rvvvqvY2FiVLl1aN998s3744Qd3lwQAAACgBCv2QemLL75Qnz599PLLL2vjxo1q0qSJ2rRpo71797q7NAAAAAAlVLEPSmPHjtXjjz+uJ554QtWrV9e4ceMUHR2t9957z92lAQAAACihPN1dwMVkZWVp/fr1eumllxzaW7ZsqRUrVuS7zpkzZ3TmzBn79JEjRyRJR48eLbxCXZBz5qS7SyhwR23G3SUUrGLSV1xBv7oK0K+KBfqV+5W0flXi+pREvyoG6FeFJzcTGHPpc1ysg9Lff/+t7OxshYeHO7SHh4crIyMj33VGjBihIUOG5GmPjo4ulBohBbu7gII2ssQd0VWpxL0L9KtiocS9C/QrtyuR7wD9yu1K5DtQzPrVsWPHFBx88ZqKdVDKZbPZHKaNMXnacg0aNEj9+vWzT+fk5OjQoUMKDQ294Dq4fEePHlV0dLT27dunoKAgd5eDEoJ+hcJAv0JBo0+hMNCvCpcxRseOHVNUVNQlly3WQalcuXLy8PDIc/UoMzMzz1WmXD4+PvLx8XFoK1OmTGGViP9fUFAQP8wocPQrFAb6FQoafQqFgX5VeC51JSlXsR7MwdvbWzfffLNSUlIc2lNSUtS4cWM3VQUAAACgpCvWV5QkqV+/fnrkkUdUr149NWrUSB988IH27t2rHj16uLs0AAAAACVUsQ9KDzzwgA4ePKjXXntN+/fvV82aNTV//nxVqlTJ3aVB/97qOHjw4Dy3OwJXgn6FwkC/QkGjT6Ew0K+KD5txZmw8AAAAALiGFOtnlAAAAADAHQhKAAAAAGBBUAIAAAAAC4ISAAAAAFgQlHBJ33//vTp06KCoqCjZbDbNnj3bYb4xRklJSYqKipKvr68SExO1detW9xSLq8al+tWsWbPUqlUrlStXTjabTWlpaW6pE1eXi/Wrs2fP6sUXX1StWrXk7++vqKgode3aVX/++af7CsZV4VKfV0lJSbrhhhvk7++vsmXL6o477tDq1avdUyyuGpfqV+d7+umnZbPZNG7cuCKrDwQlOOHEiROqXbu2JkyYkO/80aNHa+zYsZowYYLWrl2riIgItWjRQseOHSviSnE1uVS/OnHihG699VaNHDmyiCvD1exi/erkyZPasGGDXn31VW3YsEGzZs3Sr7/+qjvvvNMNleJqcqnPq6pVq2rChAnasmWLli9frpiYGLVs2VIHDhwo4kpxNblUv8o1e/ZsrV69WlFRUUVUGewM4AJJ5uuvv7ZP5+TkmIiICDNy5Eh72+nTp01wcLCZOHGiGyrE1cjar86Xnp5uJJmNGzcWaU24+l2sX+Vas2aNkWT27NlTNEXhqudMvzpy5IiRZL777ruiKQpXvQv1q99//91UqFDB/PTTT6ZSpUrmzTffLPLarmVcUcIVSU9PV0ZGhlq2bGlv8/HxUUJCglasWOHGygDg0o4cOSKbzaYyZcq4uxSUEFlZWfrggw8UHBys2rVru7scXMVycnL0yCOP6IUXXtCNN97o7nKuSZ7uLgBXt4yMDElSeHi4Q3t4eLj27NnjjpIAwCmnT5/WSy+9pM6dOysoKMjd5eAqN3fuXD344IM6efKkIiMjlZKSonLlyrm7LFzFRo0aJU9PTz333HPuLuWaxRUlFAibzeYwbYzJ0wYAxcXZs2f14IMPKicnR++++667y0EJ0KxZM6WlpWnFihVq3bq1OnXqpMzMTHeXhavU+vXr9dZbb2nKlCn8PeVGBCVckYiICEn/d2UpV2ZmZp6rTABQHJw9e1adOnVSenq6UlJSuJqEAuHv76/rr79eDRs21KRJk+Tp6alJkya5uyxcpX744QdlZmaqYsWK8vT0lKenp/bs2aP+/fsrJibG3eVdMwhKuCKxsbGKiIhQSkqKvS0rK0upqalq3LixGysDgLxyQ9KOHTv03XffKTQ01N0loYQyxujMmTPuLgNXqUceeUSbN29WWlqa/RUVFaUXXnhBCxcudHd51wyeUcIlHT9+XDt37rRPp6enKy0tTSEhIapYsaL69Omj5ORkxcXFKS4uTsnJyfLz81Pnzp3dWDWKu0v1q0OHDmnv3r3277jZvn27pH+vYuZeyQSsLtavoqKidN9992nDhg2aO3eusrOz7VfDQ0JC5O3t7a6yUcxdrF+FhoZq+PDhuvPOOxUZGamDBw/q3Xff1e+//67777/fjVWjuLvU70Hrf+R4eXkpIiJC1apVK+pSr13uHnYPxd/SpUuNpDyvbt26GWP+HSJ88ODBJiIiwvj4+JimTZuaLVu2uLdoFHuX6leTJ0/Od/7gwYPdWjeKt4v1q9yh5vN7LV261N2loxi7WL86deqUueeee0xUVJTx9vY2kZGR5s477zRr1qxxd9ko5i71e9CK4cGLns0YYwo3igEAAADA1YVnlAAAAADAgqAEAAAAABYEJQAAAACwICgBAAAAgAVBCQAAAAAsCEoAAAAAYEFQAgAAAAALghIA4Irs3r1bw4YN0/Hjx91dCgAABYagBAC4bFlZWerUqZNCQ0MVEBBQJPtctmyZbDab/vnnnyLZX0mVmJioPn36uLsMACi2CEoAcJXp3r27bDabRo4c6dA+e/Zs2Wy2Iq2lf//+atGihZ555pki3S+u3KxZszR06FB3lwEAxZanuwsAALiudOnSGjVqlJ5++mmVLVvWbXWMHz/eqeWysrLk7e1dyNXAFSEhIe4uAQCKNa4oAcBV6I477lBERIRGjBhxwWWSkpJUp04dh7Zx48YpJibGPt29e3fdfffdSk5OVnh4uMqUKaMhQ4bo3LlzeuGFFxQSEqLrrrtOH3/8scN2/vjjDz3wwAMqW7asQkNDddddd2n37t15tjtixAhFRUWpatWqkqQtW7bo9ttvl6+vr0JDQ/XUU09d8tmm+fPnq2rVqvL19VWzZs0c9pNrxYoVatq0qXx9fRUdHa3nnntOJ06cuOh258yZo3r16ql06dIqV66cOnbsaJ93+PBhde3aVWXLlpWfn5/atGmjHTt22OdPmTJFZcqU0dy5c1WtWjX5+fnpvvvu04kTJzR16lTFxMSobNmyevbZZ5WdnW1fLyYmRkOHDlXnzp0VEBCgqKioPGFz7NixqlWrlvz9/RUdHa2ePXvmOUcffvihoqOj5efnp3vuuUdjx45VmTJl7PNz3/tPP/1UMTExCg4O1oMPPqhjx47Zl7HeepeVlaWBAweqQoUK8vf3V4MGDbRs2bKLnkMAKMkISgBwFfLw8FBycrLGjx+v33///Yq2tWTJEv3555/6/vvvNXbsWCUlJal9+/YqW7asVq9erR49eqhHjx7at2+fJOnkyZNq1qyZAgIC9P3332v58uUKCAhQ69atlZWVZd/u4sWLtW3bNqWkpGju3Lk6efKkWrdurbJly2rt2rX66quv9N1336l3794XrG3fvn3q2LGj2rZtq7S0ND3xxBN66aWXHJbZsmWLWrVqpY4dO2rz5s364osvtHz58otud968eerYsaPatWunjRs3avHixapXr559fvfu3bVu3TrNmTNHK1eulDFGbdu21dmzZ+3LnDx5Um+//bZmzJihBQsWaNmyZerYsaPmz5+v+fPn69NPP9UHH3yg//73vw77HjNmjOLj47VhwwYNGjRIffv2VUpKin1+qVKl9Pbbb+unn37S1KlTtWTJEg0cONA+/8cff1SPHj30/PPPKy0tTS1atNDw4cPzHOOuXbs0e/ZszZ07V3PnzlVqamqe2zXP9+ijj+rHH3/UjBkztHnzZt1///1q3bq1Q0AEgGuKAQBcVbp162buuusuY4wxDRs2NI899pgxxpivv/7anP+xPnjwYFO7dm2Hdd98801TqVIlh21VqlTJZGdn29uqVatmmjRpYp8+d+6c8ff3N59//rkxxphJkyaZatWqmZycHPsyZ86cMb6+vmbhwoX27YaHh5szZ87Yl/nggw9M2bJlzfHjx+1t8+bNM6VKlTIZGRn5HuugQYNM9erVHfb14osvGknm8OHDxhhjHnnkEfPUU085rPfDDz+YUqVKmVOnTuW73UaNGpkuXbrkO+/XX381ksyPP/5ob/v777+Nr6+v+fLLL40xxkyePNlIMjt37rQv8/TTTxs/Pz9z7Ngxe1urVq3M008/bZ+uVKmSad26tcP+HnjgAdOmTZt8azHGmC+//NKEhoY6LN+uXTuHZbp06WKCg4Pt04MHDzZ+fn7m6NGj9rYXXnjBNGjQwD6dkJBgnn/+eWOMMTt37jQ2m8388ccfDttt3ry5GTRo0AVrA4CSjCtKAHAVGzVqlKZOnaqff/75srdx4403qlSp//t1EB4erlq1atmnPTw8FBoaqszMTEnS+vXrtXPnTgUGBiogIEABAQEKCQnR6dOntWvXLvt6tWrVcnguadu2bapdu7b8/f3tbbfeeqtycnK0ffv2fGvbtm2bGjZs6DBIRaNGjRyWWb9+vaZMmWKvJSAgQK1atVJOTo7S09Pz3W5aWpqaN29+wX16enqqQYMG9rbQ0FBVq1ZN27Zts7f5+fmpSpUqDuctJibGYfS/8PBw+3m7UP2NGjVy2O7SpUvVokULVahQQYGBgeratasOHjxov5Vw+/btql+/vsM2rNPSv7f5BQYG2qcjIyPz1JJrw4YNMsaoatWqDucxNTXV4T0FgGsJgzkAwFWsadOmatWqlf7zn/+oe/fuDvNKlSolY4xD2/m3juXy8vJymLbZbPm25eTkSJJycnJ0880367PPPsuzrfLly9v/fX4gkiRjzAVH5btQu7X+/OTk5Ojpp5/Wc889l2dexYoV813H19f3gtu70D6t9bt63i4md7t79uxR27Zt1aNHDw0dOlQhISFavny5Hn/8cft7l995zK9mV2rJycmRh4eH1q9fLw8PD4d5RTXsOwAUNwQlALjKjRw5UnXq1LEPmJCrfPnyysjIcPjDOi0t7Yr3d9NNN+mLL75QWFiYgoKCnF6vRo0amjp1qk6cOGEPUT/++KNKlSqVp/bz15k9e7ZD26pVq/LUs3XrVl1//fVO1xIfH6/Fixfr0UcfzXef586d0+rVq9W4cWNJ0sGDB/Xrr7+qevXqTu/jQqz1r1q1SjfccIMkad26dTp37pzeeOMN+1W+L7/80mH5G264QWvWrHFoW7du3RXVVLduXWVnZyszM1NNmjS5om0BQEnBrXcAcJWrVauWunTpkmf0tMTERB04cECjR4/Wrl279M477+jbb7+94v116dJF5cqV01133aUffvhB6enpSk1N1fPPP3/RgSW6dOmi0qVLq1u3bvrpp5+0dOlSPfvss3rkkUcUHh6e7zo9evTQrl271K9fP23fvl3Tp0/XlClTHJZ58cUXtXLlSvXq1UtpaWnasWOH5syZo2efffaCtQwePFiff/65Bg8erG3btmnLli0aPXq0JCkuLk533XWXnnzySS1fvlybNm3Sww8/rAoVKuiuu+5y/YRZ/Pjjjxo9erR+/fVXvfPOO/rqq6/0/PPPS5KqVKmic+fOafz48frtt9/06aefauLEiQ7rP/vss5o/f77Gjh2rHTt26P3339e33357Rd+hVbVqVXXp0kVdu3bVrFmzlJ6errVr12rUqFGaP3/+FR0vAFytCEoAUAIMHTo0z+1X1atX17vvvqt33nlHtWvX1po1azRgwIAr3pefn5++//57VaxYUR07dlT16tX12GOP6dSpUxe9wuTn56eFCxfq0KFDuuWWW3TfffepefPmmjBhwgXXqVixombOnKlvvvlGtWvX1sSJE5WcnOywTHx8vFJTU7Vjxw41adJEdevW1auvvqrIyMgLbjcxMVFfffWV5syZozp16uj222/X6tWr7fMnT56sm2++We3bt1ejRo1kjNH8+fPz3M52Ofr376/169erbt26Gjp0qN544w21atVKklSnTh2NHTtWo0aNUs2aNfXZZ5/lGQL+1ltv1cSJEzV27FjVrl1bCxYsUN++fVW6dOkrqmvy5Mnq2rWr+vfvr2rVqunOO+/U6tWrFR0dfUXbBYCrlc04cwM4AAC4YjExMerTp4/D9xcVhCeffFK//PKLfvjhhwLdLgBcy3hGCQCAq8zrr7+uFi1ayN/fX99++62mTp2qd999191lAUCJQlACAOAqs2bNGo0ePVrHjh1T5cqV9fbbb+uJJ55wd1kAUKJw6x0AAAAAWDCYAwAAAABYEJQAAAAAwIKgBAAAAAAWBCUAAAAAsCAoAQAAAIAFQQkAAAAALAhKAAAAAGBBUAIAAAAAC4ISAAAAAFj8f5ZM2qtrjB8WAAAAAElFTkSuQmCC",
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# création barplot avec la fonction générique\n",
"\n",
"multiple_barplot(company_genders, x=\"number_company\", y=\"share_of_women\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"aucun achat\", 1 : \"achat durant la période\"},\n",
" xlabel = \"Numéro de compagnie\", ylabel = \"Part de femmes (%)\", \n",
" title = \"Part de femmes selon les compagnies de spectacle (train set)\")\n",
"\n",
"# save in the s3\n",
"\n",
"FILE_NAME = \"gender_train_set_music.png\"\n",
"FILE_PATH_OUT_S3 = FILE_PATH + FILE_NAME\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as file_out:\n",
" plt.savefig(file_out)"
]
},
{
"cell_type": "markdown",
"id": "9504e6b6-d97c-4aa9-a56a-f9f97264be05",
"metadata": {},
"source": [
"#### Etude du pays d'origine"
]
},
{
"cell_type": "code",
"execution_count": 220,
"id": "ed6374e5-f36c-4f8e-9dba-602715b726f1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>country_fr</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.996136</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>0.994838</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>0.002119</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>0.831794</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>0.993978</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny country_fr\n",
"0 10 0.996136\n",
"1 11 0.994838\n",
"2 12 0.002119\n",
"3 13 0.831794\n",
"4 14 0.993978"
]
},
"execution_count": 220,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# pays d'origine (France VS reste du monde)\n",
"\n",
"company_country_fr = customerplus_clean_spectacle.groupby(\"number_compagny\")[\"country_fr\"].mean().reset_index()\n",
"company_country_fr"
]
},
{
"cell_type": "code",
"execution_count": 221,
"id": "8d95cdd9-2ab3-4c9a-8442-bb9b98e0dd18",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHGCAYAAACIDqqPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABINElEQVR4nO3deVxU9f7H8fcAAoKAggliCph7LrmUe7jhkqm3zCXNLe1qWl63NDOXvC7pLTMrtXJBy7pmmql5UzIzS819yy1LwQUXRMUVFc7vDx/Mz3FAZ2Bw9PR6Ph7zeDjf8z3nfM53zgxvzzJjMQzDEAAAgEl4uLsAAAAAVyLcAAAAUyHcAAAAUyHcAAAAUyHcAAAAUyHcAAAAUyHcAAAAUyHcAAAAUyHcAAAAUyHcIFddvHhRZcqUUZs2bZSenu7ucgAAfwOEm1vExsbKYrHI19dX8fHxdtPr1aun8uXLZ2vZX3zxhSZPnpzpNIvFolGjRmVrua6WMQaHDx+2tnXt2lWRkZE2/caNG6fFixffdXkvvfSSQkND9fnnn8vDI/u7W2RkpLp27Zrt+XPL7XUdPnxYFotFsbGxubreO+1PrnSvtud2FotFr7zyyj1dJ+5/P/30kywWi3766adcXY+79nt32LNnj0aNGmXzmZ8b7tVrl4Fwk4nU1FS9+eabLl3mnf4YrV+/Xj169HDp+lxp+PDh+uabb2zaHAk3H330kXbu3Klvv/1WPj4+uVjh/aNw4cJav369mjdvnqvruVfhBrifVKlSRevXr1eVKlXcXYpp7NmzR2+99Vauh5t7jXCTiaZNm+qLL77Qjh077sn6atSooYcffvierCs7HnnkEVWuXNnp+fr06aPff/9d+fPnd31R9ykfHx/VqFFDDz30kLtLAUwnMDBQNWrUUGBgoLtLwX2OcJOJwYMHKyQkREOGDLlr348++khPPvmkChUqJH9/f1WoUEETJ07U9evXrX3q1aun7777TvHx8bJYLNZHhsxOS+3evVutWrVSgQIF5Ovrq8cee0xz5syx6ZNxmO/LL7/UsGHDFB4ersDAQDVq1Ej79++36RsXF6dWrVrp4Ycflq+vr0qUKKGePXsqKSnprtt4+2kpi8WiS5cuac6cOdZtqVevnnX6iRMn1LNnTz388MPy9vZWVFSU3nrrLd24ceOu67p+/boGDx6ssLAw+fn5qU6dOtq4cWOmfR1dz7Rp01SpUiXly5dPAQEBKlOmjN5444271pKamqrRo0erbNmy8vX1VUhIiOrXr69169ZlOU9Wh7P/+OMPdejQQYUKFZKPj4/Kli2rjz76yKaPo6/n3fan7G7v8ePH1bZtWwUEBCgoKEjt2rXTiRMnMu27efNmtWzZUsHBwfL19VXlypX11Vdf3XUdknPj+tlnn6ls2bLy8/NTpUqVtGzZMpvpBw8eVLdu3VSyZEn5+fmpSJEiatGihXbt2mW3rH379qlp06by8/NTwYIF1atXLy1dutTuUHlWp0Dr1atns59LUkpKigYNGqSoqCh5e3urSJEi6tevny5duuTQWHz//fdq2LChgoKC5Ofnp7Jly2r8+PE2fZYsWaKaNWvKz89PAQEBiomJ0fr16236jBo1ShaLRTt37lSbNm0UFBSk4OBgDRgwQDdu3ND+/fvVtGlTBQQEKDIyUhMnTrSZP2Pf+/zzzzVgwACFhYUpb968io6O1rZt22z6bt68We3bt1dkZKTy5s2ryMhIPf/885meyv/ll19Us2ZN+fr6qkiRIho+fLhmzJhhd+o7MjJSTz/9tL7//ntVqVJFefPmVZkyZTRr1qxM67z91EZO9sd7td/f7X2ZcUlAXFycunXrpuDgYPn7+6tFixb666+/7Jb3ww8/qGHDhgoMDJSfn59q166tVatW2fXbt2+fnn/+eYWGhsrHx0fFihVT586dlZqaqtjYWLVp00aSVL9+fetnScbnlzN/N+60njvJyZjeiVeOl2BCAQEBevPNN/Wvf/1LP/74oxo0aJBl3z///FMdOnSwfrjt2LFDY8eO1b59+6xvzKlTp+qf//yn/vzzT7vTO5nZv3+/atWqpUKFCmnKlCkKCQnR559/rq5du+rkyZMaPHiwTf833nhDtWvX1owZM5SSkqIhQ4aoRYsW2rt3rzw9Pa111qxZUz169FBQUJAOHz6sSZMmqU6dOtq1a5fy5Mnj8PisX79eDRo0UP369TV8+HBJsv5P6sSJE3riiSfk4eGhESNG6JFHHtH69es1ZswYHT58WLNnz77jsl966SXNnTtXgwYNUkxMjHbv3q1nn31WFy5csOnn6Hr++9//qnfv3nr11Vf1zjvvyMPDQwcPHtSePXvuWMeNGzfUrFkzrV27Vv369VODBg1048YNbdiwQQkJCapVq5bD47Vnzx7VqlVLxYoV07vvvquwsDCtWLFCffv2VVJSkkaOHGnT/26v5532p+xu75UrV9SoUSMdP35c48ePV6lSpfTdd9+pXbt2dn1Xr16tpk2bqnr16po+fbqCgoL03//+V+3atdPly5fveG2UM+P63XffadOmTRo9erTy5cuniRMn6plnntH+/ftVvHhxSTf/MIWEhOjtt9/WQw89pOTkZM2ZM0fVq1fXtm3bVLp0aUnSyZMnFR0drTx58mjq1KkKDQ3VvHnzcnRdz+XLlxUdHa2jR4/qjTfeUMWKFfX7779rxIgR2rVrl3744Qeb0Hm7mTNn6qWXXlJ0dLSmT5+uQoUK6cCBA9q9e7e1zxdffKGOHTuqcePG+vLLL5WamqqJEyeqXr16WrVqlerUqWOzzLZt2+qFF15Qz549FRcXZ/2P1g8//KDevXtr0KBB+uKLLzRkyBCVKFFCzz77rM38b7zxhqpUqaIZM2bo/PnzGjVqlOrVq6dt27ZZx/zw4cMqXbq02rdvr+DgYCUmJmratGl6/PHHtWfPHhUsWFCStHPnTsXExKhUqVKaM2eO/Pz8NH36dH3++eeZjseOHTs0cOBAvf766woNDdWMGTPUvXt3lShRQk8++WSW45iT/fFe7ffOvC+7d++umJgYffHFFzpy5IjefPNN1atXTzt37rQeBf/888/VuXNntWrVSnPmzFGePHn08ccfq0mTJlqxYoUaNmxoHdM6deqoYMGCGj16tEqWLKnExEQtWbJE165dU/PmzTVu3Di98cYb+uijj6yn+x555BFJjv/duNt6srosISdjelcGrGbPnm1IMjZt2mSkpqYaxYsXN6pVq2akp6cbhmEY0dHRxqOPPprl/Glpacb169eNuXPnGp6enkZycrJ1WvPmzY2IiIhM55NkjBw50vq8ffv2ho+Pj5GQkGDTr1mzZoafn59x7tw5wzAMY/Xq1YYk46mnnrLp99VXXxmSjPXr12e6vvT0dOP69etGfHy8Icn49ttv7cbg0KFD1rYuXbrY1e7v72906dLFbtk9e/Y08uXLZ8THx9u0v/POO4Yk4/fff8+0JsMwjL179xqSjP79+9u0z5s3z5Bksz5H1/PKK68Y+fPnz3KdWZk7d64hyfj000/v2C8iIsKmrkOHDhmSjNmzZ1vbmjRpYjz88MPG+fPnbeZ95ZVXDF9fX+t+4szrmdX+lN3tnTZtmt2+YBiG8dJLL9ltT5kyZYzKlSsb169ft+n79NNPG4ULFzbS0tKyXI+j4yrJCA0NNVJSUqxtJ06cMDw8PIzx48dnOd+NGzeMa9euGSVLlrTZj4YMGWJYLBZj+/btNv1jYmIMScbq1autbbe/phmio6ON6Oho6/Px48cbHh4exqZNm2z6ff3114YkY/ny5VnWeeHCBSMwMNCoU6eO9fPldmlpaUZ4eLhRoUIFmzG9cOGCUahQIaNWrVrWtpEjRxqSjHfffddmGY899pghyVi0aJG17fr168ZDDz1kPPvss9a2jH2vSpUqNvUcPnzYyJMnj9GjR48st+XGjRvGxYsXDX9/f+P999+3trdp08bw9/c3Tp8+bbNN5cqVs/uMiYiIMHx9fW3ez1euXDGCg4ONnj172tV56+uVk/3xXu33jrwvMz57n3nmGZv2X3/91ZBkjBkzxjAMw7h06ZIRHBxstGjRwqZfWlqaUalSJeOJJ56wtjVo0MDInz+/cerUqSzXu2DBArsxzcyd/m44sh5Xv3Z3w2mpLHh7e2vMmDHavHnzHQ+Rbdu2TS1btlRISIg8PT2VJ08ede7cWWlpaTpw4EC21v3jjz+qYcOGKlq0qE17165ddfnyZbtD0i1btrR5XrFiRUmyOUx86tQp9erVS0WLFpWXl5fy5MmjiIgISdLevXuzVWdmli1bpvr16ys8PFw3btywPpo1ayZJWrNmTZbzrl69WpLUsWNHm/a2bdvKy8v2IKOj63niiSd07tw5Pf/88/r2228dOg0nSf/73//k6+urF1980bENz8LVq1e1atUqPfPMM/Lz87Op9amnntLVq1e1YcMGm3kceT2zkt3tXb16tQICAuzW3aFDB5vnBw8e1L59+6yv0e3bk5iYaHdK9FbOjGv9+vUVEBBgfR4aGqpChQrZjMONGzc0btw4lStXTt7e3vLy8pK3t7f++OMPm/169erVevTRR1WpUqU7bp8zli1bpvLly+uxxx6zGYcmTZrc9a6QdevWKSUlRb17987y6M7+/ft1/PhxderUyeZOw3z58ql169basGGDLl++bDPP008/bfO8bNmyslgs1veFJHl5ealEiRKZ7k8dOnSwqSciIkK1atWyvjelm1/vkHHkx8vLS15eXsqXL58uXbpkM+Zr1qxRgwYNrEdyJMnDw0Nt27bNdHsfe+wxFStWzPrc19dXpUqVuuN+n9P98V7t9868L2///KtVq5YiIiKsr8G6deuUnJysLl262NSRnp6upk2batOmTbp06ZIuX76sNWvWqG3bttm+BtCRvxvZXU9Ox/RuOC11B+3bt9c777yjYcOG2R2+laSEhATVrVtXpUuX1vvvv6/IyEj5+vpq48aN6tOnj65cuZKt9Z45c0aFCxe2aw8PD7dOv1VISIjN84xDgBnrT09PV+PGjXX8+HENHz5cFSpUkL+/v9LT01WjRo1s15mZkydPaunSpVme5rrTmzpju8LCwmzavby87LbR0fV06tRJN27c0KeffqrWrVsrPT1djz/+uMaMGaOYmJgsazl9+rTCw8NzdPu6dHObbty4oQ8++EAffPDBHWvNcLfX806yu71nzpxRaGioXfvtr8XJkyclSYMGDdKgQYMc2p5bOTOut4+DdHMsbh2HAQMG6KOPPtKQIUMUHR2tAgUKyMPDQz169LDpd+bMGUVFRdkt7/btc8bJkyd18ODBbO3rp0+flqQ73kiQ8X7I6rMgPT1dZ8+elZ+fn7U9ODjYpp+3t7f8/Pzk6+tr156SkmK33MzGIywszObmig4dOmjVqlUaPny4Hn/8cQUGBspiseipp56yG/PM9qnM2iTHXu/b5XR/vFf7vTPvy6xeg4z9IaOW5557Lsv1JScny8PDQ2lpadm+WcXRvxtnz57N1npyOqZ3Q7i5A4vFogkTJigmJkaffPKJ3fTFixfr0qVLWrRokTXNStL27dtztN6QkBAlJibatR8/flySbP4n5Ijdu3drx44dio2NVZcuXaztBw8ezFGdmSlYsKAqVqyosWPHZjo9I6BlJuPD7cSJEypSpIi1/caNG3aBzpn1dOvWTd26ddOlS5f0888/a+TIkXr66ad14MABm9ftVg899JB++eUXpaen5yjgFChQQJ6enurUqZP69OmTaZ/M/ujmRHa2NyQkJNMLt2+/sDJj3xs6dGimgV+S9TqXzLhqXDNkXHswbtw4m/akpCSbu/RCQkIyvUg0szZfX99ML4JMSkqyee8VLFhQefPmtbvo9dbpWcn4H+7Ro0ez7JPxfsjqs8DDw0MFChTIcv7syGqMMmo5f/68li1bppEjR+r111+39klNTVVycrLNfCEhIdY/YHdbR3bldH+8V/u95Pj7MqvXoESJEja1fPDBB6pRo0am6woNDVVaWpo8PT3vuI/diaN/N4KDg7O1HleM6Z0Qbu6iUaNGiomJ0ejRo+1OE2Ucvr31YinDMPTpp5/aLedu/wO5VcOGDfXNN9/o+PHjNn+k586dKz8/vyx36KxkVqckffzxx04t51ZZbc/TTz+t5cuX65FHHnH6gzfjTpR58+apatWq1vavvvrK7g6o7KzH399fzZo107Vr1/SPf/xDv//+e5Z/7Js1a6Yvv/xSsbGxOTo15efnp/r162vbtm2qWLGivL29s72sWzmyPzmzvfXr19dXX32lJUuW2Byi/+KLL2z6lS5dWiVLltSOHTvsAoUjXDWuGSwWi91+/d133+nYsWPWPwbSze2bOHGiduzYYXNq6vbtk27eubNz506btgMHDmj//v02geXpp5/WuHHjFBIS4nRArVWrloKCgjR9+nS1b98+01NTpUuXVpEiRfTFF19o0KBB1j6XLl3SwoULrXdQudKXX36pAQMGWNcVHx+vdevWqXPnzpJujrdhGHZjPmPGDKWlpdm0RUdHa/ny5TahMD09XQsWLHBZvTndH+/Vfn+ru70v582bp9atW1ufr1u3TvHx8dbvQqtdu7by58+vPXv23PWC+OjoaC1YsEBjx47NMmxndWTY0b8bGXfV3W09t3PlmGaGcOOACRMmqGrVqjp16pQeffRRa3tMTIy8vb31/PPPa/Dgwbp69aqmTZums2fP2i2jQoUKWrRokaZNm6aqVavKw8ND1apVy3R9I0eOtF5TMmLECAUHB2vevHn67rvvNHHiRAUFBTlVf5kyZfTII4/o9ddfl2EYCg4O1tKlSxUXF+fcQNy2PT/99JOWLl2qwoULKyAgQKVLl9bo0aMVFxenWrVqqW/fvipdurSuXr2qw4cPa/ny5Zo+fXqWhy/Lli2rF154QZMnT1aePHnUqFEj7d69W++8847d91o4up6XXnpJefPmVe3atVW4cGGdOHFC48ePV1BQkB5//PEst+/555/X7Nmz1atXL+3fv1/169dXenq6fvvtN5UtW1bt27d3eKzef/991alTR3Xr1tXLL7+syMhIXbhwQQcPHtTSpUv1448/OrysDFntT9nd3s6dO+u9995T586dNXbsWJUsWVLLly/XihUr7Pp+/PHHatasmZo0aaKuXbuqSJEiSk5O1t69e7V169Y7/vFy5bhKNwNGbGysypQpo4oVK2rLli36z3/+Y7eP9evXT7NmzVLz5s01ZswY691S+/bts1tmp06d9MILL6h3795q3bq14uPjNXHiRLvrCfr166eFCxfqySefVP/+/VWxYkWlp6crISFBK1eu1MCBA1W9evVM686XL5/effdd9ejRQ40aNbJ+k/fBgwe1Y8cOffjhh/Lw8NDEiRPVsWNHPf300+rZs6dSU1P1n//8R+fOndPbb7/t1Fg54tSpU3rmmWf00ksv6fz58xo5cqR8fX01dOhQSTfvinzyySf1n//8RwULFlRkZKTWrFmjmTNn2n2f1bBhw7R06VI1bNhQw4YNU968eTV9+nTrbfKuOHIn5Wx/vFf7vTPvy82bN6tHjx5q06aNjhw5omHDhqlIkSLq3bu3pJv7zgcffKAuXbooOTlZzz33nAoVKqTTp09rx44dOn36tKZNmyZJ1jubqlevrtdff10lSpTQyZMntWTJEn388ccKCAiwfuv+J598ooCAAPn6+ioqKsqpvxuOrMfVr91dZftSZBO69W6p23Xo0MGQZHe31NKlS41KlSoZvr6+RpEiRYzXXnvN+N///md3VXhycrLx3HPPGfnz5zcsFotx69DrtrulDMMwdu3aZbRo0cIICgoyvL29jUqVKtlcuW8Y/3/1+YIFC2zaM7tjZ8+ePUZMTIwREBBgFChQwGjTpo2RkJBgt25H75bavn27Ubt2bcPPz8+QZHMXyenTp42+ffsaUVFRRp48eYzg4GCjatWqxrBhw4yLFy/aje2tUlNTjYEDBxqFChUyfH19jRo1ahjr16/P9A4WR9YzZ84co379+kZoaKjh7e1thIeHG23btjV27tx5xzoM4+bdGiNGjDBKlixpeHt7GyEhIUaDBg2MdevWWfs4crdURvuLL75oFClSxMiTJ4/x0EMPGbVq1bLeAWEYzr2eWe1POdneo0ePGq1btzby5ctnBAQEGK1btzbWrVuX6fbs2LHDaNu2rVGoUCEjT548RlhYmNGgQQNj+vTpd12PI+MqyejTp4/dvLeP99mzZ43u3bsbhQoVMvz8/Iw6deoYa9eutbuzyTD+/z3g6+trBAcHG927dze+/fZbu/dqenq6MXHiRKN48eKGr6+vUa1aNePHH3/MdJkXL1403nzzTaN06dKGt7e3ERQUZFSoUMHo37+/ceLEibuOxfLly43o6GjD39/f8PPzM8qVK2dMmDDBps/ixYuN6tWrG76+voa/v7/RsGFD49dff7Xpk3G31K13JhnGzfeuv7+/3Xpvv/MzY9/77LPPjL59+xoPPfSQ4ePjY9StW9fYvHmzzbwZ+0mBAgWMgIAAo2nTpsbu3bszfY+uXbvWqF69uuHj42OEhYUZr732mjFhwgRDkvWuT8O4+bo2b9480zpvHfPM7rgxjJztj/div3fkfZnx2bty5UqjU6dORv78+Y28efMaTz31lPHHH3/YLXPNmjVG8+bNjeDgYCNPnjxGkSJFjObNm9t9fuzZs8do06aNERISYnh7exvFihUzunbtaly9etXaZ/LkyUZUVJTh6elps92O/t1wZD258drdicUwDCP70QgAHlw//fST6tevr9WrV9t9Qd/fScY4LFiw4I4XqrpC48aNdfjw4WzfTWpWsbGx6tatmzZt2pTlUX04jtNSAIBcMWDAAFWuXFlFixZVcnKy5s2bp7i4OM2cOdPdpcHkCDcAgFyRlpamESNG6MSJE7JYLCpXrpw+++wzvfDCC+4uDSbHaSkAAGAqfEMxAAAwFcINAAAwFcINAAAwlb/dBcXp6ek6fvy4AgICsvzBOgAAcH8xDEMXLlxw6Pfp/nbh5vjx43Y/owAAAB4MR44cuesPdf7twk3G10AfOXLE7iv9AQDA/SklJUVFixbN8uccbvW3CzcZp6ICAwMJNwAAPGAcuaSEC4oBAICpEG4AAICpEG4AAICpEG4AAICpEG4AAICpEG4AAICpEG4AAICpEG4AAICpEG4AAICpuDXc/Pzzz2rRooXCw8NlsVi0ePHiu86zZs0aVa1aVb6+vipevLimT5+e+4UCAIAHhlvDzaVLl1SpUiV9+OGHDvU/dOiQnnrqKdWtW1fbtm3TG2+8ob59+2rhwoW5XCkAAHhQuPW3pZo1a6ZmzZo53H/69OkqVqyYJk+eLEkqW7asNm/erHfeeUetW7fOpSoBAMCD5IG65mb9+vVq3LixTVuTJk20efNmXb9+PdN5UlNTlZKSYvMAAADm9UCFmxMnTig0NNSmLTQ0VDdu3FBSUlKm84wfP15BQUHWR9GiRe9FqQAAwE0eqHAj2f/UuWEYmbZnGDp0qM6fP299HDlyJNdrBAAA7uPWa26cFRYWphMnTti0nTp1Sl5eXgoJCcl0Hh8fH/n4+NyL8iRJka9/d8/W9aA7/HZzly2LcXecK8cd+Dvhc8Zx7v6ceaCO3NSsWVNxcXE2bStXrlS1atWUJ08eN1UFAADuJ24NNxcvXtT27du1fft2STdv9d6+fbsSEhIk3Tyl1LlzZ2v/Xr16KT4+XgMGDNDevXs1a9YszZw5U4MGDXJH+QAA4D7k1tNSmzdvVv369a3PBwwYIEnq0qWLYmNjlZiYaA06khQVFaXly5erf//++uijjxQeHq4pU6ZwGzgAALBya7ipV6+e9YLgzMTGxtq1RUdHa+vWrblYFQAAeJA9UNfcAAAA3A3hBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmIqXuwsAADgn8vXv3F3CA+Pw283dXQLcgCM3AADAVAg3AADAVAg3AADAVAg3AADAVAg3AADAVAg3AADAVNwebqZOnaqoqCj5+vqqatWqWrt27R37z5s3T5UqVZKfn58KFy6sbt266cyZM/eoWgAAcL9za7iZP3+++vXrp2HDhmnbtm2qW7eumjVrpoSEhEz7//LLL+rcubO6d++u33//XQsWLNCmTZvUo0ePe1w5AAC4X7k13EyaNEndu3dXjx49VLZsWU2ePFlFixbVtGnTMu2/YcMGRUZGqm/fvoqKilKdOnXUs2dPbd68+R5XDgAA7lduCzfXrl3Tli1b1LhxY5v2xo0ba926dZnOU6tWLR09elTLly+XYRg6efKkvv76azVvnvU3UKampiolJcXmAQAAzMtt4SYpKUlpaWkKDQ21aQ8NDdWJEycynadWrVqaN2+e2rVrJ29vb4WFhSl//vz64IMPslzP+PHjFRQUZH0ULVrUpdsBAADuL26/oNhisdg8NwzDri3Dnj171LdvX40YMUJbtmzR999/r0OHDqlXr15ZLn/o0KE6f/689XHkyBGX1g8AAO4vbvvhzIIFC8rT09PuKM2pU6fsjuZkGD9+vGrXrq3XXntNklSxYkX5+/urbt26GjNmjAoXLmw3j4+Pj3x8fFy/AQAA4L7ktiM33t7eqlq1quLi4mza4+LiVKtWrUznuXz5sjw8bEv29PSUdPOIDwAAgFtPSw0YMEAzZszQrFmztHfvXvXv318JCQnW00xDhw5V586drf1btGihRYsWadq0afrrr7/066+/qm/fvnriiScUHh7urs0AAAD3EbedlpKkdu3a6cyZMxo9erQSExNVvnx5LV++XBEREZKkxMREm++86dq1qy5cuKAPP/xQAwcOVP78+dWgQQNNmDDBXZsAAADuM24NN5LUu3dv9e7dO9NpsbGxdm2vvvqqXn311VyuCgAAPKjcfrcUAACAKxFuAACAqRBuAACAqRBuAACAqRBuAACAqRBuAACAqRBuAACAqRBuAACAqRBuAACAqRBuAACAqRBuAACAqRBuAACAqRBuAACAqeQ43KSlpWn79u06e/asK+oBAADIEafDTb9+/TRz5kxJN4NNdHS0qlSpoqJFi+qnn35ydX0AAABOcTrcfP3116pUqZIkaenSpTp06JD27dunfv36adiwYS4vEAAAwBlOh5ukpCSFhYVJkpYvX642bdqoVKlS6t69u3bt2uXyAgEAAJzhdLgJDQ3Vnj17lJaWpu+//16NGjWSJF2+fFmenp4uLxAAAMAZXs7O0K1bN7Vt21aFCxeWxWJRTEyMJOm3335TmTJlXF4gAACAM5wON6NGjVL58uV15MgRtWnTRj4+PpIkT09Pvf766y4vEAAAwBlOhxtJeu655+zaunTpkuNiAAAAcsqhcDNlyhT985//lK+vr6ZMmXLHvn379nVJYQAAANnhULh577331LFjR/n6+uq9997Lsp/FYiHcAAAAt3Io3Bw6dCjTfwMAANxv+G0pAABgKtm6oPjo0aNasmSJEhISdO3aNZtpkyZNcklhAAAA2eF0uFm1apVatmypqKgo7d+/X+XLl9fhw4dlGIaqVKmSGzUCAAA4zOnTUkOHDtXAgQO1e/du+fr6auHChTpy5Iiio6PVpk2b3KgRAADAYU6Hm71791q/08bLy0tXrlxRvnz5NHr0aE2YMMHlBQIAADjD6XDj7++v1NRUSVJ4eLj+/PNP67SkpCTXVQYAAJANTl9zU6NGDf36668qV66cmjdvroEDB2rXrl1atGiRatSokRs1AgAAOMzpcDNp0iRdvHhR0s3fmbp48aLmz5+vEiVK3PEL/gAAAO4Fp8NN8eLFrf/28/PT1KlTXVoQAABATjh9zc2mTZv022+/2bX/9ttv2rx5s0uKAgAAyC6nw02fPn105MgRu/Zjx46pT58+Onv2rL799lslJia6pEAAAABnOH1aas+ePZl+WV/lypW1adMmtWjRQmlpaUpJSdHvv//ukiIBAAAc5XS48fHx0cmTJ22uvZGkxMRE+fj46JdfftFff/2lcuXKuaxIAAAARzl9WiomJkZDhw7V+fPnrW3nzp3T0KFD1aJFC0k3A9C0adNcVyUAAICDnD5y8+677+rJJ59URESEKleuLEnavn27QkND9fnnn0uSihQpom7durm2UgAAAAc4HW6KFCminTt3at68edqxY4fy5s2rbt266fnnn1eePHlyo0YAAACHOR1upJs/wfDPf/7T1bUAAADkWLbCzYEDB/TTTz/p1KlTSk9Pt5k2YsQIlxQGAACQHU6Hm08//VQvv/yyChYsqLCwMFksFus0i8VCuAEAAG7ldLgZM2aMxo4dqyFDhuRGPQAAADni9K3gZ8+eVZs2bXKjFgAAgBxzOty0adNGK1euzI1aAAAAcszp01IlSpTQ8OHDtWHDBlWoUMHu9u++ffu6rDgAAABnOR1uPvnkE+XLl09r1qzRmjVrbKZZLBbCDQAAcCunw82hQ4dyow4AAACXcPqaGwAAgPtZtr7E7+jRo1qyZIkSEhJ07do1m2mTJk1ySWEAAADZ4XS4WbVqlVq2bKmoqCjt379f5cuX1+HDh2UYhqpUqZIbNQIAADjM6dNSQ4cO1cCBA7V79275+vpq4cKFOnLkiKKjo/n+GwAA4HZOh5u9e/eqS5cukiQvLy9duXJF+fLl0+jRozVhwgSXFwgAAOAMp8ONv7+/UlNTJUnh4eH6888/rdOSkpJcVxkAAEA2OH3NTY0aNfTrr7+qXLlyat68uQYOHKhdu3Zp0aJFqlGjRm7UCAAA4DCnw82kSZN08eJFSdKoUaN08eJFzZ8/XyVKlNB7773n8gIBAACc4VS4SUtL05EjR1SxYkVJkp+fn6ZOnZorhQEAAGSHU9fceHp6qkmTJjp37lwulQMAAJAzTl9QXKFCBf31118uK2Dq1KmKioqSr6+vqlatqrVr196xf2pqqoYNG6aIiAj5+PjokUce0axZs1xWDwAAeLA5fc3N2LFjNWjQIP373/9W1apV5e/vbzM9MDDQ4WXNnz9f/fr109SpU1W7dm19/PHHatasmfbs2aNixYplOk/btm118uRJzZw5UyVKlNCpU6d048YNZzcDAACYlNPhpmnTppKkli1bymKxWNsNw5DFYlFaWprDy5o0aZK6d++uHj16SJImT56sFStWaNq0aRo/frxd/++//15r1qzRX3/9peDgYElSZGSks5sAAABMzOlws3r1apes+Nq1a9qyZYtef/11m/bGjRtr3bp1mc6zZMkSVatWTRMnTtRnn30mf39/tWzZUv/+97+VN29el9QFAAAebA6Fm2effVaxsbEKDAxUfHy82rVrJx8fnxytOCkpSWlpaQoNDbVpDw0N1YkTJzKd56+//tIvv/wiX19fffPNN0pKSlLv3r2VnJyc5XU3qamp1i8dlKSUlJQc1Q0AAO5vDl1QvGzZMl26dEmS1K1bN50/f95lBdx6akv6/9NbmUlPT5fFYtG8efP0xBNP6KmnntKkSZMUGxurK1euZDrP+PHjFRQUZH0ULVrUZbUDAID7j0NHbsqUKaOhQ4eqfv36MgxDX331VZYXDnfu3NmhFRcsWFCenp52R2lOnTpldzQnQ+HChVWkSBEFBQVZ28qWLSvDMHT06FGVLFnSbp6hQ4dqwIAB1ucpKSkEHAAATMyhcDN9+nQNGDBA3333nSwWi958881Mj65YLBaHw423t7eqVq2quLg4PfPMM9b2uLg4tWrVKtN5ateurQULFujixYvKly+fJOnAgQPy8PDQww8/nOk8Pj4+OT6FBgAAHhwOnZaqVauWNmzYoNOnT8swDB04cEBnz561eyQnJzu18gEDBmjGjBmaNWuW9u7dq/79+yshIUG9evWSdPOoy61hqUOHDgoJCVG3bt20Z88e/fzzz3rttdf04osvckExAACQlI27pQ4dOqSHHnrIJStv166dzpw5o9GjRysxMVHly5fX8uXLFRERIUlKTExUQkKCtX++fPkUFxenV199VdWqVVNISIjatm2rMWPGuKQeAADw4HM63GQED1fp3bu3evfunem02NhYu7YyZcooLi7OpTUAAADzcPrnFwAAAO5nhBsAAGAqhBsAAGAqToebK1eu6PLly9bn8fHxmjx5slauXOnSwgAAALLD6XDTqlUrzZ07V5J07tw5Va9eXe+++65atWqladOmubxAAAAAZzgdbrZu3aq6detKkr7++muFhoYqPj5ec+fO1ZQpU1xeIAAAgDOcDjeXL19WQECAJGnlypV69tln5eHhoRo1aig+Pt7lBQIAADjD6XBTokQJLV68WEeOHNGKFSvUuHFjSTd/Eyqr35sCAAC4V5wONyNGjNCgQYMUGRmp6tWrq2bNmpJuHsWpXLmyywsEAABwhtPfUPzcc8+pTp06SkxMVKVKlaztDRs21LPPPuvS4gAAAJzl9JGbF198Uf7+/qpcubI8PP5/9kcffVQTJkxwaXEAAADOcjrczJkzR1euXLFrv3LlivUWcQAAAHdx+LRUSkqKDMOQYRi6cOGCfH19rdPS0tK0fPlyFSpUKFeKBAAAcJTD4SZ//vyyWCyyWCwqVaqU3XSLxaK33nrLpcUBAAA4y+Fws3r1ahmGoQYNGmjhwoUKDg62TvP29lZERITCw8NzpUgAAABHORxuoqOjJUmHDh1S0aJFbS4mBgAAuF84fSt4RESEzp07p40bN+rUqVNKT0+3md65c2eXFQcAAOAsp8PN0qVL1bFjR126dEkBAQGyWCzWaRaLhXADAADcyulzSwMHDtSLL76oCxcu6Ny5czp79qz1kZycnBs1AgAAOMzpcHPs2DH17dtXfn5+uVEPAABAjjgdbpo0aaLNmzfnRi0AAAA55vQ1N82bN9drr72mPXv2qEKFCsqTJ4/N9JYtW7qsOAAAAGc5HW5eeuklSdLo0aPtplksFqWlpeW8KgAAgGxyOtzcfus3AADA/SRH38R39epVV9UBAADgEk6Hm7S0NP373/9WkSJFlC9fPv3111+SpOHDh2vmzJkuLxAAAMAZToebsWPHKjY2VhMnTpS3t7e1vUKFCpoxY4ZLiwMAAHCW0+Fm7ty5+uSTT9SxY0d5enpa2ytWrKh9+/a5tDgAAABnZetL/EqUKGHXnp6eruvXr7ukKAAAgOxyOtw8+uijWrt2rV37ggULVLlyZZcUBQAAkF1O3wo+cuRIderUSceOHVN6eroWLVqk/fv3a+7cuVq2bFlu1AgAAOAwp4/ctGjRQvPnz9fy5ctlsVg0YsQI7d27V0uXLlVMTExu1AgAAOAwp4/cSDd/X6pJkyaurgUAACDHcvQlfgAAAPcbh47cBAcH68CBAypYsKAKFCggi8WSZd/k5GSXFQcAAOAsh8LNe++9p4CAAEnS5MmTc7MeAACAHHEo3HTp0iXTfwMAANxvHAo3KSkpDi8wMDAw28UAAADklEPhJn/+/He8zkaSDMOQxWJRWlqaSwoDAADIDofCzerVq3O7DgAAAJdwKNxER0fndh0AAAAu4fT33MyePVsLFiywa1+wYIHmzJnjkqIAAACyy+lw8/bbb6tgwYJ27YUKFdK4ceNcUhQAAEB2OR1u4uPjFRUVZdceERGhhIQElxQFAACQXU6Hm0KFCmnnzp127Tt27FBISIhLigIAAMgup8NN+/bt1bdvX61evVppaWlKS0vTjz/+qH/9619q3759btQIAADgMKd/FXzMmDGKj49Xw4YN5eV1c/b09HR17tyZa24AAIDbOR1uvL29NX/+fI0ZM0bbt29X3rx5VaFCBUVERORGfQAAAE5xOtxkKFmypEqWLOnKWgAAAHLM6WtuAAAA7meEGwAAYCqEGwAAYCqEGwAAYCrZCjdr167VCy+8oJo1a+rYsWOSpM8++0y//PKLS4sDAABwltPhZuHChWrSpIny5s2rbdu2KTU1VZJ04cIFvucGAAC4ndPhZsyYMZo+fbo+/fRT5cmTx9peq1Ytbd261aXFAQAAOMvpcLN//349+eSTdu2BgYE6d+6cK2oCAADINqfDTeHChXXw4EG79l9++UXFixd3SVEAAADZ5XS46dmzp/71r3/pt99+k8Vi0fHjxzVv3jwNGjRIvXv3zo0aAQAAHOb0zy8MHjxY58+fV/369XX16lU9+eST8vHx0aBBg/TKK6/kRo0AAAAOy9at4GPHjlVSUpI2btyoDRs26PTp0/r3v/+drQKmTp2qqKgo+fr6qmrVqlq7dq1D8/3666/y8vLSY489lq31AgAAc8r2l/j5+fmpWrVqeuKJJ5QvX75sLWP+/Pnq16+fhg0bpm3btqlu3bpq1qyZEhIS7jjf+fPn1blzZzVs2DBb6wUAAObl0GmpZ5991uEFLlq0yOG+kyZNUvfu3dWjRw9J0uTJk7VixQpNmzZN48ePz3K+nj17qkOHDvL09NTixYsdXh8AADA/h47cBAUFWR+BgYFatWqVNm/ebJ2+ZcsWrVq1SkFBQQ6v+Nq1a9qyZYsaN25s0964cWOtW7cuy/lmz56tP//8UyNHjnRoPampqUpJSbF5AAAA83LoyM3s2bOt/x4yZIjatm2r6dOny9PTU5KUlpam3r17KzAw0OEVJyUlKS0tTaGhoTbtoaGhOnHiRKbz/PHHH3r99de1du1aeXk5di30+PHj9dZbbzlcFwAAeLA5fc3NrFmzNGjQIGuwkSRPT08NGDBAs2bNcroAi8Vi89wwDLs26WaA6tChg9566y2VKlXK4eUPHTpU58+ftz6OHDnidI0AAODB4fSt4Ddu3NDevXtVunRpm/a9e/cqPT3d4eUULFhQnp6edkdpTp06ZXc0R7r521WbN2/Wtm3brLecp6enyzAMeXl5aeXKlWrQoIHdfD4+PvLx8XG4LgAA8GBzOtx069ZNL774og4ePKgaNWpIkjZs2KC3335b3bp1c3g53t7eqlq1quLi4vTMM89Y2+Pi4tSqVSu7/oGBgdq1a5dN29SpU/Xjjz/q66+/VlRUlLObAgAATMjpcPPOO+8oLCxM7733nhITEyXd/EmGwYMHa+DAgU4ta8CAAerUqZOqVaummjVr6pNPPlFCQoJ69eol6eYppWPHjmnu3Lny8PBQ+fLlbeYvVKiQfH197doBAMDfl9PhxsPDQ4MHD9bgwYOtdx45cyHxrdq1a6czZ85o9OjRSkxMVPny5bV8+XJFRERIkhITE+/6nTcAAAC3cjrc3Cq7oeZWvXv3zvI3qWJjY+8476hRozRq1Kgc1wAAAMwj299QDAAAcD8i3AAAAFMh3AAAAFNxOtzMnTtXqampdu3Xrl3T3LlzXVIUAABAdjkdbrp166bz58/btV+4cMGp77kBAADIDU6Hm6x+HuHo0aNO/XAmAABAbnD4VvDKlSvLYrHIYrGoYcOGNj9cmZaWpkOHDqlp06a5UiQAAICjHA43//jHPyRJ27dvV5MmTZQvXz7rNG9vb0VGRqp169YuLxAAAMAZDoebkSNHKi0tTREREWrSpIkKFy6cm3UBAABki1PX3Hh6eqpXr166evVqbtUDAACQI05fUFyhQgX99ddfuVELAABAjjkdbsaOHatBgwZp2bJlSkxMVEpKis0DAADAnZz+4cyMO6Jatmxpc0t4xi3iaWlprqsOAADASU6Hm9WrV+dGHQAAAC7hdLiJjo7OjToAAABcwulwk+Hy5ctKSEjQtWvXbNorVqyY46IAAACyy+lwc/r0aXXr1k3/+9//Mp3ONTcAAMCdnL5bql+/fjp79qw2bNigvHnz6vvvv9ecOXNUsmRJLVmyJDdqBAAAcJjTR25+/PFHffvtt3r88cfl4eGhiIgIxcTEKDAwUOPHj1fz5s1zo04AAACHOH3k5tKlSypUqJAkKTg4WKdPn5Z088v9tm7d6trqAAAAnOR0uCldurT2798vSXrsscf08ccf69ixY5o+fTq/NwUAANzO6dNS/fr10/HjxyXd/DHNJk2aaN68efL29lZsbKyr6wMAAHCK0+GmY8eO1n9XrlxZhw8f1r59+1SsWDEVLFjQpcUBAAA4y+HTUpcvX1afPn1UpEgRFSpUSB06dFBSUpL8/PxUpUoVgg0AALgvOBxuRo4cqdjYWDVv3lzt27dXXFycXn755dysDQAAwGkOn5ZatGiRZs6cqfbt20uSXnjhBdWuXVtpaWny9PTMtQIBAACc4fCRmyNHjqhu3brW50888YS8vLysFxcDAADcDxwON2lpafL29rZp8/Ly0o0bN1xeFAAAQHY5fFrKMAx17dpVPj4+1rarV6+qV69e8vf3t7YtWrTItRUCAAA4weFw06VLF7u2F154waXFAAAA5JTD4Wb27Nm5WQcAAIBLOP3zCwAAAPczwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVt4ebqVOnKioqSr6+vqpatarWrl2bZd9FixYpJiZGDz30kAIDA1WzZk2tWLHiHlYLAADud24NN/Pnz1e/fv00bNgwbdu2TXXr1lWzZs2UkJCQaf+ff/5ZMTExWr58ubZs2aL69eurRYsW2rZt2z2uHAAA3K/cGm4mTZqk7t27q0ePHipbtqwmT56sokWLatq0aZn2nzx5sgYPHqzHH39cJUuW1Lhx41SyZEktXbr0HlcOAADuV24LN9euXdOWLVvUuHFjm/bGjRtr3bp1Di0jPT1dFy5cUHBwcJZ9UlNTlZKSYvMAAADm5bZwk5SUpLS0NIWGhtq0h4aG6sSJEw4t491339WlS5fUtm3bLPuMHz9eQUFB1kfRokVzVDcAALi/uf2CYovFYvPcMAy7tsx8+eWXGjVqlObPn69ChQpl2W/o0KE6f/689XHkyJEc1wwAAO5fXu5accGCBeXp6Wl3lObUqVN2R3NuN3/+fHXv3l0LFixQo0aN7tjXx8dHPj4+Oa4XAAA8GNx25Mbb21tVq1ZVXFycTXtcXJxq1aqV5Xxffvmlunbtqi+++ELNmzfP7TIBAMADxm1HbiRpwIAB6tSpk6pVq6aaNWvqk08+UUJCgnr16iXp5imlY8eOae7cuZJuBpvOnTvr/fffV40aNaxHffLmzaugoCC3bQcAALh/uDXctGvXTmfOnNHo0aOVmJio8uXLa/ny5YqIiJAkJSYm2nznzccff6wbN26oT58+6tOnj7W9S5cuio2NvdflAwCA+5Bbw40k9e7dW71798502u2B5aeffsr9ggAAwAPN7XdLAQAAuBLhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmArhBgAAmIrbw83UqVMVFRUlX19fVa1aVWvXrr1j/zVr1qhq1ary9fVV8eLFNX369HtUKQAAeBC4NdzMnz9f/fr107Bhw7Rt2zbVrVtXzZo1U0JCQqb9Dx06pKeeekp169bVtm3b9MYbb6hv375auHDhPa4cAADcr9wabiZNmqTu3burR48eKlu2rCZPnqyiRYtq2rRpmfafPn26ihUrpsmTJ6ts2bLq0aOHXnzxRb3zzjv3uHIAAHC/clu4uXbtmrZs2aLGjRvbtDdu3Fjr1q3LdJ7169fb9W/SpIk2b96s69ev51qtAADgweHlrhUnJSUpLS1NoaGhNu2hoaE6ceJEpvOcOHEi0/43btxQUlKSChcubDdPamqqUlNTrc/Pnz8vSUpJScnpJmQqPfVyrizXjFz5GjDujnPluJcfucJlyzK73W81cdmy2N8dx+eMe+TG39iMZRqGcde+bgs3GSwWi81zwzDs2u7WP7P2DOPHj9dbb71l1160aFFnS4WLBU12dwV/T4y7ezDu7sG4u0dujvuFCxcUFBR0xz5uCzcFCxaUp6en3VGaU6dO2R2dyRAWFpZpfy8vL4WEhGQ6z9ChQzVgwADr8/T0dCUnJyskJOSOIcpMUlJSVLRoUR05ckSBgYHuLudvgTF3D8bdPRh39/i7jbthGLpw4YLCw8Pv2tdt4cbb21tVq1ZVXFycnnnmGWt7XFycWrVqlek8NWvW1NKlS23aVq5cqWrVqilPnjyZzuPj4yMfHx+btvz58+es+AdUYGDg3+INcD9hzN2DcXcPxt09/k7jfrcjNhncerfUgAEDNGPGDM2aNUt79+5V//79lZCQoF69ekm6edSlc+fO1v69evVSfHy8BgwYoL1792rWrFmaOXOmBg0a5K5NAAAA9xm3XnPTrl07nTlzRqNHj1ZiYqLKly+v5cuXKyIiQpKUmJho8503UVFRWr58ufr376+PPvpI4eHhmjJlilq3bu2uTQAAAPcZt19Q3Lt3b/Xu3TvTabGxsXZt0dHR2rp1ay5XZS4+Pj4aOXKk3ek55B7G3D0Yd/dg3N2Dcc+axXDknioAAIAHhNt/WwoAAMCVCDcAAMBUCDcAAMBUCDcAAMBUCDcm8fPPP6tFixYKDw+XxWLR4sWLbaYbhqFRo0YpPDxcefPmVb169fT777+7p1gTudu4L1q0SE2aNFHBggVlsVi0fft2t9RpNnca9+vXr2vIkCGqUKGC/P39FR4ers6dO+v48ePuK9gk7ra/jxo1SmXKlJG/v78KFCigRo0a6bfffnNPsSZyt3G/Vc+ePWWxWDR58uR7Vt/9iHBjEpcuXVKlSpX04YcfZjp94sSJmjRpkj788ENt2rRJYWFhiomJ0YULF+5xpeZyt3G/dOmSateurbfffvseV2Zudxr3y5cva+vWrRo+fLi2bt2qRYsW6cCBA2rZsqUbKjWXu+3vpUqV0ocffqhdu3bpl19+UWRkpBo3bqzTp0/f40rN5W7jnmHx4sX67bffHPp5AtMzYDqSjG+++cb6PD093QgLCzPefvtta9vVq1eNoKAgY/r06W6o0JxuH/dbHTp0yJBkbNu27Z7W9Hdwp3HPsHHjRkOSER8ff2+K+htwZNzPnz9vSDJ++OGHe1PU30BW43706FGjSJEixu7du42IiAjjvffeu+e13U84cvM3cOjQIZ04cUKNGze2tvn4+Cg6Olrr1q1zY2XAvXH+/HlZLJa/7e/KucO1a9f0ySefKCgoSJUqVXJ3OaaWnp6uTp066bXXXtOjjz7q7nLuC27/hmLkvoxfUr/919ZDQ0MVHx/vjpKAe+bq1at6/fXX1aFDh7/Njwu607Jly9S+fXtdvnxZhQsXVlxcnAoWLOjuskxtwoQJ8vLyUt++fd1dyn2DIzd/IxaLxea5YRh2bYCZXL9+Xe3bt1d6erqmTp3q7nL+FurXr6/t27dr3bp1atq0qdq2batTp065uyzT2rJli95//33FxsbyeX4Lws3fQFhYmKT/P4KT4dSpU3ZHcwCzuH79utq2batDhw4pLi6Oozb3iL+/v0qUKKEaNWpo5syZ8vLy0syZM91dlmmtXbtWp06dUrFixeTl5SUvLy/Fx8dr4MCBioyMdHd5bkO4+RuIiopSWFiY4uLirG3Xrl3TmjVrVKtWLTdWBuSOjGDzxx9/6IcfflBISIi7S/rbMgxDqamp7i7DtDp16qSdO3dq+/bt1kd4eLhee+01rVixwt3luQ3X3JjExYsXdfDgQevzQ4cOafv27QoODlaxYsXUr18/jRs3TiVLllTJkiU1btw4+fn5qUOHDm6s+sF3t3FPTk5WQkKC9TtW9u/fL+nm0bSMI2pw3p3GPTw8XM8995y2bt2qZcuWKS0tzXrUMjg4WN7e3u4q+4F3p3EPCQnR2LFj1bJlSxUuXFhnzpzR1KlTdfToUbVp08aNVT/47vY5c3t4z5Mnj8LCwlS6dOl7Xer9w923a8E1Vq9ebUiye3Tp0sUwjJu3g48cOdIICwszfHx8jCeffNLYtWuXe4s2gbuN++zZszOdPnLkSLfW/aC707hn3Haf2WP16tXuLv2Bdqdxv3LlivHMM88Y4eHhhre3t1G4cGGjZcuWxsaNG91d9gPvbp8zt+NWcMOwGIZh5G58AgAAuHe45gYAAJgK4QYAAJgK4QYAAJgK4QYAAJgK4QYAAJgK4QYAAJgK4QYAAJgK4QYAAJgK4QZArjhx4oReffVVFS9eXD4+PipatKhatGihVatWubs0ACbHb0sBcLnDhw+rdu3ayp8/vyZOnKiKFSvq+vXrWrFihfr06aN9+/a5u0QAJsaRGwAu17t3b1ksFm3cuFHPPfecSpUqpUcffVQDBgzQhg0bJEkJCQlq1aqV8uXLp8DAQLVt21YnT560LmPUqFF67LHHNGvWLBUrVkz58uXTyy+/rLS0NE2cOFFhYWEqVKiQxo4da7Nui8WiadOmqVmzZsqbN6+ioqK0YMECmz5DhgxRqVKl5Ofnp+LFi2v48OG6fv263bo/++wzRUZGKigoSO3bt9eFCxckSXPnzlVISIjdr123bt1anTt3dulYAnAe4QaASyUnJ+v7779Xnz595O/vbzc9f/78MgxD//jHP5ScnKw1a9YoLi5Of/75p9q1a2fT988//9T//vc/ff/99/ryyy81a9YsNW/eXEePHtWaNWs0YcIEvfnmm9bAlGH48OFq3bq1duzYoRdeeEHPP/+89u7da50eEBCg2NhY7dmzR++//74+/fRTvffee3brXrx4sZYtW6Zly5ZpzZo1evvttyVJbdq0UVpampYsWWLtn5SUpGXLlqlbt245HkMAOeTmH+4EYDK//fabIclYtGhRln1WrlxpeHp6GgkJCda233//3ZBk/RXpkSNHGn5+fkZKSoq1T5MmTYzIyEgjLS3N2la6dGlj/Pjx1ueSjF69etmsr3r16sbLL7+cZT0TJ040qlatan2e2bpfe+01o3r16tbnL7/8stGsWTPr88mTJxvFixc30tPTs1wPgHuDa24AuJRhGJJunh7Kyt69e1W0aFEVLVrU2lauXDnlz59fe/fu1eOPPy5JioyMVEBAgLVPaGioPD095eHhYdN26tQpm+XXrFnT7vn27dutz7/++mtNnjxZBw8e1MWLF3Xjxg0FBgbazHP7ugsXLmyznpdeekmPP/64jh07piJFimj27Nnq2rXrHbcbwL3BaSkALlWyZElZLBab00C3Mwwj0xBwe3uePHlsplsslkzb0tPT71pXxnI3bNig9u3bq1mzZlq2bJm2bdumYcOG6dq1azb977aeypUrq1KlSpo7d662bt2qXbt2qWvXrnetA0DuI9wAcKng4GA1adJEH330kS5dumQ3/dy5cypXrpwSEhJ05MgRa/uePXt0/vx5lS1bNsc13H4NzoYNG1SmTBlJ0q+//qqIiAgNGzZM1apVU8mSJRUfH5+t9fTo0UOzZ8/WrFmz1KhRI5sjUQDch3ADwOWmTp2qtLQ0PfHEE1q4cKH++OMP7d27V1OmTFHNmjXVqFEjVaxYUR07dtTWrVu1ceNGde7cWdHR0apWrVqO179gwQLNmjVLBw4c0MiRI7Vx40a98sorkqQSJUooISFB//3vf/Xnn39qypQp+uabb7K1no4dO+rYsWP69NNP9eKLL+a4bgCuQbgB4HJRUVHaunWr6tevr4EDB6p8+fKKiYnRqlWrNG3aNFksFi1evFgFChTQk08+qUaNGql48eKaP3++S9b/1ltv6b///a8qVqyoOXPmaN68eSpXrpwkqVWrVurfv79eeeUVPfbYY1q3bp2GDx+erfUEBgaqdevWypcvn/7xj3+4pHYAOWcxMq7+AwATsFgs+uabb+5Z2IiJiVHZsmU1ZcqUe7I+AHfH3VIAkA3JyclauXKlfvzxR3344YfuLgfALQg3AJANVapU0dmzZzVhwgSVLl3a3eUAuAWnpQAAgKlwQTEAADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADAVwg0AADCV/wPJ0nECepCwAAAAAABJRU5ErkJggg==",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_country_fr[\"number_compagny\"], company_country_fr[\"country_fr\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Part de clients français\")\n",
"plt.title(\"Nationalité des clients de chaque compagnie de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 222,
"id": "b459f81f-6d30-44fa-ad65-e85acbf12fd2",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>country_fr</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
" <td>99.833259</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>99.935317</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>0.0</td>\n",
" <td>99.486493</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>99.808521</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>0.155933</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>0.079799</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
" <td>82.894264</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>94.744832</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
" <td>99.238475</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>99.032154</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_company y_has_purchased country_fr\n",
"0 10 0.0 99.833259\n",
"1 10 1.0 99.935317\n",
"2 11 0.0 99.486493\n",
"3 11 1.0 99.808521\n",
"4 12 0.0 0.155933\n",
"5 12 1.0 0.079799\n",
"6 13 0.0 82.894264\n",
"7 13 1.0 94.744832\n",
"8 14 0.0 99.238475\n",
"9 14 1.0 99.032154"
]
},
"execution_count": 222,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# graphique sur le train set\n",
"\n",
"company_country_fr = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[[\"country_fr\"]].mean().reset_index()\n",
"company_country_fr[\"country_fr\"] = 100 * company_country_fr[\"country_fr\"]\n",
"company_country_fr"
]
},
{
"cell_type": "code",
"execution_count": 223,
"id": "4a037b48-1d65-4ed3-a012-7d6f5a312533",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# generic function to generate the barplot ON THE TRAIN SET - nationality\n",
"\n",
"multiple_barplot(company_country_fr, x=\"number_company\", y=\"country_fr\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"aucun achat\", 1 : \"achat durant la période\"},\n",
" xlabel = \"Numéro de compagnie\", ylabel = \"Part de clients français (%)\", \n",
" title = \"Part de clients français des compagnies de spectacle (train set)\")\n",
"\n",
"# save in the s3\n",
"\n",
"FILE_NAME = \"nationality_fr_train_set_music.png\"\n",
"FILE_PATH_OUT_S3 = FILE_PATH + FILE_NAME\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as file_out:\n",
" plt.savefig(file_out)"
]
},
{
"cell_type": "markdown",
"id": "ecfd112e-270a-4223-b80f-7e95e57d199d",
"metadata": {},
"source": [
"### 2. campaigns_information"
]
},
{
"cell_type": "code",
"execution_count": 189,
"id": "b37e7ddf-321a-4ebe-9742-9e760a541d29",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de lignes de la table : 688953\n"
]
},
{
"data": {
"text/plain": [
"customer_id 0\n",
"nb_campaigns 0\n",
"nb_campaigns_opened 0\n",
"time_to_open 301495\n",
"number_compagny 0\n",
"dtype: int64"
]
},
"execution_count": 189,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de nan\n",
"print(\"Nombre de lignes de la table : \",campaigns_information_spectacle.shape[0])\n",
"campaigns_information_spectacle.isna().sum()"
]
},
{
"cell_type": "markdown",
"id": "47c15a1d-bef8-4105-87f3-607958667569",
"metadata": {},
"source": [
"#### Part de clients n'ouvrant jamais les mails"
]
},
{
"cell_type": "code",
"execution_count": 224,
"id": "de1ecaac-25bb-4853-b8ab-3ef2ca6917ed",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>number_compagny</th>\n",
" <th>no_campaign_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>29</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>37</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>39</td>\n",
" <td>4</td>\n",
" <td>1.0</td>\n",
" <td>0 days 05:16:38</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>41</td>\n",
" <td>4</td>\n",
" <td>1.0</td>\n",
" <td>0 days 01:12:29</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>44</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254699</th>\n",
" <td>6837769</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0 days 23:42:15</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254700</th>\n",
" <td>6875038</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254701</th>\n",
" <td>6875066</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254702</th>\n",
" <td>6875099</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254703</th>\n",
" <td>6875143</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0 days 01:17:01</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>688953 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_campaigns nb_campaigns_opened time_to_open \\\n",
"0 29 4 0.0 NaT \n",
"1 37 3 0.0 NaT \n",
"2 39 4 1.0 0 days 05:16:38 \n",
"3 41 4 1.0 0 days 01:12:29 \n",
"4 44 4 0.0 NaT \n",
"... ... ... ... ... \n",
"254699 6837769 1 1.0 0 days 23:42:15 \n",
"254700 6875038 1 0.0 NaT \n",
"254701 6875066 1 0.0 NaT \n",
"254702 6875099 1 0.0 NaT \n",
"254703 6875143 1 1.0 0 days 01:17:01 \n",
"\n",
" number_compagny no_campaign_opened \n",
"0 10 True \n",
"1 10 True \n",
"2 10 False \n",
"3 10 False \n",
"4 10 True \n",
"... ... ... \n",
"254699 14 False \n",
"254700 14 True \n",
"254701 14 True \n",
"254702 14 True \n",
"254703 14 False \n",
"\n",
"[688953 rows x 6 columns]"
]
},
"execution_count": 224,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# part de clients n'ouvrant jamais les mails par compagnie\n",
"\n",
"campaigns_information_spectacle[\"no_campaign_opened\"] = pd.isna(campaigns_information_spectacle[\"time_to_open\"])\n",
"campaigns_information_spectacle"
]
},
{
"cell_type": "code",
"execution_count": 225,
"id": "b5a0060f-a9dd-435b-844f-b24674b8bc27",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>no_campaign_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.605656</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>0.294001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>0.475719</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>0.353820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>0.428148</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny no_campaign_opened\n",
"0 10 0.605656\n",
"1 11 0.294001\n",
"2 12 0.475719\n",
"3 13 0.353820\n",
"4 14 0.428148"
]
},
"execution_count": 225,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"company_lazy_customers = campaigns_information_spectacle.groupby(\"number_compagny\")[\"no_campaign_opened\"].mean().reset_index()\n",
"company_lazy_customers"
]
},
{
"cell_type": "code",
"execution_count": 226,
"id": "788c90e0-f13a-4804-ace7-e5159fddd7fd",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_lazy_customers[\"number_compagny\"], company_lazy_customers[\"no_campaign_opened\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Part de clients n'ayant ouvert aucun mail\")\n",
"plt.title(\"Part de clients n'ayant ouvert aucun mail pour les compagnies de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "33233fb9-707d-44c0-80e2-a131756110a1",
"metadata": {},
"source": [
"#### Taux d'ouverture des campagnes de mails"
]
},
{
"cell_type": "code",
"execution_count": 227,
"id": "c48015c2-6451-4089-93b7-6d55d3b2e553",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>ratio_campaigns_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>734772</td>\n",
" <td>126151.0</td>\n",
" <td>0.171687</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>342396</td>\n",
" <td>129833.0</td>\n",
" <td>0.379190</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>3168123</td>\n",
" <td>810722.0</td>\n",
" <td>0.255900</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>3218569</td>\n",
" <td>793581.0</td>\n",
" <td>0.246563</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>2427043</td>\n",
" <td>723846.0</td>\n",
" <td>0.298242</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny nb_campaigns nb_campaigns_opened ratio_campaigns_opened\n",
"0 10 734772 126151.0 0.171687\n",
"1 11 342396 129833.0 0.379190\n",
"2 12 3168123 810722.0 0.255900\n",
"3 13 3218569 793581.0 0.246563\n",
"4 14 2427043 723846.0 0.298242"
]
},
"execution_count": 227,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# taux d'ouverture des campaigns\n",
"\n",
"company_campaigns_stats = campaigns_information_spectacle.groupby(\"number_compagny\")[[\"nb_campaigns\", \"nb_campaigns_opened\"]].sum().reset_index()\n",
"company_campaigns_stats[\"ratio_campaigns_opened\"] = company_campaigns_stats[\"nb_campaigns_opened\"] / company_campaigns_stats[\"nb_campaigns\"]\n",
"company_campaigns_stats"
]
},
{
"cell_type": "code",
"execution_count": 228,
"id": "d06ab865-4832-4fe9-918b-e5ff72bebee4",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_campaigns_stats[\"number_compagny\"], 100 * company_campaigns_stats[\"ratio_campaigns_opened\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Taux d'ouverture (%)\")\n",
"plt.title(\"Taux d'ouverture des campagnes de mails pour les compagnies de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 230,
"id": "5c37e063-a717-4a8c-828e-b386b87e8409",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# création d'un barplot permettant de visualiser les 2 indicateurs sur le même graphique\n",
"\n",
"# Création du premier barplot\n",
"plt.bar(company_campaigns_stats[\"number_compagny\"], 100 * company_campaigns_stats[\"ratio_campaigns_opened\"],\n",
" label = \"taux d'ouverture\", alpha = 0.7)\n",
"\n",
"# Création du deuxième barplot à côté du premier\n",
"bar_width = 0.4 # Largeur des barres\n",
"indices2 = company_campaigns_stats[\"number_compagny\"] + bar_width\n",
"plt.bar(indices2, 100 * (1 - company_lazy_customers[\"no_campaign_opened\"]), \n",
" label='Part de clients ouvrant des mails', alpha=0.7, width=bar_width)\n",
"\n",
"# Ajout des étiquettes et de la légende\n",
"plt.xlabel('Compagnie')\n",
"plt.ylabel('Taux (%)')\n",
"plt.title('Lien entre taux d ouverture des mails et nombre de clients actifs')\n",
"plt.legend()\n",
"\n",
"# save in the s3\n",
"\n",
"FILE_NAME = \"stats_mail_opening_music.png\"\n",
"FILE_PATH_OUT_S3 = FILE_PATH + FILE_NAME\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as file_out:\n",
" plt.savefig(file_out)"
]
},
{
"cell_type": "markdown",
"id": "638ab84b-15a5-4e70-b140-f121c68c82f5",
"metadata": {},
"source": [
"#### on refait les mêmes stats sur le train set"
]
},
{
"cell_type": "code",
"execution_count": 231,
"id": "4fdf4134-d32c-42c3-ab4f-36ad4783332c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" <th>number_company</th>\n",
" <th>no_campaign_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10_492779</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>13.0</td>\n",
" <td>4.0</td>\n",
" <td>8 days 04:08:27</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10_563424</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>0 days 01:39:58.555555555</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10_44369</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>14.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10_620271</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10_687644</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 42 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 10_492779 0.0 0.0 0.0 0.0 \n",
"1 10_563424 0.0 0.0 0.0 0.0 \n",
"2 10_44369 0.0 0.0 0.0 0.0 \n",
"3 10_620271 0.0 0.0 0.0 0.0 \n",
"4 10_687644 0.0 0.0 0.0 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0.0 550.0 550.0 \n",
"1 0.0 550.0 550.0 \n",
"2 0.0 550.0 550.0 \n",
"3 0.0 550.0 550.0 \n",
"4 0.0 550.0 550.0 \n",
"\n",
" time_between_purchase nb_tickets_internet ... gender_female \\\n",
"0 -1.0 0.0 ... 1 \n",
"1 -1.0 0.0 ... 0 \n",
"2 -1.0 0.0 ... 0 \n",
"3 -1.0 0.0 ... 0 \n",
"4 -1.0 0.0 ... 0 \n",
"\n",
" gender_male gender_other country_fr nb_campaigns nb_campaigns_opened \\\n",
"0 0 0 1.0 13.0 4.0 \n",
"1 0 1 1.0 10.0 9.0 \n",
"2 1 0 1.0 14.0 0.0 \n",
"3 0 1 NaN 9.0 0.0 \n",
"4 0 1 NaN 4.0 0.0 \n",
"\n",
" time_to_open y_has_purchased number_company \\\n",
"0 8 days 04:08:27 0.0 10 \n",
"1 0 days 01:39:58.555555555 0.0 10 \n",
"2 NaN 0.0 10 \n",
"3 NaN 0.0 10 \n",
"4 NaN 0.0 10 \n",
"\n",
" no_campaign_opened \n",
"0 False \n",
"1 False \n",
"2 True \n",
"3 True \n",
"4 True \n",
"\n",
"[5 rows x 42 columns]"
]
},
"execution_count": 231,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# same statistics on the train set\n",
"\n",
"train_set_spectacle.head()"
]
},
{
"cell_type": "markdown",
"id": "924300e5-d6a9-4686-a938-f5f99afda70c",
"metadata": {},
"source": [
"#### Part de clients n'ouvrant aucun mail"
]
},
{
"cell_type": "code",
"execution_count": 232,
"id": "14ff9886-742c-4a60-8824-5d31f7c76aea",
"metadata": {},
"outputs": [],
"source": [
"train_set_spectacle[\"no_campaign_opened\"] = train_set_spectacle[\"nb_campaigns_opened\"]==0"
]
},
{
"cell_type": "code",
"execution_count": 235,
"id": "16285593-a0fa-461c-aeb8-c64ffdf9a0d6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>no_campaign_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
" <td>73.553379</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>35.582432</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>0.0</td>\n",
" <td>42.609537</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>32.887454</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>100.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>100.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
" <td>68.335897</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>52.833256</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
" <td>44.334881</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>28.807320</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_company y_has_purchased no_campaign_opened\n",
"0 10 0.0 73.553379\n",
"1 10 1.0 35.582432\n",
"2 11 0.0 42.609537\n",
"3 11 1.0 32.887454\n",
"4 12 0.0 100.000000\n",
"5 12 1.0 100.000000\n",
"6 13 0.0 68.335897\n",
"7 13 1.0 52.833256\n",
"8 14 0.0 44.334881\n",
"9 14 1.0 28.807320"
]
},
"execution_count": 235,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"company_lazy_customers = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[\"no_campaign_opened\"].mean().reset_index()\n",
"company_lazy_customers[\"no_campaign_opened\"] = 100 * company_lazy_customers[\"no_campaign_opened\"] \n",
"company_lazy_customers"
]
},
{
"cell_type": "code",
"execution_count": 236,
"id": "d35f00e3-b9b0-42b3-9dce-785c1ad5506c",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# graphic for non opening mails customers for music companies (train set)\n",
"\n",
"multiple_barplot(company_lazy_customers, x=\"number_company\", y=\"no_campaign_opened\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"aucun achat\", 1 : \"achat durant la période\"},\n",
" xlabel = \"Compagnie\", ylabel = \"Part de clients n'ayant ouvert aucun mail (%)\", \n",
" title = \"Part de clients des compagnies de spectacle n'ouvrant aucun mail (train set)\")\n",
"\n",
"# save in the s3\n",
"\n",
"FILE_NAME = \"no_mail_opened_train_set_music.png\"\n",
"FILE_PATH_OUT_S3 = FILE_PATH + FILE_NAME\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as file_out:\n",
" plt.savefig(file_out)"
]
},
{
"cell_type": "markdown",
"id": "f3407307-7cc1-4f57-a3ae-7c83773b4b81",
"metadata": {},
"source": [
"#### Part globale de mails ouverts pour chaque compagnie"
]
},
{
"cell_type": "code",
"execution_count": 237,
"id": "b391f5b2-2424-4758-8ae5-f0fdacdfae66",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" <th>number_company</th>\n",
" <th>no_campaign_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10_492779</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>13.0</td>\n",
" <td>4.0</td>\n",
" <td>8 days 04:08:27</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10_563424</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>0 days 01:39:58.555555555</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10_44369</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>14.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10_620271</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10_687644</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>354360</th>\n",
" <td>14_4685578</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>354361</th>\n",
" <td>14_4652175</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>11.0</td>\n",
" <td>2.0</td>\n",
" <td>3 days 06:21:17</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>354362</th>\n",
" <td>14_4736169</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>50.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>91.030556</td>\n",
" <td>91.020139</td>\n",
" <td>0.010417</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>6.0</td>\n",
" <td>6.0</td>\n",
" <td>0 days 17:30:10.166666666</td>\n",
" <td>1.0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>354363</th>\n",
" <td>14_4957203</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>55.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>52.284028</td>\n",
" <td>52.284028</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>354364</th>\n",
" <td>14_4690653</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>354365 rows × 42 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 10_492779 0.0 0.0 0.0 0.0 \n",
"1 10_563424 0.0 0.0 0.0 0.0 \n",
"2 10_44369 0.0 0.0 0.0 0.0 \n",
"3 10_620271 0.0 0.0 0.0 0.0 \n",
"4 10_687644 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... ... \n",
"354360 14_4685578 0.0 0.0 0.0 0.0 \n",
"354361 14_4652175 0.0 0.0 0.0 0.0 \n",
"354362 14_4736169 2.0 2.0 50.0 1.0 \n",
"354363 14_4957203 1.0 1.0 55.0 1.0 \n",
"354364 14_4690653 0.0 0.0 0.0 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0.0 550.000000 550.000000 \n",
"1 0.0 550.000000 550.000000 \n",
"2 0.0 550.000000 550.000000 \n",
"3 0.0 550.000000 550.000000 \n",
"4 0.0 550.000000 550.000000 \n",
"... ... ... ... \n",
"354360 0.0 550.000000 550.000000 \n",
"354361 0.0 550.000000 550.000000 \n",
"354362 0.0 91.030556 91.020139 \n",
"354363 0.0 52.284028 52.284028 \n",
"354364 0.0 550.000000 550.000000 \n",
"\n",
" time_between_purchase nb_tickets_internet ... gender_female \\\n",
"0 -1.000000 0.0 ... 1 \n",
"1 -1.000000 0.0 ... 0 \n",
"2 -1.000000 0.0 ... 0 \n",
"3 -1.000000 0.0 ... 0 \n",
"4 -1.000000 0.0 ... 0 \n",
"... ... ... ... ... \n",
"354360 -1.000000 0.0 ... 0 \n",
"354361 -1.000000 0.0 ... 0 \n",
"354362 0.010417 0.0 ... 1 \n",
"354363 0.000000 0.0 ... 0 \n",
"354364 -1.000000 0.0 ... 0 \n",
"\n",
" gender_male gender_other country_fr nb_campaigns \\\n",
"0 0 0 1.0 13.0 \n",
"1 0 1 1.0 10.0 \n",
"2 1 0 1.0 14.0 \n",
"3 0 1 NaN 9.0 \n",
"4 0 1 NaN 4.0 \n",
"... ... ... ... ... \n",
"354360 0 1 NaN 7.0 \n",
"354361 1 0 1.0 11.0 \n",
"354362 0 0 1.0 6.0 \n",
"354363 1 0 1.0 3.0 \n",
"354364 1 0 NaN 7.0 \n",
"\n",
" nb_campaigns_opened time_to_open y_has_purchased \\\n",
"0 4.0 8 days 04:08:27 0.0 \n",
"1 9.0 0 days 01:39:58.555555555 0.0 \n",
"2 0.0 NaN 0.0 \n",
"3 0.0 NaN 0.0 \n",
"4 0.0 NaN 0.0 \n",
"... ... ... ... \n",
"354360 0.0 NaN 0.0 \n",
"354361 2.0 3 days 06:21:17 0.0 \n",
"354362 6.0 0 days 17:30:10.166666666 1.0 \n",
"354363 0.0 NaN 0.0 \n",
"354364 0.0 NaN 0.0 \n",
"\n",
" number_company no_campaign_opened \n",
"0 10 False \n",
"1 10 False \n",
"2 10 True \n",
"3 10 True \n",
"4 10 True \n",
"... ... ... \n",
"354360 14 True \n",
"354361 14 False \n",
"354362 14 False \n",
"354363 14 True \n",
"354364 14 True \n",
"\n",
"[354365 rows x 42 columns]"
]
},
"execution_count": 237,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# part de mails ouverts de chaque compagnie\n",
"\n",
"train_set_spectacle"
]
},
{
"cell_type": "code",
"execution_count": 238,
"id": "dc8cfd36-0eb2-4ef3-877d-626fd0a9ced4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>ratio_campaigns_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>734772</td>\n",
" <td>126151.0</td>\n",
" <td>0.171687</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>342396</td>\n",
" <td>129833.0</td>\n",
" <td>0.379190</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>3168123</td>\n",
" <td>810722.0</td>\n",
" <td>0.255900</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>3218569</td>\n",
" <td>793581.0</td>\n",
" <td>0.246563</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>2427043</td>\n",
" <td>723846.0</td>\n",
" <td>0.298242</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny nb_campaigns nb_campaigns_opened ratio_campaigns_opened\n",
"0 10 734772 126151.0 0.171687\n",
"1 11 342396 129833.0 0.379190\n",
"2 12 3168123 810722.0 0.255900\n",
"3 13 3218569 793581.0 0.246563\n",
"4 14 2427043 723846.0 0.298242"
]
},
"execution_count": 238,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# taux d'ouverture des campaigns\n",
"\n",
"company_campaigns_stats = campaigns_information_spectacle.groupby(\"number_compagny\")[[\"nb_campaigns\", \"nb_campaigns_opened\"]].sum().reset_index()\n",
"company_campaigns_stats[\"ratio_campaigns_opened\"] = company_campaigns_stats[\"nb_campaigns_opened\"] / company_campaigns_stats[\"nb_campaigns\"]\n",
"company_campaigns_stats"
]
},
{
"cell_type": "code",
"execution_count": 239,
"id": "30b28426-088a-4153-b2aa-c20f11b2b771",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>perc_campaigns_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
" <td>143960.0</td>\n",
" <td>18472.0</td>\n",
" <td>12.831342</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>10609.0</td>\n",
" <td>5177.0</td>\n",
" <td>48.798190</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>0.0</td>\n",
" <td>84676.0</td>\n",
" <td>27658.0</td>\n",
" <td>32.663328</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>20848.0</td>\n",
" <td>10927.0</td>\n",
" <td>52.412701</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
" <td>1182992.0</td>\n",
" <td>275366.0</td>\n",
" <td>23.277080</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>107160.0</td>\n",
" <td>41244.0</td>\n",
" <td>38.488242</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
" <td>822836.0</td>\n",
" <td>219220.0</td>\n",
" <td>26.642004</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>92099.0</td>\n",
" <td>34256.0</td>\n",
" <td>37.194758</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_company y_has_purchased nb_campaigns nb_campaigns_opened \\\n",
"0 10 0.0 143960.0 18472.0 \n",
"1 10 1.0 10609.0 5177.0 \n",
"2 11 0.0 84676.0 27658.0 \n",
"3 11 1.0 20848.0 10927.0 \n",
"4 12 0.0 0.0 0.0 \n",
"5 12 1.0 0.0 0.0 \n",
"6 13 0.0 1182992.0 275366.0 \n",
"7 13 1.0 107160.0 41244.0 \n",
"8 14 0.0 822836.0 219220.0 \n",
"9 14 1.0 92099.0 34256.0 \n",
"\n",
" perc_campaigns_opened \n",
"0 12.831342 \n",
"1 48.798190 \n",
"2 32.663328 \n",
"3 52.412701 \n",
"4 NaN \n",
"5 NaN \n",
"6 23.277080 \n",
"7 38.488242 \n",
"8 26.642004 \n",
"9 37.194758 "
]
},
"execution_count": 239,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"company_campaigns_stats = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[[\"nb_campaigns\", \"nb_campaigns_opened\"]].sum().reset_index()\n",
"company_campaigns_stats[\"perc_campaigns_opened\"] = 100* (company_campaigns_stats[\"nb_campaigns_opened\"] / company_campaigns_stats[\"nb_campaigns\"])\n",
"company_campaigns_stats"
]
},
{
"cell_type": "code",
"execution_count": 240,
"id": "9cebe912-fce1-4f4f-9d87-9649605296c8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>perc_campaigns_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
" <td>143960.0</td>\n",
" <td>18472.0</td>\n",
" <td>12.831342</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>10609.0</td>\n",
" <td>5177.0</td>\n",
" <td>48.798190</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>0.0</td>\n",
" <td>84676.0</td>\n",
" <td>27658.0</td>\n",
" <td>32.663328</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>20848.0</td>\n",
" <td>10927.0</td>\n",
" <td>52.412701</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
" <td>1182992.0</td>\n",
" <td>275366.0</td>\n",
" <td>23.277080</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>107160.0</td>\n",
" <td>41244.0</td>\n",
" <td>38.488242</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
" <td>822836.0</td>\n",
" <td>219220.0</td>\n",
" <td>26.642004</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>92099.0</td>\n",
" <td>34256.0</td>\n",
" <td>37.194758</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_company y_has_purchased nb_campaigns nb_campaigns_opened \\\n",
"0 10 0.0 143960.0 18472.0 \n",
"1 10 1.0 10609.0 5177.0 \n",
"2 11 0.0 84676.0 27658.0 \n",
"3 11 1.0 20848.0 10927.0 \n",
"6 13 0.0 1182992.0 275366.0 \n",
"7 13 1.0 107160.0 41244.0 \n",
"8 14 0.0 822836.0 219220.0 \n",
"9 14 1.0 92099.0 34256.0 \n",
"\n",
" perc_campaigns_opened \n",
"0 12.831342 \n",
"1 48.798190 \n",
"2 32.663328 \n",
"3 52.412701 \n",
"6 23.277080 \n",
"7 38.488242 \n",
"8 26.642004 \n",
"9 37.194758 "
]
},
"execution_count": 240,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"company_campaigns_stats = company_campaigns_stats[company_campaigns_stats[\"number_company\"]!=12]\n",
"company_campaigns_stats"
]
},
{
"cell_type": "code",
"execution_count": 241,
"id": "1c32cd86-e08d-4b8a-90f1-27ad0df0ffeb",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# graphic - overall rate of opened mails (train set for music companies)\n",
"\n",
"FILE_NAME = \"overall_mail_opening_train_set_music.png\"\n",
"FILE_PATH_OUT_S3 = FILE_PATH + FILE_NAME\n",
"\n",
"multiple_barplot(company_campaigns_stats, x=\"number_company\", y=\"perc_campaigns_opened\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"clients n'ayant pas acheté\", 1 : \"clients ayant acheté sur la période\"},\n",
" xlabel = \"Compagnie\", ylabel = \"Part de mails ouverts (%)\", \n",
" title = \"Taux d'ouverture global des mails envoyés par les compagnies de spectacle (train set)\")\n",
"\n",
"# save in the s3\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as file_out:\n",
" plt.savefig(file_out)"
]
},
{
"cell_type": "markdown",
"id": "783f6fb2-5f26-42a9-a22d-f4ece44bfaf2",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"### 3. products_purchased_reduced"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "74534ded-8121-43fb-8cf8-af353bed2c77",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de lignes de la table : 764880\n"
]
},
{
"data": {
"text/plain": [
"customer_id 0\n",
"nb_tickets 0\n",
"nb_purchases 0\n",
"total_amount 0\n",
"nb_suppliers 0\n",
"vente_internet_max 0\n",
"purchase_date_min 0\n",
"purchase_date_max 0\n",
"time_between_purchase 0\n",
"nb_tickets_internet 0\n",
"number_compagny 0\n",
"dtype: int64"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de nan\n",
"print(\"Nombre de lignes de la table : \",products_purchased_reduced_spectacle.shape[0])\n",
"products_purchased_reduced_spectacle.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "6db089d5-5517-4aee-a5fd-53f20ae3f0d7",
"metadata": {},
"outputs": [],
"source": [
"#importation librairies\n",
"import warnings\n",
"warnings.simplefilter(\"ignore\")\n",
"import pandas as pd\n",
"import numpy as np\n",
"import statsmodels\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from scipy.stats import shapiro\n",
"from numpy.random import randn\n",
"import scipy.stats as st\n",
"%matplotlib inline\n",
"\n",
"#col_purchase=[\"nb_tickets\",\"nb_purchases\",\"total_amount\",\"nb_suppliers\",\"time_between_purchase\",\"nb_tickets_internet\"]"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "943b8088-9ca2-40a4-b658-2cfae1589fac",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"30.0\n",
"62.0\n",
"120.0\n",
"90.0\n",
"Moustache inferieure -105.0\n",
"Moustache superieure 255.0\n"
]
}
],
"source": [
"#identification des valeur manquantes\n",
"#calcule des quartile de la variable valeur(taille de la population)\n",
"Q1=np.percentile(products_purchased_reduced_spectacle[\"total_amount\"], 25) # Q1\n",
"Q2=np.percentile(products_purchased_reduced_spectacle[\"total_amount\"], 50) # Q2\n",
"Q3=np.percentile(products_purchased_reduced_spectacle[\"total_amount\"], 75) # Q3\n",
"print(Q1)\n",
"print(Q2)\n",
"print(Q3)\n",
"\n",
"#intervale interquartile de la variable Valeur\n",
"\n",
"IQ=Q3-Q1\n",
"print(IQ)\n",
"\n",
"#la valeur minimale des moustache de la variable Valeur\n",
"\n",
"M_inf=Q1-1.5*IQ\n",
"M_sup=Q3+1.5*IQ\n",
"\n",
"print(\"Moustache inferieure\",M_inf)#moustache inferieur\n",
"print(\"Moustache superieure\",M_sup)#moustache sup\n"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "c3adb0cd-8292-4c6f-9d4e-8352a6967022",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"customer_id int64\n",
"nb_tickets int64\n",
"nb_purchases int64\n",
"total_amount float64\n",
"nb_suppliers int64\n",
"vente_internet_max int64\n",
"purchase_date_min float64\n",
"purchase_date_max float64\n",
"time_between_purchase float64\n",
"nb_tickets_internet float64\n",
"number_compagny int64\n",
"dtype: object"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_purchased_reduced_spectacle.dtypes"
]
},
{
"cell_type": "markdown",
"id": "a63e6d13-429b-4b01-ad11-27e5eea68cbd",
"metadata": {},
"source": [
"#histogrames des variable quantitatives\n",
"col_purchase=[\"nb_tickets\",\"nb_purchases\",\"total_amount\",\"nb_suppliers\",\"time_between_purchase\",\"nb_tickets_internet\"]\n",
"for col in col_purchase:\n",
" plt.figure()\n",
" sns.histplot(products_purchased_reduced_spectacle[col], kde=True, color='red')"
]
},
{
"cell_type": "code",
"execution_count": 127,
"id": "5a08b5a5-7d56-4543-945a-38f6219d831d",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# Filtrer les données pour inclure uniquement les valeurs positives de total_amount et exclusion des valeur aberrantes\n",
"filtered_products_purchased_reduced_spectacle = products_purchased_reduced_spectacle[(products_purchased_reduced_spectacle['total_amount'] > 0) & (products_purchased_reduced_spectacle['total_amount'] <= 255)]\n",
"\n",
"# Créer le graphique en utilisant les données filtrées\n",
"sns.boxplot(data=filtered_data, y=\"total_amount\", x=\"number_compagny\", showfliers=False, showmeans=True)\n",
"\n",
"# Titre du graphique\n",
"plt.title(\"Boite à moustache du chiffre d'affaire selon les compagnies de spectacles\")\n",
"\n",
"# Afficher le graphique\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "76e08ece-0b58-4b3a-abca-53e30ccc907b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Statistique F : 317.1792172580724\n",
"Valeur de p : 3.665389608154993e-273\n",
"Nombre de degrés de liberté entre les groupes : 4\n",
"Nombre de degrés de liberté à l'intérieur des groupes : 670581\n",
"Il y a des différences significatives entre au moins une des entrepries .\n"
]
}
],
"source": [
"#test d'anova pour voir si la difference de chiffre d'affaire est statistiquement significative\n",
"\n",
"from scipy.stats import f_oneway\n",
"\n",
"# Créez une liste pour stocker les données de chaque groupe\n",
"groupes = []\n",
"\n",
"# Parcourez chaque modalité de la variable catégorielle et divisez les données en groupes\n",
"for modalite in filtered_products_purchased_reduced_spectacle['number_compagny'].unique():\n",
" groupe = filtered_products_purchased_reduced_spectacle[filtered_products_purchased_reduced_spectacle['number_compagny'] == modalite]['total_amount']\n",
" groupes.append(groupe)\n",
"\n",
"# Effectuez le test ANOVA\n",
"f_statistic, p_value = f_oneway(*groupes)\n",
"\n",
"# Nombre total d'observations\n",
"N = sum(len(groupe) for groupe in groupes)\n",
"\n",
"# Nombre de groupes ou de catégories\n",
"k = len(groupes)\n",
"\n",
"# Degrés de liberté entre les groupes\n",
"df_between = k - 1\n",
"\n",
"# Degrés de liberté à l'intérieur des groupes\n",
"df_within = N - k\n",
"\n",
"# Affichez les résultats\n",
"print(\"Statistique F :\", f_statistic)\n",
"print(\"Valeur de p :\", p_value)\n",
"\n",
"print(\"Nombre de degrés de liberté entre les groupes :\", df_between)\n",
"print(\"Nombre de degrés de liberté à l'intérieur des groupes :\", df_within)\n",
"\n",
"if p_value < 0.05:\n",
" print(\"Il y a des différences significatives entre au moins une des entrepries .\")\n",
"else:\n",
" print(\"Il n'y a pas de différences significatives entre les entreprises .\")"
]
},
{
"cell_type": "code",
"execution_count": 129,
"id": "9ec6e1c5-f3bc-4041-b32e-b62762246eb7",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#repartition Chiffre d'affaire selon y_has_purchased\n",
"\n",
"# Filtrer les données pour inclure uniquement les valeurs positives de total_amount et exclusion des valeur aberrantes\n",
"train_set_spectacle_filtered = train_set_spectacle[(train_set_spectacle['total_amount'] > 0) & (train_set_spectacle['total_amount'] <= 255)]\n",
"\n",
"# Créer le graphique en utilisant les données filtrées\n",
"sns.boxplot(data=train_set_spectacle_filtered, y=\"total_amount\", x=\"y_has_purchased\", showfliers=False, showmeans=True)\n",
"\n",
"# Titre du graphique\n",
"plt.title(\"Boite à moustache du chiffre d'affaire selon le statut d'achat du client\")\n",
"\n",
"# Afficher le graphique\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6b55de4b-913e-4bc1-b4f2-cc0b1824d0e2",
"metadata": {},
"outputs": [],
"source": [
"#graphe sur le taux de ticket acheté"
]
},
{
"cell_type": "code",
"execution_count": 89,
"id": "aacf2c34-f7ea-4d6e-935b-c5db01f03bbe",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>Taux_ticket_internet</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>492314</td>\n",
" <td>126262.0</td>\n",
" <td>25.646640</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>318969</td>\n",
" <td>16348.0</td>\n",
" <td>5.125263</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>591028</td>\n",
" <td>42045.0</td>\n",
" <td>7.113876</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>7024227</td>\n",
" <td>1247482.0</td>\n",
" <td>17.759705</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>335741</td>\n",
" <td>125638.0</td>\n",
" <td>37.421107</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny nb_tickets nb_tickets_internet Taux_ticket_internet\n",
"0 10 492314 126262.0 25.646640\n",
"1 11 318969 16348.0 5.125263\n",
"2 12 591028 42045.0 7.113876\n",
"3 13 7024227 1247482.0 17.759705\n",
"4 14 335741 125638.0 37.421107"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Taux de ticket payé par internet selon les compagnies\n",
"\n",
"purchase_spectacle = products_purchased_reduced_spectacle.groupby(\"number_compagny\")[[\"nb_tickets\", \"nb_tickets_internet\"]].sum().reset_index()\n",
"purchase_spectacle[\"Taux_ticket_internet\"] = purchase_spectacle[\"nb_tickets_internet\"]*100 / purchase_spectacle[\"nb_tickets\"]\n",
"purchase_spectacle"
]
},
{
"cell_type": "code",
"execution_count": 90,
"id": "f71bb53d-724b-454d-8743-305d20eec2b0",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(purchase_spectacle[\"number_compagny\"], purchase_spectacle[\"Taux_ticket_internet\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Taux d'achat de tickets en ligne (%)\")\n",
"plt.title(\"Taux d'achat des tickets en ligne selon les compagnies de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 133,
"id": "86fa4d7f-9b5f-4487-beb8-eb23771f724c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>Taux_ticket_internet</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
" <td>9957.0</td>\n",
" <td>5450.0</td>\n",
" <td>54.735362</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>7941.0</td>\n",
" <td>3424.0</td>\n",
" <td>43.117995</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>0.0</td>\n",
" <td>10361.0</td>\n",
" <td>5.0</td>\n",
" <td>0.048258</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>9638.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>35600.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>11520.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
" <td>131759.0</td>\n",
" <td>105406.0</td>\n",
" <td>79.999089</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>1004076.0</td>\n",
" <td>13902.0</td>\n",
" <td>1.384557</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
" <td>44596.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>16694.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_company y_has_purchased nb_tickets nb_tickets_internet \\\n",
"0 10 0.0 9957.0 5450.0 \n",
"1 10 1.0 7941.0 3424.0 \n",
"2 11 0.0 10361.0 5.0 \n",
"3 11 1.0 9638.0 0.0 \n",
"4 12 0.0 35600.0 0.0 \n",
"5 12 1.0 11520.0 0.0 \n",
"6 13 0.0 131759.0 105406.0 \n",
"7 13 1.0 1004076.0 13902.0 \n",
"8 14 0.0 44596.0 0.0 \n",
"9 14 1.0 16694.0 0.0 \n",
"\n",
" Taux_ticket_internet \n",
"0 54.735362 \n",
"1 43.117995 \n",
"2 0.048258 \n",
"3 0.000000 \n",
"4 0.000000 \n",
"5 0.000000 \n",
"6 79.999089 \n",
"7 1.384557 \n",
"8 0.000000 \n",
"9 0.000000 "
]
},
"execution_count": 133,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Taux de ticket payé en ligne selon y_has_purchase par compagnies avec la base de train\n",
"\n",
"purchase_spectacle_train = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[[\"nb_tickets\", \"nb_tickets_internet\"]].sum().reset_index()\n",
"purchase_spectacle_train[\"Taux_ticket_internet\"] = purchase_spectacle_train[\"nb_tickets_internet\"]*100 / purchase_spectacle_train[\"nb_tickets\"]\n",
"purchase_spectacle_train"
]
},
{
"cell_type": "code",
"execution_count": 106,
"id": "d11335b7-e35a-44c7-8ce4-661216978151",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"multiple_barplot(purchase_spectacle_train, x=\"number_company\", y=\"Taux_ticket_internet\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"clients n'ayant pas acheté\", 1 : \"clients ayant acheté sur la période\"},\n",
" xlabel = \"Numéro de compagnie\", ylabel = \"Taux de ticket acheté par internet (%)\", \n",
" title = \"Taux de ticket achété en ligne selon y_has_purchased par compagnies de spectacle (train set)\")"
]
},
{
"cell_type": "code",
"execution_count": 140,
"id": "f8444cab-d4c5-4afd-b472-476e702c09cc",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import seaborn as sns\n",
"\n",
"\n",
"# Créer le graphique à barres\n",
"sns.barplot(data=purchase_spectacle_train, x=\"y_has_purchased\", y=\"Taux_ticket_internet\",ci=None)\n",
"\n",
"\n",
"# Titre du graphique\n",
"plt.title(\"Taux moyen de tickets achetés selon le statut d'achat du client\")\n",
"\n",
"# Ajouter une étiquette à l'axe des abscisses\n",
"plt.xlabel(\"Statut d'achat du client\")\n",
"\n",
"# Ajouter une étiquette à l'axe des ordonnées\n",
"plt.ylabel(\"Taux de tickets internet\")\n",
"\n",
"# Afficher le graphique\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 107,
"id": "9ba02de7-3087-4b0c-884a-dc4a6ca92c3b",
"metadata": {},
"outputs": [],
"source": [
"#stat sur la variable temps ecoulé entre le premier et le dernier achat"
]
},
{
"cell_type": "code",
"execution_count": 108,
"id": "59a95248-0261-4970-9e91-e43d50cf4d69",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, 'Boite à moustache du temps ecoulés entre le premier et le dernier achat selon les compagnies de spectacles')"
]
},
"execution_count": 108,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#repartition des client selon le temps ecoulés entre le premier et le denier achat par compagnie\n",
"\n",
"sns.boxplot(data=products_purchased_reduced_spectacle, y=\"time_between_purchase\",x=\"number_compagny\",showfliers=False,showmeans=True)\n",
"plt.title(\"Boite à moustache du temps ecoulés entre le premier et le dernier achat selon les compagnies de spectacles\")"
]
},
{
"cell_type": "code",
"execution_count": 109,
"id": "e2c51e28-6197-48f0-ab6d-9fc7b3b0de74",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Statistique F : 7956.05932109542\n",
"Valeur de p : 0.0\n",
"Nombre de degrés de liberté entre les groupes : 4\n",
"Nombre de degrés de liberté à l'intérieur des groupes : 764875\n",
"Il y a des différences significatives entre au moins une des entrepries .\n"
]
}
],
"source": [
"#test d'anova pour voir si la difference de temps entre le premier et le dernier achat est statistiquement significative\n",
"\n",
"from scipy.stats import f_oneway\n",
"\n",
"# Créez une liste pour stocker les données de chaque groupe\n",
"groupes = []\n",
"\n",
"# Parcourez chaque modalité de la variable catégorielle et divisez les données en groupes\n",
"for modalite in products_purchased_reduced_spectacle['number_compagny'].unique():\n",
" groupe = products_purchased_reduced_spectacle[products_purchased_reduced_spectacle['number_compagny'] == modalite]['time_between_purchase']\n",
" groupes.append(groupe)\n",
"\n",
"# Effectuez le test ANOVA\n",
"f_statistic, p_value = f_oneway(*groupes)\n",
"\n",
"# Nombre total d'observations\n",
"N = sum(len(groupe) for groupe in groupes)\n",
"\n",
"# Nombre de groupes ou de catégories\n",
"k = len(groupes)\n",
"\n",
"# Degrés de liberté entre les groupes\n",
"df_between = k - 1\n",
"\n",
"# Degrés de liberté à l'intérieur des groupes\n",
"df_within = N - k\n",
"\n",
"# Affichez les résultats\n",
"print(\"Statistique F :\", f_statistic)\n",
"print(\"Valeur de p :\", p_value)\n",
"\n",
"print(\"Nombre de degrés de liberté entre les groupes :\", df_between)\n",
"print(\"Nombre de degrés de liberté à l'intérieur des groupes :\", df_within)\n",
"\n",
"if p_value < 0.05:\n",
" print(\"Il y a des différences significatives entre au moins une des entrepries .\")\n",
"else:\n",
" print(\"Il n'y a pas de différences significatives entre les entreprises .\")"
]
},
{
"cell_type": "code",
"execution_count": 111,
"id": "75a003ab-f42a-4b2d-a0a8-284e673e71f7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>time_between_purchase</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
" <td>45.791114</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>193.080793</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>0.0</td>\n",
" <td>27.640469</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>129.853892</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>16.418446</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>58.548598</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
" <td>10.012525</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>93.545373</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
" <td>3.879196</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>10.745213</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_company y_has_purchased time_between_purchase\n",
"0 10 0.0 45.791114\n",
"1 10 1.0 193.080793\n",
"2 11 0.0 27.640469\n",
"3 11 1.0 129.853892\n",
"4 12 0.0 16.418446\n",
"5 12 1.0 58.548598\n",
"6 13 0.0 10.012525\n",
"7 13 1.0 93.545373\n",
"8 14 0.0 3.879196\n",
"9 14 1.0 10.745213"
]
},
"execution_count": 111,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#repartition des client selon le temps ecoulés entre le premier et le denier achat par compagnie\n",
"purchase_train_time= train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[\"time_between_purchase\"].mean().reset_index()\n",
"purchase_train_time"
]
},
{
"cell_type": "code",
"execution_count": 113,
"id": "f27921a9-1253-4c02-9bff-8cd3c4a9a5d9",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"multiple_barplot(purchase_train_time, x=\"number_company\", y=\"time_between_purchase\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"clients n'ayant pas acheté\", 1 : \"clients ayant acheté sur la période\"},\n",
" xlabel = \"Numéro de compagnie\", ylabel = \"Taux de ticket acheté par internet (%)\", \n",
" title = \"temps moyen entre le premier et le dernier achat selon y_has_purchased par compagnies de spectacle (train set)\")"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "74f06e96-3c25-4eca-8190-25b0a4ab0d75",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"customer_id int64\n",
"nb_tickets int64\n",
"nb_purchases int64\n",
"total_amount float64\n",
"nb_suppliers int64\n",
"vente_internet_max int64\n",
"purchase_date_min float64\n",
"purchase_date_max float64\n",
"time_between_purchase float64\n",
"nb_tickets_internet float64\n",
"number_compagny int64\n",
"dtype: object"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_purchased_reduced_spectacle.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 114,
"id": "aa6655c0-c602-4485-8b38-3117227464e1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>number_compagny</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>19482</td>\n",
" <td>88</td>\n",
" <td>29</td>\n",
" <td>872.0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2643.092500</td>\n",
" <td>718.149398</td>\n",
" <td>1924.943102</td>\n",
" <td>8.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>19484</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>62.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1745.021736</td>\n",
" <td>1743.045035</td>\n",
" <td>1.976701</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19485</td>\n",
" <td>131</td>\n",
" <td>21</td>\n",
" <td>1878.0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2649.044745</td>\n",
" <td>85.240845</td>\n",
" <td>2563.803900</td>\n",
" <td>84.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>19486</td>\n",
" <td>10</td>\n",
" <td>4</td>\n",
" <td>96.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1944.077604</td>\n",
" <td>1742.794225</td>\n",
" <td>201.283380</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>19487</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>33.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1742.877766</td>\n",
" <td>1742.877766</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99580</th>\n",
" <td>6884747</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>40.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.193750</td>\n",
" <td>0.193750</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99581</th>\n",
" <td>6884748</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>40.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.186806</td>\n",
" <td>0.186806</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99582</th>\n",
" <td>6884750</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>80.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.136111</td>\n",
" <td>0.136111</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99583</th>\n",
" <td>6884751</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>40.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.122917</td>\n",
" <td>0.122917</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99584</th>\n",
" <td>6884753</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>40.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.047222</td>\n",
" <td>0.047222</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>764880 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 19482 88 29 872.0 2 \n",
"1 19484 3 2 62.0 1 \n",
"2 19485 131 21 1878.0 2 \n",
"3 19486 10 4 96.0 1 \n",
"4 19487 2 1 33.0 1 \n",
"... ... ... ... ... ... \n",
"99580 6884747 2 1 40.0 1 \n",
"99581 6884748 2 1 40.0 1 \n",
"99582 6884750 4 1 80.0 1 \n",
"99583 6884751 2 1 40.0 1 \n",
"99584 6884753 2 1 40.0 1 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 1 2643.092500 718.149398 \n",
"1 0 1745.021736 1743.045035 \n",
"2 1 2649.044745 85.240845 \n",
"3 0 1944.077604 1742.794225 \n",
"4 0 1742.877766 1742.877766 \n",
"... ... ... ... \n",
"99580 0 0.193750 0.193750 \n",
"99581 0 0.186806 0.186806 \n",
"99582 0 0.136111 0.136111 \n",
"99583 0 0.122917 0.122917 \n",
"99584 0 0.047222 0.047222 \n",
"\n",
" time_between_purchase nb_tickets_internet number_compagny \n",
"0 1924.943102 8.0 10 \n",
"1 1.976701 0.0 10 \n",
"2 2563.803900 84.0 10 \n",
"3 201.283380 0.0 10 \n",
"4 0.000000 0.0 10 \n",
"... ... ... ... \n",
"99580 0.000000 0.0 14 \n",
"99581 0.000000 0.0 14 \n",
"99582 0.000000 0.0 14 \n",
"99583 0.000000 0.0 14 \n",
"99584 0.000000 0.0 14 \n",
"\n",
"[764880 rows x 11 columns]"
]
},
"execution_count": 114,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_purchased_reduced_spectacle"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "be04e2f9-60b9-4b44-ab36-06a365b21e32",
"metadata": {},
"outputs": [],
"source": [
"#Stat sur les canaux de vente"
]
},
{
"cell_type": "code",
"execution_count": 118,
"id": "20a70ec0-38f6-470e-a442-7884a150613a",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 800x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#Repartition du nombre de canaux de vente selon les entreprise\n",
"\n",
"# Filtrer les données pour inclure uniquement les valeurs positives de total_amount et exclusion des valeur aberrantes\n",
"purchase_canaux = products_purchased_reduced_spectacle[(products_purchased_reduced_spectacle['nb_tickets'] > 0) ]\n",
"\n",
"plt.figure(figsize=(8, 6))\n",
"sns.barplot(x='number_compagny', y='nb_suppliers', data=purchase_canaux, ci=None) # ci=None pour ne pas afficher les intervalles de confiance\n",
"plt.title('Nombre moyen de canaux de vente par entreprise')\n",
"plt.xlabel('number_compagny')\n",
"plt.ylabel('Nombre moyen de caneaux ')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 120,
"id": "ee901539-37d1-4dfa-8e78-38e4947c3d35",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 697297.000000\n",
"mean 0.110917\n",
"std 0.319561\n",
"min 0.000000\n",
"25% 0.000000\n",
"50% 0.000000\n",
"75% 0.000000\n",
"max 8.000000\n",
"Name: nb_suppliers, dtype: float64"
]
},
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_set_spectacle[\"nb_suppliers\"].describe()"
]
},
{
"cell_type": "code",
"execution_count": 125,
"id": "7389053e-54ae-4167-9afd-aa5d194822ef",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>nb_suppliers</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
" <td>1.118250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>1.340136</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>0.0</td>\n",
" <td>1.033992</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>1.155239</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>0.153296</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>0.220174</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
" <td>1.007711</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>1.083750</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_company y_has_purchased nb_suppliers\n",
"0 10 0.0 1.118250\n",
"1 10 1.0 1.340136\n",
"2 11 0.0 1.033992\n",
"3 11 1.0 1.155239\n",
"4 12 0.0 0.153296\n",
"5 12 1.0 0.220174\n",
"6 13 0.0 1.007711\n",
"7 13 1.0 1.083750\n",
"8 14 0.0 1.000000\n",
"9 14 1.0 1.000000"
]
},
"execution_count": 125,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#repartition des client selon le nombre moyen de canaux utilisé pour l'achat de ticket par compagnie sur base de train\n",
"\n",
"#purchase_train_canaux = train_set_spectacle[(train_set_spectacle['nb_tickets'] > 0) ]\n",
"\n",
"purchase_train_canaux_filtered= purchase_train_canaux.groupby([\"number_company\", \"y_has_purchased\"])[\"nb_suppliers\"].mean().reset_index()\n",
"purchase_train_canaux_filtered"
]
},
{
"cell_type": "code",
"execution_count": 126,
"id": "e4079e46-db8b-4a25-9da6-37b1405c57d9",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"multiple_barplot(purchase_train_canaux_filtered, x=\"number_company\", y=\"nb_suppliers\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"clients n'ayant pas acheté\", 1 : \"clients ayant acheté sur la période\"},\n",
" xlabel = \"Numéro de compagnie\", ylabel = \"Nombre moyen de canaux d'achat\", \n",
" title = \"Nombre moyen de canaux d'acht selon y_has_purchased par compagnies de spectacle (train set)\")"
]
},
{
"cell_type": "markdown",
"id": "b9e84af4-a02b-4f83-81ae-b7a73475d060",
"metadata": {},
"source": [
"### 4. target_information"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "2867eceb-1f72-406c-adc2-adfedcaf60e6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de lignes de la table : 6240166\n"
]
},
{
"data": {
"text/plain": [
"id 0\n",
"customer_id 0\n",
"target_name 0\n",
"target_type_is_import 0\n",
"target_type_name 0\n",
"number_compagny 0\n",
"dtype: int64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de nan\n",
"print(\"Nombre de lignes de la table : \",target_information_spectacle.shape[0])\n",
"target_information_spectacle.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "561f361d-7d39-430a-9e27-a32f6c2f7b50",
"metadata": {},
"outputs": [],
"source": [
"# pas exploitable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "904cbf32-77b6-49dd-a96c-9e7e5a0175c3",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}