BDC-team-1/Spectacle/Stat_desc.ipynb
2024-03-08 09:30:12 +00:00

6643 lines
717 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"id": "be628bfc-0bca-48b0-97c9-29063289127e",
"metadata": {},
"source": [
"# Statistiques descriptives : compagnies offrant des spectacles"
]
},
{
"cell_type": "markdown",
"id": "0bf5450b-f44d-430a-aed7-d875dc365048",
"metadata": {},
"source": [
"## Importations et chargement des données"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "aa915888-cede-4eb0-8a26-7df573d29a3e",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import os\n",
"import s3fs\n",
"import warnings\n",
"from datetime import date, timedelta, datetime\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "17949e81-c30b-4fdf-9872-d7dc2b22ba9e",
"metadata": {},
"outputs": [],
"source": [
"# Import KPI construction functions\n",
"#exec(open('0_KPI_functions.py').read())\n",
"exec(open('../0_KPI_functions.py').read())\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "9c1737a2-bad8-4266-8dec-452085d8cfe7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"BUCKET = \"projet-bdc2324-team1/0_Input/Company_10\"\n",
"fs.ls(BUCKET)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "a35dc2f6-2017-4b21-abd2-2c4c112c96b2",
"metadata": {},
"outputs": [],
"source": [
"# test avec company 10\n",
"\n",
"dic_base=['campaigns_information','customerplus_cleaned','products_purchased_reduced','target_information']\n",
"for nom_base in dic_base:\n",
" FILE_PATH_S3_fanta = 'projet-bdc2324-team1/0_Input/Company_10/' + nom_base + '.csv'\n",
" with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n",
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "40b705eb-fd18-436b-b150-61611a3c6a84",
"metadata": {},
"outputs": [],
"source": [
"# fonction permettant d'extraire une table à partir du numéro de la compagnie (directory_path)\n",
"\n",
"def display_databases(directory_path, file_name, datetime_col = None):\n",
" \"\"\"\n",
" This function returns the file from s3 storage \n",
" \"\"\"\n",
" file_path = \"projet-bdc2324-team1\" + \"/0_Input/Company_\" + directory_path + \"/\" + file_name + \".csv\"\n",
" print(\"File path : \", file_path)\n",
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser) \n",
" return df \n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "c56decc3-de19-4786-82a4-1386c72a6bfb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>customer_id</th>\n",
" <th>target_name</th>\n",
" <th>target_type_is_import</th>\n",
" <th>target_type_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1165098</td>\n",
" <td>618562</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1165100</td>\n",
" <td>618559</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1165101</td>\n",
" <td>618561</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1165102</td>\n",
" <td>618560</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1165103</td>\n",
" <td>618558</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69253</th>\n",
" <td>1698158</td>\n",
" <td>18580</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69254</th>\n",
" <td>1698159</td>\n",
" <td>18569</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69255</th>\n",
" <td>1698160</td>\n",
" <td>2962</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69256</th>\n",
" <td>1698161</td>\n",
" <td>3825</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69257</th>\n",
" <td>1698162</td>\n",
" <td>5731</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>69258 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" id customer_id target_name target_type_is_import \\\n",
"0 1165098 618562 Newsletter mensuelle False \n",
"1 1165100 618559 Newsletter mensuelle False \n",
"2 1165101 618561 Newsletter mensuelle False \n",
"3 1165102 618560 Newsletter mensuelle False \n",
"4 1165103 618558 Newsletter mensuelle False \n",
"... ... ... ... ... \n",
"69253 1698158 18580 Newsletter mensuelle False \n",
"69254 1698159 18569 Newsletter mensuelle False \n",
"69255 1698160 2962 Newsletter mensuelle False \n",
"69256 1698161 3825 Newsletter mensuelle False \n",
"69257 1698162 5731 Newsletter mensuelle False \n",
"\n",
" target_type_name \n",
"0 manual_static_filter \n",
"1 manual_static_filter \n",
"2 manual_static_filter \n",
"3 manual_static_filter \n",
"4 manual_static_filter \n",
"... ... \n",
"69253 manual_static_filter \n",
"69254 manual_static_filter \n",
"69255 manual_static_filter \n",
"69256 manual_static_filter \n",
"69257 manual_static_filter \n",
"\n",
"[69258 rows x 5 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"target_information"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "c825d64b-356c-4b71-aa3c-90e0dd7ca092",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ticket_id</th>\n",
" <th>customer_id</th>\n",
" <th>purchase_id</th>\n",
" <th>event_type_id</th>\n",
" <th>supplier_name</th>\n",
" <th>purchase_date</th>\n",
" <th>amount</th>\n",
" <th>is_full_price</th>\n",
" <th>name_event_types</th>\n",
" <th>name_facilities</th>\n",
" <th>name_categories</th>\n",
" <th>name_events</th>\n",
" <th>name_seasons</th>\n",
" <th>start_date_time</th>\n",
" <th>end_date_time</th>\n",
" <th>open</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1799177</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>2</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>danse</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>aringa rossa</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2016-09-27 00:00:00+02:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1799178</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>3</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>cirque</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>5èmes hurlants</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2016-11-18 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1799179</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>dom juan</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2016-12-07 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1799180</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>vanishing point</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2017-01-04 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1799181</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>3</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>12.0</td>\n",
" <td>False</td>\n",
" <td>cirque</td>\n",
" <td>la cite des congres</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>a o lang pho</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2017-01-03 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492309</th>\n",
" <td>3252232</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492310</th>\n",
" <td>3252233</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492311</th>\n",
" <td>3252234</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492312</th>\n",
" <td>3252235</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492313</th>\n",
" <td>3252236</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>492314 rows × 16 columns</p>\n",
"</div>"
],
"text/plain": [
" ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
"0 1799177 36984 409613 2 guichet \n",
"1 1799178 36984 409613 3 guichet \n",
"2 1799179 36984 409613 1 guichet \n",
"3 1799180 36984 409613 1 guichet \n",
"4 1799181 36984 409613 3 guichet \n",
"... ... ... ... ... ... \n",
"492309 3252232 621716 710062 1 guichet \n",
"492310 3252233 621716 710062 1 guichet \n",
"492311 3252234 621716 710062 1 guichet \n",
"492312 3252235 621716 710062 1 guichet \n",
"492313 3252236 621716 710062 1 guichet \n",
"\n",
" purchase_date amount is_full_price name_event_types \\\n",
"0 2016-04-28 17:58:26+02:00 9.0 False danse \n",
"1 2016-04-28 17:58:26+02:00 9.0 False cirque \n",
"2 2016-04-28 17:58:26+02:00 9.0 False théâtre \n",
"3 2016-04-28 17:58:26+02:00 9.0 False théâtre \n",
"4 2016-04-28 17:58:26+02:00 12.0 False cirque \n",
"... ... ... ... ... \n",
"492309 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492310 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492311 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492312 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492313 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"\n",
" name_facilities name_categories \\\n",
"0 le grand t abo t gourmand jeune \n",
"1 le grand t abo t gourmand jeune \n",
"2 le grand t abo t gourmand jeune \n",
"3 le grand t abo t gourmand jeune \n",
"4 la cite des congres abo t gourmand jeune \n",
"... ... ... \n",
"492309 cap nort tarif sco co 1 seance scolaire \n",
"492310 cap nort tarif sco co 1 seance scolaire \n",
"492311 cap nort tarif sco co 1 seance scolaire \n",
"492312 cap nort tarif sco co 1 seance scolaire \n",
"492313 cap nort tarif sco co 1 seance scolaire \n",
"\n",
" name_events name_seasons start_date_time \\\n",
"0 aringa rossa test 2016/2017 2016-09-27 00:00:00+02:00 \n",
"1 5èmes hurlants test 2016/2017 2016-11-18 00:00:00+01:00 \n",
"2 dom juan test 2016/2017 2016-12-07 00:00:00+01:00 \n",
"3 vanishing point test 2016/2017 2017-01-04 00:00:00+01:00 \n",
"4 a o lang pho test 2016/2017 2017-01-03 00:00:00+01:00 \n",
"... ... ... ... \n",
"492309 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492310 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492311 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492312 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492313 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"\n",
" end_date_time open \n",
"0 1901-01-01 00:09:21+00:09 True \n",
"1 1901-01-01 00:09:21+00:09 True \n",
"2 1901-01-01 00:09:21+00:09 True \n",
"3 1901-01-01 00:09:21+00:09 True \n",
"4 1901-01-01 00:09:21+00:09 True \n",
"... ... ... \n",
"492309 1901-01-01 00:09:21+00:09 True \n",
"492310 1901-01-01 00:09:21+00:09 True \n",
"492311 1901-01-01 00:09:21+00:09 True \n",
"492312 1901-01-01 00:09:21+00:09 True \n",
"492313 1901-01-01 00:09:21+00:09 True \n",
"\n",
"[492314 rows x 16 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_purchased_reduced"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "afd044b8-ac83-4a35-b959-700cae0b3b41",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tables imported for tenant 10\n",
"File path : projet-bdc2324-team1/0_Input/Company_11/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_11/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_11/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_11/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tables imported for tenant 11\n",
"File path : projet-bdc2324-team1/0_Input/Company_12/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
"<string>:13: DtypeWarning: Columns (4,8,10) have mixed types. Specify dtype option on import or set low_memory=False.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tables imported for tenant 12\n",
"File path : projet-bdc2324-team1/0_Input/Company_13/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tables imported for tenant 13\n",
"File path : projet-bdc2324-team1/0_Input/Company_14/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
"<string>:13: DtypeWarning: Columns (8,9) have mixed types. Specify dtype option on import or set low_memory=False.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
"<string>:27: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tables imported for tenant 14\n"
]
}
],
"source": [
"# création des bases contenant les KPI pour les 5 compagnies de spectacle\n",
"\n",
"# liste des compagnies de spectacle\n",
"nb_compagnie=['10','11','12','13','14']\n",
"\n",
"# début de la boucle permettant de générer des datasets agrégés pour les 5 compagnies de spectacle\n",
"for directory_path in nb_compagnie:\n",
" df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
" df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
" df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
" df_target_information = display_databases(directory_path, file_name = \"target_information\")\n",
" \n",
" df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information) \n",
" df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n",
" df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)\n",
"\n",
" \n",
"# creation de la colonne Number compagnie, qui permettra d'agréger les résultats\n",
" df_tickets_kpi[\"number_compagny\"]=int(directory_path)\n",
" df_campaigns_kpi[\"number_compagny\"]=int(directory_path)\n",
" df_customerplus_clean[\"number_compagny\"]=int(directory_path)\n",
" df_target_information[\"number_compagny\"]=int(directory_path)\n",
"\n",
" if nb_compagnie.index(directory_path)>=1:\n",
" customerplus_clean_spectacle=pd.concat([customerplus_clean_spectacle,df_customerplus_clean],axis=0)\n",
" campaigns_information_spectacle=pd.concat([campaigns_information_spectacle,df_campaigns_kpi],axis=0)\n",
" products_purchased_reduced_spectacle=pd.concat([products_purchased_reduced_spectacle,df_tickets_kpi],axis=0)\n",
" target_information_spectacle=pd.concat([target_information_spectacle,df_target_information],axis=0)\n",
" else:\n",
" customerplus_clean_spectacle=df_customerplus_clean\n",
" campaigns_information_spectacle=df_campaigns_kpi\n",
" products_purchased_reduced_spectacle=df_tickets_kpi\n",
" target_information_spectacle=df_target_information\n",
"\n",
" print(f\"Tables imported for tenant {directory_path}\")"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "b5a4a031-9533-4a50-8569-5f4246691a7a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>2</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18031</th>\n",
" <td>2</td>\n",
" <td>319517</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>1556</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>2</td>\n",
" <td>2020-01-01 14:06:52+00:00</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>291642</th>\n",
" <td>2</td>\n",
" <td>757541</td>\n",
" <td>303.0</td>\n",
" <td>5.0</td>\n",
" <td>1</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>2016-09-08 14:50:00+00:00</td>\n",
" <td>fr</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3 rows × 29 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"17 2 139 NaN NaN 0 \n",
"18031 2 319517 NaN NaN 0 \n",
"291642 2 757541 303.0 5.0 1 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"17 875 False NaN 2 False ... \n",
"18031 1556 False NaN 0 True ... \n",
"291642 862 False NaN 1 True ... \n",
"\n",
" purchase_count first_buying_date country gender_label \\\n",
"17 3 NaN NaN other \n",
"18031 2 2020-01-01 14:06:52+00:00 fr female \n",
"291642 3 2016-09-08 14:50:00+00:00 fr male \n",
"\n",
" gender_female gender_male gender_other country_fr has_tags \\\n",
"17 0 0 1 NaN 0 \n",
"18031 1 0 0 1.0 0 \n",
"291642 0 1 0 1.0 1 \n",
"\n",
" number_compagny \n",
"17 10 \n",
"18031 11 \n",
"291642 14 \n",
"\n",
"[3 rows x 29 columns]"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"customer_id\"]==2]"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "b9b6ec1f-36fb-4ee9-a1ed-09ff41878005",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'customerplus_clean_spectacle' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcustomerplus_clean_spectacle\u001b[49m[customerplus_clean_spectacle[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcustomer_id\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m==\u001b[39m\u001b[38;5;241m1\u001b[39m]\n",
"\u001b[0;31mNameError\u001b[0m: name 'customerplus_clean_spectacle' is not defined"
]
}
],
"source": [
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"customer_id\"]==1]"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "a12c1b7d-6f6f-483e-b215-6336d7a51057",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['customer_id', 'street_id', 'structure_id', 'mcp_contact_id',\n",
" 'fidelity', 'tenant_id', 'is_partner', 'deleted_at', 'gender',\n",
" 'is_email_true', 'opt_in', 'last_buying_date', 'max_price',\n",
" 'ticket_sum', 'average_price', 'average_purchase_delay',\n",
" 'average_price_basket', 'average_ticket_basket', 'total_price',\n",
" 'purchase_count', 'first_buying_date', 'country', 'gender_label',\n",
" 'gender_female', 'gender_male', 'gender_other', 'country_fr',\n",
" 'has_tags', 'number_compagny'],\n",
" dtype='object')"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6f263e9c-0adf-4f25-8939-7416f3013c04",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 38,
"id": "05b9a396-dcd7-4d3d-8b39-5ca48beba4b0",
"metadata": {},
"outputs": [],
"source": [
"#customerplus_clean_spectacle.isna().sum()\n",
"#campaigns_information_spectacle.isna().sum()\n",
"#products_purchased_reduced_spectacle.isna().sum()\n",
"#target_information_spectacle.isna().sum()"
]
},
{
"cell_type": "markdown",
"id": "81e15508-32ca-46f1-a03d-1febddbbf5b4",
"metadata": {},
"source": [
"### Ajout : importation de la table train_set pour faire les stats desc dessus"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "3a1fdd6b-ac43-4e90-9a31-4f522bcc44bb",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_444/3450421856.py:9: DtypeWarning: Columns (38) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" train_set_spectacle = pd.read_csv(file_in, sep=\",\")\n"
]
}
],
"source": [
"# importation de la table train_set pour les compagnies de spectacle (ou musique)\n",
"\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"path_train_set_spectacle = \"projet-bdc2324-team1/Generalization/musique/Train_set.csv\"\n",
"\n",
"with fs.open(path_train_set_spectacle, mode=\"rb\") as file_in:\n",
" train_set_spectacle = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "3a4c1ff4-2861-4e86-99df-26eea0370dc3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10_299341</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>12.0</td>\n",
" <td>3.0</td>\n",
" <td>0 days 05:47:26.333333333</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10_63788</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>62.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>393.205891</td>\n",
" <td>281.017639</td>\n",
" <td>112.188252</td>\n",
" <td>3.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>0 days 05:13:51</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10_759946</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10_20653</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>11.0</td>\n",
" <td>10.0</td>\n",
" <td>1 days 00:45:54</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10_824705</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 40 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 10_299341 0.0 0.0 0.0 0.0 \n",
"1 10_63788 3.0 2.0 62.0 1.0 \n",
"2 10_759946 0.0 0.0 0.0 0.0 \n",
"3 10_20653 0.0 0.0 0.0 0.0 \n",
"4 10_824705 0.0 0.0 0.0 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0.0 NaN NaN \n",
"1 1.0 393.205891 281.017639 \n",
"2 0.0 NaN NaN \n",
"3 0.0 NaN NaN \n",
"4 0.0 NaN NaN \n",
"\n",
" time_between_purchase nb_tickets_internet ... country gender_label \\\n",
"0 NaN 0.0 ... fr male \n",
"1 112.188252 3.0 ... fr female \n",
"2 NaN 0.0 ... NaN other \n",
"3 NaN 0.0 ... fr male \n",
"4 NaN 0.0 ... NaN other \n",
"\n",
" gender_female gender_male gender_other country_fr nb_campaigns \\\n",
"0 0 1 0 1.0 12.0 \n",
"1 1 0 0 1.0 3.0 \n",
"2 0 0 1 NaN 0.0 \n",
"3 0 1 0 1.0 11.0 \n",
"4 0 0 1 NaN 0.0 \n",
"\n",
" nb_campaigns_opened time_to_open y_has_purchased \n",
"0 3.0 0 days 05:47:26.333333333 NaN \n",
"1 1.0 0 days 05:13:51 1.0 \n",
"2 0.0 NaN NaN \n",
"3 10.0 1 days 00:45:54 NaN \n",
"4 0.0 NaN NaN \n",
"\n",
"[5 rows x 40 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_set_spectacle.head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "4632384d-2a06-445d-9fdb-b0c91b37ebaf",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0., 1.])"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# on remplace les valeurs has purchased = NaN par des 0\n",
"train_set_spectacle[\"y_has_purchased\"] = train_set_spectacle[\"y_has_purchased\"].fillna(0)\n",
"train_set_spectacle[\"y_has_purchased\"].unique()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "5fd56696-b479-46c7-8a59-fb8137db5fb5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([10, 11, 12, 13, 14])"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# on reproduit une colonne avec le numéro de la compagnie \n",
"\n",
"train_set_spectacle[\"number_company\"] = train_set_spectacle[\"customer_id\"].apply(lambda x : int(re.split(\"_\", str(x))[0]))\n",
"train_set_spectacle[\"number_company\"].unique()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "91c6e047-43d2-456c-81f1-087026eef4f0",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" <th>number_company</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10_299341</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>12.0</td>\n",
" <td>3.0</td>\n",
" <td>0 days 05:47:26.333333333</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10_63788</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>62.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>393.205891</td>\n",
" <td>281.017639</td>\n",
" <td>112.188252</td>\n",
" <td>3.0</td>\n",
" <td>...</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>0 days 05:13:51</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10_759946</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10_20653</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>11.0</td>\n",
" <td>10.0</td>\n",
" <td>1 days 00:45:54</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10_824705</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 41 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 10_299341 0.0 0.0 0.0 0.0 \n",
"1 10_63788 3.0 2.0 62.0 1.0 \n",
"2 10_759946 0.0 0.0 0.0 0.0 \n",
"3 10_20653 0.0 0.0 0.0 0.0 \n",
"4 10_824705 0.0 0.0 0.0 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0.0 NaN NaN \n",
"1 1.0 393.205891 281.017639 \n",
"2 0.0 NaN NaN \n",
"3 0.0 NaN NaN \n",
"4 0.0 NaN NaN \n",
"\n",
" time_between_purchase nb_tickets_internet ... gender_label \\\n",
"0 NaN 0.0 ... male \n",
"1 112.188252 3.0 ... female \n",
"2 NaN 0.0 ... other \n",
"3 NaN 0.0 ... male \n",
"4 NaN 0.0 ... other \n",
"\n",
" gender_female gender_male gender_other country_fr nb_campaigns \\\n",
"0 0 1 0 1.0 12.0 \n",
"1 1 0 0 1.0 3.0 \n",
"2 0 0 1 NaN 0.0 \n",
"3 0 1 0 1.0 11.0 \n",
"4 0 0 1 NaN 0.0 \n",
"\n",
" nb_campaigns_opened time_to_open y_has_purchased \\\n",
"0 3.0 0 days 05:47:26.333333333 0.0 \n",
"1 1.0 0 days 05:13:51 1.0 \n",
"2 0.0 NaN 0.0 \n",
"3 10.0 1 days 00:45:54 0.0 \n",
"4 0.0 NaN 0.0 \n",
"\n",
" number_company \n",
"0 10 \n",
"1 10 \n",
"2 10 \n",
"3 10 \n",
"4 10 \n",
"\n",
"[5 rows x 41 columns]"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_set_spectacle.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "21e562d4-035d-4112-9f94-527b7fd935cf",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "fff306c2-1d41-4ef6-867b-ba9a7cf4ee68",
"metadata": {},
"source": [
"## Statistiques descriptives"
]
},
{
"cell_type": "markdown",
"id": "0549bdc4-edd7-4511-916e-26e94b5a30f5",
"metadata": {},
"source": [
"### 0. Détection du client anonyme (outlier) - utile pour la section 3"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "5b460061-f8b5-4a6b-ba59-539446d8487f",
"metadata": {},
"outputs": [],
"source": [
"def outlier_detection(directory_path = \"1\", coupure = 1):\n",
" df_tickets = display_databases(directory_path, file_name = 'products_purchased_reduced' , datetime_col = ['purchase_date'])\n",
" df_tickets_kpi = tickets_kpi_function(df_tickets)\n",
"\n",
" if directory_path == \"101\" :\n",
" df_tickets_1 = display_databases(directory_path, file_name = 'products_purchased_reduced_1' , datetime_col = ['purchase_date'])\n",
" df_tickets_kpi_1 = tickets_kpi_function(df_tickets_1)\n",
"\n",
" df_tickets_kpi = pd.concat([df_tickets_kpi, df_tickets_kpi_1])\n",
" # Part du CA par customer\n",
" total_amount_share = df_tickets_kpi.groupby('customer_id')['total_amount'].sum().reset_index()\n",
" total_amount_share['total_amount_entreprise'] = total_amount_share['total_amount'].sum()\n",
" total_amount_share['share_total_amount'] = total_amount_share['total_amount']/total_amount_share['total_amount_entreprise']\n",
" \n",
" total_amount_share_index = total_amount_share.set_index('customer_id')\n",
" df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False)\n",
" \n",
" top = df_circulaire[:coupure]\n",
" rest = df_circulaire[coupure:]\n",
" \n",
" # Calculez la somme du reste\n",
" rest_sum = rest.sum()\n",
" \n",
" # Créez une nouvelle série avec les cinq plus grandes parts et 'Autre'\n",
" new_series = pd.concat([top, pd.Series([rest_sum], index=['Autre'])])\n",
" \n",
" # Créez le graphique circulaire\n",
" plt.figure(figsize=(3, 3))\n",
" plt.pie(new_series, labels=new_series.index, autopct='%1.1f%%', startangle=140, pctdistance=0.5)\n",
" plt.axis('equal') # Assurez-vous que le graphique est un cercle\n",
" plt.title('Répartition des montants totaux')\n",
" plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "b6417f09-a6c7-4319-95b3-98c95ec5a3b7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAUwAAAEQCAYAAADbIk3TAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA5TUlEQVR4nO3dd3xT9f7H8VeStkkX3dDSySxbliggU9kgyLiKskVUcKIX0SvrOlB/XkXFLVdEkesGUUQ2IgKCWPYqs0DL6KB7Jd/fH4FAaEtTaHvS5vN8PPrQJCcn76Snb8745hydUkohhBCiVHqtAwghRFUhhSmEEA6SwhRCCAdJYQohhIOkMIUQwkFSmEII4SApTCGEcJAUphBCOEgKUwghHFRtCnPnzp14eXnxzjvvaB1FCFFNOVVhzp8/H51OZ/txc3MjLCyMe+65h0OHDpX4vIyMDIYOHcqjjz7Ko48+WomJi1q2bBkzZ84s9rGYmBjGjBlju3369GlmzpxJXFxckWlnzpyJTqermJDXSafTlfjeXMXevXuZOXMmx44dq5TXe/nll1m8eHG5za888l9rGa/2lBP59NNPFaA+/fRTtWnTJrV27Vr14osvKk9PT1WzZk2VkpJS7POGDRum7rvvPmWxWCo5cVGTJk1SJX2s27dvV/Hx8bbbW7dutb3fqyUkJKhNmzZVVMzrAqgZM2ZoHUNT33zzjQLU2rVrK+X1vL291ejRo8ttfuWR/1rLeHXnpmFXl6hZs2a0bdsWgK5du2I2m5kxYwaLFy9m7NixRab/+uuvKztiEdnZ2Xh5eV1zmlatWjk8v4iICCIiIm40lhCiPGnd2Fe6tIa5detWu/t//vlnBajZs2fb3b9161Y1YMAAFRAQoIxGo2rZsqX66quvip3nihUr1JgxY1RAQIDy8vJS/fv3V4cPH7abdsWKFerOO+9U4eHhymg0qnr16qkJEyaoc+fO2U03Y8YMBai//vpLDRkyRPn7+6vQ0FA1evRoBRT5OXr0qFJKqejoaNvawtq1a4ud9tIa3KXXuJLZbFavvvqqio2NVR4eHiokJESNHDlSJSQk2E3XpUsX1bRpU/Xnn3+q2267TXl6eqo6deqo2bNnK7PZXOrv4cKFC2r8+PEqMDBQeXt7q169eqkDBw4Uu4Z58OBBNXz4cBUSEqI8PDxUo0aN1Ny5c4vkfuGFF1TDhg2VyWRSfn5+qnnz5mrOnDnXzHHpM1q4cKGaMmWKCg0NVd7e3qp///4qKSlJpaenqwceeEAFBQWpoKAgNWbMGJWRkWE3j5ycHDV16lQVExOj3N3dVe3atdXEiRNVamqq3XTR0dGqX79+6pdfflGtWrVSJpNJxcbGqnnz5tmmubQsXf1zaQuhrMvP7t271T333KNq1KihatasqcaOHavS0tJs0xX3Wl26dFFKKZWVlaWeeuopFRMTo4xGowoICFBt2rRRX375ZYmfZ2n5lVJq3rx5qkWLFrZ5Dho0SO3du9f2eGnL+Ny5c1WnTp1USEiI8vLyUs2aNVOvvvqqys/PL/J5F7fm3KVLF9t7VEqpBx98UBmNRrVt2zbbfWazWXXv3l3VrFlTnT59usT3WxGqRGHOnTtXAeq7776z3bdmzRrl4eGhOnXqpL766iu1fPlyNWbMmCILwKV5RkZGqnHjxqlffvlFffTRR6pmzZoqMjLS7g/n/fffV7Nnz1Y//vijWr9+vfrss8/UTTfdpGJjY+1+4ZcW+OjoaPXMM8+olStXqsWLF6v4+Hg1dOhQBahNmzbZfnJzc5VS9gvJhQsXbNmef/5527SXyq+4wpwwYYIC1COPPKKWL1+uPvjgAxUSEqIiIyPt/ii7dOmigoKCVIMGDdQHH3ygVq5cqSZOnKgA9dlnn13zd2CxWFS3bt2U0WhUL730klqxYoWaMWOGqlu3bpHC3LNnj638FixYoFasWKGeeuoppdfr1cyZM23TzZ49WxkMBjVjxgy1evVqtXz5cjVnzhy7aYpzqTCjo6PVmDFjbO/Zx8dHdevWTfXo0UM9/fTTasWKFerVV19VBoNBPfroo3bvpVevXsrNzU1NmzZNrVixQr3++uvK29tbtWrVyvZ7ufS7iYiIUE2aNFELFixQv/76qxo2bJgC1Pr165VSSp09e1a9/PLLClDvvvuu7Xd29uzZ61p+YmNj1fTp09XKlSvVG2+8oYxGoxo7dqxtuk2bNilPT0/Vt29f22vt2bNHKWUtEi8vL/XGG2+otWvXqp9++km98sor6p133inx8ywt/6XHhg8frn7++We1YMECVbduXeXn56cOHjyolFKlLuNPPvmkev/999Xy5cvVmjVr1JtvvqmCg4Pt3telz9uRwszJyVEtW7ZUdevWtf2tTp8+Xen1erVixYoS32tFccrC3Lx5syooKFAZGRlq+fLlKjQ0VHXu3FkVFBTYpm3UqJFq1aqV3X1KKdW/f38VFhZmW5O6NM+77rrLbrqNGzcqQL344ovFZrFYLKqgoEAdP35cAWrJkiW2xy4t8NOnTy/yvGvt37l6IbnWPsyrC3Pfvn0KUBMnTrSbbsuWLQpQzz33nO2+Ll26KEBt2bLFbtomTZqoXr16FZvtkl9++UUB6q233rK7/6WXXipSmL169VIRERHqwoULdtM+8sgjymQy2fY59+/fX7Vs2fKar1ucS4U5YMAAu/ufeOIJBajHHnvM7v5BgwapwMBA2+3ly5crQL322mt203311VcKUB999JHtvujoaGUymdTx48dt9+Xk5KjAwED14IMP2u5zdB+gI8vP1bkmTpyoTCaT3b74kvZhNmvWTA0aNOiaGYpTUv7U1FRbOV/pxIkTymg0qnvvvdd2n6P7MM1msyooKFALFixQBoPB7hiEo4WplFKHDh1SNWrUUIMGDVKrVq1Ser1ePf/886W/2QrgVEfJL7n11ltxd3fH19eX3r17ExAQwJIlS3Bzs+5yjY+PZ//+/dx3330AFBYW2n769u1LYmIiBw4csJvnpWkv6dChA9HR0axdu9Z239mzZ3nooYeIjIzEzc0Nd3d3oqOjAdi3b1+RnEOGDCnX930tl3JeeZQdoF27djRu3JjVq1fb3R8aGkq7du3s7mvRogXHjx936HWu/rzuvfdeu9u5ubmsXr2au+66Cy8vryK/g9zcXDZv3mzLuGPHDiZOnMivv/5Kenq6Y2/6ov79+9vdbty4MQD9+vUrcn9KSgqZmZkArFmzBij6mQ0bNgxvb+8in1nLli2Jioqy3TaZTDRs2LDUz+ySsi4/d955p93tFi1akJuby9mzZ0t9rXbt2vHLL78wdepU1q1bR05OjkMZS7Jp0yZycnKKfFaRkZF07969yGdVkr///ps777yToKAgDAYD7u7ujBo1CrPZzMGDB68rW/369fn4449ZvHgx/fv3p1OnTpodpXfKwlywYAFbt25lzZo1PPjgg+zbt4/hw4fbHj9z5gwATz/9NO7u7nY/EydOBOD8+fN28wwNDS3yOqGhoSQnJwNgsVjo2bMn33//PVOmTGH16tX8+eeftj/64hbIsLCw8nnDDriUs7jXrF27tu3xS4KCgopMZzQaS/3DSk5Oxs3Nrcjzr/78kpOTKSws5J133inyO+jbty9w+Xfw7LPP8vrrr7N582b69OlDUFAQt99+O9u2bSvlXVsFBgba3fbw8Ljm/bm5uXbvJSQkxG46nU5n97u/5Ho/M7i+5efq1zMajSVOe7W3336bZ555hsWLF9OtWzcCAwMZNGjQNYffXUtZl6/inDhxgk6dOnHq1CneeustNmzYwNatW3n33XcBx95XSfr160etWrXIzc1l8uTJGAyG657XjXDKo+SNGze2HSXv1q0bZrOZTz75hG+//ZahQ4cSHBwMWP8QBw8eXOw8YmNj7W4nJSUVmSYpKYn69esDsHv3bnbs2MH8+fMZPXq0bZr4+PgSc1bmOMlLf1yJiYlFjp6fPn3a9pmUx+sUFhaSnJxs9wd99ecXEBCAwWBg5MiRTJo0qdh51alTBwA3NzcmT57M5MmTSUtLY9WqVTz33HP06tWLhISEUkcX3Oh7OXfunF1pKqVISkri5ptvLrfXup7l50Z4e3sza9YsZs2axZkzZ2xrmwMGDGD//v1lnt+Vy9fVHF2+Fi9eTFZWFt9//71tzRoodpyxyWQiLy+vyP3nz58v9rUeeughMjIyaNq0KY899hidOnUiICCg1EzlzSnXMK/22muvERAQwPTp07FYLMTGxtKgQQN27NhB27Zti/3x9fW1m8fChQvtbv/xxx8cP36crl27ApfL79K/8pd8+OGHZcpalrWEskzbvXt3AL744gu7+7du3cq+ffu4/fbby5SzJN26dQOKfl5ffvml3W0vLy+6devG33//TYsWLYr9HRS3xubv78/QoUOZNGkSKSkpFToA/NJncvVn9t1335GVlXVdn1lJv7PyWn6Ke73Slo9atWoxZswYhg8fzoEDB8jOzr7m/KBo/vbt2+Pp6Vnkszp58iRr1qyx+6zK8hkopfj444+L5IiJiWHnzp129x08eLDIrjSATz75hC+++IK5c+fy448/kpaWVuzwwsrglGuYVwsICODZZ59lypQpfPnll4wYMYIPP/yQPn360KtXL8aMGUN4eDgpKSns27eP7du3880339jNY9u2bYwfP55hw4aRkJDAv/71L8LDw22b8I0aNaJevXpMnToVpRSBgYEsXbqUlStXlilr8+bNAXj11Vfp06cPBoOBFi1a2DYXr1SvXj08PT1ZuHAhjRs3xsfHh9q1a1O7du0i08bGxjJhwgTeeecd9Ho9ffr04dixY0ybNo3IyEiefPLJMuUsSc+ePencuTNTpkwhKyuLtm3bsnHjRj7//PMi07711lvcdtttdOrUiYcffpiYmBgyMjKIj49n6dKltn2IAwYMsI2tDQkJ4fjx48yZM4fo6GgaNGhQLrmL06NHD3r16sUzzzxDeno6HTt2ZOfOncyYMYNWrVoxcuTIMs+zWbNmAHz00Uf4+vpiMpmoU6dOuS0/V2vevDnr1q1j6dKlhIWF4evrS2xsLLfccgv9+/enRYsWBAQEsG/fPj7//HPat29/zTX2kvIHBQUxbdo0nnvuOUaNGsXw4cNJTk5m1qxZmEwmZsyYYZcJii7jPXr0wMPDg+HDhzNlyhRyc3N5//33SU1NLZJj5MiRjBgxgokTJzJkyBCOHz/Oa6+9VmT3ya5du3jssccYPXq0rSTnzZvH0KFDmTNnDk888cQNfb5lpsmhphKUNKxIKesRy6ioKNWgQQNVWFiolFJqx44d6h//+IeqWbOmcnd3V6Ghoap79+7qgw8+KDLPFStWqJEjRyp/f3/b0cBDhw7ZvcbevXtVjx49lK+vrwoICFDDhg1TJ06cKHJ0+NJRzqvH1ymlVF5enho/frwKCQlROp2uxHGYlyxatEg1atRIubu7OzwOs2HDhsrd3V0FBwerESNGlDgO82qjR49W0dHRRe6/Wlpamho3bpzy9/dXXl5eqkePHmr//v3FjsM8evSoGjdunAoPD1fu7u4qJCREdejQwW70wX/+8x/VoUMHFRwcrDw8PFRUVJS6//771bFjx66Z49JR8m+++cbu/pKWk+J+Lzk5OeqZZ55R0dHRyt3dXYWFhamHH364xHGYVyvuqO2cOXNUnTp1lMFgsBvlcKPLz6X3dWl5UUqpuLg41bFjR+Xl5WU3DnPq1Kmqbdu2tjHIdevWVU8++aQ6f/78NT7Ra+dXSqlPPvlEtWjRQnl4eCg/Pz81cOBA21CmS661jC9dulTddNNNymQyqfDwcPXPf/7TNvLiyiPzFotFvfbaa6pu3brKZDKptm3bqjVr1th93pmZmapRo0aqSZMmKisryy7DpEmTlLu7e5GRIBVNp1T1vszu/PnzGTt2LFu3brXtFxVCiOtRJfZhCiGEM5DCFEIIB1X7TXIhhCgvsoYphBAOksIUQggHSWEKIYSDpDCFEMJBUphCCOEgKUwhhHCQFKYQQjhIClMIIRwkhSmEEA6SwhRCCAdJYQohhIOkMIUQwkFSmEII4aAqcYkK4UIK8yE3DXIvgKUQ9G6gN1z8rxvoDJfvczOBu0nrxMKFSGGKypGbDsnxkHLE+t/U45CTerkcc9Ks/19Q8gW8iuXuDd5B4BUM3iHgGwo1wqFGbfALh6AG4B9ZAW9IuCI5H6YoX9kpcHIbnN1jLcbkw9afrLPaZTL5Qc0mUKvpxZ9mULMxGH1Lf64QV5DCFNdPKTi7F47/AQl/wsmtkHpU61QO0oF/FIS3hjqdoU4XCKqndSjh5KQwRdmkJcCBZXD0N2tR5qRonaj8+EVB3c5QpyvU7QI+NbVOJJyMFKYo3dn9sH8p7PsJEuO0TlN5ajaBet2h6WCIaKN1GuEEpDBFUUrBqe2XSzL5kNaJtBdYF5oNhebDIKSh1mmERqQwxWUZSbB9AWz/HC6c0DqN8wptYS3O5kOtR+OFy5DCFHBkPWybB/t/to59FI7R6SHmNrh5PDTqbx0bKqo1KUxXlZMKcV/Ctk9lk7s8+EVC23HQZgx4BWqdRlQQKUxXk3oMNvwHdn4DhTlap6l+3L2g5X3Q4REIiNE6jShnUpiuIu0E/PZ/ELcILAVap6n+dAZoMhA6PQWhzbROI8qJFGZ1d+EUbHgd/v4CzPlap3E9Oj00/wd0f16+olkNSGFWV+mJ1k3v7QvAnKd1GmEwwi0ToNPT4OmvdRpxnaQwq5v8bOsa5aZ3oTBX6zTiaiZ/62b6LQ+Cm1HrNKKMpDCrk93fw4ppkH5S6ySiNH5RcPs0aPEPrZOIMpDCrA6SD8NPT8LR9VonEWVVpwvc+bYcUa8ipDCrMnMB/D7Hugkum99Vl7s33D4d2k0AvVwEwZlJYVZVJ7fBkkfg3D6tk4jyEnkrDJwLwQ20TiJKIIVZ1Vgs8PsbsG62fI2xOnIzQdep0OEx+aqlE5LCrEoykuD7B6znohTVW+1WMGSenNTYyUhhVhUHV8DihyH7vNZJRGUx1oBB70Pj/lonERdJYTq7wnxYNRM2vwfIr8r16KDj49aDQrKJrjkpTGeWchS+GQ2JO7ROIrRWpzMM+S/4hGidxKVJYTqrE5vhf/dCdrLWSYSz8K0N/1gAkTdrncRlSWE6o93fwQ8Py3fARVF6d+g9G9o9oHUSlySF6Wx+ex3WvIjsrxTX1P4R6Pki6HRaJ3EpUpjOwlwIPz0Bf3+udRJRVTQbAoM+ADcPrZO4DClMZ5B7Ab4eBUfWaZ1EVDUxnWD4IjD6ap3EJUhhai3rPHx2J5zdo3USUVXVbg0jvpNrCVUCKUwtZafAZwPgzG6tk4iqLqQxjPwBaoRpnaRak8LUSnaKdc3yzC6tk4jqIiAGxvwMfhFaJ6m25FxSWshJhQUDpSxF+Uo9Bp/fBVkydreiSGFWtpw0WDAIknZqnURUR+cPwheDIS9D6yTVkhRmZcq9YF0DSIzTOomozhLjYNFwKJCTSpc3KczKUpALC4fB6e1aJxGu4NgG+HasdXyvKDdSmJVBKfhhAiRs0TqJcCUHlsGPj1iXP1EupDArw6oZsHeJ1imEK9qxCJY/q3WKakMKs4IdifsNNr6ldQzhyra8D9sXaJ2iWpDCrEBbjiTT65tMFtV+FmWQ7/sKDf38NJz6S+sUVZ4MXK8gCSnZDHx3IylZ+QCMrH2KWTmvoM+RMXJCIzUi4MH14B2sdZIqS9YwK0BugZkJn/9lK0uAz0+Hc496ibyAhhomEy4t/aT1yLnFrHWSKksKswK89PM+9iWmF7n/z7QadEl5juSwzhqkEgLrFUdXzdQ6RZUlhVnOVuxJ4vPNx0t8PCnPg1uPP8jeyOGVmEqIK/zxNuxZrHWKKkkKsxwlXshhynelf+WxwKKj76EBLAl/GqV3q4RkQlxlySRIPqx1iipHCrOcWCyKJ/4XR1p2gcPPefxwa14JfBFl9KvAZEIUIz8TFk8Ei0XrJFWKFGY5eXdtPFuOppT5eR+ejGKM28sU+NWpgFRCXEPCZusYTeEwGVZUDnYkpDHk/T8otFz/RxnlmcvSmh/hd2ZzOSYTohRunvDwRgiqp3WSKkHWMG9QodnC1O933VBZApzIMXHryUnERw4pp2RCOKAwx7o/UzbNHSKFeYP+u/FosUOIrkeO2cAdh4bwa8TjKJ38akQlObEJtnygdYoqQf4qb8DJ1GzmrDpU7vN9MP4W5oT8G+XhU+7zFqJYq/8tR80dIIV5A2Ys2UN2fsV8a+KtE3V50OMVCmtEVsj8hbBTmANLHtE6hdOTwrxOv+xKZPX+sxX6GivOB9IrcyaZNdtU6OsIAcCJP2DXt1qncGpSmNchK6+QWUv3VsprHc725JbTT3AiYkClvJ5wcStnQEGO1imclhTmdfjotyMkpVfe9VKyCg10jh/Ob5EPo9BV2usKF5R+Eja+rXUKpyWFWUbJmXnM+/2oJq896lAnPqw1A+XupcnrCxex8S3ISNI6hVOSwiyjuWvjyczT7sJSrxxvyBNeL2P2CdMsg6jmCrJg/atap3BKUphlcDI1m4VbTmgdgyVnanJn3r/JDm6hdRRRXW1fIMOMiiGFWQZvrjxEfqFzfCNiT4Y37c88TWJ4L62jiOrIUmgdmynsSGE66OCZDH74+6TWMexcKHCjw5FR/Bl5v9ZRRHW0dwmc3ad1Cqcihemgt1cf4ga/Ll4hlNLxj0O3syDseZSbSes4olpR8MdcrUM4FSlMB5xMzeaX3c591HD60SZM9X0Zi1eI1lFEdbLra8g4o3UKpyGF6YD5G49hdsbVy6t8lRjKUPNL5AY21jqKqC7M+XJijitIYZYiM6+Qr7YmaB3DYdsv+NDp/FTO1e6udRRRXWz7L+RnaZ3CKUhhluJ/f54gQ8Nxl9fjXL47tx4dx46okVpHEdVBbhps/1zrFE5BCvMazBbF/D+OaR3jupiVnoEH+/B1+DMovbvWcURVt/k9uZ45UpjX9OueJE6mVu0TEUw5fBOz/F/C4hmodRRRlaUdh30/ap1Cc1KY17DoT+2/1VMe5p+O4D5eIt+/vtZRRFUmm+VSmCU5k57LxvjzWscoN5tS/eiW9i9SQztqHUVUVUfWQWbFngPW2UlhlmBJ3CmnHKh+I07lGrn1xMPsj7xb6yiiKlJmlz/BsBRmCb7ffkrrCBUiz6Kn96GBLA2fjNK7aR1HVDU7v9I6gaakMIuxLzGd/UkZWseoUI8ebstrQS+gjDW0jiKqksQ4OHdQ6xSakcIsxg9/V8+1y6u9nxDNWMNsCvxitI4iqhIXXsuUwryKxaJYEucahQmwLiWAHunTSa91i9ZRRFWx6xtQ1WwHv4OkMK/yd0IaZ9LztI5RqY7lmLjl5CMcibhL6yiiKkg7DglbtE6hCSnMq6w/4JrDJnLMBrrHD2N15KMonSwWohQHl2udQBPyl3GVtQfOaR1BU/cfas/cmrNQHt5aRxHO7PAarRNoQgrzCucy8th9+oLWMTT3n+P1mGicTaFvuNZRhLNK3AlZyVqnqHRSmFdYf/Ccq+7LLuKXc8H0zf43mSGttI4inJKCo+u0DlHppDCvsM5F91+W5GCWJ+0TnyQhop/WUYQzcsHNcinMi8wWxYZD1ee74+Ulo9CNTvH3sTHyQRQ6reMIZ3J4ndYJKp0U5kX7k9K5kFOgdQyndd+hLswLnY5y89Q6inAW6Sfh/CGtU1QqKcyLdiTIwZ7SvHgslqe8Z2P2DtU6inAWLrZZLoV5UVxCqtYRqoTvz9RkUP4L5AQ30zqKcAYnNmmdoFJJYV4ka5iO25XhTYczU0gK76F1FKG1039rnaBSlbkwf/vtNwYMGEDt2rXR6XQsXrzY7vEzZ84wZswYateujZeXF7179+bQIfv9HF27dkWn09n93HPPPbbHjx07xv3330+dOnXw9PSkXr16zJgxg/z8fLv5PP7447Rp0waj0UjLli3L+lZssvIKOXS2ep+dqLylFrjR/sgYtkWN0zqK0FLqMchxna2zMhdmVlYWN910E3Pnzi3ymFKKQYMGceTIEZYsWcLff/9NdHQ0d9xxB1lZ9pfpfOCBB0hMTLT9fPjhh7bH9u/fj8Vi4cMPP2TPnj28+eabfPDBBzz33HNFXm/cuHHcffeNnRB358kL1e5kwZVBKR1DD97BwtrPoQxGreMIrSTu0DpBpSnzGWT79OlDnz59in3s0KFDbN68md27d9O0aVMA3nvvPWrWrMmiRYsYP368bVovLy9CQ4s/eNC7d2969+5tu123bl0OHDjA+++/z+uvv267/+233wbg3Llz7Ny5s6xvxSYuIe26nyvgX0easTfsJV7IfQV9jgzNcjmJO6FuV61TVIpy3YeZl2c9y4/JZLLdZzAY8PDw4Pfff7ebduHChQQHB9O0aVOefvppMjKuvUl84cIFAgMr5sqHexPTK2S+rmRhYm3+YXmJvMBYraOIynZ2n9YJKk25FmajRo2Ijo7m2WefJTU1lfz8fF555RWSkpJITEy0TXffffexaNEi1q1bx7Rp0/juu+8YPHhwifM9fPgw77zzDg899FB5xrU5ci6zQubrarZd8KVT8nMkh3XROoqoTGf3ap2g0pTrRV3c3d357rvvuP/++wkMDMRgMHDHHXcU2YR/4IEHbP/frFkzGjRoQNu2bdm+fTutW7e2m/b06dP07t2bYcOG2W3SlxelFEfPZ5U+oXDI2Tx32h17gMX1I2iesFDrOKIynD8IFgvoq/+gm3J/h23atCEuLo60tDQSExNZvnw5ycnJ1KlTp8TntG7dGnd39yJH00+fPk23bt1o3749H330UXlHBSApPZfsfHOFzNtVmZWeAYf68X34P1F6d63jiIpWkA0XTmidolJU2D8Jfn5+hISEcOjQIbZt28bAgQNLnHbPnj0UFBQQFhZmu+/UqVN07dqV1q1b8+mnn6KvoH+9jidnV8h8BUw+3IoXA17EYgrQOoqoaOmntU5QKcq8SZ6ZmUl8fLzt9tGjR4mLiyMwMJCoqCi++eYbQkJCiIqKYteuXTz++OMMGjSInj17Atb9kQsXLqRv374EBwezd+9ennrqKVq1akXHjh0B65pl165diYqK4vXXX+fcucsn9b3yyHp8fDyZmZkkJSWRk5NDXFwcAE2aNMHDw8Oh95OQon1h5ibsJn3Ld+SfOYw5M4WQu/6FV8P2tsePv9q/2Of5dx2L3y1DSpxv+tYlZMQtw5x+Dr1nDbxiOxLQZTQ6N+tnk7lnLWnrP0MV5OLToicB3S6PqSy8cIYzX00jbPQc9Eav635v805FciDwZf7r/zoeaYevez7CyWUklj5NNVDmwty2bRvdunWz3Z48eTIAo0ePZv78+SQmJjJ58mTOnDlDWFgYo0aNYtq0abbpPTw8WL16NW+99RaZmZlERkbSr18/ZsyYgcFgAGDFihXEx8cTHx9PRESE3eurK05YOX78eNavX2+73aqV9dyNR48eJSYmxqH3k5CaU7YPoAKo/Fzca9bFp3kPzi1+ucjjEZM+t7udc2Qbyb+8jVdsxxLnmblnLanr5xPc93GM4Y0pSDlF8rI5AATe/gDm7AukLH+HoL5P4OYfytlvZ2GMao5XvZsBSP71PQK6jLmhsrzk9xQ/upme5+fQj/FP+uOG5yecULoUZrG6du1qV1pXe+yxx3jsscdKfDwyMtKu5IozZswYxowZU2qWdevWlTpNaU45QWF61muLZ722JT5u8LHfpM2O34Ipujnu/iWfBCP/9H5MEY3xbtIVADe/Wng17kx+ovWa0oVpSeiMXng37gyAKaoFBedPQL2bydq7Dp3BDa/YDjf4zi47lWvklhMTWVovnIYJ35TbfIWTcJE1zOp/WKsUqdn5pU/kRMxZqeQc3opPi57XnM4Y3oS8pMPknT4AQEFaEjmHt+F5cQ3SLTAcVZBn3Q2Qk0F+4kE8QmIw52SQtmEhgT3KfwhXnkVPz0N38UvEEyidodznLzSUkaR1gkpRrsOKqqK0KlaYmbtXo/fwxKvhtdf+vJt0wZyTTtLCZwAFFjM+rfrid+swAAwmH4L7Pcn5n95AFebj3aw7nnXbcH7ZHHzb9KfwwhnOfvcCWArx63gv3o1uK7f38HB8Ox6JfIGn0mejy5Pv8FcLLrKG6fKFWdVOGpy5cxXeTbraDtyUJPfETi5s+orAng9jrB1LYeppUlZ9TJr3Ivw7DgfAq2EHu+LNPbGTgnPHCezxEKc/mkDwgH9i8A4gccFkTJHNMHj7l9v7mJsQw76g2XxofA23dNcYklKtuUhhuvwmeVUqzNyE3RSmnMTnpmtvjgOkbfgCn6bd8b2pFx4hMXg17IB/l1Gkb/4WpSxFpleFBaSseJ/AXpMoTE1EWcyYoprjHhSBe2A4eYkHyv39rE4OpGfmTDJqlrz/VlQRLnLQRwqzChVm5s6VeITWx6Nm3VKnVQV5oLO/Bo9OpwcUxV0aM+2P/2Gq2wZjaH1QFrBcHsyvLIXWb3JUgCPZJm499QTHIkoepyuqgMIcMBdqnaLCuXRhZuYVUmDW/rxulvwc8s8cIf/MEcA6BjL/zBEK0y9fxdKSl032gd9LPNhz/qf/kLp+vu22Z/12ZPy9jKy9660HfI7+TdqGL/Csfws6vf0Bl/xzx8ne/xv+t40AwC0wAnR6MnasIPvwVgqST+IR1qCc3/VlWWY9XePvZm3kRJTOpRfJqs1SdVY+rpdL78N0lrXL/KRDnFl0+VyfqWs+AcC72e0E93sSgKx9v4GyHswpTmH6ObiibPw63APoSNvwBebMZPSefnjWb0dA55F2z1NKkfLrXAK6P4Dew3qWKb27kaC+T5Cy8n2UuYDAHg/h5htcnm+5WGMP3cbT0aFMSnkVXYF8v7/KMeeDe/W+SJ5OXWtQZTV39HwW3V5fp3UMcZX+IeeZw6u4ZZzSOoooi38eBu+K/4dVS7L9I5zOT+eC6Z8zi6yQllpHEWVhdo4ttookhSmc0v5ML25NnMyp8OLP7i+ckLlqjWm+Hi5dmLrSJxEayih0Y2zSUPL962sdRTjCUv2Pkrv0QR/hvHoEp/Cs/2rqJC5Dl5andRzhCBdYw5TCFE7l4chjPGD4hcCkDeAkVw4ptChmrstj4a4CkjIVYT46xrT04PnOHuh1OgrMiufX5LEsvpAjqRb8jDruqOvGK3cYqe1b8kbcx3/ls2BnAbvPWse8tgkz8PLtJtqFXx72tXBnAVNX55KVr7i/lQf/1/Py9bKOpVno+Xk22yZ4U8PoBNtLxXwhorqRwhSa83YzMyNqNwNzF2M8V/7fKLpRr/6ezwfbCvhskImmNQ1sO21m7JIc/Izw+K1Gsgtge5KZaZ2N3FRLT2qu4onledy5KJttE3xKnO+644UMb+ZOh0gTJjd4bWM+PT/PYs9EH8Jr6DmfbWH80hzmD/SkboCefl9m0zXGQL+G1rPYP/xzDq/cYXSOsgTwKPm9VhcuXZjubi69C1dzdb1yeSF8C+2Tv0d/+lzpT9DIppNmBsa62Yoqxl/Pot0FbEu0rlH5mXSsHOlt95x3+uho90kWJy5YiPIrfjlbONj+XKMfDzDx7d4CVh8tZNRNHhxJVfgZddzdzPq63eoY2HvOQr+G8OWuAjwMOgY3dqJLgJhqaJ2gwrl0Y/h5OtHC5kK6BqayssH3rNZPomPCh+iznbcsAW6LMrD6aCEHk62bzjuSzPx+wkzf+iWvb1zIU+gAf5Pja3/ZBVBggUBP63MaBOrJLlD8nWgmJUex9ZSZFrUMpOQopq/NZW4fUylzrGTG6l+YLr2G6WN0w92gc4qvR7qC8REJPOS+jKDE39BlV53P/JmOHlzIVTSam4VBD2YLvNTdyPDmxf+Dm1uomLoql3ubu5dpc3nqqlzCfa37PwECPHV8NsiTUYtzyClQjLrJnV713Ri3JIdH23lwNM3Cnf/LpsAMM7saGdpEwxUADx/QV/9znLp0YYJ1LfN8ZvU/uqcVT4OZaVF7uSv/RzzP79E6znX5ak8hX+wq4MshnjQN0ROXZOaJX/Oo7atjdEv70+wVmBX3fJuDRcF7/RxfA3xtYx6Ldhewbow3JrfLJXtXY3fuumKze92xQnadNTO3r4n6b2eyaIgnoT7Wzf/O0QZqemu00egCa5cghSmFWUGiPHN5MWIrHVN+wJBYtc/G/c+VuUztaOSei/sSm9cycPyCYvbv+XaFWWBW/OPbHI6mWVgzysvhtcvX/8jj5Q15rBrlTYtaJa+l5RUqJv6cyxeDPYlPsVBogS4x1j/hhkF6tpw0MyBWo8J0gf2XIIWJv5cHICd6KC+3BV5gWtA6GiYtRZeg/RU5y0N2Aeiv6j6DDixX7FW4VJaHki2sHe1FkJdjxfV/G/N4cUMev47wom3ta2/SvvBbHn3qu9E6zMDfiWYKrwhQYAZN9yzJGqZr8JcDP+VidO2TTDItJyRxHbqE6jUeb0BDN17akEeUn46mNa1l9cbmfMa1tC47hRbF0G9y2J5o5qfhXpgVJGVaP4NATx0eBmvbjvohh3BfHbPvsG6qv7Yxj2lr8/hysCcx/nrbc3w8dPh42Df0nrNmvtpTSNyD1qPxjYL16HU65m3PJ9RHx/7zFm4upXArlKxhuoZAb8euXy6KMuot/Ct6H0MLfsTr/C6t41SYd/qYmLY2j4nLcjmbpajtq+PBNu5M72IE4GS64scD1q8FtvzQfmtl7Wgvul7cbD5xwYL+ilPwvbc1n3wzDP3G/sqlM7p4MLPr5f2fSikm/JTLm72MeF8sUk93HfMHmZi0LJe8Qpjb10R4DQ0HvfiUfAXTa/njjz/o1KkTPXr0YPny5WV67syZM1m8eDFxcXHX9drXw6VP7wbw1qpDvLnqoNYxqpRwUx4vRG6jS+oPGDJPax1HOIOuz0HXZ8r8tPHjx+Pj48Mnn3zC3r17iYqKcvi5jhZmQUEB7u7lsyXp0uMwAWKCvUqfSABwi386yxos5XePR+ie8K6UpbjM3/GiuyQrK4uvv/6ahx9+mP79+zN//nzbY/Pnz8ff399u+sWLF6O7eNmV+fPnM2vWLHbs2IFOp0On09mer9Pp+OCDDxg4cCDe3t68+OKLACxdupQ2bdpgMpmoW7cus2bNorCwbCcMcflN8ugg79IncnH3hp3mMa9fqXV6dbXbPynKSUB0mZ/y1VdfERsbS2xsLCNGjODRRx9l2rRptlK8lrvvvpvdu3ezfPlyVq1aBYCfn5/t8RkzZjB79mzefPNNDAYDv/76KyNGjODtt9+mU6dOHD58mAkTJtimdZTLF2ZMkKxhFsddr5gavZ97CpfifS4OUrVOJJxaQJ0yP2XevHmMGGG9jlTv3r3JzMxk9erV3HHHHaU+19PTEx8fH9zc3AgNLbr/9N5772XcuHG22yNHjmTq1KmMHj0agLp16/LCCy8wZcoUKcyy8PfywM/T3Wmu76O1UGM+L0T+RbcLP+CWeFLrOKIq8PCFGmFlesqBAwf4888/+f777wFwc3Pj7rvv5r///a9DhVmatm3tL938119/sXXrVl566SXbfWazmdzcXLKzs/HycmzFyeULE6xrmTtOXtA6hqZa+2Uwq+ZvNDv7I7qTGVrHEVVJUL0yP2XevHkUFhYSHh5uu08phbu7O6mpqej1eq4+Hl1Q4PhKjbe3/a42i8XCrFmzGDx4cJFpTSbHv5ElhQnEBHu7bGEOC03iSe8VhJ1eiS7BXPoThLhacMMyTV5YWMiCBQv4z3/+Q8+e9peNHjJkCAsXLqRevXpkZGSQlZVlK7+rj4Z7eHhgNju2zLZu3ZoDBw5Qv/6Nnb1fChNoHFaDJXGuc8TXoLPwTHQ895p/xOfcdkjTOpGo0kJiyzT5Tz/9RGpqKvfff7/dgRqAoUOHMm/ePFavXo2XlxfPPfccjz76KH/++afdUXSAmJgYjh49SlxcHBEREfj6+mI0Got9zenTp9O/f38iIyMZNmwYer2enTt3smvXLttRdEe4/LAigJsi/LWOUClCPAp4v/4W9of8iwlJM61lKcSNiri5TJPPmzePO+64o0hZgnUNMy4ujmPHjvHFF1+wbNkymjdvzqJFi5g5c2aRaXv37k23bt0ICQlh0aJFJb5mr169+Omnn1i5ciU333wzt956K2+88QbR0WU7uu/yA9cBsvIKaT7zV7vvBlcnLWtkMqvWBlqcXYIuL13rOKI60Rlg6gkwVv+zrYNskgPgbXSjQU1fDpypXgc7BtU6y9O+Kwk//Su6hOp/RT+hgVpNXKYsQQrT5qZIv2pRmAadhclRhxmpfqLG2a3gmseyRGWJaKd1gkolhXlRy8gAvt5WdccdBrgX8kJ0HL0yfsD9zFGt4whXESmF6ZJuiiy6A7oqaOqbxb9DN9Lq3GL0J9O0jiNcjRSma2oUWqNKfeOnf8h5pvitJPL0cnQJVSOzqGa8giGwrtYpKpUU5kUGvY5ODYL5aWei1lFKpNMpnog8yhjdT/id2QxVf5erqMoib9E6QaWTwrxC19iaTlmYfu6FzIraSd+sH/A4e1jrOEJYNeihdYJKJ4V5ha6xIeh04CwjUxv5ZPNC2B+0Ob8Y/akUreMIcQUdxPbVOkSlk8K8QrCPkWa1/dh1StuxOL1DknnGbzUxib+gS8jTNIsQxYpoC761tE5R6aQwr9ItNkSTwtTpFI9EHGOcYRkBSRtl/6Rwbi64dglSmEV0ia3J22viK+31fN0KmRG1mwE5izGek2sLiSqiUT+tE2hCCvMqrSL9qelr5GxGxW4K1/PK4cXwzdyS/AP60+cr9LWEKFdB9ct8hqLqQgrzKnq9joEta/Pxhor5tsztQSk8F7CGuknL0CXkVshrCFGhXHRzHKQwizW4dUS5F+ZDEcd5wP0XAhM3oMtyksPwQlwPF90cBynMYjUOq0GjUF/2J93YkRdvg4Xp0bsZmLsY0/n95ZROCA0F1nXJAeuXSGGWYHDrcF5edn0lF+OZy4sRW2if8gOG02fLOZkQGmo9Ghy4DG51JScQLsHZ9Fzav7IGcxnOKtwlKJV/Ba6jQeJP6ApzKjCdEBoweMDkfeAdrHUSzcgaZglq1jDRoV4QGw6VfgR7fHgCD3n8QlDietk/KaqvRv1duixBCvOa7rk5qsTC9DSYeT56H4PzfsQzeXclJxNCA23Hap1Ac1KY19CraS3C/EwkXrg8/CfClMdLkX9yW8oPGE4naZhOiEoUWA/qdNY6hebkqpHX4GbQM+JW61XlOgZcYHmDJWxwf4QuCe9jyJKyFC6kzRitEzgFOehTitSsfHL/N5bQk8vQKYvWcYSofAbjxYM9QVon0ZysYZYiwNuDsNAwKUvhulreK2V5kRSmIzo8Yr3+shCuRu8Gtz2pdQqnIYXpiIAYaDpI6xRCVL7mwyAgWusUTkMK01G3PQm47jcchAvS6eG2yVqncCpSmI4KbW7911YIV9FsKIQ01DqFU5HCLIvbp1mPGApR3endoOtUrVM4HSnMsvCPgnYPaJ1CiIp303AIqqd1CqcjhVlWnZ8Gk7/WKYSoOG6e0OUZrVM4JSnMsvIMgE5PaZ1CiIrT+Snwj9Q6hVOSwrwetzwIflFapxCi/AXWgw6Pa53CaUlhXg83o/UAkBDVTd//AzcPrVM4LSnM69V8GMR00jqFEOWn8Z1Q/3atUzg1KczrpdPBne+Au5fWSYS4ce7e0Hu21imcnhTmjQisA7dP1zqFEDeuyz/BL0LrFE5PCvNGtXsQIm/VOoUQ1y84Fto/onWKKkEK80bp9TBwLriZtE4iRNkZPGDIx2Bw1zpJlSCFWR6CG8jXyETVdPt0CLtJ6xRVhhRmeenwGNRupXUKIRxXr7tsipeRFGZ50Rtg8CdgrKF1EiFK5xUMgz6wjvYQDpPCLE/B9WHQe1qnEKJ0g94D31pap6hypDDLW+MB0FG+WiacWLsJ0LCX1imqJLlqZEWwmGHBQDi2QeskQtir1RzGrwJ3GdVxPWQNsyLoDTD0U/CtrXUSIS7zqQX3/k/K8gZIYVYUnxD4x2egl/Ftwgm4ecLwRfJtnhskhVmRItvJ93OFE9DB4A8hvI3WQao8KcyK1u4BuHWi1imEK7t9GjQZqHWKakEKszL0ehmaDtY6hXBFLe+TKwSUIzlKXlkK8+GLwXLkXFSemE4w8gf5nng5kjXMyuLmYd3pLl+fFJUhtDnc/bmUZTmTwqxMRl8Y8T2ENNY6iajOajaBkUusF+wT5UoKs7J5BcKoxRAQo3USUR0FNYBRS8A7SOsk1ZIUphZ8Q2HMz9aFW4jyEtQARi8Fn5paJ6m25KCPlrLOWw8EJe7QOomo6kIaw+gfpSwrmBSm1nLTYdFwOP671klEVRXa3LrPUjbDK5xskmvNVANGfAcN+2idRFRFMZ2sm+FSlpVCCtMZuJvg7i+gxT1aJxFVScsR1nGWcjS80sgmuTNRCpY/C1ve1zqJcGo667V4Ok3WOojLkcJ0Rn/Nh2VTwJyndRLhbNw84a4PoOkgrZO4JClMZ3XyL/h6JKSf0jqJcBY+teCeRRAhZx3SihSmM8s6D9+Mke+fC+uZ0ocvAv9IrZO4NClMZ2cxw8rpsGmu1kmEVto9CD1fADej1klcnhRmVbH7e/jxUcjP1DqJqCxewdarO8oFy5yGFGZVcv4QLJ4IJ//UOomoaPW6W68bLpfCdSpSmFWNxQKb34U1L0FhjtZpRHkzeFiHDLV/BHQ6rdOIq0hhVlXn42HJJEjYrHUSUV6CY2HwR1C7pdZJRAmkMKsyi8U6yH31C7K2WZW5e0OXKdB+kpzw18lJYVYHyYdhySNw4g+tk4iyanyn9cqicvnbKkEKszrZ+Q2smgnpJ7VOIkoTWA/6vgb179A6iSgDKczqpiAHNr4NG+dAQbbWacTV3DytV3Hs+JiMq6yCpDCrq4wkWPcK/P05WAq1TiP0btDyXug8Rb6tU4VJYVZ35+NhzQuwdwkgv+pKp9NDs6HQdSoE1dM6jbhBUpiu4uw++GMu7PoazPlap6n+9G7Wouz0FIQ01DqNKCdSmK4mIwm2fAjb/gu5aVqnqX4MRmg5HG57Uq4MWg1JYbqq/CzY/jlsfg/SjmudpuoLqg+tR0PL++RyEdWYFKars5hh34+wfQEcWQ/KrHWiqsPgAY36Q9ux1mvryFcZqz0pTHFZxhnY/S3s/BoS47RO47wC61rXJluNAO9grdOISiSFKYp37iDs/Mp6kCjthNZptBdUHxr1g9h+ENlO1iZdlBSmuDalIGELHFgGh9dC0i5cY3iSDsLbWEuyUT8IidU6kHACUpiibLLOw5F1cGQtHF5Xvb6G6R0CEe2gwR0Q2xd8Q7VOJJyMFKa4MecPWdc8j2+07vdMPaZ1IsfoDFCribUgIy/+BNbVOpVwclKYonzlpELiDkjcaR0sf3YvnD+o7ffaPXys+yCDG1o3rSNutm5uG320yySqJClMUfEsFrhwAtJPW38ykiAj8eJ/kyDj4n1lLVV3bzD5gakGGGtY/+tdEwKiwT8a/KMgsA7UqF0x70u4HClM4TzMhdavbVoKwHzpJ9968hBzvnXMqNHHWo7GGmBw0zqxcDFSmEII4SC91gGEEKKqkMIUQggHSWEKIYSDpDCFEMJBUphCCOEgKUwhhHCQFKYQQjhIClMIIRwkhSmEEA6SwhRCCAdJYQohhIOkMIUQwkFSmEII4SApTCGEcJAUphBCOEgKUwghHCSFKYQQDpLCFEIIB/0/tjE/E8JiR/kAAAAASUVORK5CYII=",
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# outlier à enlever (dépend des stats desc !)\n",
"outlier_detection(directory_path=\"10\") # mettre 2 si on veut le 1er client non anonyme"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f08c082e-f76f-41f3-9530-3e6700eb74d9",
"metadata": {},
"outputs": [],
"source": [
"# boucle pour identifier les outliers de chaque compagnie (et le client principal non anonyme)\n",
"\n",
"# nb_compagnie=['10','11','12','13','14']\n",
"for company_number in nb_compagnie :\n",
" print(f\"outlier for tenant {company_number}\")\n",
" outlier_detection(directory_path=company_number, coupure = 2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dbe1af6a-79e9-45c7-a810-c6df3bf647f7",
"metadata": {},
"outputs": [],
"source": [
"# print(products_purchased_reduced_spectacle.loc[products_purchased_reduced_spectacle[\"number_compagny\"]==10][\"total_amount\"].describe())\n",
"\n",
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==10) & \n",
"(products_purchased_reduced_spectacle[\"customer_id\"]==19521)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "20e2b8a2-f31c-42a4-8ea5-7ad67ab66915",
"metadata": {},
"outputs": [],
"source": [
"# company 11 \n",
"# etrange, pas de vente sur internet, et un seul supplier. Plus de 9k achats\n",
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==11) & \n",
"(products_purchased_reduced_spectacle[\"customer_id\"]==36)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5dbce57c-d091-4ce2-92f9-1201deb2462e",
"metadata": {},
"outputs": [],
"source": [
"# company 12\n",
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==12) & \n",
"(products_purchased_reduced_spectacle[\"customer_id\"]==1706757)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0a243b57-19da-4e29-a53d-bb8d03e2ab77",
"metadata": {},
"outputs": [],
"source": [
"# company 13\n",
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==13) & \n",
"(products_purchased_reduced_spectacle[\"customer_id\"]==8422)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d9b01bc-9584-4882-bd06-7de8acb8a88f",
"metadata": {},
"outputs": [],
"source": [
"# company 14\n",
"# a-t-on vrmt un outlier ? A acheté quasi 3k tickets, pr 96 achats\n",
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==14) & \n",
"(products_purchased_reduced_spectacle[\"customer_id\"]==6354)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "033c1e00-52bd-4651-b893-57bda531760e",
"metadata": {},
"outputs": [],
"source": [
"# verifs dans les tables customerplus (outlier incertain pr 11 et 14)\n",
"\n",
"customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==36) &\n",
"(customerplus_clean_spectacle[\"number_compagny\"]==11)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "28ac8cda-32fa-4fb7-a75b-e1cc24871c39",
"metadata": {},
"outputs": [],
"source": [
"customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==6354) &\n",
"(customerplus_clean_spectacle[\"number_compagny\"]==14)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3faea297-2cc5-4704-af85-77d95f600cc1",
"metadata": {},
"outputs": [],
"source": [
"customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==8422) &\n",
"(customerplus_clean_spectacle[\"number_compagny\"]==13)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b165ea79-347b-46fb-8217-635d9e888c65",
"metadata": {},
"outputs": [],
"source": [
"customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==19521) &\n",
"(customerplus_clean_spectacle[\"number_compagny\"]==10)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "282b0a96-5e78-48aa-9c2c-7d00d3907add",
"metadata": {},
"outputs": [],
"source": [
"customerplus_clean_spectacle.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4918db6e-249b-412e-b646-9a6686989b79",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "e866edce-f4bc-4627-89d3-3ec7d9ef26e3",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "42f8171c-e80d-4faa-b278-21fcbe3b242c",
"metadata": {},
"source": [
"### 1. customerplus_clean"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "47f98721-53dd-4f8f-85ac-88043ee8d967",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>821538</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>809126</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11005</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>14</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>17663</td>\n",
" <td>12731</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>38100</td>\n",
" <td>12395</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>307036</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2946</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>8</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>18441</td>\n",
" <td>11139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>9231</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>9870</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10 rows × 29 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity tenant_id \\\n",
"0 821538 139 NaN NaN 0 875 \n",
"1 809126 1063 NaN NaN 0 875 \n",
"2 11005 1063 NaN NaN 0 875 \n",
"3 17663 12731 NaN NaN 0 875 \n",
"4 38100 12395 NaN NaN 0 875 \n",
"5 307036 139 NaN NaN 0 875 \n",
"6 2946 1063 NaN NaN 0 875 \n",
"7 18441 11139 NaN NaN 0 875 \n",
"8 9231 139 NaN NaN 0 875 \n",
"9 9870 139 NaN NaN 0 875 \n",
"\n",
" is_partner deleted_at gender is_email_true ... purchase_count \\\n",
"0 False NaN 2 True ... 0 \n",
"1 False NaN 2 True ... 0 \n",
"2 False NaN 2 False ... 14 \n",
"3 False NaN 0 False ... 1 \n",
"4 False NaN 0 True ... 1 \n",
"5 False NaN 2 True ... 1 \n",
"6 False NaN 2 False ... 8 \n",
"7 False NaN 2 False ... 3 \n",
"8 False NaN 0 True ... 1 \n",
"9 False NaN 2 True ... 1 \n",
"\n",
" first_buying_date country gender_label gender_female gender_male \\\n",
"0 NaN NaN other 0 0 \n",
"1 NaN fr other 0 0 \n",
"2 NaN fr other 0 0 \n",
"3 NaN fr female 1 0 \n",
"4 NaN fr female 1 0 \n",
"5 NaN NaN other 0 0 \n",
"6 NaN fr other 0 0 \n",
"7 NaN fr other 0 0 \n",
"8 NaN NaN female 1 0 \n",
"9 NaN NaN other 0 0 \n",
"\n",
" gender_other country_fr has_tags number_compagny \n",
"0 1 NaN 0 10 \n",
"1 1 1.0 0 10 \n",
"2 1 1.0 0 10 \n",
"3 0 1.0 0 10 \n",
"4 0 1.0 0 10 \n",
"5 1 NaN 0 10 \n",
"6 1 1.0 0 10 \n",
"7 1 1.0 0 10 \n",
"8 0 NaN 0 10 \n",
"9 1 NaN 0 10 \n",
"\n",
"[10 rows x 29 columns]"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# visu de la table\n",
"customerplus_clean_spectacle.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "738e063b-f84e-4a00-b35d-6d1d657e3c09",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de lignes de la table : 1523688\n"
]
},
{
"data": {
"text/plain": [
"customer_id 0\n",
"street_id 0\n",
"structure_id 1460624\n",
"mcp_contact_id 729167\n",
"fidelity 0\n",
"tenant_id 0\n",
"is_partner 0\n",
"deleted_at 1523688\n",
"gender 0\n",
"is_email_true 0\n",
"opt_in 0\n",
"last_buying_date 762879\n",
"max_price 762879\n",
"ticket_sum 0\n",
"average_price 667328\n",
"average_purchase_delay 762915\n",
"average_price_basket 762915\n",
"average_ticket_basket 762915\n",
"total_price 95551\n",
"purchase_count 0\n",
"first_buying_date 762879\n",
"country 429486\n",
"gender_label 0\n",
"gender_female 0\n",
"gender_male 0\n",
"gender_other 0\n",
"country_fr 429486\n",
"has_tags 0\n",
"number_compagny 0\n",
"dtype: int64"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de NaN\n",
"print(\"Nombre de lignes de la table : \",customerplus_clean_spectacle.shape[0])\n",
"customerplus_clean_spectacle.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 148,
"id": "296e51c5-30ae-4ade-ba3d-4ba4981a8758",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>customer_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>45264</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>35313</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>216105</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>388731</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>101642</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny customer_id\n",
"0 10 45264\n",
"1 11 35313\n",
"2 12 216105\n",
"3 13 388731\n",
"4 14 101642"
]
},
"execution_count": 148,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de clients de la compagnie (pas les clients visés par une campagne mais ceux ayant acheté)\n",
"# on rq le nbre de clients est très variable : de 35k à 389k\n",
"company_nb_clients = customerplus_clean_spectacle[customerplus_clean_spectacle[\"purchase_count\"]>0].groupby(\"number_compagny\")[\"customer_id\"].count().reset_index()\n",
"company_nb_clients"
]
},
{
"cell_type": "code",
"execution_count": 151,
"id": "5845aedf-78ca-4d3d-ad61-3561d4fc1886",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_nb_clients[\"number_compagny\"], company_nb_clients[\"customer_id\"]/1000)\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Nombre de clients (milliers)\")\n",
"plt.title(\"Nombre de clients de chaque compagnie de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "884a33d0-c275-4ab4-ab1f-8b53e563fb95",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" number_compagny already_purchased customer_id\n",
"0 10 True 45264\n",
"1 11 True 35313\n",
"2 12 True 216105\n",
"3 13 True 388731\n",
"4 14 True 101642\n",
" number_compagny already_purchased customer_id\n",
"0 10 False 53530\n",
"1 11 False 35994\n",
"2 12 False 26620\n",
"3 13 False 379005\n",
"4 14 False 241484\n"
]
}
],
"source": [
"# nouveau barplot pr les clients : on regarde la taille totale de la base et on distingue clients ayant acheté / pas acheté\n",
"\n",
"# variable relative à l'achat\n",
"customerplus_clean_spectacle[\"already_purchased\"] = customerplus_clean_spectacle[\"purchase_count\"]>0\n",
"\n",
"nb_customers_purchasing_spectacle = customerplus_clean_spectacle[customerplus_clean_spectacle[\"already_purchased\"]].groupby([\"number_compagny\",\"already_purchased\"])[\"customer_id\"].count().reset_index()\n",
"nb_customers_no_purchase_spectacle = customerplus_clean_spectacle[~customerplus_clean_spectacle[\"already_purchased\"]].groupby([\"number_compagny\",\"already_purchased\"])[\"customer_id\"].count().reset_index()\n",
"\n",
"print(nb_customers_purchasing_spectacle)\n",
"print(nb_customers_no_purchase_spectacle)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "41c9fb5a-708b-4f85-9918-00337151f155",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(nb_customers_purchasing_spectacle[\"number_compagny\"], nb_customers_purchasing_spectacle[\"customer_id\"]/1000, label = \"has purchased\")\n",
"plt.bar(nb_customers_no_purchase_spectacle[\"number_compagny\"], nb_customers_no_purchase_spectacle[\"customer_id\"]/1000, \n",
" bottom = nb_customers_purchasing_spectacle[\"customer_id\"]/1000, label = \"has not purchased\")\n",
"\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Nombre de clients (en milliers)\")\n",
"plt.title(\"Nombre de clients ayant acheté ou été ciblés par des mails pour les compagnies de spectacle\")\n",
"plt.legend()\n",
"\n",
"# Affichage du barplot\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 152,
"id": "fd11c547-7128-4ef6-ad7b-4b7c2a30cd9e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>max_price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>13823.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>108.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>5000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>3180.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>456.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny max_price\n",
"0 10 13823.0\n",
"1 11 108.0\n",
"2 12 5000.0\n",
"3 13 3180.0\n",
"4 14 456.0"
]
},
"execution_count": 152,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# prix maximal payé par un client pour chaque compagnie - très variable : de 108 à 13823\n",
"\n",
"company_max_price = customerplus_clean_spectacle.groupby(\"number_compagny\")[\"max_price\"].max().reset_index()\n",
"company_max_price"
]
},
{
"cell_type": "code",
"execution_count": 153,
"id": "b8f8f162-4153-4cfe-bfaa-d981d414510d",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlAAAAHGCAYAAAC7NbWGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABc0UlEQVR4nO3dd1gUV/828HulCQgrRcBVBCyxgT0iGINGURNKNBoLilhiLyFifRJjSWKPGOUxamKJJWJMwMdYUGxEI1hQYkMTE6yIGMFFLIDLef/wx7yuC8joIgven+vaS/fMmZnvDLvLzZmyCiGEABERERGVWKWyLoCIiIiovGGAIiIiIpKJAYqIiIhIJgYoIiIiIpkYoIiIiIhkYoAiIiIikokBioiIiEgmBigiIiIimRigiIiIiGRigCIiekH/+c9/4OjoiL///rusSyGiV4wBSk/Wrl0LhUIhPYyNjVGzZk0MGjQIN27cKNEyBg4cCFdX19IttAwV7KPLly+/8nVfvnwZCoUCa9eufaH5Dx48CIVCgYMHD+q1rrKwc+dOzJgxo8zWX/A6OHHiRJnVoA+7du1CREQEtm/fjjp16pR1OVSKFArFK3nPuLq6YuDAgaW+nrL24MEDzJgx45V8npbmz44BSs/WrFmD+Ph4xMbGYujQodi0aRPatWuH+/fvP3feadOmITo6+hVUWTb8/PwQHx+P6tWrl3Upr7WdO3di5syZZV1GuXbt2jUMGjQImzdvxptvvlnW5VApi4+Px0cffVTWZVQYDx48wMyZM8v9H6TGZV1ARePu7o5WrVoBADp06ACNRoMvvvgCW7duRb9+/Qqd58GDB7CwsKjwf8VWq1YN1apVK+syiCQF7z25nJ2dkZaWVgoVvbwX3SYqWps2bcq6BDJAHIEqZQVvvCtXrgB4cpiuSpUqOHPmDDp37gwrKyt07NhRmvb0IbzIyEgoFApERERoLXP69OkwMjJCbGxsset2dXWFv78/tm/fjubNm8Pc3BwNGzbE9u3bATw5lNKwYUNYWlqidevWOodUTpw4gT59+sDV1RXm5uZwdXVF3759pW0BACEE3nvvPdjZ2eHq1atS+4MHD9C4cWM0bNhQGn0r7BBe+/bt4e7ujvj4eHh7e0vrWbNmDQBgx44daNGiBSwsLODh4YGYmBitGi9duoRBgwahXr16sLCwQI0aNRAQEIAzZ84Uu2+Kc+HCBXTt2hUWFhawt7fHiBEjcO/evUL77t27Fx07doS1tTUsLCzQtm1b7Nu3r9jl3759G6amppg2bVqh61YoFFiyZInUlpaWhuHDh6NmzZowNTWFm5sbZs6cicePH0t9Cg5RLly4EIsWLYKbmxuqVKkCLy8vJCQkSP0GDhyI//73vwCgdci54GcihMCyZcvQrFkzmJubw8bGBj179sQ///xTon13+PBhdOzYEVZWVrCwsIC3tzd27NhRaN/MzEwMGjQItra2sLS0REBAgM56Tp06BX9/fzg4OMDMzAwqlQp+fn64fv261KekNRe81n777Td4e3vDwsICgwcPRrdu3eDi4oL8/HydGj09PdGiRQvZ6yrMjBkzoFAocOrUKXzwwQewtraGUqlE//79cfv2ba2+mzdvRufOnVG9enXpfTtlyhSdkeziPk+KcuHCBfTt2xeOjo4wMzNDrVq1MGDAAOTk5Eh9zp49i/fffx82NjaoXLkymjVrhh9++EFrOQWHtX/88UdMnjwZ1atXR5UqVRAQEIBbt27h3r17GDZsGOzt7WFvb49BgwYhOztbaxkKhQJjxozBihUr8MYbb8DMzAyNGjVCZGSkVr/bt29j1KhRaNSoEapUqQIHBwe88847OHTokM72Xb9+HT179oSVlRWqVq2Kfv364fjx4zqH8Av23aVLl/Dee++hSpUqcHZ2RlhYmNa+KKjz2cNAJXlfFiUvLw+TJk2Ck5MTLCws8NZbb+HYsWOF9n2Z9ezfvx/t27eHnZ0dzM3NUatWLfTo0QMPHjwA8P8/N+bPn4+vvvoKtWrVQuXKldGqVatCP8f++usvBAUFSe/Hhg0bSp8nT7t79y7CwsJQu3ZtmJmZwcHBAe+99x4uXLiAy5cvS39Iz5w5U/oMKjh0Keczvbj1FOdl9qkWQXqxZs0aAUAcP35cq/2bb74RAMTKlSuFEEKEhIQIExMT4erqKubMmSP27dsndu/eLU1zcXHRmn/EiBHC1NRUWu6+fftEpUqVxGefffbcmlxcXETNmjWFu7u72LRpk9i5c6fw9PQUJiYm4vPPPxdt27YVUVFRIjo6WrzxxhvC0dFRPHjwQJp/y5Yt4vPPPxfR0dEiLi5OREZGCh8fH1GtWjVx+/Ztqd+///4ratasKTw9PUVubq60Lebm5uL06dM6+yglJUVq8/HxEXZ2dqJ+/fpi1apVYvfu3cLf318AEDNnzhQeHh5S7W3atBFmZmbixo0b0vxxcXEiLCxM/PzzzyIuLk5ER0eLbt26CXNzc3HhwgWpX0pKigAg1qxZU+w+S0tLEw4ODqJGjRpizZo1YufOnaJfv36iVq1aAoA4cOCA1Hf9+vVCoVCIbt26iaioKPHrr78Kf39/YWRkJPbu3Vvserp37y6cnZ2FRqPRap80aZIwNTUV//77rxBCiJs3bwpnZ2fh4uIiVqxYIfbu3Su++OILYWZmJgYOHKizfa6urqJr165i69atYuvWrcLDw0PY2NiIu3fvCiGEuHTpkujZs6cAIOLj46XHo0ePhBBCDB06VJiYmIiwsDARExMjfvzxR9GgQQPh6Ogo0tLSit2mgwcPChMTE9GyZUuxefNmsXXrVtG5c2ehUChEZGSk1K/gdeDs7CwGDx4sdu3aJVauXCkcHByEs7OzyMzMFEIIkZ2dLezs7ESrVq3ETz/9JOLi4sTmzZvFiBEjxPnz56XllbRmHx8fYWtrK5ydncXSpUvFgQMHRFxcnPjf//4nAIjY2Fit7UlOThYAxJIlS2SvqzDTp08XAISLi4uYOHGi2L17t1i0aJGwtLQUzZs3l947QgjxxRdfiPDwcLFjxw5x8OBBsXz5cuHm5iY6dOigtcziPk8Kk5SUJKpUqSJcXV3F8uXLxb59+8SGDRtEr169RFZWlhBCiAsXLggrKytRp04dsW7dOrFjxw7Rt29fAUDMmzdPWtaBAwek7Rk4cKCIiYkRy5cvF1WqVBEdOnQQvr6+YsKECWLPnj1i3rx5wsjISIwdO1arnoLXQaNGjcSmTZvEtm3bRNeuXQUAsWXLFqnfhQsXxMiRI0VkZKQ4ePCg2L59uxgyZIioVKmS1nsyOztb1K1bV9ja2or//ve/Yvfu3eKTTz4Rbm5uOu//kJAQYWpqKho2bCgWLlwo9u7dKz7//HOhUCjEzJkzdeqcPn269Lyk78uihISECIVCISZOnCj27NkjFi1aJGrUqCGsra1FSEiIXtaTkpIiKleuLHx9fcXWrVvFwYMHxcaNG0VwcLD0Hiv43HB2dhZvvfWW+OWXX8SWLVvEm2++KUxMTMSRI0ek5Z07d04olUrh4eEh1q1bJ/bs2SPCwsJEpUqVxIwZM6R+WVlZonHjxsLS0lLMmjVL7N69W/zyyy/i448/Fvv37xePHj0SMTExAoAYMmSI9Bl06dIlIUTJP9Oft57S+tk9jQFKTwp+KSQkJIi8vDxx7949sX37dlGtWjVhZWUlfbiGhIQIAGL16tU6yygsQD169Eg0b95cuLm5ifPnzwtHR0fh4+MjHj9+/NyaXFxchLm5ubh+/brUlpSUJACI6tWri/v370vtW7duFQDEtm3bilze48ePRXZ2trC0tBTffPON1rTDhw8LY2NjERoaKlavXi0AiO+//77QffRsgAIgTpw4IbXduXNHGBkZCXNzc62wVFD707/QCqsxNzdX1KtXT3zyySdSe0kD1OTJk4VCoRBJSUla7b6+vloB6v79+8LW1lYEBARo9dNoNKJp06aidevWxa5n27ZtAoDYs2ePVu0qlUr06NFDahs+fLioUqWKuHLlitb8CxcuFADEuXPntLbPw8ND67Vx7NgxAUBs2rRJahs9erQo7G+n+Ph4AUB8/fXXWu3Xrl0T5ubmYtKkScVuU5s2bYSDg4O4d++e1ja5u7uLmjVrivz8fCHE/38ddO/eXWv+33//XQAQX375pRBCiBMnTggAYuvWrUWuU07NBa+1ffv2afXNy8sTjo6OIigoSKv92TD7svunIEA9/boUQoiNGzcKAGLDhg2Fzpefny/y8vJEXFycACD++OMPaVpxnyeFeeedd0TVqlVFenp6kX369OkjzMzMxNWrV7Xa3333XWFhYSGF8YIA9ex7IDQ0VAAQ48aN02rv1q2bsLW11WoDIMzNzbXC5+PHj0WDBg1E3bp1i6zx8ePHIi8vT3Ts2FHrdfTf//5XABC7du3S6j98+PBCAxQA8dNPP2n1fe+990T9+vV16nz6l3BJ35eFKQjmRb0Ong5QL7Oen3/+WQDQ+Sx7WsHnhkqlEg8fPpTas7KyhK2trejUqZPU1qVLF1GzZk2hVqu1ljFmzBhRuXJlkZGRIYQQYtasWYX+QfK027dv6+zTohT1mV6S9Qih35/ds3gIT8/atGkDExMTWFlZwd/fH05OTti1axccHR21+vXo0aNEyzMzM8NPP/2EO3fuoEWLFhBCYNOmTTAyMirR/M2aNUONGjWk5w0bNgTw5HDG0+dJFLQ/fXguOzsbkydPRt26dWFsbAxjY2NUqVIF9+/fR3JystZ62rZti6+++gqLFy/GyJEj0b9/fwwZMqRENVavXh0tW7aUntva2sLBwQHNmjWDSqUqtsbHjx9j9uzZaNSoEUxNTWFsbAxTU1P89ddfOjWWxIEDB9C4cWM0bdpUqz0oKEjr+ZEjR5CRkYGQkBA8fvxYeuTn56Nr1644fvx4sRcOvPvuu3BycpIOVQLA7t27kZqaisGDB0tt27dvR4cOHaBSqbTW8+677wIA4uLitJbr5+en9dpo0qQJAO19VpTt27dDoVCgf//+WutycnJC06ZNiz3h8/79+zh69Ch69uyJKlWqSO1GRkYIDg7G9evXcfHiRa15nj0n0NvbGy4uLjhw4AAAoG7durCxscHkyZOxfPlynD9//qVrtrGxwTvvvKPVZmxsjP79+yMqKgpqtRoAoNFosH79erz//vuws7N76f1T3Hb36tULxsbG0nYDwD///IOgoCA4OTnByMgIJiYm8PHxAYBCX9cl+Tx58OAB4uLi0KtXr2LPRdy/fz86duwIZ2dnrfaBAwfiwYMHiI+P12r39/fXel7wPvXz89Npz8jI0DmM17FjR63PRyMjI/Tu3RuXLl3SOlS7fPlytGjRApUrV4axsTFMTEywb98+rf0RFxcHKysrdO3aVWsdffv2LXRbFQoFAgICtNqaNGny3PeL3Pfl0wp+zkW9DvS1nmbNmsHU1BTDhg3DDz/8UOxh5g8++ACVK1eWnltZWSEgIAC//fYbNBoNHj16hH379qF79+6wsLDQquW9997Do0ePpFMFdu3ahTfeeAOdOnUqcn3FKeln+ouu52X26bMYoPRs3bp1OH78OE6dOoXU1FScPn0abdu21epjYWEBa2vrEi+zbt26aNeuHR49eoR+/frJuorN1tZW67mpqWmx7Y8ePZLagoKCEBERgY8++gi7d+/GsWPHcPz4cVSrVg0PHz7UWVe/fv1gamqKnJwcTJw48YVrLKinJDWOHz8e06ZNQ7du3fDrr7/i6NGjOH78OJo2bVpojc9z584dODk56bQ/23br1i0AQM+ePWFiYqL1mDdvHoQQyMjIKHI9xsbGCA4ORnR0NO7evQvgyTli1atXR5cuXbTW8+uvv+qso3HjxgCAf//9V2u5Bb/sC5iZmQFAifbFrVu3IISAo6OjzvoSEhJ01vW0zMxMCCEKfW0WhOA7d+5otRe1nwv6KZVKxMXFoVmzZvjPf/6Dxo0bQ6VSYfr06cjLy3uhmot67wwePBiPHj2Szr3ZvXs3bt68iUGDBull/xS33cbGxrCzs5O2Ozs7G+3atcPRo0fx5Zdf4uDBgzh+/DiioqIA6P4sS/p5kpmZCY1Gg5o1axbb786dO7J+ji/zGQMU/Tp4el2LFi3CyJEj4enpiV9++QUJCQk4fvw4unbtqrU/7ty5o/PHKoBC24An++7p4AA8ec88W+Oz5L4vn1awTUW9DvS1njp16mDv3r1wcHDA6NGjUadOHdSpUwfffPONTt+ifga5ubnIzs7GnTt38PjxYyxdulSnlvfee0+rltu3bz/3NVackn6mv+h6XmafPotX4elZw4YNpavwiqJQKGQt8/vvv8eOHTvQunVrREREoHfv3vD09HyZMp9LrVZj+/btmD59OqZMmSK15+TkFBoMNBoN+vXrBxsbG5iZmWHIkCH4/fffpQ/N0rJhwwYMGDAAs2fP1mr/999/UbVqVdnLs7OzK/Tqqmfb7O3tAQBLly4t8gqdoj60CwwaNAgLFixAZGQkevfujW3btiE0NFRrBMne3h5NmjTBV199Vegynh6he1n29vZQKBQ4dOiQFLyeVlhbARsbG1SqVAk3b97UmZaamiot/2lF7ee6detKzz08PBAZGQkhBE6fPo21a9di1qxZMDc3x5QpU2TXXNR7r1GjRmjdujXWrFmD4cOHY82aNVCpVOjcubPU52X2z7Pb+PSo8OPHj3Hnzh3pl+f+/fuRmpqKgwcPSqNOAKSg/aySfp7Y2trCyMhIa1SnMHZ2drJ+ji+ruPdbwT7ZsGED2rdvj2+//Var37MXd9jZ2RV6Mra+r5h8mfdlwTYV9TrQ13oAoF27dmjXrh00Gg1OnDiBpUuXIjQ0FI6OjujTp4/Ur6ifgampKapUqQITExNpNHn06NGFrsvNzQ3Ak6utn/caK05JP9NfdD36/ExlgDJwZ86cwbhx4zBgwAB899138Pb2Ru/evXHq1CnY2NiU2noVCgWEEDq/FL7//ntoNBqd/tOnT8ehQ4ewZ88eWFpa4u2338bEiRML/WtH33U+W+OOHTtw48YNrV/EJdWhQwfMnz8ff/zxh9ZhvB9//FGrX9u2bVG1alWcP38eY8aMeaHaGzZsCE9PT6xZswYajQY5OTlaIx7Ak8MjO3fuRJ06dfT28356VMrc3FxrXXPnzsWNGzfQq1cvWcu0tLSEp6cnoqKisHDhQmm5+fn52LBhA2rWrIk33nhDa56NGzdqHXo6cuQIrly5Uuj9dhQKBZo2bYrw8HCsXbsWJ0+efOmanzVo0CCMHDkShw8fxq+//orx48drhVl9rWvjxo1ah6x/+uknPH78GO3btwfw/wPRs6/rFStWvPA6AcDc3Bw+Pj7YsmULvvrqqyKDUMeOHREdHY3U1FStXybr1q2DhYWF3i/p37dvH27duiX9waHRaLB582bUqVNHGmEo7H1++vRpxMfHax1q9PHxwU8//YRdu3ZJh2QA6FzV97Je5n1Z8HMu6nWgr/U8zcjICJ6enmjQoAE2btyIkydPagWoqKgoLFiwQBqNu3fvHn799Ve0a9cORkZGsLCwQIcOHXDq1Ck0adKk2D+M3333XXz++efYv3+/zuHyAsWNjJf0M70k6ymMPj9TGaAM2P3799GrVy+4ublh2bJlMDU1xU8//YQWLVpg0KBB2Lp1a6mt29raGm+//TYWLFgAe3t7uLq6Ii4uDqtWrdIZ2YmNjcWcOXMwbdo06RLqOXPmYMKECWjfvj26d+9eanX6+/tj7dq1aNCgAZo0aYLExEQsWLDghYeQQ0NDsXr1avj5+eHLL7+Eo6MjNm7cqHNZbJUqVbB06VKEhIQgIyMDPXv2hIODA27fvo0//vgDt2/f1vlruTCDBw/G8OHDkZqaCm9vb9SvX19r+qxZsxAbGwtvb2+MGzcO9evXx6NHj3D58mXs3LkTy5cvl72tHh4eAIB58+bh3XffhZGREZo0aYK2bdti2LBhGDRoEE6cOIG3334blpaWuHnzJg4fPgwPDw+MHDmyyOXOmTMHvr6+6NChAyZMmABTU1MsW7YMZ8+exaZNm3RGSk6cOIGPPvoIH374Ia5du4ZPP/0UNWrUwKhRowA8OVdh2bJl6NatG2rXrg0hBKKionD37l34+voCwEvX/LS+ffti/Pjx6Nu3L3JycnTuCK2vdUVFRcHY2Bi+vr44d+4cpk2bhqZNm0qhzNvbGzY2NhgxYgSmT58OExMTbNy4EX/88UeJtqM4ixYtwltvvQVPT09MmTIFdevWxa1bt7Bt2zasWLECVlZWmD59unSeyOeffw5bW1ts3LgRO3bswPz586FUKl+6jqfZ29vjnXfewbRp02BpaYlly5bhwoULWqHH398fX3zxBaZPnw4fHx9cvHgRs2bNgpubm1boCAkJQXh4OPr3748vv/wSdevWxa5du7B7924AQKVK+jlr5WXelw0bNkT//v2xePFimJiYoFOnTjh79iwWLlyocyj2ZdazfPly7N+/H35+fqhVqxYePXqE1atXA4DOeUNGRkbw9fXF+PHjkZ+fj3nz5iErK0vrhrvffPMN3nrrLbRr1w4jR46Eq6sr7t27h0uXLuHXX3/F/v37ATz5DN28eTPef/99TJkyBa1bt8bDhw8RFxcHf39/dOjQAVZWVnBxccH//vc/dOzYEba2ttLvmZJ+ppdkPfr+2eko8enmVKyibmPwrJCQEGFpaVnktKevwuvfv7+wsLDQuSpgy5YtAoAIDw8vdl0uLi7Cz89Ppx2AGD16tFZbwdUYCxYskNquX78uevToIWxsbISVlZXo2rWrOHv2rHBxcZGuFElNTRUODg7inXfe0bokPz8/XwQEBIiqVatKV90VdRVe48aNX7j2zMxMMWTIEOHg4CAsLCzEW2+9JQ4dOiR8fHyEj4+PzvY97yo8IYQ4f/688PX1FZUrVxa2trZiyJAh0qXuT18yLcSTS279/PyEra2tMDExETVq1BB+fn5al2AXR61WC3NzcwFAfPfdd4X2uX37thg3bpxwc3MTJiYmwtbWVrRs2VJ8+umnIjs7W2v7nv75FcAzV6Hk5OSIjz76SFSrVk0oFAqdn8nq1auFp6ensLS0FObm5qJOnTpiwIABWldKFuXQoUPinXfekeZt06aN+PXXX7X6FLwO9uzZI4KDg0XVqlWFubm5eO+998Rff/0l9btw4YLo27evqFOnjjA3NxdKpVK0bt1arF27Vme9Jam5qNfa04KCggQA0bZt2yL7vOj+KbgKLzExUQQEBIgqVaoIKysr0bdvX3Hr1i2tvkeOHBFeXl7CwsJCVKtWTXz00Ufi5MmThV5JVtTnSVHOnz8vPvzwQ2FnZydMTU1FrVq1xMCBA6VbWQghxJkzZ0RAQIBQKpXC1NRUNG3aVOe9U3AV3rOv9aI+Cwu2/+lboBS8n5ctWybq1KkjTExMRIMGDcTGjRu15s3JyRETJkwQNWrUEJUrVxYtWrQQW7duLfTK5atXr4oPPvhA2r89evQQO3fuFADE//73v+fuu4I6n/bse0iIkr0vi5KTkyPCwsKEg4ODqFy5smjTpo2Ij4/X+mx92fXEx8eL7t27CxcXF2FmZibs7OyEj4+P1pXWBZ8b8+bNEzNnzhQ1a9YUpqamonnz5oXeDiMlJUUMHjxY1KhRQ5iYmIhq1aoJb29v6crZApmZmeLjjz8WtWrVEiYmJsLBwUH4+flp3YZg7969onnz5sLMzEzr6sOSfqaXdD36/tk9TfF/KyAiolI0Y8YMzJw5E7dv39b7eUTllUKhwOjRo3VuFqxvs2fPxmeffYarV6++1AnOFc3ly5fh5uaGBQsWYMKECWVdTrnDQ3hERFRhFISxBg0aIC8vD/v378eSJUvQv39/hifSKwYoIiKqMCwsLBAeHo7Lly8jJycHtWrVwuTJk/HZZ5+VdWlUwfAQHhEREZFMvJEmERERkUwMUEREREQyMUARERERycSTyPUoPz8fqampsLKykv11LURERFQ2hBC4d+8eVCpViW+4ygClR6mpqTrfYE5ERETlw7Vr10p8uwsGKD2ysrIC8OQHUJJvRyciIqKyl5WVBWdnZ+n3eEkwQOlRwWE7a2trBigiIqJyRs7pNzyJnIiIiEgmBigiIiIimRigiIiIiGRigCIiIiKSiQGKiIiISCYGKCIiIiKZyjRA/fbbbwgICIBKpYJCocDWrVuL7Dt8+HAoFAosXrxYqz0nJwdjx46Fvb09LC0tERgYiOvXr2v1yczMRHBwMJRKJZRKJYKDg3H37l2tPlevXkVAQAAsLS1hb2+PcePGITc3V09bSkRERBVJmQao+/fvo2nTpoiIiCi239atW3H06FGoVCqdaaGhoYiOjkZkZCQOHz6M7Oxs+Pv7Q6PRSH2CgoKQlJSEmJgYxMTEICkpCcHBwdJ0jUYDPz8/3L9/H4cPH0ZkZCR++eUXhIWF6W9jiYiIqOIQBgKAiI6O1mm/fv26qFGjhjh79qxwcXER4eHh0rS7d+8KExMTERkZKbXduHFDVKpUScTExAghhDh//rwAIBISEqQ+8fHxAoC4cOGCEEKInTt3ikqVKokbN25IfTZt2iTMzMyEWq0u8Tao1WoBQNY8REREVLZe5Pe3QZ8DlZ+fj+DgYEycOBGNGzfWmZ6YmIi8vDx07txZalOpVHB3d8eRI0cAAPHx8VAqlfD09JT6tGnTBkqlUquPu7u71ghXly5dkJOTg8TExCLry8nJQVZWltaDiIiIKj6DDlDz5s2DsbExxo0bV+j0tLQ0mJqawsbGRqvd0dERaWlpUh8HBwedeR0cHLT6ODo6ak23sbGBqamp1Kcwc+bMkc6rUiqV/CJhIiKi14TBBqjExER88803WLt2razvpgEAIYTWPIXN/yJ9njV16lSo1Wrpce3aNVl1EhERUflksAHq0KFDSE9PR61atWBsbAxjY2NcuXIFYWFhcHV1BQA4OTkhNzcXmZmZWvOmp6dLI0pOTk64deuWzvJv376t1efZkabMzEzk5eXpjEw9zczMTPriYH6BMBER0evDYANUcHAwTp8+jaSkJOmhUqkwceJE7N69GwDQsmVLmJiYIDY2Vprv5s2bOHv2LLy9vQEAXl5eUKvVOHbsmNTn6NGjUKvVWn3Onj2LmzdvSn327NkDMzMztGzZ8lVsLhEREZUjxmW58uzsbFy6dEl6npKSgqSkJNja2qJWrVqws7PT6m9iYgInJyfUr18fAKBUKjFkyBCEhYXBzs4Otra2mDBhAjw8PNCpUycAQMOGDdG1a1cMHToUK1asAAAMGzYM/v7+0nI6d+6MRo0aITg4GAsWLEBGRgYmTJiAoUOHclSJiIiIdJRpgDpx4gQ6dOggPR8/fjwAICQkBGvXri3RMsLDw2FsbIxevXrh4cOH6NixI9auXQsjIyOpz8aNGzFu3Djpar3AwECte08ZGRlhx44dGDVqFNq2bQtzc3MEBQVh4cKFethK/XCdsqOsSyg3Ls/1K+sSiIioglMIIURZF1FRZGVlQalUQq1W633kigGq5BigiIhIjhf5/W2w50ARERERGSoGKCIiIiKZGKCIiIiIZGKAIiIiIpKJAYqIiIhIJgYoIiIiIpkYoIiIiIhkYoAiIiIikokBioiIiEgmBigiIiIimRigiIiIiGRigCIiIiKSiQGKiIiISCYGKCIiIiKZGKCIiIiIZGKAIiIiIpKJAYqIiIhIJgYoIiIiIpkYoIiIiIhkYoAiIiIikokBioiIiEgmBigiIiIimRigiIiIiGRigCIiIiKSiQGKiIiISCYGKCIiIiKZGKCIiIiIZGKAIiIiIpKJAYqIiIhIJgYoIiIiIpkYoIiIiIhkYoAiIiIikokBioiIiEgmBigiIiIimRigiIiIiGRigCIiIiKSiQGKiIiISCYGKCIiIiKZGKCIiIiIZGKAIiIiIpKpTAPUb7/9hoCAAKhUKigUCmzdulWalpeXh8mTJ8PDwwOWlpZQqVQYMGAAUlNTtZaRk5ODsWPHwt7eHpaWlggMDMT169e1+mRmZiI4OBhKpRJKpRLBwcG4e/euVp+rV68iICAAlpaWsLe3x7hx45Cbm1tam05ERETlWJkGqPv376Np06aIiIjQmfbgwQOcPHkS06ZNw8mTJxEVFYU///wTgYGBWv1CQ0MRHR2NyMhIHD58GNnZ2fD394dGo5H6BAUFISkpCTExMYiJiUFSUhKCg4Ol6RqNBn5+frh//z4OHz6MyMhI/PLLLwgLCyu9jSciIqJySyGEEGVdBAAoFApER0ejW7duRfY5fvw4WrdujStXrqBWrVpQq9WoVq0a1q9fj969ewMAUlNT4ezsjJ07d6JLly5ITk5Go0aNkJCQAE9PTwBAQkICvLy8cOHCBdSvXx+7du2Cv78/rl27BpVKBQCIjIzEwIEDkZ6eDmtr6xJtQ1ZWFpRKJdRqdYnnKSnXKTv0uryK7PJcv7IugYiIypEX+f1drs6BUqvVUCgUqFq1KgAgMTEReXl56Ny5s9RHpVLB3d0dR44cAQDEx8dDqVRK4QkA2rRpA6VSqdXH3d1dCk8A0KVLF+Tk5CAxMbHIenJycpCVlaX1ICIiooqv3ASoR48eYcqUKQgKCpLSYVpaGkxNTWFjY6PV19HREWlpaVIfBwcHneU5ODho9XF0dNSabmNjA1NTU6lPYebMmSOdV6VUKuHs7PxS20hERETlQ7kIUHl5eejTpw/y8/OxbNmy5/YXQkChUEjPn/7/y/R51tSpU6FWq6XHtWvXnlsbERERlX8GH6Dy8vLQq1cvpKSkIDY2VuvYpJOTE3Jzc5GZmak1T3p6ujSi5OTkhFu3buks9/bt21p9nh1pyszMRF5ens7I1NPMzMxgbW2t9SAiIqKKz6ADVEF4+uuvv7B3717Y2dlpTW/ZsiVMTEwQGxsrtd28eRNnz56Ft7c3AMDLywtqtRrHjh2T+hw9ehRqtVqrz9mzZ3Hz5k2pz549e2BmZoaWLVuW5iYSERFROWRclivPzs7GpUuXpOcpKSlISkqCra0tVCoVevbsiZMnT2L79u3QaDTSKJGtrS1MTU2hVCoxZMgQhIWFwc7ODra2tpgwYQI8PDzQqVMnAEDDhg3RtWtXDB06FCtWrAAADBs2DP7+/qhfvz4AoHPnzmjUqBGCg4OxYMECZGRkYMKECRg6dChHlYiIiEhHmQaoEydOoEOHDtLz8ePHAwBCQkIwY8YMbNu2DQDQrFkzrfkOHDiA9u3bAwDCw8NhbGyMXr164eHDh+jYsSPWrl0LIyMjqf/GjRsxbtw46Wq9wMBArXtPGRkZYceOHRg1ahTatm0Lc3NzBAUFYeHChaWx2URERFTOGcx9oCoC3gfKMPA+UEREJEeFvw8UERERkSFggCIiIiKSiQGKiIiISCYGKCIiIiKZGKCIiIiIZGKAIiIiIpKJAYqIiIhIJgYoIiIiIpkYoIiIiIhkYoAiIiIikkn2d+HNmjWr2Omff/75CxdDREREVB7IDlDR0dFaz/Py8pCSkgJjY2PUqVOHAYqIiIgqPNkB6tSpUzptWVlZGDhwILp3766XooiIiIgMmV7OgbK2tsasWbMwbdo0fSyOiIiIyKDp7STyu3fvQq1W62txRERERAZL9iG8JUuWaD0XQuDmzZtYv349unbtqrfCiIiIiAyV7AAVHh6u9bxSpUqoVq0aQkJCMHXqVL0VRkRERGSoZAeolJSU0qiDiIiIqNzgjTSJiIiIZCrRCNQHH3xQ4gVGRUW9cDFERERE5UGJRqCUSqX0sLa2xr59+3DixAlpemJiIvbt2welUllqhRIREREZihKNQK1Zs0b6/+TJk9GrVy8sX74cRkZGAACNRoNRo0bB2tq6dKokIiIiMiCyz4FavXo1JkyYIIUnADAyMsL48eOxevVqvRZHREREZIhkB6jHjx8jOTlZpz05ORn5+fl6KYqIiIjIkMm+jcGgQYMwePBgXLp0CW3atAEAJCQkYO7cuRg0aJDeCyQiIiIyNLID1MKFC+Hk5ITw8HDcvHkTAFC9enVMmjQJYWFhei+QiIiIyNDIDlCVKlXCpEmTMGnSJGRlZQEATx4nIiKi14rsAPU0BiciIiJ6Hck+ifzWrVsIDg6GSqWCsbExjIyMtB5EREREFZ3sEaiBAwfi6tWrmDZtGqpXrw6FQlEadREREREZLNkB6vDhwzh06BCaNWtWCuUQERERGT7Zh/CcnZ0hhCiNWoiIiIjKBdkBavHixZgyZQouX75cCuUQERERGT7Zh/B69+6NBw8eoE6dOrCwsICJiYnW9IyMDL0VR0RERGSIZAeoxYsXl0IZREREROWH7AAVEhJSGnUQERERlRuyz4ECgL///hufffYZ+vbti/T0dABATEwMzp07p9fiiIiIiAyR7AAVFxcHDw8PHD16FFFRUcjOzgYAnD59GtOnT9d7gURERESGRnaAmjJlCr788kvExsbC1NRUau/QoQPi4+P1WhwRERGRIZIdoM6cOYPu3bvrtFerVg137tzRS1FEREREhkx2gKpatSpu3ryp037q1CnUqFFD1rJ+++03BAQEQKVSQaFQYOvWrVrThRCYMWMGVCoVzM3N0b59e53zrHJycjB27FjY29vD0tISgYGBuH79ulafzMxMBAcHQ6lUQqlUIjg4GHfv3tXqc/XqVQQEBMDS0hL29vYYN24ccnNzZW0PERERvR5kB6igoCBMnjwZaWlpUCgUyM/Px++//44JEyZgwIABspZ1//59NG3aFBEREYVOnz9/PhYtWoSIiAgcP34cTk5O8PX1xb1796Q+oaGhiI6ORmRkJA4fPozs7Gz4+/tDo9Fo1ZyUlISYmBjExMQgKSkJwcHB0nSNRgM/Pz/cv38fhw8fRmRkJH755ReEhYXJ3DtERET0OlAImd/LkpeXh4EDByIyMhJCCBgbG0Oj0SAoKAhr166FkZHRixWiUCA6OhrdunUD8GT0SaVSITQ0FJMnTwbwZLTJ0dER8+bNw/Dhw6FWq1GtWjWsX78evXv3BgCkpqbC2dkZO3fuRJcuXZCcnIxGjRohISEBnp6eAICEhAR4eXnhwoULqF+/Pnbt2gV/f39cu3YNKpUKABAZGYmBAwciPT0d1tbWJdqGrKwsKJVKqNXqEs9TUq5Tduh1eRXZ5bl+ZV0CERGVIy/y+1v2CJSJiQk2btyIP//8Ez/99BM2bNiACxcuYP369S8cngqTkpKCtLQ0dO7cWWozMzODj48Pjhw5AgBITExEXl6eVh+VSgV3d3epT3x8PJRKpRSeAKBNmzZQKpVafdzd3aXwBABdunRBTk4OEhMTi6wxJycHWVlZWg8iIiKq+GTfSDMuLg4+Pj6oU6cO6tSpUxo1AQDS0tIAAI6Ojlrtjo6OuHLlitTH1NQUNjY2On0K5k9LS4ODg4PO8h0cHLT6PLseGxsbmJqaSn0KM2fOHMycOVPmlhEREVF5J3sEytfXF7Vq1cKUKVNw9uzZ0qhJi0Kh0HouhNBpe9azfQrr/yJ9njV16lSo1Wrpce3atWLrIiIioopBdoBKTU3FpEmTcOjQITRp0gRNmjTB/Pnzda58e1lOTk4AoDMClJ6eLo0WOTk5ITc3F5mZmcX2uXXrls7yb9++rdXn2fVkZmYiLy9PZ2TqaWZmZrC2ttZ6EBERUcUnO0DZ29tjzJgx+P333/H333+jd+/eWLduHVxdXfHOO+/orTA3Nzc4OTkhNjZWasvNzUVcXBy8vb0BAC1btoSJiYlWn5s3b+Ls2bNSHy8vL6jVahw7dkzqc/ToUajVaq0+Z8+e1bo9w549e2BmZoaWLVvqbZuIiIioYpB9DtTT3NzcMGXKFDRt2hTTpk1DXFycrPmzs7Nx6dIl6XlKSgqSkpJga2uLWrVqITQ0FLNnz0a9evVQr149zJ49GxYWFggKCgIAKJVKDBkyBGFhYbCzs4OtrS0mTJgADw8PdOrUCQDQsGFDdO3aFUOHDsWKFSsAAMOGDYO/vz/q168PAOjcuTMaNWqE4OBgLFiwABkZGZgwYQKGDh3KUSUiIiLS8cIB6vfff8fGjRvx888/49GjRwgMDMTs2bNlLePEiRPo0KGD9Hz8+PEAgJCQEKxduxaTJk3Cw4cPMWrUKGRmZsLT0xN79uyBlZWVNE94eDiMjY3Rq1cvPHz4EB07dtS5ncLGjRsxbtw46Wq9wMBArXtPGRkZYceOHRg1ahTatm0Lc3NzBAUFYeHChS+0b4iIiKhik30fqP/85z/YtGkTUlNT0alTJ/Tr1w/dunWDhYVFadVYbvA+UIaB94EiIiI5XuT3t+wRqIMHD2LChAno3bs37O3tZRdJREREVN7JDlAFN58kIiIiel3JvgqPiIiI6HXHAEVEREQkEwMUERERkUwMUEREREQyyQ5QtWvXxp07d3Ta7969i9q1a+ulKCIiIiJDJjtAXb58GRqNRqc9JycHN27c0EtRRERERIasxLcx2LZtm/T/3bt3Q6lUSs81Gg327dsHV1dXvRZHREREZIhKHKC6desGAFAoFAgJCdGaZmJiAldXV3z99dd6LY6IiIjIEJU4QOXn5wN48gXCx48f513IiYiI6LUl+07kKSkp0v8fPXqEypUr67UgIiIiIkMn+yTy/Px8fPHFF6hRowaqVKmCf/75BwAwbdo0rFq1Su8FEhERERka2QHqyy+/xNq1azF//nyYmppK7R4eHvj+++/1WhwRERGRIZIdoNatW4eVK1eiX79+MDIyktqbNGmCCxcu6LU4IiIiIkMkO0DduHEDdevW1WnPz89HXl6eXooiIiIiMmSyA1Tjxo1x6NAhnfYtW7agefPmeimKiIiIyJDJvgpv+vTpCA4Oxo0bN5Cfn4+oqChcvHgR69atw/bt20ujRiIiIiKDInsEKiAgAJs3b8bOnTuhUCjw+eefIzk5Gb/++it8fX1Lo0YiIiIigyJ7BAoAunTpgi5duui7FiIiIqJyQfYIFBEREdHrrkQjUDY2NlAoFCVaYEZGxksVRERERGToShSgFi9eXMplEBEREZUfJQpQISEhpV0HERERUblRogCVlZVV4gVaW1u/cDFERERE5UGJAlTVqlWfew6UEAIKhQIajUYvhREREREZqhIFqAMHDpR2HURERETlRokClI+PT2nXQURERFRulChAnT59Gu7u7qhUqRJOnz5dbN8mTZropTAiIiIiQ1WiANWsWTOkpaXBwcEBzZo1g0KhgBBCpx/PgSIiIqLXQYkCVEpKCqpVqyb9n4iIiOh1VqIA5eLiUuj/iYiIiF5HL/RlwhcvXsTSpUuRnJwMhUKBBg0aYOzYsahfv76+6yMiIiIyOLK/TPjnn3+Gu7s7EhMT0bRpUzRp0gQnT56Eu7s7tmzZUho1EhERERkU2SNQkyZNwtSpUzFr1iyt9unTp2Py5Mn48MMP9VYcERERkSGSPQKVlpaGAQMG6LT3798faWlpeimKiIiIyJDJDlDt27fHoUOHdNoPHz6Mdu3a6aUoIiIiIkNWokN427Ztk/4fGBiIyZMnIzExEW3atAEAJCQkYMuWLZg5c2bpVElERERkQBSisDtiPqNSpZINVL3uN9LMysqCUqmEWq2GtbW1XpftOmWHXpdXkV2e61fWJRARUTnyIr+/SzQClZ+f/1KFEREREVUkss+BepUeP36Mzz77DG5ubjA3N0ft2rUxa9YsrUAnhMCMGTOgUqlgbm6O9u3b49y5c1rLycnJwdixY2Fvbw9LS0sEBgbi+vXrWn0yMzMRHBwMpVIJpVKJ4OBg3L1791VsJhEREZUzBh2g5s2bh+XLlyMiIgLJycmYP38+FixYgKVLl0p95s+fj0WLFiEiIgLHjx+Hk5MTfH19ce/ePalPaGgooqOjERkZicOHDyM7Oxv+/v5ahxuDgoKQlJSEmJgYxMTEICkpCcHBwa90e4mIiKh8KNE5UGXF398fjo6OWLVqldTWo0cPWFhYYP369RBCQKVSITQ0FJMnTwbwZLTJ0dER8+bNw/Dhw6FWq1GtWjWsX78evXv3BgCkpqbC2dkZO3fuRJcuXZCcnIxGjRohISEBnp6eAJ6cGO/l5YULFy6U+A7rPAfKMPAcKCIikuNFfn8b9AjUW2+9hX379uHPP/8EAPzxxx84fPgw3nvvPQBPvtg4LS0NnTt3luYxMzODj48Pjhw5AgBITExEXl6eVh+VSgV3d3epT3x8PJRKpRSeAKBNmzZQKpVSn8Lk5OQgKytL60FEREQV3wt9F96rMnnyZKjVajRo0ABGRkbQaDT46quv0LdvXwCQbtzp6OioNZ+joyOuXLki9TE1NYWNjY1On4L509LS4ODgoLN+BweHYm8OOmfOHN66gYiI6DX0QgEqPz8fly5dQnp6us4Vem+//bZeCgOAzZs3Y8OGDfjxxx/RuHFjJCUlITQ0FCqVCiEhIVI/hUKhNZ8QQqftWc/2Kaz/85YzdepUjB8/XnqelZUFZ2fn524XERERlW+yA1RCQgKCgoJw5coVPHv6lL7vAzVx4kRMmTIFffr0AQB4eHjgypUrmDNnDkJCQuDk5ATgyQhS9erVpfnS09OlUSknJyfk5uYiMzNTaxQqPT0d3t7eUp9bt27prP/27ds6o1tPMzMzg5mZ2ctvKBEREZUrss+BGjFiBFq1aoWzZ88iIyMDmZmZ0iMjI0OvxT148EDnJp5GRkbSqJebmxucnJwQGxsrTc/NzUVcXJwUjlq2bAkTExOtPjdv3sTZs2elPl5eXlCr1Th27JjU5+jRo1Cr1VIfIiIiogKyR6D++usv/Pzzz6hbt25p1KMlICAAX331FWrVqoXGjRvj1KlTWLRoEQYPHgzgyYhXaGgoZs+ejXr16qFevXqYPXs2LCwsEBQUBABQKpUYMmQIwsLCYGdnB1tbW0yYMAEeHh7o1KkTAKBhw4bo2rUrhg4dihUrVgAAhg0bBn9//xJfgUdERESvD9kBytPTE5cuXXolAWrp0qWYNm0aRo0ahfT0dKhUKgwfPhyff/651GfSpEl4+PAhRo0ahczMTHh6emLPnj2wsrKS+oSHh8PY2Bi9evXCw4cP0bFjR6xduxZGRkZSn40bN2LcuHHS1XqBgYGIiIgo9W0kIiKi8kf2faCio6Px2WefYeLEifDw8ICJiYnW9CZNmui1wPKE94EyDLwPFBERyVFq34X3tB49egCAdBgNeHIoreCKtdf5y4SJiIjo9SA7QKWkpJRGHURERETlhuwA5eLiUhp1EBEREZUbsgPUunXrip0+YMCAFy6GiIiIqDyQHaA+/vhjred5eXl48OABTE1NYWFhwQBFREREFZ7sG2k+fePMzMxMZGdn4+LFi3jrrbewadOm0qiRiIiIyKDIDlCFqVevHubOnaszOkVERERUEeklQAFPvmIlNTVVX4sjIiIiMliyz4Hatm2b1nMhBG7evImIiAi0bdtWb4URERERGSrZAapbt25azxUKBapVq4Z33nkHX3/9tb7qIiIiIjJYsgNUfn5+adRBREREVG7o7RwoIiIiotdFiUegZs2apfX8888/13sxREREROVBiQPU09+Bp1AoSqUYIiIiovKgxAFqzZo1pVkHERERUbnBc6CIiIiIZCrRCNQHH3xQ4gVGRUW9cDFERERE5UGJRqCUSqX0sLa2xr59+3DixAlpemJiIvbt2welUllqhRIREREZihKNQD19/tPkyZPRq1cvLF++HEZGRgAAjUaDUaNGwdraunSqJCIiIjIgss+BWr16NSZMmCCFJ+DJ9+CNHz8eq1ev1mtxRERERIZIdoB6/PgxkpOTddqTk5N5l3IiIiJ6Lcj+KpdBgwZh8ODBuHTpEtq0aQMASEhIwNy5czFo0CC9F0hERERkaGQHqIULF8LJyQnh4eG4efMmAKB69eqYNGkSwsLC9F4gERERkaGRHaAqVaqESZMmYdKkScjKygIAnjxORERErxXZAeppDE5ERET0OuKdyImIiIhkYoAiIiIikokBioiIiEgmBigiIiIimUp0EvmSJUtKvMBx48a9cDFERERE5UGJAlR4eHiJFqZQKBigiIiIqMIrUYBKSUkp7TqIiIiIyg2eA0VEREQk0wvdSPP69evYtm0brl69itzcXK1pixYt0kthRERERIZKdoDat28fAgMD4ebmhosXL8Ld3R2XL1+GEAItWrQojRqJiIiIDIrsQ3hTp05FWFgYzp49i8qVK+OXX37BtWvX4OPjgw8//LA0aiQiIiIyKLIDVHJyMkJCQgAAxsbGePjwIapUqYJZs2Zh3rx5ei+QiIiIyNDIDlCWlpbIyckBAKhUKvz999/StH///Vd/lREREREZKNnnQLVp0wa///47GjVqBD8/P4SFheHMmTOIiopCmzZtSqNGIiIiIoMiO0AtWrQI2dnZAIAZM2YgOzsbmzdvRt26dUt8w00iIiKi8kz2IbzatWujSZMmAAALCwssW7YMp0+fRlRUFFxcXPRe4I0bN9C/f3/Y2dnBwsICzZo1Q2JiojRdCIEZM2ZApVLB3Nwc7du3x7lz57SWkZOTg7Fjx8Le3h6WlpYIDAzE9evXtfpkZmYiODgYSqUSSqUSwcHBuHv3rt63h4iIiMq/l7qRZnZ2NrKysrQe+pSZmYm2bdvCxMQEu3btwvnz5/H111+jatWqUp/58+dj0aJFiIiIwPHjx+Hk5ARfX1/cu3dP6hMaGoro6GhERkbi8OHDyM7Ohr+/PzQajdQnKCgISUlJiImJQUxMDJKSkhAcHKzX7SEiIqKKQSGEEHJmSElJwZgxY3Dw4EE8evRIahdCQKFQaIWSlzVlyhT8/vvvOHToUKHThRBQqVQIDQ3F5MmTATwZbXJ0dMS8efMwfPhwqNVqVKtWDevXr0fv3r0BAKmpqXB2dsbOnTvRpUsXJCcno1GjRkhISICnpycAICEhAV5eXrhw4QLq169fonqzsrKgVCqhVqthbW2thz3w/7lO2aHX5VVkl+f6lXUJRERUjrzI72/Z50D169cPALB69Wo4OjpCoVDIXUSJbdu2DV26dMGHH36IuLg41KhRA6NGjcLQoUMBPAlzaWlp6Ny5szSPmZkZfHx8cOTIEQwfPhyJiYnIy8vT6qNSqeDu7o4jR46gS5cuiI+Ph1KplMIT8ORkeaVSiSNHjhQZoHJycqQrEgHofQSOiIiIDJPsAHX69GkkJiaWeFTmZfzzzz/49ttvMX78ePznP//BsWPHMG7cOJiZmWHAgAFIS0sDADg6OmrN5+joiCtXrgAA0tLSYGpqChsbG50+BfOnpaXBwcFBZ/0ODg5Sn8LMmTMHM2fOfKltJCIiovJH9jlQb775Jq5du1YatejIz89HixYtMHv2bDRv3hzDhw/H0KFD8e2332r1e3YUrOBwYnGe7VNY/+ctZ+rUqVCr1dLjVe0XIiIiKluyR6C+//57jBgxAjdu3IC7uztMTEy0phdcoacP1atXR6NGjbTaGjZsiF9++QUA4OTkBODJCFL16tWlPunp6dKolJOTE3Jzc5GZmak1CpWeng5vb2+pz61bt3TWf/v2bZ3RraeZmZnBzMzsBbeOiIiIyivZI1C3b9/G33//jUGDBuHNN99Es2bN0Lx5c+lffWrbti0uXryo1fbnn39Kt0twc3ODk5MTYmNjpem5ubmIi4uTwlHLli1hYmKi1efmzZs4e/as1MfLywtqtRrHjh2T+hw9ehRqtVrqQ0RERFRA9gjU4MGD0bx5c2zatKnUTyL/5JNP4O3tjdmzZ6NXr144duwYVq5ciZUrVwJ4ctgtNDQUs2fPRr169VCvXj3Mnj0bFhYWCAoKAgAolUoMGTIEYWFhsLOzg62tLSZMmAAPDw906tQJwJNRra5du2Lo0KFYsWIFAGDYsGHw9/d/Jed6ERERUfkiO0BduXIF27ZtQ926dUujHi1vvvkmoqOjMXXqVMyaNQtubm5YvHixdCUgAEyaNAkPHz7EqFGjkJmZCU9PT+zZswdWVlZSn/DwcBgbG6NXr154+PAhOnbsiLVr18LIyEjqs3HjRowbN066Wi8wMBARERGlvo1ERERU/si+D1RAQAAGDhyIHj16lFZN5RbvA2UYeB8oIiKS45XcByogIACffPIJzpw5Aw8PD52TyAMDA+UukoiIiKhckR2gRowYAQCYNWuWzjR934mciIiIyBDJDlD5+fmlUQcRERFRufFSXyZMRERE9Doq0QjUkiVLMGzYMFSuXBlLliwptu+4ceP0UhgRERGRoSpRgAoPD0e/fv1QuXJlhIeHF9lPoVAwQBEREVGFV6IAlZKSUuj/iYiIiF5Hss+BevjwYZHTbt68+VLFEBEREZUHsgNU8+bNcfLkSZ32n3/+Wa9fJExERERkqGQHKF9fX3h7e2Pu3LkQQiA7OxsDBw5ESEgIPv/889KokYiIiMigyL4P1NKlS+Hn54dBgwZhx44dSE1NhbW1NY4fP45GjRqVRo1EREREBkV2gAKAzp0744MPPsC3334LY2Nj/PrrrwxPRERE9NqQfQjv77//hpeXF7Zv347du3dj0qRJeP/99zFp0iTk5eWVRo1EREREBkV2gGrWrBnc3Nzwxx9/wNfXF19++SX279+PqKgotG7dujRqJCIiIjIosgPUsmXLEBkZiapVq0pt3t7eOHXqFFq0aKHP2oiIiIgMkuwAFRwcXGi7lZUVVq1a9dIFERERERm6FzqJHADOnz+Pq1evIjc3V2pTKBQICAjQS2FEREREhkp2gPrnn3/QvXt3nDlzBgqFAkIIAE/CEwBoNBr9VkhERERkYGQHqI8//hhubm7Yu3cvateujWPHjuHOnTsICwvDwoULS6NGInrNuE7ZUdYllBuX5/qVdQlEryXZASo+Ph779+9HtWrVUKlSJVSqVAlvvfUW5syZg3HjxuHUqVOlUScRERGRwZB9ErlGo0GVKlUAAPb29khNTQUAuLi44OLFi/qtjoiIiMgAyR6Bcnd3x+nTp1G7dm14enpi/vz5MDU1xcqVK1G7du3SqJGIiIjIoMgOUJ999hnu378PAPjyyy/h7++Pdu3awc7ODps3b9Z7gURERESGRnaA6tKli/T/2rVr4/z588jIyICNjY10JR4RERFRRfbC94F6mq2trT4WQ0RERFQuyA5Qjx49wtKlS3HgwAGkp6cjPz9fa/rJkyf1VhwRERGRIZIdoAYPHozY2Fj07NkTrVu35mE7IiIieu3IDlA7duzAzp070bZt29Koh4iIiMjgyb4PVI0aNWBlZVUatRARERGVC7ID1Ndff43JkyfjypUrpVEPERERkcGTfQivVatWePToEWrXrg0LCwuYmJhoTc/IyNBbcURERESGSHaA6tu3L27cuIHZs2fD0dGRJ5ETERHRa0d2gDpy5Aji4+PRtGnT0qiHiIiIyODJPgeqQYMGePjwYWnUQkRERFQuyA5Qc+fORVhYGA4ePIg7d+4gKytL60FERERU0ck+hNe1a1cAQMeOHbXahRBQKBTQaDT6qYyIiIjIQMkOUAcOHCiNOoiIiIjKDdkBysfHpzTqICIiIio3ZJ8DRURERPS6Y4AiIiIikqlcBag5c+ZAoVAgNDRUahNCYMaMGVCpVDA3N0f79u1x7tw5rflycnIwduxY2Nvbw9LSEoGBgbh+/bpWn8zMTAQHB0OpVEKpVCI4OBh37959BVtFRERE5U25CVDHjx/HypUr0aRJE632+fPnY9GiRYiIiMDx48fh5OQEX19f3Lt3T+oTGhqK6OhoREZG4vDhw8jOzoa/v7/WFYNBQUFISkpCTEwMYmJikJSUhODg4Fe2fURERFR+yA5Qz47uPC0mJualiilKdnY2+vXrh++++w42NjZSuxACixcvxqeffooPPvgA7u7u+OGHH/DgwQP8+OOPAAC1Wo1Vq1bh66+/RqdOndC8eXNs2LABZ86cwd69ewEAycnJiImJwffffw8vLy94eXnhu+++w/bt23Hx4sVS2SYiIiIqv2QHqFatWmHp0qVabTk5ORgzZgy6d++ut8KeNnr0aPj5+aFTp05a7SkpKUhLS0Pnzp2lNjMzM/j4+ODIkSMAgMTEROTl5Wn1UalUcHd3l/rEx8dDqVTC09NT6tOmTRsolUqpT2FycnJ4I1EiIqLXkOzbGGzcuBHDhg3Dzp07sWbNGqSlpSEoKAgA8Pvvv+u9wMjISJw8eRLHjx/XmZaWlgYAcHR01Gp3dHTElStXpD6mpqZaI1cFfQrmT0tLg4ODg87yHRwcpD6FmTNnDmbOnClvg4iIiKjckz0C9cEHH+D06dN4/Pgx3N3d4eXlhfbt2yMxMREtWrTQa3HXrl3Dxx9/jA0bNqBy5cpF9lMoFFrPC+6KXpxn+xTW/3nLmTp1KtRqtfS4du1aseskIiKiiuGFTiLXaDTIzc2FRqOBRqOBk5MTzMzM9F0bEhMTkZ6ejpYtW8LY2BjGxsaIi4vDkiVLYGxsLI08PTtKlJ6eLk1zcnJCbm4uMjMzi+1z69YtnfXfvn1bZ3TraWZmZrC2ttZ6EBERUcUnO0BFRkaiSZMmUCqV+PPPP7Fjxw6sXLkS7dq1wz///KPX4jp27IgzZ84gKSlJerRq1Qr9+vVDUlISateuDScnJ8TGxkrz5ObmIi4uDt7e3gCAli1bwsTERKvPzZs3cfbsWamPl5cX1Go1jh07JvU5evQo1Gq11IeIiIiogOxzoIYMGYKFCxdi5MiRAABfX1+cOXMGw4cPR7NmzfR6IrWVlRXc3d212iwtLWFnZye1h4aGYvbs2ahXrx7q1auH2bNnw8LCQjovS6lUYsiQIQgLC4OdnR1sbW0xYcIEeHh4SCelN2zYEF27dsXQoUOxYsUKAMCwYcPg7++P+vXr6217iIiIqGKQHaBOnjypEypsbGzw008/Yf369XorrKQmTZqEhw8fYtSoUcjMzISnpyf27NkDKysrqU94eDiMjY3Rq1cvPHz4EB07dsTatWthZGQk9dm4cSPGjRsnXa0XGBiIiIiIV749REREZPgUQghR1kVUFFlZWVAqlVCr1Xo/H8p1yg69Lq8iuzzXr6xLoJfE13vJ8fVO9PJe5Pd3iUagxo8fjy+++AKWlpYYP358sX0XLVpUohUTERERlVclClCnTp1CXl4egCeH8Iq6tP95tw4gIiIiqghKFKAOHDgg/f/gwYOlVQsRERFRuSDrNgaPHz+GsbExzp49W1r1EBERERk8WQHK2NgYLi4u0Gg0pVUPERERkcGTfSPNzz77DFOnTkVGRkZp1ENERERk8GTfB2rJkiW4dOkSVCoVXFxcYGlpqTX95MmTeiuOiIiIyBDJDlDvv/8+r7YjIiKi15rsADVjxoxSKIOIiIio/CjxOVAPHjzA6NGjUaNGDTg4OCAoKAj//vtvadZGREREZJBKHKCmT5+OtWvXws/PD3369EFsbKz0hcJEREREr5MSH8KLiorCqlWr0KdPHwBA//790bZtW2g0Gq0v5SUiIiKq6Eo8AnXt2jW0a9dOet66dWsYGxsjNTW1VAojIiIiMlQlDlAajQampqZabcbGxnj8+LHeiyIiIiIyZCU+hCeEwMCBA2FmZia1PXr0CCNGjNC6F1RUVJR+KyQiolfCdcqOsi6h3Lg816+sS6AyVuIAFRISotPWv39/vRZDREREVB6UOECtWbOmNOsgIiIiKjdkfxceERER0euOAYqIiIhIJgYoIiIiIpkYoIiIiIhkYoAiIiIikokBioiIiEgmBigiIiIimRigiIiIiGRigCIiIiKSiQGKiIiISCYGKCIiIiKZGKCIiIiIZGKAIiIiIpKJAYqIiIhIJgYoIiIiIpkYoIiIiIhkYoAiIiIikokBioiIiEgmBigiIiIimRigiIiIiGRigCIiIiKSiQGKiIiISCYGKCIiIiKZDDpAzZkzB2+++SasrKzg4OCAbt264eLFi1p9hBCYMWMGVCoVzM3N0b59e5w7d06rT05ODsaOHQt7e3tYWloiMDAQ169f1+qTmZmJ4OBgKJVKKJVKBAcH4+7du6W9iURERFQOGXSAiouLw+jRo5GQkIDY2Fg8fvwYnTt3xv3796U+8+fPx6JFixAREYHjx4/DyckJvr6+uHfvntQnNDQU0dHRiIyMxOHDh5GdnQ1/f39oNBqpT1BQEJKSkhATE4OYmBgkJSUhODj4lW4vERERlQ/GZV1AcWJiYrSer1mzBg4ODkhMTMTbb78NIQQWL16MTz/9FB988AEA4IcffoCjoyN+/PFHDB8+HGq1GqtWrcL69evRqVMnAMCGDRvg7OyMvXv3okuXLkhOTkZMTAwSEhLg6ekJAPjuu+/g5eWFixcvon79+q92w4mIiMigGfQI1LPUajUAwNbWFgCQkpKCtLQ0dO7cWepjZmYGHx8fHDlyBACQmJiIvLw8rT4qlQru7u5Sn/j4eCiVSik8AUCbNm2gVCqlPoXJyclBVlaW1oOIiIgqvnIToIQQGD9+PN566y24u7sDANLS0gAAjo6OWn0dHR2laWlpaTA1NYWNjU2xfRwcHHTW6eDgIPUpzJw5c6RzppRKJZydnV98A4mIiKjcKDcBasyYMTh9+jQ2bdqkM02hUGg9F0LotD3r2T6F9X/ecqZOnQq1Wi09rl279rzNICIiogqgXASosWPHYtu2bThw4ABq1qwptTs5OQGAzihRenq6NCrl5OSE3NxcZGZmFtvn1q1bOuu9ffu2zujW08zMzGBtba31ICIioorPoAOUEAJjxoxBVFQU9u/fDzc3N63pbm5ucHJyQmxsrNSWm5uLuLg4eHt7AwBatmwJExMTrT43b97E2bNnpT5eXl5Qq9U4duyY1Ofo0aNQq9VSHyIiIqICBn0V3ujRo/Hjjz/if//7H6ysrKSRJqVSCXNzcygUCoSGhmL27NmoV68e6tWrh9mzZ8PCwgJBQUFS3yFDhiAsLAx2dnawtbXFhAkT4OHhIV2V17BhQ3Tt2hVDhw7FihUrAADDhg2Dv78/r8AjIiIiHQYdoL799lsAQPv27bXa16xZg4EDBwIAJk2ahIcPH2LUqFHIzMyEp6cn9uzZAysrK6l/eHg4jI2N0atXLzx8+BAdO3bE2rVrYWRkJPXZuHEjxo0bJ12tFxgYiIiIiNLdQCIiIiqXFEIIUdZFVBRZWVlQKpVQq9V6Px/KdcoOvS6vIrs816+sS6CXxNd7yenz9c79XnL8nKlYXuT3t0GfA0VERERkiBigiIiIiGRigCIiIiKSiQGKiIiISCYGKCIiIiKZGKCIiIiIZGKAIiIiIpKJAYqIiIhIJgYoIiIiIpkYoIiIiIhkYoAiIiIikokBioiIiEgmBigiIiIimRigiIiIiGRigCIiIiKSiQGKiIiISCYGKCIiIiKZGKCIiIiIZGKAIiIiIpKJAYqIiIhIJgYoIiIiIpkYoIiIiIhkYoAiIiIikokBioiIiEgmBigiIiIimRigiIiIiGRigCIiIiKSiQGKiIiISCYGKCIiIiKZGKCIiIiIZGKAIiIiIpKJAYqIiIhIJuOyLoCIiOh15jplR1mXUG5cnutX1iVIOAJFREREJBMDFBEREZFMDFBEREREMjFAEREREcnEAEVEREQkEwMUERERkUwMUM9YtmwZ3NzcULlyZbRs2RKHDh0q65KIiIjIwDBAPWXz5s0IDQ3Fp59+ilOnTqFdu3Z49913cfXq1bIujYiIiAwIb6T5lEWLFmHIkCH46KOPAACLFy/G7t278e2332LOnDllXB2VBd7gruQM6QZ3RESljSNQ/yc3NxeJiYno3LmzVnvnzp1x5MiRMqqKiIiIDBFHoP7Pv//+C41GA0dHR612R0dHpKWlFTpPTk4OcnJypOdqtRoAkJWVpff68nMe6H2ZFZU+9z/3e8lxv5cN7veywf1eNkrj9+vTyxVClHgeBqhnKBQKredCCJ22AnPmzMHMmTN12p2dnUulNioZ5eKyruD1xP1eNrjfywb3e9ko7f1+7949KJXKEvVlgPo/9vb2MDIy0hltSk9P1xmVKjB16lSMHz9eep6fn4+MjAzY2dkVGboqkqysLDg7O+PatWuwtrYu63JeG9zvZYP7vWxwv5eN122/CyFw7949qFSqEs/DAPV/TE1N0bJlS8TGxqJ79+5Se2xsLN5///1C5zEzM4OZmZlWW9WqVUuzTINkbW39WrzBDA33e9ngfi8b3O9l43Xa7yUdeSrAAPWU8ePHIzg4GK1atYKXlxdWrlyJq1evYsSIEWVdGhERERkQBqin9O7dG3fu3MGsWbNw8+ZNuLu7Y+fOnXBxcSnr0oiIiMiAMEA9Y9SoURg1alRZl1EumJmZYfr06TqHMal0cb+XDe73ssH9Xja4359PIeRcs0dEREREvJEmERERkVwMUEREREQyMUARERERycQARURERCQTAxQV67fffkNAQABUKhUUCgW2bt2qNV0IgRkzZkClUsHc3Bzt27fHuXPnyqbYCuR5+z0qKgpdunSBvb09FAoFkpKSyqTOiqa4/Z6Xl4fJkyfDw8MDlpaWUKlUGDBgAFJTU8uu4Ariea/3GTNmoEGDBrC0tISNjQ06deqEo0ePlk2xFcjz9vvThg8fDoVCgcWLF7+y+gwdAxQV6/79+2jatCkiIiIKnT5//nwsWrQIEREROH78OJycnODr64t79+694korluft9/v376Nt27aYO3fuK66sYituvz948AAnT57EtGnTcPLkSURFReHPP/9EYGBgGVRasTzv9f7GG28gIiICZ86cweHDh+Hq6orOnTvj9u3br7jSiuV5+73A1q1bcfToUVlfc/JaEEQlBEBER0dLz/Pz84WTk5OYO3eu1Pbo0SOhVCrF8uXLy6DCiunZ/f60lJQUAUCcOnXqldb0Oihuvxc4duyYACCuXLnyaop6DZRkv6vVagFA7N2799UU9Rooar9fv35d1KhRQ5w9e1a4uLiI8PDwV16boeIIFL2wlJQUpKWloXPnzlKbmZkZfHx8cOTIkTKsjOjVUKvVUCgUr+V3YJaV3NxcrFy5EkqlEk2bNi3rciq0/Px8BAcHY+LEiWjcuHFZl2NweCdyemFpaWkAAEdHR612R0dHXLlypSxKInplHj16hClTpiAoKOi1+bLVsrR9+3b06dMHDx48QPXq1REbGwt7e/uyLqtCmzdvHoyNjTFu3LiyLsUgcQSKXppCodB6LoTQaSOqSPLy8tCnTx/k5+dj2bJlZV3Oa6FDhw5ISkrCkSNH0LVrV/Tq1Qvp6ellXVaFlZiYiG+++QZr167l53kRGKDohTk5OQH4/yNRBdLT03VGpYgqiry8PPTq1QspKSmIjY3l6NMrYmlpibp166JNmzZYtWoVjI2NsWrVqrIuq8I6dOgQ0tPTUatWLRgbG8PY2BhXrlxBWFgYXF1dy7o8g8AARS/Mzc0NTk5OiI2Nldpyc3MRFxcHb2/vMqyMqHQUhKe//voLe/fuhZ2dXVmX9NoSQiAnJ6esy6iwgoODcfr0aSQlJUkPlUqFiRMnYvfu3WVdnkHgOVBUrOzsbFy6dEl6npKSgqSkJNja2qJWrVoIDQ3F7NmzUa9ePdSrVw+zZ8+GhYUFgoKCyrDq8u95+z0jIwNXr16V7kF08eJFAE9GBQtGBkm+4va7SqVCz549cfLkSWzfvh0ajUYafbW1tYWpqWlZlV3uFbff7ezs8NVXXyEwMBDVq1fHnTt3sGzZMly/fh0ffvhhGVZd/j3vc+bZPxBMTEzg5OSE+vXrv+pSDVNZXwZIhu3AgQMCgM4jJCRECPHkVgbTp08XTk5OwszMTLz99tvizJkzZVt0BfC8/b5mzZpCp0+fPr1M6y7vitvvBbeMKOxx4MCBsi69XCtuvz98+FB0795dqFQqYWpqKqpXry4CAwPFsWPHyrrscu95nzPP4m0MtCmEEKJ0IxoRERFRxcJzoIiIiIhkYoAiIiIikokBioiIiEgmBigiIiIimRigiIiIiGRigCIiIiKSiQGKiIiISCYGKCIiIiKZGKCIqFxJS0vD2LFjUbt2bZiZmcHZ2RkBAQHYt29fWZdGRK8RfhceEZUbly9fRtu2bVG1alXMnz8fTZo0QV5eHnbv3o3Ro0fjwoULZV0iEb0mOAJFROXGqFGjoFAocOzYMfTs2RNvvPEGGjdujPHjxyMhIQEAcPXqVbz//vuoUqUKrK2t0atXL9y6dUtaxowZM9CsWTOsXr0atWrVQpUqVTBy5EhoNBrMnz8fTk5OcHBwwFdffaW1boVCgW+//RbvvvsuzM3N4ebmhi1btmj1mTx5Mt544w1YWFigdu3amDZtGvLy8nTWvX79eri6ukKpVKJPnz64d+8eAGDdunWws7NDTk6O1nJ79OiBAQMG6HVfEtHLYYAionIhIyMDMTExGD16NCwtLXWmV61aFUIIdOvWDRkZGYiLi0NsbCz+/vtv9O7dW6vv33//jV27diEmJgabNm3C6tWr4efnh+vXryMuLg7z5s3DZ599JoWyAtOmTUOPHj3wxx9/oH///ujbty+Sk5Ol6VZWVli7di3Onz+Pb775Bt999x3Cw8N11r1161Zs374d27dvR1xcHObOnQsA+PDDD6HRaLBt2zap/7///ovt27dj0KBBL70PiUiPyvjLjImISuTo0aMCgIiKiiqyz549e4SRkZG4evWq1Hbu3DkBQBw7dkwIIcT06dOFhYWFyMrKkvp06dJFuLq6Co1GI7XVr19fzJkzR3oOQIwYMUJrfZ6enmLkyJFF1jN//nzRsmVL6Xlh6544caLw9PSUno8cOVK8++670vPFixeL2rVri/z8/CLXQ0SvHs+BIqJyQQgB4MmhtKIkJyfD2dkZzs7OUlujRo1QtWpVJCcn48033wQAuLq6wsrKSurj6OgIIyMjVKpUSastPT1da/leXl46z5OSkqTnP//8MxYvXoxLly4hOzsbjx8/hrW1tdY8z667evXqWusZOnQo3nzzTdy4cQM1atTAmjVrMHDgwGK3m4hePR7CI6JyoV69elAoFFqHzJ4lhCg0aDzbbmJiojVdoVAU2pafn//cugqWm5CQgD59+uDdd9/F9u3bcerUKXz66afIzc3V6v+89TRv3hxNmzbFunXrcPLkSZw5cwYDBw58bh1E9GoxQBFRuWBra4suXbrgv//9L+7fv68z/e7du2jUqBGuXr2Ka9euSe3nz5+HWq1Gw4YNX7qGZ8+JSkhIQIMGDQAAv//+O1xcXPDpp5+iVatWqFevHq5cufJC6/noo4+wZs0arF69Gp06ddIaUSMiw8AARUTlxrJly6DRaNC6dWv88ssv+Ouvv5CcnIwlS5bAy8sLnTp1QpMmTdCvXz+cPHkSx44dw4ABA+Dj44NWrVq99Pq3bNmC1atX488//8T06dNx7NgxjBkzBgBQt25dXL16FZGRkfj777+xZMkSREdHv9B6+vXrhxs3buC7777D4MGDX7puItI/BigiKjfc3Nxw8uRJdOjQAWFhYXB3d4evry/27duHb7/9FgqFAlu3boWNjQ3efvttdOrUCbVr18bmzZv1sv6ZM2ciMjISTZo0wQ8//ICNGzeiUaNGAID3338fn3zyCcaMGYNmzZrhyJEjmDZt2gutx9raGj169ECVKlXQrVs3vdRORPqlEAVnZhIRUZEUCgWio6NfWaDx9fVFw4YNsWTJkleyPiKSh1fhEREZkIyMDOzZswf79+9HREREWZdDREVggCIiMiAtWrRAZmYm5s2bh/r165d1OURUBB7CIyIiIpKJJ5ETERERycQARURERCQTAxQRERGRTAxQRERERDIxQBERERHJxABFREREJBMDFBEREZFMDFBEREREMjFAEREREcn0/wD+PdVJx8K92QAAAABJRU5ErkJggg==",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_max_price[\"number_compagny\"], company_max_price[\"max_price\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Prix maximal d'un billet vendu\")\n",
"plt.title(\"Prix maximal de vente observé par compagnie de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "bff23e5d-d7ed-4092-ae3c-5df503e54a6d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 762879.000000\n",
"mean 0.079068\n",
"std 3.969729\n",
"min 0.000000\n",
"25% 0.000000\n",
"50% 0.000000\n",
"75% 0.000000\n",
"max 3334.000000\n",
"Name: purchase_count, dtype: float64"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"first_buying_date\"].isna()][\"purchase_count\"].describe()"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "89466dbd-14d2-4ede-9ca0-b9c32b764e25",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 7.608090e+05\n",
"mean 3.863940e+00\n",
"std 1.685825e+03\n",
"min 1.000000e+00\n",
"25% 1.000000e+00\n",
"50% 1.000000e+00\n",
"75% 2.000000e+00\n",
"max 1.469325e+06\n",
"Name: purchase_count, dtype: float64"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle[~customerplus_clean_spectacle[\"first_buying_date\"].isna()][\"purchase_count\"].describe()"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "5f9feae4-35f4-43b6-adeb-f75773900a2d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>821538</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>809126</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11005</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>17663</td>\n",
" <td>12731</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>38100</td>\n",
" <td>12395</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343121</th>\n",
" <td>4667645</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534181.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343122</th>\n",
" <td>4667649</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534177.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343123</th>\n",
" <td>4667660</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534165.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343124</th>\n",
" <td>4667679</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534132.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343125</th>\n",
" <td>4667686</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1567949.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1523688 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"0 821538 139 NaN NaN 0 \n",
"1 809126 1063 NaN NaN 0 \n",
"2 11005 1063 NaN NaN 0 \n",
"3 17663 12731 NaN NaN 0 \n",
"4 38100 12395 NaN NaN 0 \n",
"... ... ... ... ... ... \n",
"343121 4667645 122 NaN 1534181.0 0 \n",
"343122 4667649 122 NaN 1534177.0 0 \n",
"343123 4667660 122 NaN 1534165.0 0 \n",
"343124 4667679 122 NaN 1534132.0 0 \n",
"343125 4667686 122 NaN 1567949.0 0 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"0 875 False NaN 2 True ... \n",
"1 875 False NaN 2 True ... \n",
"2 875 False NaN 2 False ... \n",
"3 875 False NaN 0 False ... \n",
"4 875 False NaN 0 True ... \n",
"... ... ... ... ... ... ... \n",
"343121 862 False NaN 2 True ... \n",
"343122 862 False NaN 2 True ... \n",
"343123 862 False NaN 0 True ... \n",
"343124 862 False NaN 2 True ... \n",
"343125 862 False NaN 0 True ... \n",
"\n",
" first_buying_date country gender_label gender_female gender_male \\\n",
"0 NaN NaN other 0 0 \n",
"1 NaN fr other 0 0 \n",
"2 NaN fr other 0 0 \n",
"3 NaN fr female 1 0 \n",
"4 NaN fr female 1 0 \n",
"... ... ... ... ... ... \n",
"343121 NaN NaN other 0 0 \n",
"343122 NaN NaN other 0 0 \n",
"343123 NaN NaN female 1 0 \n",
"343124 NaN NaN other 0 0 \n",
"343125 NaN NaN female 1 0 \n",
"\n",
" gender_other country_fr has_tags number_compagny already_purchased \n",
"0 1 NaN 0 10 False \n",
"1 1 1.0 0 10 False \n",
"2 1 1.0 0 10 False \n",
"3 0 1.0 0 10 False \n",
"4 0 1.0 0 10 False \n",
"... ... ... ... ... ... \n",
"343121 1 NaN 0 14 False \n",
"343122 1 NaN 0 14 False \n",
"343123 0 NaN 0 14 False \n",
"343124 1 NaN 0 14 False \n",
"343125 0 NaN 0 14 False \n",
"\n",
"[1523688 rows x 30 columns]"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle[\"already_purchased\"] = customerplus_clean_spectacle[\"first_buying_date\"].isna()==False\n",
"customerplus_clean_spectacle"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "cec4f1eb-cec8-409d-8b2c-1e01f1bf81ff",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11005</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>17663</td>\n",
" <td>12731</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>38100</td>\n",
" <td>12395</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>307036</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2946</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338933</th>\n",
" <td>3625705</td>\n",
" <td>648752</td>\n",
" <td>NaN</td>\n",
" <td>1253864.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338954</th>\n",
" <td>3627626</td>\n",
" <td>636890</td>\n",
" <td>NaN</td>\n",
" <td>1253887.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338959</th>\n",
" <td>3628124</td>\n",
" <td>653042</td>\n",
" <td>NaN</td>\n",
" <td>1253899.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338986</th>\n",
" <td>3631189</td>\n",
" <td>648423</td>\n",
" <td>NaN</td>\n",
" <td>1253928.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>339039</th>\n",
" <td>3635380</td>\n",
" <td>659417</td>\n",
" <td>NaN</td>\n",
" <td>1253975.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>26246 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"2 11005 1063 NaN NaN 0 \n",
"3 17663 12731 NaN NaN 0 \n",
"4 38100 12395 NaN NaN 0 \n",
"5 307036 139 NaN NaN 0 \n",
"6 2946 1063 NaN NaN 0 \n",
"... ... ... ... ... ... \n",
"338933 3625705 648752 NaN 1253864.0 0 \n",
"338954 3627626 636890 NaN 1253887.0 0 \n",
"338959 3628124 653042 NaN 1253899.0 0 \n",
"338986 3631189 648423 NaN 1253928.0 0 \n",
"339039 3635380 659417 NaN 1253975.0 0 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"2 875 False NaN 2 False ... \n",
"3 875 False NaN 0 False ... \n",
"4 875 False NaN 0 True ... \n",
"5 875 False NaN 2 True ... \n",
"6 875 False NaN 2 False ... \n",
"... ... ... ... ... ... ... \n",
"338933 862 False NaN 0 True ... \n",
"338954 862 False NaN 0 True ... \n",
"338959 862 False NaN 0 True ... \n",
"338986 862 False NaN 0 True ... \n",
"339039 862 False NaN 1 True ... \n",
"\n",
" first_buying_date country gender_label gender_female gender_male \\\n",
"2 NaN fr other 0 0 \n",
"3 NaN fr female 1 0 \n",
"4 NaN fr female 1 0 \n",
"5 NaN NaN other 0 0 \n",
"6 NaN fr other 0 0 \n",
"... ... ... ... ... ... \n",
"338933 NaN fr female 1 0 \n",
"338954 NaN fr female 1 0 \n",
"338959 NaN fr female 1 0 \n",
"338986 NaN fr female 1 0 \n",
"339039 NaN fr male 0 1 \n",
"\n",
" gender_other country_fr has_tags number_compagny already_purchased \n",
"2 1 1.0 0 10 False \n",
"3 0 1.0 0 10 False \n",
"4 0 1.0 0 10 False \n",
"5 1 NaN 0 10 False \n",
"6 1 1.0 0 10 False \n",
"... ... ... ... ... ... \n",
"338933 0 1.0 0 14 False \n",
"338954 0 1.0 0 14 False \n",
"338959 0 1.0 0 14 False \n",
"338986 0 1.0 0 14 False \n",
"339039 0 1.0 0 14 False \n",
"\n",
"[26246 rows x 30 columns]"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# attention, on a des cas où le client a pas de première date d'achat alors qu'il compte plusieurs achats\n",
"# on peut donc avoir une date de première achat valant NaN non pas parce que l'individu n'a jamais acheté \n",
"# mais simplement car elle n'est pas renseignée\n",
"\n",
"customerplus_clean_spectacle[(customerplus_clean_spectacle[\"already_purchased\"]==False) &\n",
"(customerplus_clean_spectacle[\"purchase_count\"]>0)]"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "b5904039-a967-47d5-ba13-1b805bcd76ca",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"<p>0 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [customer_id, street_id, structure_id, mcp_contact_id, fidelity, tenant_id, is_partner, deleted_at, gender, is_email_true, opt_in, last_buying_date, max_price, ticket_sum, average_price, average_purchase_delay, average_price_basket, average_ticket_basket, total_price, purchase_count, first_buying_date, country, gender_label, gender_female, gender_male, gender_other, country_fr, has_tags, number_compagny, already_purchased]\n",
"Index: []\n",
"\n",
"[0 rows x 30 columns]"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# cpdt, si un client a un nombre d'achats nul, il a bien une date de premier achat valant NaN, OK\n",
"customerplus_clean_spectacle[(customerplus_clean_spectacle[\"already_purchased\"]) &\n",
"(customerplus_clean_spectacle[\"purchase_count\"]==0)]"
]
},
{
"cell_type": "code",
"execution_count": 89,
"id": "e940bfcf-29cc-4d4c-ae5e-e2a8cecf28af",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"number_compagny already_purchased\n",
"10 False 0.234840\n",
" True 0.236236\n",
"11 False 0.141746\n",
" True 0.002804\n",
"12 False 0.485950\n",
" True 0.244779\n",
"13 False 0.084057\n",
" True 0.177213\n",
"14 False 0.885553\n",
" True 0.308859\n",
"Name: opt_in, dtype: float64"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# différence de consentement aux campagnes de mails (opt in)\n",
"\n",
"# en se restreignant au personnes n'ayant pas acheté, on a quand même des individus acceptant d'être ciblés\n",
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"first_buying_date\"].isna()][\"opt_in\"].unique()\n",
"\n",
"# taux de consentement variés\n",
"customerplus_clean_spectacle[\"already_purchased\"] = customerplus_clean_spectacle[\"purchase_count\"] > 0\n",
"customerplus_clean_spectacle.groupby([\"number_compagny\", \"already_purchased\"])[\"opt_in\"].mean()"
]
},
{
"cell_type": "code",
"execution_count": 94,
"id": "a5e79beb-9ba0-4c89-b084-e27ff0d65dcc",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" <th>opt_in</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" <td>0.234840</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" <td>0.236236</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>False</td>\n",
" <td>0.141746</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>True</td>\n",
" <td>0.002804</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>False</td>\n",
" <td>0.485950</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>True</td>\n",
" <td>0.244779</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>False</td>\n",
" <td>0.084057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>True</td>\n",
" <td>0.177213</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" <td>0.885553</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" <td>0.308859</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny already_purchased opt_in\n",
"0 10 False 0.234840\n",
"1 10 True 0.236236\n",
"2 11 False 0.141746\n",
"3 11 True 0.002804\n",
"4 12 False 0.485950\n",
"5 12 True 0.244779\n",
"6 13 False 0.084057\n",
"7 13 True 0.177213\n",
"8 14 False 0.885553\n",
"9 14 True 0.308859"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_graph = customerplus_clean_spectacle.groupby([\"number_compagny\", \"already_purchased\"])[\"opt_in\"].mean().reset_index()\n",
"df_graph"
]
},
{
"cell_type": "code",
"execution_count": 127,
"id": "5be56c41-7697-481a-84ea-f77a2041484b",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot groupé\n",
"fig, ax = plt.subplots(figsize=(10, 6))\n",
"\n",
"categories = df_graph[\"number_compagny\"].unique()\n",
"bar_width = 0.35\n",
"bar_positions = np.arange(len(categories))\n",
"\n",
"# Grouper les données par label et créer les barres groupées\n",
"for label in df_graph[\"already_purchased\"].unique():\n",
" label_data = df_graph[df_graph['already_purchased'] == label]\n",
" values = [label_data[label_data['number_compagny'] == category]['opt_in'].values[0]*100 for category in categories]\n",
"\n",
" label_printed = \"purchased\" if label else \"no purchase\"\n",
" ax.bar(bar_positions, values, bar_width, label=label_printed)\n",
"\n",
" # Mise à jour des positions des barres pour le prochain groupe\n",
" bar_positions = [pos + bar_width for pos in bar_positions]\n",
"\n",
"# Ajout des étiquettes, de la légende, etc.\n",
"ax.set_xlabel('Numero de compagnie')\n",
"ax.set_ylabel('Part de consentement (%)')\n",
"ax.set_title('Part de consentement au mailing selon les compagnies')\n",
"ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])\n",
"ax.set_xticklabels(categories)\n",
"ax.legend()\n",
"\n",
"# Affichage du plot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "91b743c4-5473-41e1-b97e-cf06904f0fa8",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>opt_in</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
" <td>0.226815</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>0.456172</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>0.0</td>\n",
" <td>0.086818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>0.000347</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>0.387308</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>0.000461</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
" <td>0.125966</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>0.167097</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
" <td>0.777891</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>0.175614</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_company y_has_purchased opt_in\n",
"0 10 0.0 0.226815\n",
"1 10 1.0 0.456172\n",
"2 11 0.0 0.086818\n",
"3 11 1.0 0.000347\n",
"4 12 0.0 0.387308\n",
"5 12 1.0 0.000461\n",
"6 13 0.0 0.125966\n",
"7 13 1.0 0.167097\n",
"8 14 0.0 0.777891\n",
"9 14 1.0 0.175614"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# on refait le graphique sur train set \n",
"\n",
"df_graph = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[\"opt_in\"].mean().reset_index()\n",
"df_graph"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "728e0021-4f95-4601-bb01-032db2cf6571",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.43578991448407206\n",
"0.2889600758160463\n"
]
}
],
"source": [
"# pourquoi une telle différence sur la variable opt in ??\n",
"print(train_set_spectacle[\"opt_in\"].mean())\n",
"print(customerplus_clean_spectacle[\"opt_in\"].mean())"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "274b4bc5-277f-476a-8bc1-c1764b1df2de",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.8473746548562269\n",
"0.7573747808905485\n"
]
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": 76,
"id": "e1d837e1-c445-424b-867a-48b1e790f703",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"genre = homme : \n",
"0.3175633912091978\n",
"0.3103916287323914\n",
"email vérifié : \n",
"0.9581971527197163\n",
"0.9360131470484772\n",
"nationalité française : \n",
"0.8473746548562269\n",
"0.7573747808905485\n",
"nbre d'achats : \n",
"2.925387603847428\n",
"1.968932616126136\n"
]
}
],
"source": [
"# pour les autres variables, la distribution semble similaire\n",
"\n",
"print(\"genre = homme : \")\n",
"print(train_set_spectacle[\"gender_male\"].mean())\n",
"print(customerplus_clean_spectacle[\"gender_male\"].mean())\n",
"\n",
"print(\"email vérifié : \")\n",
"print(train_set_spectacle[\"is_email_true\"].mean())\n",
"print(customerplus_clean_spectacle[\"is_email_true\"].mean())\n",
"\n",
"print(\"nationalité française : \")\n",
"print(train_set_spectacle[\"country_fr\"].mean())\n",
"print(customerplus_clean_spectacle[\"country_fr\"].mean())\n",
"\n",
"# sauf pr nbre d'achats - à verif\n",
"print(\"nbre d'achats : \")\n",
"print(train_set_spectacle[\"purchase_count\"].mean())\n",
"print(customerplus_clean_spectacle[\"purchase_count\"].mean())"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "ec31d69c-846e-4d52-9ea9-f6712187b028",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>821538</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>809126</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11005</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>14</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>17663</td>\n",
" <td>12731</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>38100</td>\n",
" <td>12395</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343121</th>\n",
" <td>4667645</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534181.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343122</th>\n",
" <td>4667649</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534177.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343123</th>\n",
" <td>4667660</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534165.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343124</th>\n",
" <td>4667679</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534132.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343125</th>\n",
" <td>4667686</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1567949.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1523688 rows × 29 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"0 821538 139 NaN NaN 0 \n",
"1 809126 1063 NaN NaN 0 \n",
"2 11005 1063 NaN NaN 0 \n",
"3 17663 12731 NaN NaN 0 \n",
"4 38100 12395 NaN NaN 0 \n",
"... ... ... ... ... ... \n",
"343121 4667645 122 NaN 1534181.0 0 \n",
"343122 4667649 122 NaN 1534177.0 0 \n",
"343123 4667660 122 NaN 1534165.0 0 \n",
"343124 4667679 122 NaN 1534132.0 0 \n",
"343125 4667686 122 NaN 1567949.0 0 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"0 875 False NaN 2 True ... \n",
"1 875 False NaN 2 True ... \n",
"2 875 False NaN 2 False ... \n",
"3 875 False NaN 0 False ... \n",
"4 875 False NaN 0 True ... \n",
"... ... ... ... ... ... ... \n",
"343121 862 False NaN 2 True ... \n",
"343122 862 False NaN 2 True ... \n",
"343123 862 False NaN 0 True ... \n",
"343124 862 False NaN 2 True ... \n",
"343125 862 False NaN 0 True ... \n",
"\n",
" purchase_count first_buying_date country gender_label \\\n",
"0 0 NaN NaN other \n",
"1 0 NaN fr other \n",
"2 14 NaN fr other \n",
"3 1 NaN fr female \n",
"4 1 NaN fr female \n",
"... ... ... ... ... \n",
"343121 0 NaN NaN other \n",
"343122 0 NaN NaN other \n",
"343123 0 NaN NaN female \n",
"343124 0 NaN NaN other \n",
"343125 0 NaN NaN female \n",
"\n",
" gender_female gender_male gender_other country_fr number_compagny \\\n",
"0 0 0 1 NaN 10 \n",
"1 0 0 1 1.0 10 \n",
"2 0 0 1 1.0 10 \n",
"3 1 0 0 1.0 10 \n",
"4 1 0 0 1.0 10 \n",
"... ... ... ... ... ... \n",
"343121 0 0 1 NaN 14 \n",
"343122 0 0 1 NaN 14 \n",
"343123 1 0 0 NaN 14 \n",
"343124 0 0 1 NaN 14 \n",
"343125 1 0 0 NaN 14 \n",
"\n",
" already_purchased \n",
"0 False \n",
"1 False \n",
"2 True \n",
"3 True \n",
"4 True \n",
"... ... \n",
"343121 False \n",
"343122 False \n",
"343123 False \n",
"343124 False \n",
"343125 False \n",
"\n",
"[1523688 rows x 29 columns]"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle"
]
},
{
"cell_type": "code",
"execution_count": 69,
"id": "e8872cac-bde9-41ad-9297-0f2e02c7f0e8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" <th>number_company</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10_299341</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>12.0</td>\n",
" <td>3.0</td>\n",
" <td>0 days 05:47:26.333333333</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10_63788</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>62.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>393.205891</td>\n",
" <td>281.017639</td>\n",
" <td>112.188252</td>\n",
" <td>3.0</td>\n",
" <td>...</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>0 days 05:13:51</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10_759946</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10_20653</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>11.0</td>\n",
" <td>10.0</td>\n",
" <td>1 days 00:45:54</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10_824705</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>697292</th>\n",
" <td>14_119950</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>697293</th>\n",
" <td>14_938</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>697294</th>\n",
" <td>14_5004707</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>2 days 16:42:51</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>697295</th>\n",
" <td>14_108184</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>697296</th>\n",
" <td>14_4663981</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>697297 rows × 41 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 10_299341 0.0 0.0 0.0 0.0 \n",
"1 10_63788 3.0 2.0 62.0 1.0 \n",
"2 10_759946 0.0 0.0 0.0 0.0 \n",
"3 10_20653 0.0 0.0 0.0 0.0 \n",
"4 10_824705 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... ... \n",
"697292 14_119950 0.0 0.0 0.0 0.0 \n",
"697293 14_938 0.0 0.0 0.0 0.0 \n",
"697294 14_5004707 0.0 0.0 0.0 0.0 \n",
"697295 14_108184 0.0 0.0 0.0 0.0 \n",
"697296 14_4663981 0.0 0.0 0.0 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0.0 NaN NaN \n",
"1 1.0 393.205891 281.017639 \n",
"2 0.0 NaN NaN \n",
"3 0.0 NaN NaN \n",
"4 0.0 NaN NaN \n",
"... ... ... ... \n",
"697292 0.0 NaN NaN \n",
"697293 0.0 NaN NaN \n",
"697294 0.0 NaN NaN \n",
"697295 0.0 NaN NaN \n",
"697296 0.0 NaN NaN \n",
"\n",
" time_between_purchase nb_tickets_internet ... gender_label \\\n",
"0 NaN 0.0 ... male \n",
"1 112.188252 3.0 ... female \n",
"2 NaN 0.0 ... other \n",
"3 NaN 0.0 ... male \n",
"4 NaN 0.0 ... other \n",
"... ... ... ... ... \n",
"697292 NaN 0.0 ... male \n",
"697293 NaN 0.0 ... male \n",
"697294 NaN 0.0 ... male \n",
"697295 NaN 0.0 ... other \n",
"697296 NaN 0.0 ... other \n",
"\n",
" gender_female gender_male gender_other country_fr nb_campaigns \\\n",
"0 0 1 0 1.0 12.0 \n",
"1 1 0 0 1.0 3.0 \n",
"2 0 0 1 NaN 0.0 \n",
"3 0 1 0 1.0 11.0 \n",
"4 0 0 1 NaN 0.0 \n",
"... ... ... ... ... ... \n",
"697292 0 1 0 1.0 0.0 \n",
"697293 0 1 0 1.0 0.0 \n",
"697294 0 1 0 1.0 2.0 \n",
"697295 0 0 1 1.0 0.0 \n",
"697296 0 0 1 NaN 0.0 \n",
"\n",
" nb_campaigns_opened time_to_open y_has_purchased \\\n",
"0 3.0 0 days 05:47:26.333333333 0.0 \n",
"1 1.0 0 days 05:13:51 1.0 \n",
"2 0.0 NaN 0.0 \n",
"3 10.0 1 days 00:45:54 0.0 \n",
"4 0.0 NaN 0.0 \n",
"... ... ... ... \n",
"697292 0.0 NaN 0.0 \n",
"697293 0.0 NaN 0.0 \n",
"697294 1.0 2 days 16:42:51 0.0 \n",
"697295 0.0 NaN 0.0 \n",
"697296 0.0 NaN 0.0 \n",
"\n",
" number_company \n",
"0 10 \n",
"1 10 \n",
"2 10 \n",
"3 10 \n",
"4 10 \n",
"... ... \n",
"697292 14 \n",
"697293 14 \n",
"697294 14 \n",
"697295 14 \n",
"697296 14 \n",
"\n",
"[697297 rows x 41 columns]"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_set_spectacle"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "d972ade5-974a-4fc9-8f83-bdf8503e1469",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot groupé\n",
"fig, ax = plt.subplots(figsize=(10, 6))\n",
"\n",
"categories = df_graph[\"number_company\"].unique()\n",
"bar_width = 0.35\n",
"bar_positions = np.arange(len(categories))\n",
"\n",
"# Grouper les données par label et créer les barres groupées\n",
"for label in df_graph[\"y_has_purchased\"].unique():\n",
" label_data = df_graph[df_graph['y_has_purchased'] == label]\n",
" values = [label_data[label_data['number_company'] == category]['opt_in'].values[0]*100 for category in categories]\n",
"\n",
" label_printed = \"achat durant la période\" if label else \"aucun achat\"\n",
" ax.bar(bar_positions, values, bar_width, label=label_printed)\n",
"\n",
" # Mise à jour des positions des barres pour le prochain groupe\n",
" bar_positions = [pos + bar_width for pos in bar_positions]\n",
"\n",
"# Ajout des étiquettes, de la légende, etc.\n",
"ax.set_xlabel('Numero de compagnie')\n",
"ax.set_ylabel('Part de consentement (%)')\n",
"ax.set_title('Part de consentement au mailing selon les compagnies (train set)')\n",
"ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])\n",
"ax.set_xticklabels(categories)\n",
"ax.legend()\n",
"\n",
"# Affichage du plot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "43deeeb5-8092-42fc-b80b-59d2c58093de",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 124,
"id": "32960530-cb46-4eeb-a6d2-1dcf5fb640d8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>gender_male</th>\n",
" <th>gender_female</th>\n",
" <th>gender_other</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.181580</td>\n",
" <td>0.343837</td>\n",
" <td>0.474583</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>0.179520</td>\n",
" <td>0.314443</td>\n",
" <td>0.506037</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>0.346380</td>\n",
" <td>0.454036</td>\n",
" <td>0.199584</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>0.318108</td>\n",
" <td>0.503092</td>\n",
" <td>0.178800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>0.331954</td>\n",
" <td>0.316181</td>\n",
" <td>0.351865</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny gender_male gender_female gender_other\n",
"0 10 0.181580 0.343837 0.474583\n",
"1 11 0.179520 0.314443 0.506037\n",
"2 12 0.346380 0.454036 0.199584\n",
"3 13 0.318108 0.503092 0.178800\n",
"4 14 0.331954 0.316181 0.351865"
]
},
"execution_count": 124,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# genre \n",
"\n",
"company_genders = customerplus_clean_spectacle.groupby(\"number_compagny\")[[\"gender_male\", \"gender_female\", \"gender_other\"]].mean().reset_index()\n",
"company_genders"
]
},
{
"cell_type": "code",
"execution_count": 126,
"id": "1b4a49d7-7bfe-4e80-aa7e-c9c6d4bc46e2",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_genders[\"number_compagny\"], company_genders[\"gender_male\"], label = \"Homme\")\n",
"plt.bar(company_genders[\"number_compagny\"], company_genders[\"gender_female\"], \n",
" bottom = company_genders[\"gender_male\"], label = \"Femme\")\n",
"\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Part de clients de chaque sexe\")\n",
"plt.title(\"Sexe des clients de chaque compagnie de spectacle\")\n",
"plt.legend()\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "c7348c95-e506-4002-90d9-d3b6768af985",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>gender_male</th>\n",
" <th>gender_female</th>\n",
" <th>gender_other</th>\n",
" <th>share_of_women</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
" <td>0.171838</td>\n",
" <td>0.333929</td>\n",
" <td>0.494232</td>\n",
" <td>0.660243</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>0.312165</td>\n",
" <td>0.683363</td>\n",
" <td>0.004472</td>\n",
" <td>0.686433</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>0.0</td>\n",
" <td>0.151162</td>\n",
" <td>0.273204</td>\n",
" <td>0.575635</td>\n",
" <td>0.643794</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>0.328477</td>\n",
" <td>0.597641</td>\n",
" <td>0.073881</td>\n",
" <td>0.645318</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>0.334546</td>\n",
" <td>0.433672</td>\n",
" <td>0.231782</td>\n",
" <td>0.564517</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>0.366020</td>\n",
" <td>0.506659</td>\n",
" <td>0.127321</td>\n",
" <td>0.580579</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
" <td>0.314243</td>\n",
" <td>0.503242</td>\n",
" <td>0.182515</td>\n",
" <td>0.615598</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>0.351721</td>\n",
" <td>0.504910</td>\n",
" <td>0.143369</td>\n",
" <td>0.589414</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
" <td>0.317971</td>\n",
" <td>0.296388</td>\n",
" <td>0.385641</td>\n",
" <td>0.482434</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>0.451289</td>\n",
" <td>0.485106</td>\n",
" <td>0.063605</td>\n",
" <td>0.518057</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_company y_has_purchased gender_male gender_female gender_other \\\n",
"0 10 0.0 0.171838 0.333929 0.494232 \n",
"1 10 1.0 0.312165 0.683363 0.004472 \n",
"2 11 0.0 0.151162 0.273204 0.575635 \n",
"3 11 1.0 0.328477 0.597641 0.073881 \n",
"4 12 0.0 0.334546 0.433672 0.231782 \n",
"5 12 1.0 0.366020 0.506659 0.127321 \n",
"6 13 0.0 0.314243 0.503242 0.182515 \n",
"7 13 1.0 0.351721 0.504910 0.143369 \n",
"8 14 0.0 0.317971 0.296388 0.385641 \n",
"9 14 1.0 0.451289 0.485106 0.063605 \n",
"\n",
" share_of_women \n",
"0 0.660243 \n",
"1 0.686433 \n",
"2 0.643794 \n",
"3 0.645318 \n",
"4 0.564517 \n",
"5 0.580579 \n",
"6 0.615598 \n",
"7 0.589414 \n",
"8 0.482434 \n",
"9 0.518057 "
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"company_genders = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[[\"gender_male\", \"gender_female\", \"gender_other\"]].mean().reset_index()\n",
"company_genders[\"share_of_women\"] = company_genders[\"gender_female\"]/(1-company_genders[\"gender_other\"])\n",
"company_genders"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "799db5a6-24e3-43e9-a5ff-c8a7168a2897",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot groupé\n",
"fig, ax = plt.subplots(figsize=(10, 6))\n",
"\n",
"categories = company_genders[\"number_company\"].unique()\n",
"bar_width = 0.35\n",
"bar_positions = np.arange(len(categories))\n",
"\n",
"# Grouper les données par label et créer les barres groupées\n",
"for label in company_genders[\"y_has_purchased\"].unique():\n",
" label_data = company_genders[df_graph['y_has_purchased'] == label]\n",
" values = [label_data[label_data['number_company'] == category]['share_of_women'].values[0]*100 for category in categories]\n",
"\n",
" label_printed = \"achat durant la période\" if label else \"aucun achat\"\n",
" ax.bar(bar_positions, values, bar_width, label=label_printed)\n",
"\n",
" # Mise à jour des positions des barres pour le prochain groupe\n",
" bar_positions = [pos + bar_width for pos in bar_positions]\n",
"\n",
"# Ajout des étiquettes, de la légende, etc.\n",
"ax.set_xlabel('Numero de compagnie')\n",
"ax.set_ylabel('Part de femmes (%)')\n",
"ax.set_title('Part de femmes selon les compagnies de spectacle (train set)')\n",
"ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])\n",
"ax.set_xticklabels(categories)\n",
"ax.legend()\n",
"\n",
"# Affichage du plot - la proportion de femmes est la même selon qu'il y ait achat sur la période ou non\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 144,
"id": "ed6374e5-f36c-4f8e-9dba-602715b726f1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>country_fr</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.996136</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>0.994838</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>0.002119</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>0.831795</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>0.993978</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny country_fr\n",
"0 10 0.996136\n",
"1 11 0.994838\n",
"2 12 0.002119\n",
"3 13 0.831795\n",
"4 14 0.993978"
]
},
"execution_count": 144,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# pays d'origine (France VS reste du monde)\n",
"\n",
"company_country_fr = customerplus_clean_spectacle.groupby(\"number_compagny\")[\"country_fr\"].mean().reset_index()\n",
"company_country_fr"
]
},
{
"cell_type": "code",
"execution_count": 147,
"id": "8d95cdd9-2ab3-4c9a-8442-bb9b98e0dd18",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_country_fr[\"number_compagny\"], company_country_fr[\"country_fr\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Part de clients français\")\n",
"plt.title(\"Nationalité des clients de chaque compagnie de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "b459f81f-6d30-44fa-ad65-e85acbf12fd2",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>country_fr</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
" <td>0.995421</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>0.999097</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>0.0</td>\n",
" <td>0.995433</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>0.995016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>0.001565</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>0.002656</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
" <td>0.843896</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>0.775967</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
" <td>0.995202</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>0.984715</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_company y_has_purchased country_fr\n",
"0 10 0.0 0.995421\n",
"1 10 1.0 0.999097\n",
"2 11 0.0 0.995433\n",
"3 11 1.0 0.995016\n",
"4 12 0.0 0.001565\n",
"5 12 1.0 0.002656\n",
"6 13 0.0 0.843896\n",
"7 13 1.0 0.775967\n",
"8 14 0.0 0.995202\n",
"9 14 1.0 0.984715"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# graphique sur le train set\n",
"\n",
"company_country_fr = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[[\"country_fr\"]].mean().reset_index()\n",
"company_country_fr"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "357a6cd6-b1f2-41b8-9d92-155de84858cf",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot groupé\n",
"fig, ax = plt.subplots(figsize=(10, 6))\n",
"\n",
"categories = company_country_fr[\"number_company\"].unique()\n",
"bar_width = 0.35\n",
"bar_positions = np.arange(len(categories))\n",
"\n",
"# Grouper les données par label et créer les barres groupées\n",
"for label in company_country_fr[\"y_has_purchased\"].unique():\n",
" label_data = company_country_fr[df_graph['y_has_purchased'] == label]\n",
" values = [label_data[label_data['number_company'] == category]['country_fr'].values[0]*100 for category in categories]\n",
"\n",
" label_printed = \"achat durant la période\" if label else \"aucun achat\"\n",
" ax.bar(bar_positions, values, bar_width, label=label_printed)\n",
"\n",
" # Mise à jour des positions des barres pour le prochain groupe\n",
" bar_positions = [pos + bar_width for pos in bar_positions]\n",
"\n",
"# Ajout des étiquettes, de la légende, etc.\n",
"ax.set_xlabel('Numero de compagnie')\n",
"ax.set_ylabel('Part de clients frnaçais (%)')\n",
"ax.set_title('Part de clients français des compagnies de spectacle (train set)')\n",
"ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])\n",
"ax.set_xticklabels(categories)\n",
"ax.legend()\n",
"\n",
"# Affichage du plot - la proportion de français est la même selon qu'il y ait achat sur la période ou non\n",
"# sauf compagnie 12, et peut-être 13\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "ecfd112e-270a-4223-b80f-7e95e57d199d",
"metadata": {},
"source": [
"### 2. campaigns_information"
]
},
{
"cell_type": "code",
"execution_count": 189,
"id": "b37e7ddf-321a-4ebe-9742-9e760a541d29",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de lignes de la table : 688953\n"
]
},
{
"data": {
"text/plain": [
"customer_id 0\n",
"nb_campaigns 0\n",
"nb_campaigns_opened 0\n",
"time_to_open 301495\n",
"number_compagny 0\n",
"dtype: int64"
]
},
"execution_count": 189,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de nan\n",
"print(\"Nombre de lignes de la table : \",campaigns_information_spectacle.shape[0])\n",
"campaigns_information_spectacle.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 192,
"id": "de1ecaac-25bb-4853-b8ab-3ef2ca6917ed",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>number_compagny</th>\n",
" <th>no_campaign_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>29</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>37</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>39</td>\n",
" <td>4</td>\n",
" <td>1.0</td>\n",
" <td>0 days 05:16:38</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>41</td>\n",
" <td>4</td>\n",
" <td>1.0</td>\n",
" <td>0 days 01:12:29</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>44</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254699</th>\n",
" <td>6837769</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0 days 23:42:15</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254700</th>\n",
" <td>6875038</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254701</th>\n",
" <td>6875066</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254702</th>\n",
" <td>6875099</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254703</th>\n",
" <td>6875143</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0 days 01:17:01</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>688953 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_campaigns nb_campaigns_opened time_to_open \\\n",
"0 29 4 0.0 NaT \n",
"1 37 3 0.0 NaT \n",
"2 39 4 1.0 0 days 05:16:38 \n",
"3 41 4 1.0 0 days 01:12:29 \n",
"4 44 4 0.0 NaT \n",
"... ... ... ... ... \n",
"254699 6837769 1 1.0 0 days 23:42:15 \n",
"254700 6875038 1 0.0 NaT \n",
"254701 6875066 1 0.0 NaT \n",
"254702 6875099 1 0.0 NaT \n",
"254703 6875143 1 1.0 0 days 01:17:01 \n",
"\n",
" number_compagny no_campaign_opened \n",
"0 10 True \n",
"1 10 True \n",
"2 10 False \n",
"3 10 False \n",
"4 10 True \n",
"... ... ... \n",
"254699 14 False \n",
"254700 14 True \n",
"254701 14 True \n",
"254702 14 True \n",
"254703 14 False \n",
"\n",
"[688953 rows x 6 columns]"
]
},
"execution_count": 192,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# part de clients n'ouvrant jamais les mails par compagnie\n",
"\n",
"campaigns_information_spectacle[\"no_campaign_opened\"] = pd.isna(campaigns_information_spectacle[\"time_to_open\"])\n",
"campaigns_information_spectacle"
]
},
{
"cell_type": "code",
"execution_count": 197,
"id": "b5a0060f-a9dd-435b-844f-b24674b8bc27",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>no_campaign_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.605656</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>0.294001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>0.475719</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>0.353820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>0.428148</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny no_campaign_opened\n",
"0 10 0.605656\n",
"1 11 0.294001\n",
"2 12 0.475719\n",
"3 13 0.353820\n",
"4 14 0.428148"
]
},
"execution_count": 197,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"company_lazy_customers = campaigns_information_spectacle.groupby(\"number_compagny\")[\"no_campaign_opened\"].mean().reset_index()\n",
"company_lazy_customers"
]
},
{
"cell_type": "code",
"execution_count": 198,
"id": "788c90e0-f13a-4804-ace7-e5159fddd7fd",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_lazy_customers[\"number_compagny\"], company_lazy_customers[\"no_campaign_opened\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Part de clients n'ayant ouvert aucun mail\")\n",
"plt.title(\"Part de clients n'ayant ouvert aucun mail pour les compagnies de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 203,
"id": "c48015c2-6451-4089-93b7-6d55d3b2e553",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>ratio_campaigns_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>734772</td>\n",
" <td>126151.0</td>\n",
" <td>0.171687</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>342396</td>\n",
" <td>129833.0</td>\n",
" <td>0.379190</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>3168123</td>\n",
" <td>810722.0</td>\n",
" <td>0.255900</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>3218569</td>\n",
" <td>793581.0</td>\n",
" <td>0.246563</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>2427043</td>\n",
" <td>723846.0</td>\n",
" <td>0.298242</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny nb_campaigns nb_campaigns_opened ratio_campaigns_opened\n",
"0 10 734772 126151.0 0.171687\n",
"1 11 342396 129833.0 0.379190\n",
"2 12 3168123 810722.0 0.255900\n",
"3 13 3218569 793581.0 0.246563\n",
"4 14 2427043 723846.0 0.298242"
]
},
"execution_count": 203,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# taux d'ouverture des campaigns\n",
"\n",
"company_campaigns_stats = campaigns_information_spectacle.groupby(\"number_compagny\")[[\"nb_campaigns\", \"nb_campaigns_opened\"]].sum().reset_index()\n",
"company_campaigns_stats[\"ratio_campaigns_opened\"] = company_campaigns_stats[\"nb_campaigns_opened\"] / company_campaigns_stats[\"nb_campaigns\"]\n",
"company_campaigns_stats"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "d06ab865-4832-4fe9-918b-e5ff72bebee4",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'company_campaigns_stats' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[43], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Création du barplot\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m plt\u001b[38;5;241m.\u001b[39mbar(\u001b[43mcompany_campaigns_stats\u001b[49m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnumber_compagny\u001b[39m\u001b[38;5;124m\"\u001b[39m], \u001b[38;5;241m100\u001b[39m \u001b[38;5;241m*\u001b[39m company_campaigns_stats[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mratio_campaigns_opened\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# Ajout de titres et d'étiquettes\u001b[39;00m\n\u001b[1;32m 5\u001b[0m plt\u001b[38;5;241m.\u001b[39mxlabel(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCompany\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
"\u001b[0;31mNameError\u001b[0m: name 'company_campaigns_stats' is not defined"
]
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_campaigns_stats[\"number_compagny\"], 100 * company_campaigns_stats[\"ratio_campaigns_opened\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Taux d'ouverture (%)\")\n",
"plt.title(\"Taux d'ouverture des campagnes de mails pour les compagnies de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 219,
"id": "5c37e063-a717-4a8c-828e-b386b87e8409",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# création d'un barplot permettant de visualiser les 2 indicateurs sur le même graphique\n",
"\n",
"# Création du premier barplot\n",
"plt.bar(company_campaigns_stats[\"number_compagny\"], 100 * company_campaigns_stats[\"ratio_campaigns_opened\"],\n",
" label = \"taux d'ouverture\", alpha = 0.7)\n",
"\n",
"# Création du deuxième barplot à côté du premier\n",
"bar_width = 0.4 # Largeur des barres\n",
"indices2 = company_campaigns_stats[\"number_compagny\"] + bar_width\n",
"plt.bar(indices2, 100 * (1 - company_lazy_customers[\"no_campaign_opened\"]), \n",
" label='Part de clients ouvrant des mails', alpha=0.7, width=bar_width)\n",
"\n",
"# Ajout des étiquettes et de la légende\n",
"plt.xlabel('Compagnie')\n",
"plt.ylabel('Taux (%)')\n",
"plt.title('Lien entre taux d ouverture des mails et nombre de clients actifs')\n",
"plt.legend()\n",
"\n",
"# Affichage du graphique\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "783f6fb2-5f26-42a9-a22d-f4ece44bfaf2",
"metadata": {},
"source": [
"### 3. products_purchased_reduced"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "74534ded-8121-43fb-8cf8-af353bed2c77",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de lignes de la table : 764880\n"
]
},
{
"data": {
"text/plain": [
"customer_id 0\n",
"nb_tickets 0\n",
"nb_purchases 0\n",
"total_amount 0\n",
"nb_suppliers 0\n",
"vente_internet_max 0\n",
"purchase_date_min 0\n",
"purchase_date_max 0\n",
"time_between_purchase 0\n",
"nb_tickets_internet 0\n",
"number_compagny 0\n",
"dtype: int64"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de nan\n",
"print(\"Nombre de lignes de la table : \",products_purchased_reduced_spectacle.shape[0])\n",
"products_purchased_reduced_spectacle.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "d64979ba-fccf-45f2-8a15-40ef1b49c74f",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_430/3239820253.py:6: DtypeWarning: Columns (39) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" dataset_train = pd.read_csv(file_in, sep=\",\")\n"
]
}
],
"source": [
"#base d'entrainement\n",
"\n",
"#FILE_PATH_S3='projet-bdc2324-team1/Generalization/musique/Train_test/dataset_train14.csv'\n",
"\n",
"#with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" #dataset_train = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "635d60cd-2dbc-49da-b0f4-94e16667882f",
"metadata": {},
"outputs": [],
"source": [
"#Creation de la variable dependante fictive: 1 si l'individu a effectué un achat au cours de la periode de train et 0 sinon\n",
"\n",
"#dataset_train_modif=dataset_train\n",
"\n",
"#dataset_train_modif[\"y_purchase_fictive\"]=np.random.randint(2, size=dataset_train_modif.shape[0])"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "ea63e5d6-70f9-4685-8b08-673a47108954",
"metadata": {},
"outputs": [],
"source": [
"#dataset_train_modif[\"y_purchase_fictive\"].value_counts(normalize=True)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "98f7645f-ffe6-4d7b-8032-15e65f36af87",
"metadata": {},
"outputs": [],
"source": [
"\n",
"#dataset_train_modif[\"y_purchase_fictive\"]=dataset_train_modif[\"y_purchase_fictive\"].replace([0,1],[\"Purchase_train\",\"no_purchase_train\"])"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "6db089d5-5517-4aee-a5fd-53f20ae3f0d7",
"metadata": {},
"outputs": [],
"source": [
"#importation librairies\n",
"import warnings\n",
"warnings.simplefilter(\"ignore\")\n",
"import pandas as pd\n",
"import numpy as np\n",
"import statsmodels\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from scipy.stats import shapiro\n",
"from numpy.random import randn\n",
"import scipy.stats as st\n",
"%matplotlib inline\n",
"\n",
"#col_purchase=[\"nb_tickets\",\"nb_purchases\",\"total_amount\",\"nb_suppliers\",\"time_between_purchase\",\"nb_tickets_internet\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c532f884-3f44-4ce7-8924-9b4542bc3c10",
"metadata": {},
"outputs": [],
"source": [
"#histogrames des variable quantitatives\n",
"col_purchase=[\"nb_tickets\",\"nb_purchases\",\"total_amount\",\"nb_suppliers\",\"time_between_purchase\",\"nb_tickets_internet\"]\n",
"for col in col_purchase:\n",
" plt.figure()\n",
" sns.histplot(products_purchased_reduced_spectacle[col], kde=True, color='red')"
]
},
{
"cell_type": "code",
"execution_count": 73,
"id": "eb6355e0-3f8c-47d9-a5ee-d349040dcf51",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, \"Boite à moustache du chiffre d'affaire selon les compagnies de spectacles\")"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#repartition Chiffre d'affaire selon les compagnie de spectacle\n",
"\n",
"sns.boxplot(data=products_purchased_reduced_spectacle, y=\"total_amount\",x=\"number_compagny\",showfliers=False,showmeans=True)\n",
"plt.title(\"Boite à moustache du chiffre d'affaire selon les compagnies de spectacles\")"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "76e08ece-0b58-4b3a-abca-53e30ccc907b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Statistique F : 0.6726212699019267\n",
"Valeur de p : 0.6108808380730608\n",
"Nombre de degrés de liberté entre les groupes : 4\n",
"Nombre de degrés de liberté à l'intérieur des groupes : 764875\n",
"Il n'y a pas de différences significatives entre les entreprises .\n"
]
}
],
"source": [
"#test d'anova pour voir si la difference de chiffre d'affaire est statistiquement significative\n",
"\n",
"from scipy.stats import f_oneway\n",
"\n",
"# Créez une liste pour stocker les données de chaque groupe\n",
"groupes = []\n",
"\n",
"# Parcourez chaque modalité de la variable catégorielle et divisez les données en groupes\n",
"for modalite in products_purchased_reduced_spectacle['number_compagny'].unique():\n",
" groupe = products_purchased_reduced_spectacle[products_purchased_reduced_spectacle['number_compagny'] == modalite]['total_amount']\n",
" groupes.append(groupe)\n",
"\n",
"# Effectuez le test ANOVA\n",
"f_statistic, p_value = f_oneway(*groupes)\n",
"\n",
"# Nombre total d'observations\n",
"N = sum(len(groupe) for groupe in groupes)\n",
"\n",
"# Nombre de groupes ou de catégories\n",
"k = len(groupes)\n",
"\n",
"# Degrés de liberté entre les groupes\n",
"df_between = k - 1\n",
"\n",
"# Degrés de liberté à l'intérieur des groupes\n",
"df_within = N - k\n",
"\n",
"# Affichez les résultats\n",
"print(\"Statistique F :\", f_statistic)\n",
"print(\"Valeur de p :\", p_value)\n",
"\n",
"print(\"Nombre de degrés de liberté entre les groupes :\", df_between)\n",
"print(\"Nombre de degrés de liberté à l'intérieur des groupes :\", df_within)\n",
"\n",
"if p_value < 0.05:\n",
" print(\"Il y a des différences significatives entre au moins une des entrepries .\")\n",
"else:\n",
" print(\"Il n'y a pas de différences significatives entre les entreprises .\")"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "aacf2c34-f7ea-4d6e-935b-c5db01f03bbe",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>Taux_ticket_internet</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>492314</td>\n",
" <td>126262.0</td>\n",
" <td>25.646640</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>318969</td>\n",
" <td>16348.0</td>\n",
" <td>5.125263</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>591028</td>\n",
" <td>42045.0</td>\n",
" <td>7.113876</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>7024227</td>\n",
" <td>1247482.0</td>\n",
" <td>17.759705</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>335741</td>\n",
" <td>125638.0</td>\n",
" <td>37.421107</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny nb_tickets nb_tickets_internet Taux_ticket_internet\n",
"0 10 492314 126262.0 25.646640\n",
"1 11 318969 16348.0 5.125263\n",
"2 12 591028 42045.0 7.113876\n",
"3 13 7024227 1247482.0 17.759705\n",
"4 14 335741 125638.0 37.421107"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Taux de ticket payé par internet selon les compagnies\n",
"\n",
"purchase_spectacle = products_purchased_reduced_spectacle.groupby(\"number_compagny\")[[\"nb_tickets\", \"nb_tickets_internet\"]].sum().reset_index()\n",
"purchase_spectacle[\"Taux_ticket_internet\"] = purchase_spectacle[\"nb_tickets_internet\"]*100 / purchase_spectacle[\"nb_tickets\"]\n",
"purchase_spectacle"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "f71bb53d-724b-454d-8743-305d20eec2b0",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(purchase_spectacle[\"number_compagny\"], purchase_spectacle[\"Taux_ticket_internet\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Taux d'achat de tickets en ligne (%)\")\n",
"plt.title(\"Taux d'achat des tickets en ligne selon les compagnies de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "59a95248-0261-4970-9e91-e43d50cf4d69",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, 'Boite à moustache du temps ecoulés entre le premier et le dernier achat selon les compagnies de spectacles')"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#repartition Chiffre d'affaire selon le numero de la compagnie\n",
"\n",
"sns.boxplot(data=products_purchased_reduced_spectacle, y=\"time_between_purchase\",x=\"number_compagny\",showfliers=False,showmeans=True)\n",
"plt.title(\"Boite à moustache du temps ecoulés entre le premier et le dernier achat selon les compagnies de spectacles\")"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "e2c51e28-6197-48f0-ab6d-9fc7b3b0de74",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Statistique F : 7956.05932109542\n",
"Valeur de p : 0.0\n",
"Nombre de degrés de liberté entre les groupes : 4\n",
"Nombre de degrés de liberté à l'intérieur des groupes : 764875\n",
"Il y a des différences significatives entre au moins une des entrepries .\n"
]
}
],
"source": [
"#test d'anova pour voir si la difference de temps entre le premier et le dernier achat est statistiquement significative\n",
"\n",
"from scipy.stats import f_oneway\n",
"\n",
"# Créez une liste pour stocker les données de chaque groupe\n",
"groupes = []\n",
"\n",
"# Parcourez chaque modalité de la variable catégorielle et divisez les données en groupes\n",
"for modalite in products_purchased_reduced_spectacle['number_compagny'].unique():\n",
" groupe = products_purchased_reduced_spectacle[products_purchased_reduced_spectacle['number_compagny'] == modalite]['time_between_purchase']\n",
" groupes.append(groupe)\n",
"\n",
"# Effectuez le test ANOVA\n",
"f_statistic, p_value = f_oneway(*groupes)\n",
"\n",
"# Nombre total d'observations\n",
"N = sum(len(groupe) for groupe in groupes)\n",
"\n",
"# Nombre de groupes ou de catégories\n",
"k = len(groupes)\n",
"\n",
"# Degrés de liberté entre les groupes\n",
"df_between = k - 1\n",
"\n",
"# Degrés de liberté à l'intérieur des groupes\n",
"df_within = N - k\n",
"\n",
"# Affichez les résultats\n",
"print(\"Statistique F :\", f_statistic)\n",
"print(\"Valeur de p :\", p_value)\n",
"\n",
"print(\"Nombre de degrés de liberté entre les groupes :\", df_between)\n",
"print(\"Nombre de degrés de liberté à l'intérieur des groupes :\", df_within)\n",
"\n",
"if p_value < 0.05:\n",
" print(\"Il y a des différences significatives entre au moins une des entrepries .\")\n",
"else:\n",
" print(\"Il n'y a pas de différences significatives entre les entreprises .\")"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "74f06e96-3c25-4eca-8190-25b0a4ab0d75",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"customer_id int64\n",
"nb_tickets int64\n",
"nb_purchases int64\n",
"total_amount float64\n",
"nb_suppliers int64\n",
"vente_internet_max int64\n",
"purchase_date_min float64\n",
"purchase_date_max float64\n",
"time_between_purchase float64\n",
"nb_tickets_internet float64\n",
"number_compagny int64\n",
"dtype: object"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_purchased_reduced_spectacle.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "20a70ec0-38f6-470e-a442-7884a150613a",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 800x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#Repartition du nombre de canaux de vente selon les entreprise\n",
"plt.figure(figsize=(8, 6))\n",
"sns.barplot(x='number_compagny', y='nb_suppliers', data=products_purchased_reduced_spectacle, ci=None) # ci=None pour ne pas afficher les intervalles de confiance\n",
"plt.title('Nombre moyen de canaux de vente par entreprise')\n",
"plt.xlabel('number_compagny')\n",
"plt.ylabel('Nombre moyen de caneaux ')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "b9e84af4-a02b-4f83-81ae-b7a73475d060",
"metadata": {},
"source": [
"### 4. target_information"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "2867eceb-1f72-406c-adc2-adfedcaf60e6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de lignes de la table : 6240166\n"
]
},
{
"data": {
"text/plain": [
"id 0\n",
"customer_id 0\n",
"target_name 0\n",
"target_type_is_import 0\n",
"target_type_name 0\n",
"number_compagny 0\n",
"dtype: int64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de nan\n",
"print(\"Nombre de lignes de la table : \",target_information_spectacle.shape[0])\n",
"target_information_spectacle.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "561f361d-7d39-430a-9e27-a32f6c2f7b50",
"metadata": {},
"outputs": [],
"source": [
"# pas exploitable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "904cbf32-77b6-49dd-a96c-9e7e5a0175c3",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}