7025 lines
805 KiB
Plaintext
7025 lines
805 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "be628bfc-0bca-48b0-97c9-29063289127e",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Statistiques descriptives : compagnies offrant des spectacles"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "0bf5450b-f44d-430a-aed7-d875dc365048",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Importations et chargement des données"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "aa915888-cede-4eb0-8a26-7df573d29a3e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import os\n",
|
||
"import s3fs\n",
|
||
"import warnings\n",
|
||
"from datetime import date, timedelta, datetime\n",
|
||
"import numpy as np\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"import re"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "17949e81-c30b-4fdf-9872-d7dc2b22ba9e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Import KPI construction functions\n",
|
||
"#exec(open('0_KPI_functions.py').read())\n",
|
||
"exec(open('../0_KPI_functions.py').read())\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "9c1737a2-bad8-4266-8dec-452085d8cfe7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"['projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv',\n",
|
||
" 'projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv',\n",
|
||
" 'projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv',\n",
|
||
" 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']"
|
||
]
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Create filesystem object\n",
|
||
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
|
||
"\n",
|
||
"BUCKET = \"projet-bdc2324-team1/0_Input/Company_10\"\n",
|
||
"fs.ls(BUCKET)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "a35dc2f6-2017-4b21-abd2-2c4c112c96b2",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# test avec company 10\n",
|
||
"\n",
|
||
"dic_base=['campaigns_information','customerplus_cleaned','products_purchased_reduced','target_information']\n",
|
||
"for nom_base in dic_base:\n",
|
||
" FILE_PATH_S3_fanta = 'projet-bdc2324-team1/0_Input/Company_10/' + nom_base + '.csv'\n",
|
||
" with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n",
|
||
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "40b705eb-fd18-436b-b150-61611a3c6a84",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# fonction permettant d'extraire une table à partir du numéro de la compagnie (directory_path)\n",
|
||
"\n",
|
||
"def display_databases(directory_path, file_name, datetime_col = None):\n",
|
||
" \"\"\"\n",
|
||
" This function returns the file from s3 storage \n",
|
||
" \"\"\"\n",
|
||
" file_path = \"projet-bdc2324-team1\" + \"/0_Input/Company_\" + directory_path + \"/\" + file_name + \".csv\"\n",
|
||
" print(\"File path : \", file_path)\n",
|
||
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser) \n",
|
||
" return df \n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "c56decc3-de19-4786-82a4-1386c72a6bfb",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>target_name</th>\n",
|
||
" <th>target_type_is_import</th>\n",
|
||
" <th>target_type_name</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1165098</td>\n",
|
||
" <td>618562</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1165100</td>\n",
|
||
" <td>618559</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>1165101</td>\n",
|
||
" <td>618561</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>1165102</td>\n",
|
||
" <td>618560</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1165103</td>\n",
|
||
" <td>618558</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>69253</th>\n",
|
||
" <td>1698158</td>\n",
|
||
" <td>18580</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>69254</th>\n",
|
||
" <td>1698159</td>\n",
|
||
" <td>18569</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>69255</th>\n",
|
||
" <td>1698160</td>\n",
|
||
" <td>2962</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>69256</th>\n",
|
||
" <td>1698161</td>\n",
|
||
" <td>3825</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>69257</th>\n",
|
||
" <td>1698162</td>\n",
|
||
" <td>5731</td>\n",
|
||
" <td>Newsletter mensuelle</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>manual_static_filter</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>69258 rows × 5 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id customer_id target_name target_type_is_import \\\n",
|
||
"0 1165098 618562 Newsletter mensuelle False \n",
|
||
"1 1165100 618559 Newsletter mensuelle False \n",
|
||
"2 1165101 618561 Newsletter mensuelle False \n",
|
||
"3 1165102 618560 Newsletter mensuelle False \n",
|
||
"4 1165103 618558 Newsletter mensuelle False \n",
|
||
"... ... ... ... ... \n",
|
||
"69253 1698158 18580 Newsletter mensuelle False \n",
|
||
"69254 1698159 18569 Newsletter mensuelle False \n",
|
||
"69255 1698160 2962 Newsletter mensuelle False \n",
|
||
"69256 1698161 3825 Newsletter mensuelle False \n",
|
||
"69257 1698162 5731 Newsletter mensuelle False \n",
|
||
"\n",
|
||
" target_type_name \n",
|
||
"0 manual_static_filter \n",
|
||
"1 manual_static_filter \n",
|
||
"2 manual_static_filter \n",
|
||
"3 manual_static_filter \n",
|
||
"4 manual_static_filter \n",
|
||
"... ... \n",
|
||
"69253 manual_static_filter \n",
|
||
"69254 manual_static_filter \n",
|
||
"69255 manual_static_filter \n",
|
||
"69256 manual_static_filter \n",
|
||
"69257 manual_static_filter \n",
|
||
"\n",
|
||
"[69258 rows x 5 columns]"
|
||
]
|
||
},
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"target_information"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "c825d64b-356c-4b71-aa3c-90e0dd7ca092",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>ticket_id</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>purchase_id</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>supplier_name</th>\n",
|
||
" <th>purchase_date</th>\n",
|
||
" <th>amount</th>\n",
|
||
" <th>is_full_price</th>\n",
|
||
" <th>name_event_types</th>\n",
|
||
" <th>name_facilities</th>\n",
|
||
" <th>name_categories</th>\n",
|
||
" <th>name_events</th>\n",
|
||
" <th>name_seasons</th>\n",
|
||
" <th>start_date_time</th>\n",
|
||
" <th>end_date_time</th>\n",
|
||
" <th>open</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1799177</td>\n",
|
||
" <td>36984</td>\n",
|
||
" <td>409613</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>guichet</td>\n",
|
||
" <td>2016-04-28 17:58:26+02:00</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>danse</td>\n",
|
||
" <td>le grand t</td>\n",
|
||
" <td>abo t gourmand jeune</td>\n",
|
||
" <td>aringa rossa</td>\n",
|
||
" <td>test 2016/2017</td>\n",
|
||
" <td>2016-09-27 00:00:00+02:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1799178</td>\n",
|
||
" <td>36984</td>\n",
|
||
" <td>409613</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>guichet</td>\n",
|
||
" <td>2016-04-28 17:58:26+02:00</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>cirque</td>\n",
|
||
" <td>le grand t</td>\n",
|
||
" <td>abo t gourmand jeune</td>\n",
|
||
" <td>5èmes hurlants</td>\n",
|
||
" <td>test 2016/2017</td>\n",
|
||
" <td>2016-11-18 00:00:00+01:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>1799179</td>\n",
|
||
" <td>36984</td>\n",
|
||
" <td>409613</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>guichet</td>\n",
|
||
" <td>2016-04-28 17:58:26+02:00</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>théâtre</td>\n",
|
||
" <td>le grand t</td>\n",
|
||
" <td>abo t gourmand jeune</td>\n",
|
||
" <td>dom juan</td>\n",
|
||
" <td>test 2016/2017</td>\n",
|
||
" <td>2016-12-07 00:00:00+01:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>1799180</td>\n",
|
||
" <td>36984</td>\n",
|
||
" <td>409613</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>guichet</td>\n",
|
||
" <td>2016-04-28 17:58:26+02:00</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>théâtre</td>\n",
|
||
" <td>le grand t</td>\n",
|
||
" <td>abo t gourmand jeune</td>\n",
|
||
" <td>vanishing point</td>\n",
|
||
" <td>test 2016/2017</td>\n",
|
||
" <td>2017-01-04 00:00:00+01:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1799181</td>\n",
|
||
" <td>36984</td>\n",
|
||
" <td>409613</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>guichet</td>\n",
|
||
" <td>2016-04-28 17:58:26+02:00</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>cirque</td>\n",
|
||
" <td>la cite des congres</td>\n",
|
||
" <td>abo t gourmand jeune</td>\n",
|
||
" <td>a o lang pho</td>\n",
|
||
" <td>test 2016/2017</td>\n",
|
||
" <td>2017-01-03 00:00:00+01:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>492309</th>\n",
|
||
" <td>3252232</td>\n",
|
||
" <td>621716</td>\n",
|
||
" <td>710062</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>guichet</td>\n",
|
||
" <td>2023-03-09 12:08:45+01:00</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>théâtre</td>\n",
|
||
" <td>cap nort</td>\n",
|
||
" <td>tarif sco co 1 seance scolaire</td>\n",
|
||
" <td>sur moi, le temps</td>\n",
|
||
" <td>2022/2023</td>\n",
|
||
" <td>2023-03-13 14:00:00+01:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>492310</th>\n",
|
||
" <td>3252233</td>\n",
|
||
" <td>621716</td>\n",
|
||
" <td>710062</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>guichet</td>\n",
|
||
" <td>2023-03-09 12:08:45+01:00</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>théâtre</td>\n",
|
||
" <td>cap nort</td>\n",
|
||
" <td>tarif sco co 1 seance scolaire</td>\n",
|
||
" <td>sur moi, le temps</td>\n",
|
||
" <td>2022/2023</td>\n",
|
||
" <td>2023-03-13 14:00:00+01:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>492311</th>\n",
|
||
" <td>3252234</td>\n",
|
||
" <td>621716</td>\n",
|
||
" <td>710062</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>guichet</td>\n",
|
||
" <td>2023-03-09 12:08:45+01:00</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>théâtre</td>\n",
|
||
" <td>cap nort</td>\n",
|
||
" <td>tarif sco co 1 seance scolaire</td>\n",
|
||
" <td>sur moi, le temps</td>\n",
|
||
" <td>2022/2023</td>\n",
|
||
" <td>2023-03-13 14:00:00+01:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>492312</th>\n",
|
||
" <td>3252235</td>\n",
|
||
" <td>621716</td>\n",
|
||
" <td>710062</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>guichet</td>\n",
|
||
" <td>2023-03-09 12:08:45+01:00</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>théâtre</td>\n",
|
||
" <td>cap nort</td>\n",
|
||
" <td>tarif sco co 1 seance scolaire</td>\n",
|
||
" <td>sur moi, le temps</td>\n",
|
||
" <td>2022/2023</td>\n",
|
||
" <td>2023-03-13 14:00:00+01:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>492313</th>\n",
|
||
" <td>3252236</td>\n",
|
||
" <td>621716</td>\n",
|
||
" <td>710062</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>guichet</td>\n",
|
||
" <td>2023-03-09 12:08:45+01:00</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>théâtre</td>\n",
|
||
" <td>cap nort</td>\n",
|
||
" <td>tarif sco co 1 seance scolaire</td>\n",
|
||
" <td>sur moi, le temps</td>\n",
|
||
" <td>2022/2023</td>\n",
|
||
" <td>2023-03-13 14:00:00+01:00</td>\n",
|
||
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>492314 rows × 16 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
|
||
"0 1799177 36984 409613 2 guichet \n",
|
||
"1 1799178 36984 409613 3 guichet \n",
|
||
"2 1799179 36984 409613 1 guichet \n",
|
||
"3 1799180 36984 409613 1 guichet \n",
|
||
"4 1799181 36984 409613 3 guichet \n",
|
||
"... ... ... ... ... ... \n",
|
||
"492309 3252232 621716 710062 1 guichet \n",
|
||
"492310 3252233 621716 710062 1 guichet \n",
|
||
"492311 3252234 621716 710062 1 guichet \n",
|
||
"492312 3252235 621716 710062 1 guichet \n",
|
||
"492313 3252236 621716 710062 1 guichet \n",
|
||
"\n",
|
||
" purchase_date amount is_full_price name_event_types \\\n",
|
||
"0 2016-04-28 17:58:26+02:00 9.0 False danse \n",
|
||
"1 2016-04-28 17:58:26+02:00 9.0 False cirque \n",
|
||
"2 2016-04-28 17:58:26+02:00 9.0 False théâtre \n",
|
||
"3 2016-04-28 17:58:26+02:00 9.0 False théâtre \n",
|
||
"4 2016-04-28 17:58:26+02:00 12.0 False cirque \n",
|
||
"... ... ... ... ... \n",
|
||
"492309 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
|
||
"492310 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
|
||
"492311 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
|
||
"492312 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
|
||
"492313 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
|
||
"\n",
|
||
" name_facilities name_categories \\\n",
|
||
"0 le grand t abo t gourmand jeune \n",
|
||
"1 le grand t abo t gourmand jeune \n",
|
||
"2 le grand t abo t gourmand jeune \n",
|
||
"3 le grand t abo t gourmand jeune \n",
|
||
"4 la cite des congres abo t gourmand jeune \n",
|
||
"... ... ... \n",
|
||
"492309 cap nort tarif sco co 1 seance scolaire \n",
|
||
"492310 cap nort tarif sco co 1 seance scolaire \n",
|
||
"492311 cap nort tarif sco co 1 seance scolaire \n",
|
||
"492312 cap nort tarif sco co 1 seance scolaire \n",
|
||
"492313 cap nort tarif sco co 1 seance scolaire \n",
|
||
"\n",
|
||
" name_events name_seasons start_date_time \\\n",
|
||
"0 aringa rossa test 2016/2017 2016-09-27 00:00:00+02:00 \n",
|
||
"1 5èmes hurlants test 2016/2017 2016-11-18 00:00:00+01:00 \n",
|
||
"2 dom juan test 2016/2017 2016-12-07 00:00:00+01:00 \n",
|
||
"3 vanishing point test 2016/2017 2017-01-04 00:00:00+01:00 \n",
|
||
"4 a o lang pho test 2016/2017 2017-01-03 00:00:00+01:00 \n",
|
||
"... ... ... ... \n",
|
||
"492309 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
|
||
"492310 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
|
||
"492311 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
|
||
"492312 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
|
||
"492313 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
|
||
"\n",
|
||
" end_date_time open \n",
|
||
"0 1901-01-01 00:09:21+00:09 True \n",
|
||
"1 1901-01-01 00:09:21+00:09 True \n",
|
||
"2 1901-01-01 00:09:21+00:09 True \n",
|
||
"3 1901-01-01 00:09:21+00:09 True \n",
|
||
"4 1901-01-01 00:09:21+00:09 True \n",
|
||
"... ... ... \n",
|
||
"492309 1901-01-01 00:09:21+00:09 True \n",
|
||
"492310 1901-01-01 00:09:21+00:09 True \n",
|
||
"492311 1901-01-01 00:09:21+00:09 True \n",
|
||
"492312 1901-01-01 00:09:21+00:09 True \n",
|
||
"492313 1901-01-01 00:09:21+00:09 True \n",
|
||
"\n",
|
||
"[492314 rows x 16 columns]"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"products_purchased_reduced"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "afd044b8-ac83-4a35-b959-700cae0b3b41",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_10/target_information.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
|
||
"<string>:28: SettingWithCopyWarning: \n",
|
||
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
||
"\n",
|
||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Tables imported for tenant 10\n",
|
||
"File path : projet-bdc2324-team1/0_Input/Company_11/customerplus_cleaned.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_11/campaigns_information.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_11/products_purchased_reduced.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_11/target_information.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
|
||
"<string>:28: SettingWithCopyWarning: \n",
|
||
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
||
"\n",
|
||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Tables imported for tenant 11\n",
|
||
"File path : projet-bdc2324-team1/0_Input/Company_12/customerplus_cleaned.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_12/campaigns_information.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_12/products_purchased_reduced.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
|
||
"/tmp/ipykernel_427/3170175140.py:10: DtypeWarning: Columns (4,8,10) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_12/target_information.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
|
||
"<string>:28: SettingWithCopyWarning: \n",
|
||
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
||
"\n",
|
||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Tables imported for tenant 12\n",
|
||
"File path : projet-bdc2324-team1/0_Input/Company_13/customerplus_cleaned.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_13/campaigns_information.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_13/products_purchased_reduced.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_13/target_information.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
|
||
"<string>:28: SettingWithCopyWarning: \n",
|
||
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
||
"\n",
|
||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Tables imported for tenant 13\n",
|
||
"File path : projet-bdc2324-team1/0_Input/Company_14/customerplus_cleaned.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_14/campaigns_information.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_14/products_purchased_reduced.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
|
||
"/tmp/ipykernel_427/3170175140.py:10: DtypeWarning: Columns (8,9) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_14/target_information.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
|
||
"<string>:28: SettingWithCopyWarning: \n",
|
||
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
||
"\n",
|
||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Tables imported for tenant 14\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# création des bases contenant les KPI pour les 5 compagnies de spectacle\n",
|
||
"\n",
|
||
"# liste des compagnies de spectacle\n",
|
||
"nb_compagnie=['10','11','12','13','14']\n",
|
||
"\n",
|
||
"# début de la boucle permettant de générer des datasets agrégés pour les 5 compagnies de spectacle\n",
|
||
"for directory_path in nb_compagnie:\n",
|
||
" df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
|
||
" df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
|
||
" df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
|
||
" df_target_information = display_databases(directory_path, file_name = \"target_information\")\n",
|
||
" \n",
|
||
" df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information) \n",
|
||
" df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n",
|
||
" df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)\n",
|
||
"\n",
|
||
" \n",
|
||
"# creation de la colonne Number compagnie, qui permettra d'agréger les résultats\n",
|
||
" df_tickets_kpi[\"number_compagny\"]=int(directory_path)\n",
|
||
" df_campaigns_kpi[\"number_compagny\"]=int(directory_path)\n",
|
||
" df_customerplus_clean[\"number_compagny\"]=int(directory_path)\n",
|
||
" df_target_information[\"number_compagny\"]=int(directory_path)\n",
|
||
"\n",
|
||
" if nb_compagnie.index(directory_path)>=1:\n",
|
||
" customerplus_clean_spectacle=pd.concat([customerplus_clean_spectacle,df_customerplus_clean],axis=0)\n",
|
||
" campaigns_information_spectacle=pd.concat([campaigns_information_spectacle,df_campaigns_kpi],axis=0)\n",
|
||
" products_purchased_reduced_spectacle=pd.concat([products_purchased_reduced_spectacle,df_tickets_kpi],axis=0)\n",
|
||
" target_information_spectacle=pd.concat([target_information_spectacle,df_target_information],axis=0)\n",
|
||
" else:\n",
|
||
" customerplus_clean_spectacle=df_customerplus_clean\n",
|
||
" campaigns_information_spectacle=df_campaigns_kpi\n",
|
||
" products_purchased_reduced_spectacle=df_tickets_kpi\n",
|
||
" target_information_spectacle=df_target_information\n",
|
||
"\n",
|
||
" print(f\"Tables imported for tenant {directory_path}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"id": "b5a4a031-9533-4a50-8569-5f4246691a7a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>structure_id</th>\n",
|
||
" <th>mcp_contact_id</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>tenant_id</th>\n",
|
||
" <th>is_partner</th>\n",
|
||
" <th>deleted_at</th>\n",
|
||
" <th>gender</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>purchase_count</th>\n",
|
||
" <th>first_buying_date</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>gender_label</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>country_fr</th>\n",
|
||
" <th>has_tags</th>\n",
|
||
" <th>number_compagny</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>17</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>139</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>18031</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>319517</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1556</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2020-01-01 14:06:52+00:00</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>11</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>291642</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>757541</td>\n",
|
||
" <td>303.0</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>862</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>2016-09-08 14:50:00+00:00</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>14</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>3 rows × 29 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
|
||
"17 2 139 NaN NaN 0 \n",
|
||
"18031 2 319517 NaN NaN 0 \n",
|
||
"291642 2 757541 303.0 5.0 1 \n",
|
||
"\n",
|
||
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
|
||
"17 875 False NaN 2 False ... \n",
|
||
"18031 1556 False NaN 0 True ... \n",
|
||
"291642 862 False NaN 1 True ... \n",
|
||
"\n",
|
||
" purchase_count first_buying_date country gender_label \\\n",
|
||
"17 3 NaN NaN other \n",
|
||
"18031 2 2020-01-01 14:06:52+00:00 fr female \n",
|
||
"291642 3 2016-09-08 14:50:00+00:00 fr male \n",
|
||
"\n",
|
||
" gender_female gender_male gender_other country_fr has_tags \\\n",
|
||
"17 0 0 1 NaN 0 \n",
|
||
"18031 1 0 0 1.0 0 \n",
|
||
"291642 0 1 0 1.0 1 \n",
|
||
"\n",
|
||
" number_compagny \n",
|
||
"17 10 \n",
|
||
"18031 11 \n",
|
||
"291642 14 \n",
|
||
"\n",
|
||
"[3 rows x 29 columns]"
|
||
]
|
||
},
|
||
"execution_count": 37,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"customer_id\"]==2]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "b9b6ec1f-36fb-4ee9-a1ed-09ff41878005",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"ename": "NameError",
|
||
"evalue": "name 'customerplus_clean_spectacle' is not defined",
|
||
"output_type": "error",
|
||
"traceback": [
|
||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
||
"Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcustomerplus_clean_spectacle\u001b[49m[customerplus_clean_spectacle[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcustomer_id\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m==\u001b[39m\u001b[38;5;241m1\u001b[39m]\n",
|
||
"\u001b[0;31mNameError\u001b[0m: name 'customerplus_clean_spectacle' is not defined"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"customer_id\"]==1]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 39,
|
||
"id": "a12c1b7d-6f6f-483e-b215-6336d7a51057",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Index(['customer_id', 'street_id', 'structure_id', 'mcp_contact_id',\n",
|
||
" 'fidelity', 'tenant_id', 'is_partner', 'deleted_at', 'gender',\n",
|
||
" 'is_email_true', 'opt_in', 'last_buying_date', 'max_price',\n",
|
||
" 'ticket_sum', 'average_price', 'average_purchase_delay',\n",
|
||
" 'average_price_basket', 'average_ticket_basket', 'total_price',\n",
|
||
" 'purchase_count', 'first_buying_date', 'country', 'gender_label',\n",
|
||
" 'gender_female', 'gender_male', 'gender_other', 'country_fr',\n",
|
||
" 'has_tags', 'number_compagny'],\n",
|
||
" dtype='object')"
|
||
]
|
||
},
|
||
"execution_count": 39,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"customerplus_clean_spectacle.columns"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 38,
|
||
"id": "05b9a396-dcd7-4d3d-8b39-5ca48beba4b0",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#customerplus_clean_spectacle.isna().sum()\n",
|
||
"#campaigns_information_spectacle.isna().sum()\n",
|
||
"#products_purchased_reduced_spectacle.isna().sum()\n",
|
||
"#target_information_spectacle.isna().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "81e15508-32ca-46f1-a03d-1febddbbf5b4",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Ajout : importation de la table train_set pour faire les stats desc dessus"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "3a1fdd6b-ac43-4e90-9a31-4f522bcc44bb",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_427/3450421856.py:9: DtypeWarning: Columns (38) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||
" train_set_spectacle = pd.read_csv(file_in, sep=\",\")\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# importation de la table train_set pour les compagnies de spectacle (ou musique)\n",
|
||
"\n",
|
||
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
|
||
"\n",
|
||
"path_train_set_spectacle = \"projet-bdc2324-team1/Generalization/musique/Train_set.csv\"\n",
|
||
"\n",
|
||
"with fs.open(path_train_set_spectacle, mode=\"rb\") as file_in:\n",
|
||
" train_set_spectacle = pd.read_csv(file_in, sep=\",\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"id": "3a4c1ff4-2861-4e86-99df-26eea0370dc3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>time_between_purchase</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>gender_label</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>country_fr</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>time_to_open</th>\n",
|
||
" <th>y_has_purchased</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10_299341</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>0 days 05:47:26.333333333</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>10_63788</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>62.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>393.205891</td>\n",
|
||
" <td>281.017639</td>\n",
|
||
" <td>112.188252</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0 days 05:13:51</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>10_759946</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>10_20653</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>11.0</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>1 days 00:45:54</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>10_824705</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 40 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 10_299341 0.0 0.0 0.0 0.0 \n",
|
||
"1 10_63788 3.0 2.0 62.0 1.0 \n",
|
||
"2 10_759946 0.0 0.0 0.0 0.0 \n",
|
||
"3 10_20653 0.0 0.0 0.0 0.0 \n",
|
||
"4 10_824705 0.0 0.0 0.0 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 NaN NaN \n",
|
||
"1 1.0 393.205891 281.017639 \n",
|
||
"2 0.0 NaN NaN \n",
|
||
"3 0.0 NaN NaN \n",
|
||
"4 0.0 NaN NaN \n",
|
||
"\n",
|
||
" time_between_purchase nb_tickets_internet ... country gender_label \\\n",
|
||
"0 NaN 0.0 ... fr male \n",
|
||
"1 112.188252 3.0 ... fr female \n",
|
||
"2 NaN 0.0 ... NaN other \n",
|
||
"3 NaN 0.0 ... fr male \n",
|
||
"4 NaN 0.0 ... NaN other \n",
|
||
"\n",
|
||
" gender_female gender_male gender_other country_fr nb_campaigns \\\n",
|
||
"0 0 1 0 1.0 12.0 \n",
|
||
"1 1 0 0 1.0 3.0 \n",
|
||
"2 0 0 1 NaN 0.0 \n",
|
||
"3 0 1 0 1.0 11.0 \n",
|
||
"4 0 0 1 NaN 0.0 \n",
|
||
"\n",
|
||
" nb_campaigns_opened time_to_open y_has_purchased \n",
|
||
"0 3.0 0 days 05:47:26.333333333 0.0 \n",
|
||
"1 1.0 0 days 05:13:51 1.0 \n",
|
||
"2 0.0 NaN 0.0 \n",
|
||
"3 10.0 1 days 00:45:54 0.0 \n",
|
||
"4 0.0 NaN 0.0 \n",
|
||
"\n",
|
||
"[5 rows x 40 columns]"
|
||
]
|
||
},
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"train_set_spectacle.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "4632384d-2a06-445d-9fdb-b0c91b37ebaf",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([0., 1.])"
|
||
]
|
||
},
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# on remplace les valeurs has purchased = NaN par des 0\n",
|
||
"train_set_spectacle[\"y_has_purchased\"] = train_set_spectacle[\"y_has_purchased\"].fillna(0)\n",
|
||
"train_set_spectacle[\"y_has_purchased\"].unique()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"id": "5fd56696-b479-46c7-8a59-fb8137db5fb5",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([10, 11, 12, 13, 14])"
|
||
]
|
||
},
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# on reproduit une colonne avec le numéro de la compagnie \n",
|
||
"\n",
|
||
"train_set_spectacle[\"number_company\"] = train_set_spectacle[\"customer_id\"].apply(lambda x : int(re.split(\"_\", str(x))[0]))\n",
|
||
"train_set_spectacle[\"number_company\"].unique()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"id": "91c6e047-43d2-456c-81f1-087026eef4f0",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>time_between_purchase</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>gender_label</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>country_fr</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>time_to_open</th>\n",
|
||
" <th>y_has_purchased</th>\n",
|
||
" <th>number_company</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10_299341</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>0 days 05:47:26.333333333</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>10_63788</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>62.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>393.205891</td>\n",
|
||
" <td>281.017639</td>\n",
|
||
" <td>112.188252</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0 days 05:13:51</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>10_759946</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>10_20653</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>11.0</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>1 days 00:45:54</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>10_824705</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 41 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 10_299341 0.0 0.0 0.0 0.0 \n",
|
||
"1 10_63788 3.0 2.0 62.0 1.0 \n",
|
||
"2 10_759946 0.0 0.0 0.0 0.0 \n",
|
||
"3 10_20653 0.0 0.0 0.0 0.0 \n",
|
||
"4 10_824705 0.0 0.0 0.0 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 NaN NaN \n",
|
||
"1 1.0 393.205891 281.017639 \n",
|
||
"2 0.0 NaN NaN \n",
|
||
"3 0.0 NaN NaN \n",
|
||
"4 0.0 NaN NaN \n",
|
||
"\n",
|
||
" time_between_purchase nb_tickets_internet ... gender_label \\\n",
|
||
"0 NaN 0.0 ... male \n",
|
||
"1 112.188252 3.0 ... female \n",
|
||
"2 NaN 0.0 ... other \n",
|
||
"3 NaN 0.0 ... male \n",
|
||
"4 NaN 0.0 ... other \n",
|
||
"\n",
|
||
" gender_female gender_male gender_other country_fr nb_campaigns \\\n",
|
||
"0 0 1 0 1.0 12.0 \n",
|
||
"1 1 0 0 1.0 3.0 \n",
|
||
"2 0 0 1 NaN 0.0 \n",
|
||
"3 0 1 0 1.0 11.0 \n",
|
||
"4 0 0 1 NaN 0.0 \n",
|
||
"\n",
|
||
" nb_campaigns_opened time_to_open y_has_purchased \\\n",
|
||
"0 3.0 0 days 05:47:26.333333333 0.0 \n",
|
||
"1 1.0 0 days 05:13:51 1.0 \n",
|
||
"2 0.0 NaN 0.0 \n",
|
||
"3 10.0 1 days 00:45:54 0.0 \n",
|
||
"4 0.0 NaN 0.0 \n",
|
||
"\n",
|
||
" number_company \n",
|
||
"0 10 \n",
|
||
"1 10 \n",
|
||
"2 10 \n",
|
||
"3 10 \n",
|
||
"4 10 \n",
|
||
"\n",
|
||
"[5 rows x 41 columns]"
|
||
]
|
||
},
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"train_set_spectacle.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "21e562d4-035d-4112-9f94-527b7fd935cf",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "fff306c2-1d41-4ef6-867b-ba9a7cf4ee68",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Statistiques descriptives"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "0549bdc4-edd7-4511-916e-26e94b5a30f5",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 0. Détection du client anonyme (outlier) - utile pour la section 3"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 45,
|
||
"id": "5b460061-f8b5-4a6b-ba59-539446d8487f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def outlier_detection(directory_path = \"1\", coupure = 1):\n",
|
||
" df_tickets = display_databases(directory_path, file_name = 'products_purchased_reduced' , datetime_col = ['purchase_date'])\n",
|
||
" df_tickets_kpi = tickets_kpi_function(df_tickets)\n",
|
||
"\n",
|
||
" if directory_path == \"101\" :\n",
|
||
" df_tickets_1 = display_databases(directory_path, file_name = 'products_purchased_reduced_1' , datetime_col = ['purchase_date'])\n",
|
||
" df_tickets_kpi_1 = tickets_kpi_function(df_tickets_1)\n",
|
||
"\n",
|
||
" df_tickets_kpi = pd.concat([df_tickets_kpi, df_tickets_kpi_1])\n",
|
||
" # Part du CA par customer\n",
|
||
" total_amount_share = df_tickets_kpi.groupby('customer_id')['total_amount'].sum().reset_index()\n",
|
||
" total_amount_share['total_amount_entreprise'] = total_amount_share['total_amount'].sum()\n",
|
||
" total_amount_share['share_total_amount'] = total_amount_share['total_amount']/total_amount_share['total_amount_entreprise']\n",
|
||
" \n",
|
||
" total_amount_share_index = total_amount_share.set_index('customer_id')\n",
|
||
" df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False)\n",
|
||
" \n",
|
||
" top = df_circulaire[:coupure]\n",
|
||
" rest = df_circulaire[coupure:]\n",
|
||
" \n",
|
||
" # Calculez la somme du reste\n",
|
||
" rest_sum = rest.sum()\n",
|
||
" \n",
|
||
" # Créez une nouvelle série avec les cinq plus grandes parts et 'Autre'\n",
|
||
" new_series = pd.concat([top, pd.Series([rest_sum], index=['Autre'])])\n",
|
||
" \n",
|
||
" # Créez le graphique circulaire\n",
|
||
" plt.figure(figsize=(3, 3))\n",
|
||
" plt.pie(new_series, labels=new_series.index, autopct='%1.1f%%', startangle=140, pctdistance=0.5)\n",
|
||
" plt.axis('equal') # Assurez-vous que le graphique est un cercle\n",
|
||
" plt.title('Répartition des montants totaux')\n",
|
||
" plt.show()\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 69,
|
||
"id": "cccee90c-67d1-4e14-8410-1210a5ef97d9",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# def d'une fonction permettant de générer un barplot à plusieurs barres selon une modalité \n",
|
||
"\n",
|
||
"def multiple_barplot(data, x, y, var_labels, bar_width=0.35,\n",
|
||
" figsize=(10, 6), xlabel=None, ylabel=None, title=None, dico_labels = None) :\n",
|
||
"\n",
|
||
" # si on donne aucun nom pour la legende, le graphique reprend les noms des variables x et y \n",
|
||
" xlabel = x if xlabel==None else xlabel\n",
|
||
" ylabel = y if ylabel==None else ylabel\n",
|
||
" \n",
|
||
" fig, ax = plt.subplots(figsize=figsize)\n",
|
||
" \n",
|
||
" categories = data[x].unique()\n",
|
||
" bar_width = bar_width\n",
|
||
" bar_positions = np.arange(len(categories))\n",
|
||
" \n",
|
||
" # Grouper les données par label et créer les barres groupées\n",
|
||
" for label in data[var_labels].unique():\n",
|
||
" label_data = data[data[var_labels] == label]\n",
|
||
" values = [label_data[label_data[x] == category][y].values[0] for category in categories]\n",
|
||
" \n",
|
||
" # label_printed = \"achat durant la période\" if label else \"aucun achat\"\n",
|
||
" label_printed = f\"{var_labels}={label}\" if dico_labels==None else dico_labels[label]\n",
|
||
" \n",
|
||
" ax.bar(bar_positions, values, bar_width, label=label_printed)\n",
|
||
" \n",
|
||
" # Mise à jour des positions des barres pour le prochain groupe\n",
|
||
" bar_positions = [pos + bar_width for pos in bar_positions]\n",
|
||
"\n",
|
||
" # Ajout des étiquettes, de la légende, etc.\n",
|
||
" ax.set_xlabel(xlabel)\n",
|
||
" ax.set_ylabel(ylabel)\n",
|
||
" ax.set_title(title)\n",
|
||
" ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])\n",
|
||
" ax.set_xticklabels(categories)\n",
|
||
" ax.legend()\n",
|
||
" \n",
|
||
" # Affichage du plot - la proportion de français est la même selon qu'il y ait achat sur la période ou non\n",
|
||
" # sauf compagnie 12, et peut-être 13\n",
|
||
" plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 48,
|
||
"id": "b6417f09-a6c7-4319-95b3-98c95ec5a3b7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
||
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 300x300 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# outlier à enlever (dépend des stats desc !)\n",
|
||
"outlier_detection(directory_path=\"10\") # mettre 2 si on veut le 1er client non anonyme"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "f08c082e-f76f-41f3-9530-3e6700eb74d9",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# boucle pour identifier les outliers de chaque compagnie (et le client principal non anonyme)\n",
|
||
"\n",
|
||
"# nb_compagnie=['10','11','12','13','14']\n",
|
||
"for company_number in nb_compagnie :\n",
|
||
" print(f\"outlier for tenant {company_number}\")\n",
|
||
" outlier_detection(directory_path=company_number, coupure = 2)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "dbe1af6a-79e9-45c7-a810-c6df3bf647f7",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# print(products_purchased_reduced_spectacle.loc[products_purchased_reduced_spectacle[\"number_compagny\"]==10][\"total_amount\"].describe())\n",
|
||
"\n",
|
||
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==10) & \n",
|
||
"(products_purchased_reduced_spectacle[\"customer_id\"]==19521)]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "20e2b8a2-f31c-42a4-8ea5-7ad67ab66915",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# company 11 \n",
|
||
"# etrange, pas de vente sur internet, et un seul supplier. Plus de 9k achats\n",
|
||
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==11) & \n",
|
||
"(products_purchased_reduced_spectacle[\"customer_id\"]==36)]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "5dbce57c-d091-4ce2-92f9-1201deb2462e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# company 12\n",
|
||
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==12) & \n",
|
||
"(products_purchased_reduced_spectacle[\"customer_id\"]==1706757)]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "0a243b57-19da-4e29-a53d-bb8d03e2ab77",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# company 13\n",
|
||
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==13) & \n",
|
||
"(products_purchased_reduced_spectacle[\"customer_id\"]==8422)]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "3d9b01bc-9584-4882-bd06-7de8acb8a88f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# company 14\n",
|
||
"# a-t-on vrmt un outlier ? A acheté quasi 3k tickets, pr 96 achats\n",
|
||
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==14) & \n",
|
||
"(products_purchased_reduced_spectacle[\"customer_id\"]==6354)]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "033c1e00-52bd-4651-b893-57bda531760e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# verifs dans les tables customerplus (outlier incertain pr 11 et 14)\n",
|
||
"\n",
|
||
"customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==36) &\n",
|
||
"(customerplus_clean_spectacle[\"number_compagny\"]==11)]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "28ac8cda-32fa-4fb7-a75b-e1cc24871c39",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==6354) &\n",
|
||
"(customerplus_clean_spectacle[\"number_compagny\"]==14)]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "3faea297-2cc5-4704-af85-77d95f600cc1",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==8422) &\n",
|
||
"(customerplus_clean_spectacle[\"number_compagny\"]==13)]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "b165ea79-347b-46fb-8217-635d9e888c65",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==19521) &\n",
|
||
"(customerplus_clean_spectacle[\"number_compagny\"]==10)]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "282b0a96-5e78-48aa-9c2c-7d00d3907add",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"customerplus_clean_spectacle.columns"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "4918db6e-249b-412e-b646-9a6686989b79",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "e866edce-f4bc-4627-89d3-3ec7d9ef26e3",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "42f8171c-e80d-4faa-b278-21fcbe3b242c",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 1. customerplus_clean"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 44,
|
||
"id": "47f98721-53dd-4f8f-85ac-88043ee8d967",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>structure_id</th>\n",
|
||
" <th>mcp_contact_id</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>tenant_id</th>\n",
|
||
" <th>is_partner</th>\n",
|
||
" <th>deleted_at</th>\n",
|
||
" <th>gender</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>purchase_count</th>\n",
|
||
" <th>first_buying_date</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>gender_label</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>country_fr</th>\n",
|
||
" <th>has_tags</th>\n",
|
||
" <th>number_compagny</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>821538</td>\n",
|
||
" <td>139</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>809126</td>\n",
|
||
" <td>1063</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>11005</td>\n",
|
||
" <td>1063</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>17663</td>\n",
|
||
" <td>12731</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>38100</td>\n",
|
||
" <td>12395</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>307036</td>\n",
|
||
" <td>139</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>2946</td>\n",
|
||
" <td>1063</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>18441</td>\n",
|
||
" <td>11139</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>9231</td>\n",
|
||
" <td>139</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>9870</td>\n",
|
||
" <td>139</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>10 rows × 29 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id street_id structure_id mcp_contact_id fidelity tenant_id \\\n",
|
||
"0 821538 139 NaN NaN 0 875 \n",
|
||
"1 809126 1063 NaN NaN 0 875 \n",
|
||
"2 11005 1063 NaN NaN 0 875 \n",
|
||
"3 17663 12731 NaN NaN 0 875 \n",
|
||
"4 38100 12395 NaN NaN 0 875 \n",
|
||
"5 307036 139 NaN NaN 0 875 \n",
|
||
"6 2946 1063 NaN NaN 0 875 \n",
|
||
"7 18441 11139 NaN NaN 0 875 \n",
|
||
"8 9231 139 NaN NaN 0 875 \n",
|
||
"9 9870 139 NaN NaN 0 875 \n",
|
||
"\n",
|
||
" is_partner deleted_at gender is_email_true ... purchase_count \\\n",
|
||
"0 False NaN 2 True ... 0 \n",
|
||
"1 False NaN 2 True ... 0 \n",
|
||
"2 False NaN 2 False ... 14 \n",
|
||
"3 False NaN 0 False ... 1 \n",
|
||
"4 False NaN 0 True ... 1 \n",
|
||
"5 False NaN 2 True ... 1 \n",
|
||
"6 False NaN 2 False ... 8 \n",
|
||
"7 False NaN 2 False ... 3 \n",
|
||
"8 False NaN 0 True ... 1 \n",
|
||
"9 False NaN 2 True ... 1 \n",
|
||
"\n",
|
||
" first_buying_date country gender_label gender_female gender_male \\\n",
|
||
"0 NaN NaN other 0 0 \n",
|
||
"1 NaN fr other 0 0 \n",
|
||
"2 NaN fr other 0 0 \n",
|
||
"3 NaN fr female 1 0 \n",
|
||
"4 NaN fr female 1 0 \n",
|
||
"5 NaN NaN other 0 0 \n",
|
||
"6 NaN fr other 0 0 \n",
|
||
"7 NaN fr other 0 0 \n",
|
||
"8 NaN NaN female 1 0 \n",
|
||
"9 NaN NaN other 0 0 \n",
|
||
"\n",
|
||
" gender_other country_fr has_tags number_compagny \n",
|
||
"0 1 NaN 0 10 \n",
|
||
"1 1 1.0 0 10 \n",
|
||
"2 1 1.0 0 10 \n",
|
||
"3 0 1.0 0 10 \n",
|
||
"4 0 1.0 0 10 \n",
|
||
"5 1 NaN 0 10 \n",
|
||
"6 1 1.0 0 10 \n",
|
||
"7 1 1.0 0 10 \n",
|
||
"8 0 NaN 0 10 \n",
|
||
"9 1 NaN 0 10 \n",
|
||
"\n",
|
||
"[10 rows x 29 columns]"
|
||
]
|
||
},
|
||
"execution_count": 44,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# visu de la table\n",
|
||
"customerplus_clean_spectacle.head(10)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 43,
|
||
"id": "738e063b-f84e-4a00-b35d-6d1d657e3c09",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Nombre de lignes de la table : 1523688\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"customer_id 0\n",
|
||
"street_id 0\n",
|
||
"structure_id 1460624\n",
|
||
"mcp_contact_id 729167\n",
|
||
"fidelity 0\n",
|
||
"tenant_id 0\n",
|
||
"is_partner 0\n",
|
||
"deleted_at 1523688\n",
|
||
"gender 0\n",
|
||
"is_email_true 0\n",
|
||
"opt_in 0\n",
|
||
"last_buying_date 762879\n",
|
||
"max_price 762879\n",
|
||
"ticket_sum 0\n",
|
||
"average_price 667328\n",
|
||
"average_purchase_delay 762915\n",
|
||
"average_price_basket 762915\n",
|
||
"average_ticket_basket 762915\n",
|
||
"total_price 95551\n",
|
||
"purchase_count 0\n",
|
||
"first_buying_date 762879\n",
|
||
"country 429486\n",
|
||
"gender_label 0\n",
|
||
"gender_female 0\n",
|
||
"gender_male 0\n",
|
||
"gender_other 0\n",
|
||
"country_fr 429486\n",
|
||
"has_tags 0\n",
|
||
"number_compagny 0\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 43,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# nombre de NaN\n",
|
||
"print(\"Nombre de lignes de la table : \",customerplus_clean_spectacle.shape[0])\n",
|
||
"customerplus_clean_spectacle.isna().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 148,
|
||
"id": "296e51c5-30ae-4ade-ba3d-4ba4981a8758",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>number_compagny</th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>45264</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>35313</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>216105</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>388731</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>101642</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" number_compagny customer_id\n",
|
||
"0 10 45264\n",
|
||
"1 11 35313\n",
|
||
"2 12 216105\n",
|
||
"3 13 388731\n",
|
||
"4 14 101642"
|
||
]
|
||
},
|
||
"execution_count": 148,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# nombre de clients de la compagnie (pas les clients visés par une campagne mais ceux ayant acheté)\n",
|
||
"# on rq le nbre de clients est très variable : de 35k à 389k\n",
|
||
"company_nb_clients = customerplus_clean_spectacle[customerplus_clean_spectacle[\"purchase_count\"]>0].groupby(\"number_compagny\")[\"customer_id\"].count().reset_index()\n",
|
||
"company_nb_clients"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 151,
|
||
"id": "5845aedf-78ca-4d3d-ad61-3561d4fc1886",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAHFCAYAAAAUpjivAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABREUlEQVR4nO3dd1RU1/428OdIGYqAAsqAIqBiC2LDqBhFFFRi14jGXEssMdZLwFhijJir2BL7VVOMNYrJVbyxiw1jsCBK7InmYoegiCCIQ9vvH76cnyOgDAwOHJ/PWrOWZ58953xnDwyP+5SRhBACRERERApVydAFEBEREZUlhh0iIiJSNIYdIiIiUjSGHSIiIlI0hh0iIiJSNIYdIiIiUjSGHSIiIlI0hh0iIiJSNIYdIiIiUjSGnTK2bt06SJIEMzMz3Lx5s8D6Dh06wMPDwwCVAcOGDUPlypUNsu9XkSQJoaGhr3WfHTp0QIcOHV57HXv27Hltr9UQ4+rq6oru3bu/1n1S+Xfjxg1IkoR169aV+b4M8XNvCPfu3UNoaCji4uLKdD+v873TF4ad10Sj0eDzzz83dBmkoxMnTmDkyJFluo89e/Zg1qxZZboPovLG0dERJ06cQLdu3QxdimLcu3cPs2bNKvOwUxEx7LwmXbt2xebNm/H7778buhS9EEIgMzPT0GWUudatW6NmzZqGLoNIcVQqFVq3bo1q1aoZuhR6AzDsvCaTJ0+GnZ0dpkyZ8sq+T58+xbRp0+Dm5gZTU1PUqFED48aNw6NHj7T65R8e2LVrF5o1awZzc3M0bNgQu3btAvDsEFrDhg1haWmJt99+G2fOnCl0f5cuXUKnTp1gaWmJatWqYfz48Xjy5IlWH0mSMH78eKxevRoNGzaESqXC+vXrAQDXrl3DoEGDUL16dahUKjRs2BD//ve/izUuaWlpGDVqFOzs7FC5cmV07doVf/75Z6F9S7OfvLw8LF++HE2bNoW5uTmqVKmC1q1b45dffnnp8wqb/k5MTMTo0aNRs2ZNmJqaws3NDbNmzUJOTo7cJ3+a96uvvsKiRYvg5uaGypUro02bNjh58qTcb9iwYfJrkCRJfty4cQMA8PPPP6NVq1awsbGBhYUFateujeHDh7/y9ZbHcd23bx+aN28Oc3NzNGjQAD/88IPW+vv372Ps2LFo1KgRKleujOrVq6Njx4749ddfC2zr3r17CAwMhJWVFWxsbDBgwACcPHmywNR6YYcmgWfj7urqqtWWlZWF2bNno0GDBlCpVKhWrRo+/PBD3L9/v1hjcerUKfTo0QN2dnYwMzNDnTp1EBQUpNXn+PHj6NSpE6ysrGBhYQFvb2/s3r1bq0/+oe/Dhw/L76G1tTWGDBmCjIwMJCYmIjAwEFWqVIGjoyMmTZqE7Oxs+fn5P3sLFizAnDlzUKtWLZiZmcHLywuHDh3S2tf169fx4Ycfwt3dHRYWFqhRowZ69OiBCxcuFHh9ly5dQufOnWFhYYFq1aph3Lhx2L17NyRJwtGjR7XG3MPDAzExMWjXrp38cztv3jzk5eUVqPPFQyEV4fPkVb+XR48ehSRJ2LRpE4KDg6FWq2Fubg4fHx+cO3euwPbOnDmDnj17wtbWFmZmZmjWrBl++umnAv3u3r2Ljz76CM7OzjA1NYWTkxPee+89/P333zh69ChatmwJAPjwww/lz5L8z68zZ85g4MCBcHV1hbm5OVxdXfH+++8XenrFy/bzMqUZ0zInqEytXbtWABAxMTFi6dKlAoA4dOiQvN7Hx0e89dZb8nJeXp7o0qWLMDY2FjNmzBAHDhwQX331lbC0tBTNmjUTT58+lfu6uLiImjVrCg8PD7FlyxaxZ88e0apVK2FiYiK++OIL0bZtW7F9+3YREREh6tWrJxwcHMSTJ0/k5w8dOlSYmpqKWrVqiTlz5ogDBw6I0NBQYWxsLLp37671OgCIGjVqCE9PT7F582Zx+PBhcfHiRXHp0iVhY2MjGjduLDZs2CAOHDggQkJCRKVKlURoaOhLxyYvL0/4+voKlUol73/mzJmidu3aAoCYOXOm3Lc0+xFCiMGDBwtJksTIkSPFf//7X7F3714xZ84csXTpUq33wsfHp8Drfr6OhIQE4ezsLFxcXMQ333wjDh48KP71r38JlUolhg0bJveLj48XAISrq6vo2rWr2LFjh9ixY4do3LixqFq1qnj06JEQQojr16+L9957TwAQJ06ckB9Pnz4V0dHRQpIkMXDgQLFnzx5x+PBhsXbtWjF48OAKNa75P6eNGjUSGzZsEPv37xf9+/cXAERUVJTc7+rVq2LMmDEiPDxcHD16VOzatUuMGDFCVKpUSRw5ckTu9+TJE9GwYUNhY2Mjli9fLvbv3y8mTpwoatWqJQCItWvXvvQ9FeLZz76Li4u8nJubK7p27SosLS3FrFmzRGRkpPj+++9FjRo1RKNGjbR+bwqzb98+YWJiIjw9PcW6devE4cOHxQ8//CAGDhwo9zl69KgwMTERLVq0EFu3bhU7duwQnTt3FpIkifDwcLlf/meGm5ubCAkJEQcOHBDz588XRkZG4v333xfNmzcXs2fPFpGRkWLKlCkCgPj666/l5+f/7Dk7O4t33nlHbNu2Tfz888+iZcuWwsTERERHR8t9o6KiREhIiPjPf/4joqKiREREhOjdu7cwNzcXV69elfvdu3dP2NnZiVq1aol169aJPXv2iMGDBwtXV1cBQOv98fHxEXZ2dsLd3V2sXr1aREZGirFjxwoAYv369QXqfP79qgifJ8X5vTxy5Ij8HvTq1Uvs3LlTbNq0SdStW1dYW1uLv/76S+57+PBhYWpqKtq1aye2bt0q9u3bJ4YNG1ZgbO7cuSMcHR2Fvb29WLRokTh48KDYunWrGD58uLhy5YpITU2Vf3Y+//xz+bPk9u3bQgghfv75Z/HFF1+IiIgIERUVJcLDw4WPj4+oVq2auH//frH3Uxbv3evAsFPGng87Go1G1K5dW3h5eYm8vDwhRMGws2/fPgFALFiwQGs7W7duFQDEt99+K7e5uLgIc3NzcefOHbktLi5OABCOjo4iIyNDbt+xY4cAIH755Re5bejQoQKA1h8mIYSYM2eOACCOHz8utwEQNjY24uHDh1p9u3TpImrWrClSU1O12sePHy/MzMwK9H/e3r17X7r/5z+cSrOfY8eOCQBi+vTpRfYRonhhZ/To0aJy5cri5s2bWv2++uorAUBcunRJCPF/HwaNGzcWOTk5cr/Tp08LAGLLli1y27hx40Rh/+/I32Z+MCqu8jauLi4uwszMTGvMMjMzha2trRg9enSRz8vJyRHZ2dmiU6dOok+fPnL7qlWrBADx3//+V6v/qFGjShx2tmzZIgCIbdu2afWLiYkRAMTKlStf+hrr1Kkj6tSpIzIzM4vs07p1a1G9enXx+PFjrdfo4eEhatasKX8m5H9mTJgwQev5vXv3FgDEokWLtNqbNm0qmjdvLi/n/+w5OTlp1ZOWliZsbW2Fn59fkTXm5OSIrKws4e7uLj755BO5/dNPPxWSJMk/3/m6dOlSaNgBIE6dOqXVt1GjRqJLly4F6nz+/aoInyfF+b3MDzvNmzeX31chhLhx44YwMTERI0eOlNsaNGggmjVrJrKzs7W20b17d+Ho6Chyc3OFEEIMHz5cmJiYiMuXLxe53/yf1+fHtCg5OTkiPT1dWFpaao1Zcfaj7/fudeBhrNfI1NQUs2fPxpkzZwqdogSAw4cPA3g2zf68/v37w9LSssA0dNOmTVGjRg15uWHDhgCeTSVbWFgUaC9syvKDDz7QWh40aBAA4MiRI1rtHTt2RNWqVeXlp0+f4tChQ+jTpw8sLCyQk5MjP9599108ffpU65DNi/K3X9T+9bWfvXv3AgDGjRtXZJ/i2rVrF3x9feHk5KRVR0BAAAAgKipKq3+3bt1gZGQkL3t6egIo/H14Uf6UdGBgIH766SfcvXu3WDWWx3Ft2rQpatWqJS+bmZmhXr16BcZh9erVaN68OczMzGBsbAwTExMcOnQIV65c0Xp9VlZW6Nmz50tfny527dqFKlWqoEePHlrj0LRpU6jVaq3DNC/6888/8ddff2HEiBEwMzMrtE9GRgZOnTqF9957T+sKSCMjIwwePBh37tzBH3/8ofWcF69gy/8dfvGE3oYNGxb689S3b1+teqysrNCjRw8cO3YMubm5AICcnByEhYWhUaNGMDU1hbGxMUxNTXHt2jWtMY+KioKHhwcaNWqktY/333+/0NerVqvx9ttva7V5enq+9Oe+onye6PJ7OWjQIEiSJC+7uLjA29tbrvX69eu4evWqXPOLtSQkJMg/F3v37oWvr6/8c6Cr9PR0TJkyBXXr1oWxsTGMjY1RuXJlZGRkaL3XJdlPacf0dWDYec0GDhyI5s2bY/r06VrH2fMlJyfD2Ni4wEl7kiRBrVYjOTlZq93W1lZr2dTU9KXtT58+1Wo3NjaGnZ2dVptarZZreZ6jo2OBWnNycrB8+XKYmJhoPd59910AwIMHDwq8xhdfa1H719d+7t+/DyMjowLbLYm///4bO3fuLFDHW2+9VWgdL742lUoFAMU6ubt9+/bYsWMHcnJyMGTIENSsWRMeHh7YsmXLS59XHsf1xVqAZ2Px/DgsWrQIY8aMQatWrbBt2zacPHkSMTEx6Nq1q1a/5ORkODg4FNhead7fv//+G48ePYKpqWmBsUhMTHzlOAB46YnsKSkpEEIU+B0CACcnJwAFf990+d1+8fcaKHw81Go1srKykJ6eDgAIDg7GjBkz0Lt3b+zcuROnTp1CTEwMmjRpUqwxL6wNKN77/aKK8nmiy+9lUe9B/nudfw7MpEmTCtQyduxYrVru379fqoslBg0ahBUrVmDkyJHYv38/Tp8+jZiYGFSrVk3rfSnJfko7pq+DsUH3/gaSJAnz58+Hv78/vv322wLr7ezskJOTg/v372sFHiEEEhMT5f9V6EtOTg6Sk5O1PiASExPlWl6s/XlVq1aV/2da1P/u3dzcitx3/mstav/62k+1atWQm5uLxMTEQv/Y6MLe3h6enp6YM2dOoevz/3DpS69evdCrVy9oNBqcPHkSc+fOxaBBg+Dq6oo2bdoU+pyKOK4AsGnTJnTo0AGrVq3San/8+LHWsp2dHU6fPl3g+S++PuDZDFJqamqB9hc/eO3t7WFnZ4d9+/YVWpuVlVWRdef/nt65c6fIPlWrVkWlSpWQkJBQYN29e/fkGvSpsPFITEyEqampPLu0adMmDBkyBGFhYVr9Hjx4gCpVqsjLdnZ2hZ6cWtg+SqqifJ4Axf+9LOo9yK8v/z2fNm0a+vbtW+i+6tevD+DZz9nLfsZeJjU1Fbt27cLMmTMxdepUuV2j0eDhw4dafUuyH32MaVnjzI4B+Pn5wd/fH19++aX8P6x8nTp1AvDsQ+h527ZtQ0ZGhrxen3788Uet5c2bNwNAoVexPM/CwgK+vr44d+4cPD094eXlVeBR2P/w8vn6+r50//raT/4hphf/iJZE9+7dcfHiRdSpU6fQOkoSdooz26NSqeDj44P58+cDQKFXdOSriOMKPAvT+WOR7/z58zhx4oRWm6+vLx4/flzgiq8XXx/w7IrFP//8ExqNRm5LTk5GdHS0Vr/u3bsjOTkZubm5hY5D/h+cwtSrVw916tTBDz/8oLWf51laWqJVq1bYvn271vucl5eHTZs2oWbNmqhXr16R+yiJ7du3a834PH78GDt37kS7du3kQ6uFjfnu3bsLHJrx8fHBxYsXcfnyZa328PBwvdVbUT5Pnveq38stW7ZACCEv37x5E9HR0fJna/369eHu7o7ff/+90Dq8vLzkoB0QEIAjR44UONz5Yj1Awc8SSZIghCjwXn///ffyIc18xdnPi/Q5pmWFMzsGMn/+fLRo0QJJSUnyIRAA8Pf3R5cuXTBlyhSkpaWhbdu2OH/+PGbOnIlmzZph8ODBeq3D1NQUX3/9NdLT09GyZUtER0dj9uzZCAgIwDvvvPPK5y9duhTvvPMO2rVrhzFjxsDV1RWPHz/G9evXsXPnTvkcpMJ07twZ7du3x+TJk5GRkQEvLy/89ttv2Lhxo173065dOwwePBizZ8/G33//je7du0OlUuHcuXOwsLDAhAkTijdYAL788ktERkbC29sbEydORP369fH06VPcuHEDe/bswerVq3WeAm7cuDGAZz8TAQEBMDIygqenJ2bPno07d+6gU6dOqFmzJh49eoSlS5fCxMQEPj4+RW6vIo4r8Cxw/Otf/8LMmTPh4+ODP/74A19++SXc3Ny0LusfMmQIFi9ejCFDhmDOnDlwd3fHnj17sH///gLbHDx4ML755hv84x//wKhRo5CcnIwFCxbA2tpaq9/AgQPx448/4t1338U///lPvP322zAxMcGdO3dw5MgR9OrVC3369Cmy9n//+9/o0aMHWrdujU8++QS1atXCrVu3sH//fvmP79y5c+Hv7w9fX19MmjQJpqamWLlyJS5evIgtW7YUmDktLSMjI/j7+yM4OBh5eXmYP38+0tLStG5g2b17d6xbtw4NGjSAp6cnYmNjsXDhwgI/w0FBQfjhhx8QEBCAL7/8Eg4ODti8eTOuXr0KAKhUST//b64InydffPFFsX8vk5KS0KdPH4waNQqpqamYOXMmzMzMMG3aNLnPN998g4CAAHTp0gXDhg1DjRo18PDhQ1y5cgVnz57Fzz//DODZZ8/evXvRvn17fPbZZ2jcuDEePXqEffv2ITg4GA0aNECdOnVgbm6OH3/8EQ0bNkTlypXh5OQEJycntG/fHgsXLoS9vT1cXV0RFRWFNWvWaM3gFXc/+n7vXguDnh79Bnj+aqwXDRo0SADQuhpLiGdXqkyZMkW4uLgIExMT4ejoKMaMGSNSUlK0+rm4uIhu3boV2C4AMW7cOK22/LPnFy5cKLcNHTpUWFpaivPnz4sOHToIc3NzYWtrK8aMGSPS09Nfuc3ntz18+HBRo0YNYWJiIqpVqya8vb3F7NmzXzo2Qgjx6NEjMXz4cFGlShVhYWEh/P39xdWrVwtcPVHa/eTm5orFixcLDw8PYWpqKmxsbESbNm3Ezp075T7FuRpLCCHu378vJk6cKNzc3ISJiYmwtbUVLVq0ENOnT5fHrbDxLmqbGo1GjBw5UlSrVk1IkiQAiPj4eLFr1y4REBAgatSoIUxNTUX16tXFu+++K3799ddXvt7yNK5F/Zy+ON4ajUZMmjRJ1KhRQ5iZmYnmzZuLHTt2FLhySohnl8f269dPVK5cWVhZWYl+/fqJ6OjoQq9EWb9+vWjYsKEwMzMTjRo1Elu3bi10m9nZ2eKrr74STZo0EWZmZqJy5cqiQYMGYvTo0eLatWuvHIsTJ06IgIAAYWNjI1QqlahTp47WFU1CCPHrr7+Kjh07CktLS2Fubi5at26tNVZCFP2ZMXPmTAFA6zJhIf7v9zhf/s/e/PnzxaxZs0TNmjWFqampaNasmdi/f7/Wc1NSUsSIESNE9erVhYWFhXjnnXfEr7/+WujvwsWLF4Wfn58wMzMTtra2YsSIEWL9+vUCgPj999/lfi9eYfp8nc+PeWFX9OS3l+fPk+L8XuZfjbVx40YxceJEUa1aNaFSqUS7du3EmTNnCmzz999/F4GBgaJ69erCxMREqNVq0bFjR7F69Wqtfrdv3xbDhw8XarVamJiYCCcnJxEYGCj+/vtvuc+WLVtEgwYNhImJidbrzv+dqVq1qrCyshJdu3YVFy9eFC4uLmLo0KE67acs3ruyJgnx3BwbEVEFdePGDbi5uWHt2rUFrmZ8k+SPw8KFCzFp0qQy3ddHH32ELVu2IDk5WT6Bmp7dVNDX1xc///wz3nvvPUOXQ+BhLCIiKoYvv/wSTk5OqF27NtLT07Fr1y58//33+Pzzzxl0qNxj2CEiolcyMTHBwoULcefOHeTk5MDd3R2LFi3CP//5T0OXRvRKPIxFREREisZLz4mIiEjRGHaIiIhI0Rh2iIiISNF4gjKe3cX03r17sLKy0vuNvYiIiKhsCCHw+PFjODk5vfTmlgw7ePbdNM7OzoYug4iIiErg9u3bL717PcMO/u9L/m7fvl3gNvJERERUPqWlpcHZ2fmlX9YLMOwA+L9v87a2tmbYISIiqmBedQpKuTlBee7cuZAkCUFBQXKbEAKhoaFwcnKCubk5OnTogEuXLmk9T6PRYMKECbC3t4elpSV69uyp89fTExERkXKVi7ATExODb7/9Fp6enlrtCxYswKJFi7BixQrExMRArVbD398fjx8/lvsEBQUhIiIC4eHhOH78ONLT09G9e/cCX1tPREREbyaDh5309HR88MEH+O6771C1alW5XQiBJUuWYPr06ejbty88PDywfv16PHnyBJs3bwYApKamYs2aNfj666/h5+eHZs2aYdOmTbhw4QIOHjxoqJdERERE5YjBw864cePQrVs3+Pn5abXHx8cjMTERnTt3lttUKhV8fHwQHR0NAIiNjUV2drZWHycnJ3h4eMh9iIiI6M1m0BOUw8PDcfbsWcTExBRYl5iYCABwcHDQandwcMDNmzflPqamplozQvl98p9fGI1GA41GIy+npaWV+DUQERFR+WawmZ3bt2/jn//8JzZt2gQzM7Mi+714hrUQ4pVnXb+qz9y5c2FjYyM/eI8dIiIi5TJY2ImNjUVSUhJatGgBY2NjGBsbIyoqCsuWLYOxsbE8o/PiDE1SUpK8Tq1WIysrCykpKUX2Kcy0adOQmpoqP27fvq3nV0dERETlhcHCTqdOnXDhwgXExcXJDy8vL3zwwQeIi4tD7dq1oVarERkZKT8nKysLUVFR8Pb2BgC0aNECJiYmWn0SEhJw8eJFuU9hVCqVfE8d3luHiIhI2Qx2zo6VlRU8PDy02iwtLWFnZye3BwUFISwsDO7u7nB3d0dYWBgsLCwwaNAgAICNjQ1GjBiBkJAQ2NnZwdbWFpMmTULjxo0LnPBMREREb6ZyfQflyZMnIzMzE2PHjkVKSgpatWqFAwcOaN0WevHixTA2NkZgYCAyMzPRqVMnrFu3DkZGRgasnIiIiMoLSQghDF2EoaWlpcHGxgapqak8pEVERFRBFPfvt8Hvs0NERERUlhh2iIiISNEYdoiIiEjRGHaIiIhI0cr11VhERPRqrlN3G7qECuPGvG6GLoEMgDM7REREpGgMO0RERKRoDDtERESkaAw7REREpGgMO0RERKRoDDtERESkaAw7REREpGgMO0RERKRoDDtERESkaAw7REREpGgMO0RERKRoDDtERESkaAw7REREpGgMO0RERKRoDDtERESkaAw7REREpGgMO0RERKRoDDtERESkaAw7REREpGgMO0RERKRoDDtERESkaAw7REREpGgMO0RERKRoDDtERESkaAw7REREpGgMO0RERKRoDDtERESkaAw7REREpGgMO0RERKRoDDtERESkaAYNO6tWrYKnpyesra1hbW2NNm3aYO/evfL6YcOGQZIkrUfr1q21tqHRaDBhwgTY29vD0tISPXv2xJ07d173SyEiIqJyyqBhp2bNmpg3bx7OnDmDM2fOoGPHjujVqxcuXbok9+natSsSEhLkx549e7S2ERQUhIiICISHh+P48eNIT09H9+7dkZub+7pfDhEREZVDxobceY8ePbSW58yZg1WrVuHkyZN46623AAAqlQpqtbrQ56empmLNmjXYuHEj/Pz8AACbNm2Cs7MzDh48iC5dupTtCyAiIqJyr9ycs5Obm4vw8HBkZGSgTZs2cvvRo0dRvXp11KtXD6NGjUJSUpK8LjY2FtnZ2ejcubPc5uTkBA8PD0RHRxe5L41Gg7S0NK0HERERKZPBw86FCxdQuXJlqFQqfPzxx4iIiECjRo0AAAEBAfjxxx9x+PBhfP3114iJiUHHjh2h0WgAAImJiTA1NUXVqlW1tung4IDExMQi9zl37lzY2NjID2dn57J7gURERGRQBj2MBQD169dHXFwcHj16hG3btmHo0KGIiopCo0aNMGDAALmfh4cHvLy84OLigt27d6Nv375FblMIAUmSilw/bdo0BAcHy8tpaWkMPERERApl8LBjamqKunXrAgC8vLwQExODpUuX4ptvvinQ19HRES4uLrh27RoAQK1WIysrCykpKVqzO0lJSfD29i5ynyqVCiqVSs+vhIiIiMojgx/GepEQQj5M9aLk5GTcvn0bjo6OAIAWLVrAxMQEkZGRcp+EhARcvHjxpWGHiIiI3hwGndn57LPPEBAQAGdnZzx+/Bjh4eE4evQo9u3bh/T0dISGhqJfv35wdHTEjRs38Nlnn8He3h59+vQBANjY2GDEiBEICQmBnZ0dbG1tMWnSJDRu3Fi+OouIiIjebAYNO3///TcGDx6MhIQE2NjYwNPTE/v27YO/vz8yMzNx4cIFbNiwAY8ePYKjoyN8fX2xdetWWFlZydtYvHgxjI2NERgYiMzMTHTq1Anr1q2DkZGRAV8ZERERlReSEEIYughDS0tLg42NDVJTU2FtbW3ocoiIdOI6dbehS6gwbszrZugSSI+K+/e73J2zQ0RERKRPDDtERESkaAw7REREpGgMO0RERKRoDDtERESkaAw7REREpGgMO0RERKRoDDtERESkaAw7REREpGgMO0RERKRoDDtERESkaAw7REREpGgMO0RERKRoDDtERESkaAw7REREpGgMO0RERKRoDDtERESkaAw7REREpGgMO0RERKRoDDtERESkaAw7REREpGgMO0RERKRoDDtERESkaAw7REREpGgMO0RERKRoDDtERESkaAw7REREpGgMO0RERKRoDDtERESkaAw7REREpGilCjsajUZfdRARERGVCZ3Czv79+zFs2DDUqVMHJiYmsLCwgJWVFXx8fDBnzhzcu3evrOokIiIiKpFihZ0dO3agfv36GDp0KCpVqoRPP/0U27dvx/79+7FmzRr4+Pjg4MGDqF27Nj7++GPcv3+/rOsmIiIiKhbj4nQKCwvDV199hW7duqFSpYL5KDAwEABw9+5dLF26FBs2bEBISIh+KyUiIiIqgWLN7Jw+fRo9evQoNOg8r0aNGliwYEGxg86qVavg6ekJa2trWFtbo02bNti7d6+8XgiB0NBQODk5wdzcHB06dMClS5e0tqHRaDBhwgTY29vD0tISPXv2xJ07d4q1fyIiIlK+Ul+NlZubi7i4OKSkpOj83Jo1a2LevHk4c+YMzpw5g44dO6JXr15yoFmwYAEWLVqEFStWICYmBmq1Gv7+/nj8+LG8jaCgIERERCA8PBzHjx9Heno6unfvjtzc3NK+NCIiIlIAncNOUFAQ1qxZA+BZ0PHx8UHz5s3h7OyMo0eP6rStHj164N1330W9evVQr149zJkzB5UrV8bJkychhMCSJUswffp09O3bFx4eHli/fj2ePHmCzZs3AwBSU1OxZs0afP311/Dz80OzZs2wadMmXLhwAQcPHtT1pREREZEC6Rx2/vOf/6BJkyYAgJ07dyI+Ph5Xr15FUFAQpk+fXuJCcnNzER4ejoyMDLRp0wbx8fFITExE586d5T4qlQo+Pj6Ijo4GAMTGxiI7O1urj5OTEzw8POQ+hdFoNEhLS9N6EBERkTLpHHYePHgAtVoNANizZw/69++PevXqYcSIEbhw4YLOBVy4cAGVK1eGSqXCxx9/jIiICDRq1AiJiYkAAAcHB63+Dg4O8rrExESYmpqiatWqRfYpzNy5c2FjYyM/nJ2dda6biIiIKgadw46DgwMuX76M3Nxc7Nu3D35+fgCAJ0+ewMjISOcC6tevj7i4OJw8eRJjxozB0KFDcfnyZXm9JEla/YUQBdpe9Ko+06ZNQ2pqqvy4ffu2znUTERFRxVCsS8+f9+GHHyIwMBCOjo6QJAn+/v4AgFOnTqFBgwY6F2Bqaoq6desCALy8vBATE4OlS5diypQpAJ7N3jg6Osr9k5KS5NketVqNrKwspKSkaM3uJCUlwdvbu8h9qlQqqFQqnWslIiKiikfnmZ3Q0FCsWbMGH330EX777Tc5NBgZGWHq1KmlLkgIAY1GAzc3N6jVakRGRsrrsrKyEBUVJQeZFi1awMTERKtPQkICLl68+NKwQ0RERG8OnWZ28k8G/uabb9CvXz+tdUOHDtV555999hkCAgLg7OyMx48fIzw8HEePHsW+ffsgSRKCgoIQFhYGd3d3uLu7IywsDBYWFhg0aBAAwMbGBiNGjEBISAjs7Oxga2uLSZMmoXHjxvLhNSIiInqz6RR2TExMcPHixVeeM1Ncf//9NwYPHoyEhATY2NjA09MT+/btkw+NTZ48GZmZmRg7dixSUlLQqlUrHDhwAFZWVvI2Fi9eDGNjYwQGBiIzMxOdOnXCunXrSnT+EBERESmPJIQQujwhJCQEJiYmmDdvXlnV9NqlpaXBxsYGqampsLa2NnQ5REQ6cZ2629AlVBg35nUzdAmkR8X9+63zCcpZWVn4/vvvERkZCS8vL1haWmqtX7Roke7VEhEREZURncPOxYsX0bx5cwDAn3/+qbVOX4e3iIiIiPRF57Bz5MiRsqiDiIiIqEyU+ItAr1+/jv379yMzMxPAs0vGiYiIiMobncNOcnIyOnXqhHr16uHdd99FQkICAGDkyJEICQnRe4FEREREpaFz2Pnkk09gYmKCW7duwcLCQm4fMGAA9u3bp9fiiIiIiEpL53N2Dhw4gP3796NmzZpa7e7u7rh586beCiMiIiLSB51ndjIyMrRmdPI9ePCA3zdFRERE5Y7OYad9+/bYsGGDvCxJEvLy8rBw4UL4+vrqtTgiIiKi0tL5MNbChQvRoUMHnDlzBllZWZg8eTIuXbqEhw8f4rfffiuLGomIiIhKTOeZnUaNGuH8+fN4++234e/vj4yMDPTt2xfnzp1DnTp1yqJGIiIiohLTeWYHANRqNWbNmqXvWoiIiIj0rlhh5/z58/Dw8EClSpVw/vz5l/b19PTUS2FERERE+lCssNO0aVMkJiaievXqaNq0KSRJKvSOyZIkITc3V+9FEhEREZVUscJOfHw8qlWrJv+biIiIqKIoVthxcXEp9N9ERERE5V2xws4vv/xS7A327NmzxMUQERER6Vuxwk7v3r2LtTGes0NERETlTbHCTl5eXlnXQURERFQmdL6pIBEREVFFUqyZnWXLlhV7gxMnTixxMURERET6Vqyws3jx4mJtTJIkhh0iIiIqV4p9nx0iIiKiiojn7BAREZGiFWtmJzg4GP/6179gaWmJ4ODgl/ZdtGiRXgojIiIi0odihZ1z584hOztb/ndRJEnST1VEREREelKssHPkyJFC/01ERERU3vGcHSIiIlK0Ys3sPO/p06dYvnw5jhw5gqSkpAJ3Vz579qzeiiMiIiIqLZ3DzvDhwxEZGYn33nsPb7/9Ns/TISIionJN57Cze/du7NmzB23bti2LeoiIiIj0SudzdmrUqAErK6uyqIWIiIhI73QOO19//TWmTJmCmzdvlkU9RERERHql82EsLy8vPH36FLVr14aFhQVMTEy01j98+FBvxRERERGVls5h5/3338fdu3cRFhYGBweHUp2gPHfuXGzfvh1Xr16Fubk5vL29MX/+fNSvX1/uM2zYMKxfv17rea1atcLJkyflZY1Gg0mTJmHLli3IzMxEp06dsHLlStSsWbPEtREREZEy6Bx2oqOjceLECTRp0qTUO4+KisK4cePQsmVL5OTkYPr06ejcuTMuX74MS0tLuV/Xrl2xdu1aednU1FRrO0FBQdi5cyfCw8NhZ2eHkJAQdO/eHbGxsTAyMip1nURERFRx6Rx2GjRogMzMTL3sfN++fVrLa9euRfXq1REbG4v27dvL7SqVCmq1utBtpKamYs2aNdi4cSP8/PwAAJs2bYKzszMOHjyILl266KVWIiIiqph0PkF53rx5CAkJwdGjR5GcnIy0tDStR2mkpqYCAGxtbbXajx49iurVq6NevXoYNWoUkpKS5HWxsbHIzs5G586d5TYnJyd4eHggOjq60P1oNBq91k1ERETll84zO127dgUAdOrUSatdCAFJkpCbm1uiQoQQCA4OxjvvvAMPDw+5PSAgAP3794eLiwvi4+MxY8YMdOzYEbGxsVCpVEhMTISpqSmqVq2qtT0HBwckJiYWuq+5c+di1qxZJaqTiIiIKhadw05ZfRHo+PHjcf78eRw/flyrfcCAAfK/PTw84OXlBRcXF+zevRt9+/Ytcnv54asw06ZNQ3BwsLyclpYGZ2fnUr4CIiIiKo90Djs+Pj56L2LChAn45ZdfcOzYsVdeQeXo6AgXFxdcu3YNAKBWq5GVlYWUlBSt2Z2kpCR4e3sXug2VSgWVSqW/F0BERETlVrHO2bl165ZOG717926x+gkhMH78eGzfvh2HDx+Gm5vbK5+TnJyM27dvw9HREQDQokULmJiYIDIyUu6TkJCAixcvFhl2iIiI6M1RrLDTsmVLjBo1CqdPny6yT2pqKr777jt4eHhg+/btxdr5uHHjsGnTJmzevBlWVlZITExEYmKifLVXeno6Jk2ahBMnTuDGjRs4evQoevToAXt7e/Tp0wcAYGNjgxEjRiAkJASHDh3CuXPn8I9//AONGzeWr84iIiKiN1exDmNduXIFYWFh6Nq1K0xMTODl5QUnJyeYmZkhJSUFly9fxqVLl+Dl5YWFCxciICCgWDtftWoVAKBDhw5a7WvXrsWwYcNgZGSECxcuYMOGDXj06BEcHR3h6+uLrVu3an0/1+LFi2FsbIzAwED5poLr1q3jPXaIiIgIkhBCFLfz06dPsWfPHvz666+4ceMGMjMzYW9vj2bNmqFLly5aV1FVJGlpabCxsUFqaiqsra0NXQ4RkU5cp+42dAkVxo153QxdAulRcf9+63SCspmZGfr27fvSq6CIiIiIyhOdbypIREREVJEw7BAREZGiMewQERGRojHsEBERkaIx7BAREZGi6Rx21q9fj927/+8yx8mTJ6NKlSrw9vbGzZs39VocERERUWnpHHbCwsJgbm4OADhx4gRWrFiBBQsWwN7eHp988oneCyQiIiIqDZ2/CPT27duoW7cuAGDHjh1477338NFHH6Ft27YF7oRMREREZGg6z+xUrlwZycnJAIADBw7I3z9lZmYmf6cVERERUXmh88yOv78/Ro4ciWbNmuHPP/9Et27Pbr196dIluLq66rs+IiIiolLReWbn3//+N9q0aYP79+9j27ZtsLOzAwDExsbi/fff13uBRERERKWh88xOWloali1bhkqVtHNSaGgobt++rbfCiIiIiPRB55kdNzc3PHjwoED7w4cP4ebmppeiiIiIiPRF57AjhCi0PT09HWZmZqUuiIiIiEifin0YKzg4GAAgSRK++OILWFhYyOtyc3Nx6tQpNG3aVO8FEhEREZVGscPOuXPnADyb2blw4QJMTU3ldaampmjSpAkmTZqk/wqJiIiISqHYYefIkSMAgA8//BBLly6FtbV1mRVFREREpC86X421du3asqiDiIiIqEzoHHYyMjIwb948HDp0CElJScjLy9Na/7///U9vxRERERGVls5hZ+TIkYiKisLgwYPh6OgISZLKoi4iIiIivdA57Ozduxe7d+9G27Zty6IeIiIiIr3S+T47VatWha2tbVnUQkRERKR3Ooedf/3rX/jiiy/w5MmTsqiHiIiISK90Poz19ddf46+//oKDgwNcXV1hYmKitf7s2bN6K46IKhbXqbsNXUKFcWNeN0OXQPTG0Dns9O7duwzKICIiIiobOoedmTNnlkUdRERERGVC53N2AODRo0f4/vvvMW3aNDx8+BDAs8NXd+/e1WtxRERERKWl88zO+fPn4efnBxsbG9y4cQOjRo2Cra0tIiIicPPmTWzYsKEs6iQiIiIqEZ1ndoKDgzFs2DBcu3YNZmZmcntAQACOHTum1+KIiIiISkvnsBMTE4PRo0cXaK9RowYSExP1UhQRERGRvugcdszMzJCWllag/Y8//kC1atX0UhQRERGRvugcdnr16oUvv/wS2dnZAABJknDr1i1MnToV/fr103uBRERERKWhc9j56quvcP/+fVSvXh2ZmZnw8fFB3bp1YWVlhTlz5pRFjUREREQlpnPYsba2xvHjx7Ft2zbMmzcP48ePx549exAVFQVLS0udtjV37ly0bNkSVlZWqF69Onr37o0//vhDq48QAqGhoXBycoK5uTk6dOiAS5cuafXRaDSYMGEC7O3tYWlpiZ49e+LOnTu6vjQiIiJSoBLdZwcAOnbsiEmTJmHy5Mnw8/Mr0TaioqIwbtw4nDx5EpGRkcjJyUHnzp2RkZEh91mwYAEWLVqEFStWICYmBmq1Gv7+/nj8+LHcJygoCBEREQgPD8fx48eRnp6O7t27Izc3t6Qvj4iIiBSiWPfZWbZsGT766COYmZlh2bJlL+07ceLEYu983759Wstr165F9erVERsbi/bt20MIgSVLlmD69Ono27cvAGD9+vVwcHDA5s2bMXr0aKSmpmLNmjXYuHGjHLo2bdoEZ2dnHDx4EF26dCl2PURERKQ8xQo7ixcvxgcffAAzMzMsXry4yH6SJOkUdl6UmpoKALC1tQUAxMfHIzExEZ07d5b7qFQq+Pj4IDo6GqNHj0ZsbCyys7O1+jg5OcHDwwPR0dEMO0RERG+4YoWd+Pj4Qv+tT0IIBAcH45133oGHhwcAyPftcXBw0Orr4OCAmzdvyn1MTU1RtWrVAn2Kuu+PRqOBRqORlwu7lJ6IiIiUocTn7Ojb+PHjcf78eWzZsqXAOkmStJaFEAXaXvSyPnPnzoWNjY38cHZ2LnnhREREVK4Va2YnODi42BtctGiRzkVMmDABv/zyC44dO4aaNWvK7Wq1GsCz2RtHR0e5PSkpSZ7tUavVyMrKQkpKitbsTlJSEry9vQvd37Rp07ReU1paGgMPERGRQhUr7Jw7d65YG3vVbMuLhBCYMGECIiIicPToUbi5uWmtd3Nzg1qtRmRkJJo1awYAyMrKQlRUFObPnw8AaNGiBUxMTBAZGYnAwEAAQEJCAi5evIgFCxYUul+VSgWVSqVTrURERFQxFSvsHDlypEx2Pm7cOGzevBn//e9/YWVlJZ9jY2NjA3Nzc0iShKCgIISFhcHd3R3u7u4ICwuDhYUFBg0aJPcdMWIEQkJCYGdnB1tbW0yaNAmNGzcu8SXxREREpBzFCjvPS01NRW5urnzFVL6HDx/C2NgY1tbWxd7WqlWrAAAdOnTQal+7di2GDRsGAJg8eTIyMzMxduxYpKSkoFWrVjhw4ACsrKzk/osXL4axsTECAwORmZmJTp06Yd26dTAyMtL15REREZHCSEIIocsTAgIC0KNHD4wdO1arffXq1fjll1+wZ88evRb4OqSlpcHGxgapqak6hTUi0uY6dbehS6gwbszrprdtcdyLT5/jToZX3L/fOl+NderUKfj6+hZo79ChA06dOqXr5oiIiIjKlM5hR6PRICcnp0B7dnY2MjMz9VIUERERkb7oHHZatmyJb7/9tkD76tWr0aJFC70URURERKQvOp+gPGfOHPj5+eH3339Hp06dAACHDh1CTEwMDhw4oPcCiYiIiEpD55mdtm3b4sSJE3B2dsZPP/2EnTt3om7dujh//jzatWtXFjUSERERlZjOMzsA0LRpU/z444/6roWIiIhI78rNd2MRERERlQWGHSIiIlI0hh0iIiJSNIYdIiIiUrQSh53r169j//798o0EdfzWCSIiIqLXQuewk5ycDD8/P9SrVw/vvvsuEhISAAAjR45ESEiI3gskIiIiKg2dw84nn3wCY2Nj3Lp1CxYWFnL7gAEDsG/fPr0WR0RERFRaOt9n58CBA9i/fz9q1qyp1e7u7o6bN2/qrTAiIiIifdB5ZicjI0NrRiffgwcPoFKp9FIUERERkb7oHHbat2+PDRs2yMuSJCEvLw8LFy6Er6+vXosjIiIiKi2dD2MtXLgQHTp0wJkzZ5CVlYXJkyfj0qVLePjwIX777beyqJGIiIioxHSe2WnUqBHOnz+Pt99+G/7+/sjIyEDfvn1x7tw51KlTpyxqJCIiIiqxEn0RqFqtxqxZs/RdCxEREZHeFSvsnD9/vtgb9PT0LHExRERERPpWrLDTtGlTSJIEIQQkSZLb8++a/Hxbbm6unkskIiIiKrlinbMTHx+P//3vf4iPj8e2bdvg5uaGlStXIi4uDnFxcVi5ciXq1KmDbdu2lXW9RERERDop1syOi4uL/O/+/ftj2bJlePfdd+U2T09PODs7Y8aMGejdu7feiyQiIiIqKZ2vxrpw4QLc3NwKtLu5ueHy5ct6KYqIiIhIX3QOOw0bNsTs2bPx9OlTuU2j0WD27Nlo2LChXosjIiIiKi2dLz1fvXo1evToAWdnZzRp0gQA8Pvvv0OSJOzatUvvBRIRERGVhs5h5+2330Z8fDw2bdqEq1evQgiBAQMGYNCgQbC0tCyLGomIiIhKrEQ3FbSwsMBHH32k71qIiIiI9E7nc3aIiIiIKhKGHSIiIlI0hh0iIiJSNIYdIiIiUrQShZ1Hjx7h+++/x7Rp0/Dw4UMAwNmzZ3H37l29FkdERERUWjpfjXX+/Hn4+fnBxsYGN27cwKhRo2Bra4uIiAjcvHkTGzZsKIs6iYiIiEpE55md4OBgDBs2DNeuXYOZmZncHhAQgGPHjum1OCIiIqLS0jnsxMTEYPTo0QXaa9SogcTERJ22dezYMfTo0QNOTk6QJAk7duzQWj9s2DBIkqT1aN26tVYfjUaDCRMmwN7eHpaWlujZsyfu3Lmj68siIiIihdI57JiZmSEtLa1A+x9//IFq1arptK2MjAw0adIEK1asKLJP165dkZCQID/27NmjtT4oKAgREREIDw/H8ePHkZ6eju7duyM3N1enWoiIiEiZdD5np1evXvjyyy/x008/AQAkScKtW7cwdepU9OvXT6dtBQQEICAg4KV9VCoV1Gp1oetSU1OxZs0abNy4EX5+fgCATZs2wdnZGQcPHkSXLl10qoeIiIiUR+eZna+++gr3799H9erVkZmZCR8fH9StWxdWVlaYM2eO3gs8evQoqlevjnr16mHUqFFISkqS18XGxiI7OxudO3eW25ycnODh4YHo6Ogit6nRaJCWlqb1ICIiImXSeWbH2toax48fx+HDh3H27Fnk5eWhefPm8syKPgUEBKB///5wcXFBfHw8ZsyYgY4dOyI2NhYqlQqJiYkwNTVF1apVtZ7n4ODw0vOH5s6di1mzZum9XiIiIip/dAo7OTk5MDMzQ1xcHDp27IiOHTuWVV0AgAEDBsj/9vDwgJeXF1xcXLB792707du3yOcJISBJUpHrp02bhuDgYHk5LS0Nzs7O+imaiIiIyhWdDmMZGxvDxcXFYCf/Ojo6wsXFBdeuXQMAqNVqZGVlISUlRatfUlISHBwcityOSqWCtbW11oOIiIiUSedzdj7//HOtOye/TsnJybh9+zYcHR0BAC1atICJiQkiIyPlPgkJCbh48SK8vb1fe31ERERU/uh8zs6yZctw/fp1ODk5wcXFBZaWllrrz549W+xtpaen4/r16/JyfHw84uLiYGtrC1tbW4SGhqJfv35wdHTEjRs38Nlnn8He3h59+vQBANjY2GDEiBEICQmBnZ0dbG1tMWnSJDRu3LhMziEiIiKiikfnsNO7d2+97fzMmTPw9fWVl/PPoxk6dChWrVqFCxcuYMOGDXj06BEcHR3h6+uLrVu3wsrKSn7O4sWLYWxsjMDAQGRmZqJTp05Yt24djIyM9FYnERERVVySEEIYughDS0tLg42NDVJTU3n+DlEpuE7dbegSKowb87rpbVsc9+LT57iT4RX377fOMzv5zpw5gytXrkCSJDRs2BAtWrQo6aaIiIiIyozOYefOnTt4//338dtvv6FKlSoAgEePHsHb2xtbtmzhJdxERERUruh8Ndbw4cORnZ2NK1eu4OHDh3j48CGuXLkCIQRGjBhRFjUSERERlZjOMzu//voroqOjUb9+fbmtfv36WL58Odq2bavX4oiIiIhKS+eZnVq1aiE7O7tAe05ODmrUqKGXooiIiIj0Reews2DBAkyYMAFnzpxB/oVcZ86cwT//+U989dVXei+QiIiIqDSKdRiratWqWt81lZGRgVatWsHY+NnTc3JyYGxsjOHDh+v1PjxEREREpVWssLNkyZIyLoOIiIiobBQr7AwdOrSs6yAiIiIqEyW+qWBSUhKSkpKQl5en1e7p6VnqooiIiIj0ReewExsbi6FDh8r31nmeJEnIzc3VW3FEREREpaVz2Pnwww9Rr149rFmzBg4ODlonLhMRERGVNzqHnfj4eGzfvh1169Yti3qIiIiI9Ern++x06tQJv//+e1nUQkRERKR3Os/sfP/99xg6dCguXrwIDw8PmJiYaK3v2bOn3oojIiIiKi2dw050dDSOHz+OvXv3FljHE5SJiIiovNH5MNbEiRMxePBgJCQkIC8vT+vBoENERETljc5hJzk5GZ988gkcHBzKoh4iIiIivdI57PTt2xdHjhwpi1qIiIiI9E7nc3bq1auHadOm4fjx42jcuHGBE5QnTpyot+KIiIiISqtEV2NVrlwZUVFRiIqK0lonSRLDDhEREZUrJbqpIBEREVFFofM5O88TQhT4fiwiIiKi8qREYWfDhg1o3LgxzM3NYW5uDk9PT2zcuFHftRERERGVms6HsRYtWoQZM2Zg/PjxaNu2LYQQ+O233/Dxxx/jwYMH+OSTT8qiTiIiIqIS0TnsLF++HKtWrcKQIUPktl69euGtt95CaGgoww4RERGVKzofxkpISIC3t3eBdm9vbyQkJOilKCIiIiJ90Tns1K1bFz/99FOB9q1bt8Ld3V0vRRERERHpi86HsWbNmoUBAwbg2LFjaNu2LSRJwvHjx3Ho0KFCQxARERGRIek8s9OvXz+cOnUK9vb22LFjB7Zv3w57e3ucPn0affr0KYsaiYiIiEpM55kdAGjRogU2bdqk71qIiIiI9K5UNxUkIiIiKu+KPbNTqVIlSJL00j6SJCEnJ6fURRERERHpS7HDTkRERJHroqOjsXz5cn51BBEREZU7xT6M1atXrwKP+vXrY926dfj666/Rv39//PHHHzrt/NixY+jRowecnJwgSRJ27NihtV4IgdDQUDg5OcHc3BwdOnTApUuXtPpoNBpMmDAB9vb2sLS0RM+ePXHnzh2d6iAiIiLlKtE5O/fu3cOoUaPg6emJnJwcxMXFYf369ahVq5ZO28nIyECTJk2wYsWKQtcvWLAAixYtwooVKxATEwO1Wg1/f388fvxY7hMUFISIiAiEh4fj+PHjSE9PR/fu3ZGbm1uSl0ZEREQKo9PVWKmpqQgLC8Py5cvRtGlTHDp0CO3atSvxzgMCAhAQEFDoOiEElixZgunTp6Nv374AgPXr18PBwQGbN2/G6NGjkZqaijVr1mDjxo3w8/MDAGzatAnOzs44ePAgunTpUuLaiIiISBmKHXYWLFiA+fPnQ61WY8uWLejVq1dZ1oX4+HgkJiaic+fOcptKpYKPjw+io6MxevRoxMbGIjs7W6uPk5MTPDw8EB0dXWTY0Wg00Gg08nJaWlrZvRAiIlIk16m7DV1ChXFjXjeD7r/YYWfq1KkwNzdH3bp1sX79eqxfv77Qftu3b9dLYYmJiQAABwcHrXYHBwfcvHlT7mNqaoqqVasW6JP//MLMnTsXs2bN0kudREREVL4VO+wMGTLklZeel4UX9ymEeGUdr+ozbdo0BAcHy8tpaWlwdnYuXaFERERULhU77Kxbt64MyyhIrVYDeDZ74+joKLcnJSXJsz1qtRpZWVlISUnRmt1JSkoq9JvZ86lUKqhUqjKqnIiIiMqTcnsHZTc3N6jVakRGRsptWVlZiIqKkoNMixYtYGJiotUnISEBFy9efGnYISIiojdHib4bS1/S09Nx/fp1eTk+Ph5xcXGwtbVFrVq1EBQUhLCwMLi7u8Pd3R1hYWGwsLDAoEGDAAA2NjYYMWIEQkJCYGdnB1tbW0yaNAmNGzeWr84iIiKiN5tBw86ZM2fg6+srL+efRzN06FCsW7cOkydPRmZmJsaOHYuUlBS0atUKBw4cgJWVlfycxYsXw9jYGIGBgcjMzESnTp2wbt06GBkZvfbXQ0REROWPQcNOhw4dXvoVE5IkITQ0FKGhoUX2MTMzw/Lly7F8+fIyqJCIiIgqunJ7zg4RERGRPjDsEBERkaIx7BAREZGiMewQERGRojHsEBERkaIx7BAREZGiMewQERGRojHsEBERkaIx7BAREZGiMewQERGRojHsEBERkaIx7BAREZGiMewQERGRojHsEBERkaIx7BAREZGiMewQERGRojHsEBERkaIx7BAREZGiMewQERGRojHsEBERkaIx7BAREZGiMewQERGRojHsEBERkaIx7BAREZGiMewQERGRojHsEBERkaIx7BAREZGiMewQERGRojHsEBERkaIx7BAREZGiMewQERGRojHsEBERkaIx7BAREZGiMewQERGRopXrsBMaGgpJkrQearVaXi+EQGhoKJycnGBubo4OHTrg0qVLBqyYiIiIyptyHXYA4K233kJCQoL8uHDhgrxuwYIFWLRoEVasWIGYmBio1Wr4+/vj8ePHBqyYiIiIypNyH3aMjY2hVqvlR7Vq1QA8m9VZsmQJpk+fjr59+8LDwwPr16/HkydPsHnzZgNXTUREROVFuQ87165dg5OTE9zc3DBw4ED873//AwDEx8cjMTERnTt3lvuqVCr4+PggOjr6pdvUaDRIS0vTehAREZEyleuw06pVK2zYsAH79+/Hd999h8TERHh7eyM5ORmJiYkAAAcHB63nODg4yOuKMnfuXNjY2MgPZ2fnMnsNREREZFjlOuwEBASgX79+aNy4Mfz8/LB7924AwPr16+U+kiRpPUcIUaDtRdOmTUNqaqr8uH37tv6LJyIionKhXIedF1laWqJx48a4du2afFXWi7M4SUlJBWZ7XqRSqWBtba31ICIiImWqUGFHo9HgypUrcHR0hJubG9RqNSIjI+X1WVlZiIqKgre3twGrJCIiovLE2NAFvMykSZPQo0cP1KpVC0lJSZg9ezbS0tIwdOhQSJKEoKAghIWFwd3dHe7u7ggLC4OFhQUGDRpk6NKJiIionCjXYefOnTt4//338eDBA1SrVg2tW7fGyZMn4eLiAgCYPHkyMjMzMXbsWKSkpKBVq1Y4cOAArKysDFz5/3GdutvQJVQoN+Z1M3QJRESkMOU67ISHh790vSRJCA0NRWho6OspiIiIiCqcCnXODhEREZGuGHaIiIhI0Rh2iIiISNEYdoiIiEjRGHaIiIhI0Rh2iIiISNEYdoiIiEjRyvV9dohKijdz1A1v5khESsaZHSIiIlI0hh0iIiJSNIYdIiIiUjSGHSIiIlI0hh0iIiJSNIYdIiIiUjSGHSIiIlI0hh0iIiJSNIYdIiIiUjSGHSIiIlI0hh0iIiJSNIYdIiIiUjSGHSIiIlI0hh0iIiJSNIYdIiIiUjSGHSIiIlI0hh0iIiJSNIYdIiIiUjSGHSIiIlI0hh0iIiJSNIYdIiIiUjSGHSIiIlI0hh0iIiJSNIYdIiIiUjSGHSIiIlI0hh0iIiJSNMWEnZUrV8LNzQ1mZmZo0aIFfv31V0OXREREROWAIsLO1q1bERQUhOnTp+PcuXNo164dAgICcOvWLUOXRkRERAamiLCzaNEijBgxAiNHjkTDhg2xZMkSODs7Y9WqVYYujYiIiAyswoedrKwsxMbGonPnzlrtnTt3RnR0tIGqIiIiovLC2NAFlNaDBw+Qm5sLBwcHrXYHBwckJiYW+hyNRgONRiMvp6amAgDS0tL0Xl+e5onet6lk+noPOO664bi/fvr8vOG4Fx/H3TDK4u/r89sVQry0X4UPO/kkSdJaFkIUaMs3d+5czJo1q0C7s7NzmdRGxWezxNAVvJk47q8fx9wwOO6GUdbj/vjxY9jY2BS5vsKHHXt7exgZGRWYxUlKSiow25Nv2rRpCA4Olpfz8vLw8OFD2NnZFRmQlCQtLQ3Ozs64ffs2rK2tDV3OG4Pjbhgcd8PguBvGmzbuQgg8fvwYTk5OL+1X4cOOqakpWrRogcjISPTp00duj4yMRK9evQp9jkqlgkql0mqrUqVKWZZZLllbW78RvwzlDcfdMDjuhsFxN4w3adxfNqOTr8KHHQAIDg7G4MGD4eXlhTZt2uDbb7/FrVu38PHHHxu6NCIiIjIwRYSdAQMGIDk5GV9++SUSEhLg4eGBPXv2wMXFxdClERERkYEpIuwAwNixYzF27FhDl1EhqFQqzJw5s8ChPCpbHHfD4LgbBsfdMDjuhZPEq67XIiIiIqrAKvxNBYmIiIhehmGHiIiIFI1hh4iIiBSNYYeIiIgUjWFHwY4dO4YePXrAyckJkiRhx44dWuuFEAgNDYWTkxPMzc3RoUMHXLp0yTDFKsirxn379u3o0qUL7O3tIUkS4uLiDFKn0rxs3LOzszFlyhQ0btwYlpaWcHJywpAhQ3Dv3j3DFawQr/p5Dw0NRYMGDWBpaYmqVavCz88Pp06dMkyxCvGqMX/e6NGjIUkSlixZ8trqK48YdhQsIyMDTZo0wYoVKwpdv2DBAixatAgrVqxATEwM1Go1/P398fjx49dcqbK8atwzMjLQtm1bzJs37zVXpmwvG/cnT57g7NmzmDFjBs6ePYvt27fjzz//RM+ePQ1QqbK86ue9Xr16WLFiBS5cuIDjx4/D1dUVnTt3xv37919zpcrxqjHPt2PHDpw6deqVX6XwRhD0RgAgIiIi5OW8vDyhVqvFvHnz5LanT58KGxsbsXr1agNUqEwvjvvz4uPjBQBx7ty511rTm+Bl457v9OnTAoC4efPm6ynqDVCccU9NTRUAxMGDB19PUQpX1JjfuXNH1KhRQ1y8eFG4uLiIxYsXv/bayhPO7Lyh4uPjkZiYiM6dO8ttKpUKPj4+iI6ONmBlRK9HamoqJEl6I78Xz1CysrLw7bffwsbGBk2aNDF0OYqVl5eHwYMH49NPP8Vbb71l6HLKBcXcQZl0k/8t8S9+M7yDgwNu3rxpiJKIXpunT59i6tSpGDRo0BvzZYmGtGvXLgwcOBBPnjyBo6MjIiMjYW9vb+iyFGv+/PkwNjbGxIkTDV1KucGZnTecJElay0KIAm1ESpKdnY2BAwciLy8PK1euNHQ5bwRfX1/ExcUhOjoaXbt2RWBgIJKSkgxdliLFxsZi6dKlWLduHT/Ln8Ow84ZSq9UA/m+GJ19SUlKB2R4ipcjOzkZgYCDi4+MRGRnJWZ3XxNLSEnXr1kXr1q2xZs0aGBsbY82aNYYuS5F+/fVXJCUloVatWjA2NoaxsTFu3ryJkJAQuLq6Gro8g2HYeUO5ublBrVYjMjJSbsvKykJUVBS8vb0NWBlR2cgPOteuXcPBgwdhZ2dn6JLeWEIIaDQaQ5ehSIMHD8b58+cRFxcnP5ycnPDpp59i//79hi7PYHjOjoKlp6fj+vXr8nJ8fDzi4uJga2uLWrVqISgoCGFhYXB3d4e7uzvCwsJgYWGBQYMGGbDqiu9V4/7w4UPcunVLvsfLH3/8AeDZbFv+jBvp7mXj7uTkhPfeew9nz57Frl27kJubK89q2trawtTU1FBlV3gvG3c7OzvMmTMHPXv2hKOjI5KTk7Fy5UrcuXMH/fv3N2DVFdurPmNeDPImJiZQq9WoX7/+6y61/DD05WBUdo4cOSIAFHgMHTpUCPHs8vOZM2cKtVotVCqVaN++vbhw4YJhi1aAV4372rVrC10/c+ZMg9Zd0b1s3PMv8y/sceTIEUOXXqG9bNwzMzNFnz59hJOTkzA1NRWOjo6iZ8+e4vTp04Yuu0J71WfMi3jpuRCSEEKUbZwiIiIiMhyes0NERESKxrBDREREisawQ0RERIrGsENERESKxrBDREREisawQ0RERIrGsENERESKxrBDREREisawQ0SvTWJiIiZMmIDatWtDpVLB2dkZPXr0wKFDhwxdGhEpGL8bi4heixs3bqBt27aoUqUKFixYAE9PT2RnZ2P//v0YN24crl69augSiUihOLNDRK/F2LFjIUkSTp8+jffeew/16tXDW2+9heDgYJw8eRIAcOvWLfTq1QuVK1eGtbU1AgMD8ffff8vbCA0NRdOmTfHDDz+gVq1aqFy5MsaMGYPc3FwsWLAAarUa1atXx5w5c7T2LUkSVq1ahYCAAJibm8PNzQ0///yzVp8pU6agXr16sLCwQO3atTFjxgxkZ2cX2PfGjRvh6uoKGxsbDBw4EI8fPwYAbNiwAXZ2dgW+zbtfv34YMmSIXseSiHTDsENEZe7hw4fYt28fxo0bB0tLywLrq1SpAiEEevfujYcPHyIqKgqRkZH466+/MGDAAK2+f/31F/bu3Yt9+/Zhy5Yt+OGHH9CtWzfcuXMHUVFRmD9/Pj7//HM5QOWbMWMG+vXrh99//x3/+Mc/8P777+PKlSvyeisrK6xbtw6XL1/G0qVL8d1332Hx4sUF9r1jxw7s2rULu3btQlRUFObNmwcA6N+/P3Jzc/HLL7/I/R88eIBdu3bhww8/LPUYElEpGPiLSInoDXDq1CkBQGzfvr3IPgcOHBBGRkbi1q1bctulS5cEAPlbsmfOnCksLCxEWlqa3KdLly7C1dVV5Obmym3169cXc+fOlZcBiI8//lhrf61atRJjxowpsp4FCxaIFi1ayMuF7fvTTz8VrVq1kpfHjBkjAgIC5OUlS5aI2rVri7y8vCL3Q0Rlj+fsEFGZE0IAeHY4qShXrlyBs7MznJ2d5bZGjRqhSpUquHLlClq2bAkAcHV1hZWVldzHwcEBRkZGqFSpklZbUlKS1vbbtGlTYDkuLk5e/s9//oMlS5bg+vXrSE9PR05ODqytrbWe8+K+HR0dtfYzatQotGzZEnfv3kWNGjWwdu1aDBs27KWvm4jKHg9jEVGZc3d3hyRJWoeNXiSEKDQUvNhuYmKitV6SpELb8vLyXllX/nZPnjyJgQMHIiAgALt27cK5c+cwffp0ZGVlafV/1X6aNWuGJk2aYMOGDTh79iwuXLiAYcOGvbIOIipbDDtEVOZsbW3RpUsX/Pvf/0ZGRkaB9Y8ePUKjRo1w69Yt3L59W26/fPkyUlNT0bBhw1LX8OI5PCdPnkSDBg0AAL/99htcXFwwffp0eHl5wd3dHTdv3izRfkaOHIm1a9fihx9+gJ+fn9ZMFREZBsMOEb0WK1euRG5uLt5++21s27YN165dw5UrV7Bs2TK0adMGfn5+8PT0xAcffICzZ8/i9OnTGDJkCHx8fODl5VXq/f/888/44Ycf8Oeff2LmzJk4ffo0xo8fDwCoW7cubt26hfDwcPz1119YtmwZIiIiSrSfDz74AHfv3sV3332H4cOHl7puIio9hh0iei3c3Nxw9uxZ+Pr6IiQkBB4eHvD398ehQ4ewatUqSJKEHTt2oGrVqmjfvj38/PxQu3ZtbN26VS/7nzVrFsLDw+Hp6Yn169fjxx9/RKNGjQAAvXr1wieffILx48ejadOmiI6OxowZM0q0H2tra/Tr1w+VK1dG79699VI7EZWOJPLPHCQiUihJkhAREfHawoe/vz8aNmyIZcuWvZb9EdHL8WosIiI9efjwIQ4cOIDDhw9jxYoVhi6HiP4/hh0iIj1p3rw5UlJSMH/+fNSvX9/Q5RDR/8fDWERERKRoPEGZiIiIFI1hh4iIiBSNYYeIiIgUjWGHiIiIFI1hh4iIiBSNYYeIiIgUjWGHiIiIFI1hh4iIiBSNYYeIiIgU7f8BQbKjHMIMgWAAAAAASUVORK5CYII=",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Création du barplot\n",
|
||
"plt.bar(company_nb_clients[\"number_compagny\"], company_nb_clients[\"customer_id\"]/1000)\n",
|
||
"\n",
|
||
"# Ajout de titres et d'étiquettes\n",
|
||
"plt.xlabel('Company')\n",
|
||
"plt.ylabel(\"Nombre de clients (milliers)\")\n",
|
||
"plt.title(\"Nombre de clients de chaque compagnie de spectacle\")\n",
|
||
"\n",
|
||
"# Affichage du barplot\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"id": "884a33d0-c275-4ab4-ab1f-8b53e563fb95",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" number_compagny already_purchased customer_id\n",
|
||
"0 10 True 45264\n",
|
||
"1 11 True 35313\n",
|
||
"2 12 True 216105\n",
|
||
"3 13 True 388731\n",
|
||
"4 14 True 101642\n",
|
||
" number_compagny already_purchased customer_id\n",
|
||
"0 10 False 53530\n",
|
||
"1 11 False 35994\n",
|
||
"2 12 False 26620\n",
|
||
"3 13 False 379005\n",
|
||
"4 14 False 241484\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# nouveau barplot pr les clients : on regarde la taille totale de la base et on distingue clients ayant acheté / pas acheté\n",
|
||
"\n",
|
||
"# variable relative à l'achat\n",
|
||
"customerplus_clean_spectacle[\"already_purchased\"] = customerplus_clean_spectacle[\"purchase_count\"]>0\n",
|
||
"\n",
|
||
"nb_customers_purchasing_spectacle = customerplus_clean_spectacle[customerplus_clean_spectacle[\"already_purchased\"]].groupby([\"number_compagny\",\"already_purchased\"])[\"customer_id\"].count().reset_index()\n",
|
||
"nb_customers_no_purchase_spectacle = customerplus_clean_spectacle[~customerplus_clean_spectacle[\"already_purchased\"]].groupby([\"number_compagny\",\"already_purchased\"])[\"customer_id\"].count().reset_index()\n",
|
||
"\n",
|
||
"print(nb_customers_purchasing_spectacle)\n",
|
||
"print(nb_customers_no_purchase_spectacle)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 38,
|
||
"id": "41c9fb5a-708b-4f85-9918-00337151f155",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Création du barplot\n",
|
||
"plt.bar(nb_customers_purchasing_spectacle[\"number_compagny\"], nb_customers_purchasing_spectacle[\"customer_id\"]/1000, label = \"has purchased\")\n",
|
||
"plt.bar(nb_customers_no_purchase_spectacle[\"number_compagny\"], nb_customers_no_purchase_spectacle[\"customer_id\"]/1000, \n",
|
||
" bottom = nb_customers_purchasing_spectacle[\"customer_id\"]/1000, label = \"has not purchased\")\n",
|
||
"\n",
|
||
"\n",
|
||
"# Ajout de titres et d'étiquettes\n",
|
||
"plt.xlabel('Company')\n",
|
||
"plt.ylabel(\"Nombre de clients (en milliers)\")\n",
|
||
"plt.title(\"Nombre de clients ayant acheté ou été ciblés par des mails pour les compagnies de spectacle\")\n",
|
||
"plt.legend()\n",
|
||
"\n",
|
||
"# Affichage du barplot\n",
|
||
"plt.show()\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 152,
|
||
"id": "fd11c547-7128-4ef6-ad7b-4b7c2a30cd9e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>number_compagny</th>\n",
|
||
" <th>max_price</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>13823.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>108.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>5000.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>3180.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>456.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" number_compagny max_price\n",
|
||
"0 10 13823.0\n",
|
||
"1 11 108.0\n",
|
||
"2 12 5000.0\n",
|
||
"3 13 3180.0\n",
|
||
"4 14 456.0"
|
||
]
|
||
},
|
||
"execution_count": 152,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# prix maximal payé par un client pour chaque compagnie - très variable : de 108 à 13823\n",
|
||
"\n",
|
||
"company_max_price = customerplus_clean_spectacle.groupby(\"number_compagny\")[\"max_price\"].max().reset_index()\n",
|
||
"company_max_price"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 153,
|
||
"id": "b8f8f162-4153-4cfe-bfaa-d981d414510d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Création du barplot\n",
|
||
"plt.bar(company_max_price[\"number_compagny\"], company_max_price[\"max_price\"])\n",
|
||
"\n",
|
||
"# Ajout de titres et d'étiquettes\n",
|
||
"plt.xlabel('Company')\n",
|
||
"plt.ylabel(\"Prix maximal d'un billet vendu\")\n",
|
||
"plt.title(\"Prix maximal de vente observé par compagnie de spectacle\")\n",
|
||
"\n",
|
||
"# Affichage du barplot\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 71,
|
||
"id": "bff23e5d-d7ed-4092-ae3c-5df503e54a6d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"count 762879.000000\n",
|
||
"mean 0.079068\n",
|
||
"std 3.969729\n",
|
||
"min 0.000000\n",
|
||
"25% 0.000000\n",
|
||
"50% 0.000000\n",
|
||
"75% 0.000000\n",
|
||
"max 3334.000000\n",
|
||
"Name: purchase_count, dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 71,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"first_buying_date\"].isna()][\"purchase_count\"].describe()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 72,
|
||
"id": "89466dbd-14d2-4ede-9ca0-b9c32b764e25",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"count 7.608090e+05\n",
|
||
"mean 3.863940e+00\n",
|
||
"std 1.685825e+03\n",
|
||
"min 1.000000e+00\n",
|
||
"25% 1.000000e+00\n",
|
||
"50% 1.000000e+00\n",
|
||
"75% 2.000000e+00\n",
|
||
"max 1.469325e+06\n",
|
||
"Name: purchase_count, dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 72,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"customerplus_clean_spectacle[~customerplus_clean_spectacle[\"first_buying_date\"].isna()][\"purchase_count\"].describe()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 77,
|
||
"id": "5f9feae4-35f4-43b6-adeb-f75773900a2d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>structure_id</th>\n",
|
||
" <th>mcp_contact_id</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>tenant_id</th>\n",
|
||
" <th>is_partner</th>\n",
|
||
" <th>deleted_at</th>\n",
|
||
" <th>gender</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>first_buying_date</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>gender_label</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>country_fr</th>\n",
|
||
" <th>has_tags</th>\n",
|
||
" <th>number_compagny</th>\n",
|
||
" <th>already_purchased</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>821538</td>\n",
|
||
" <td>139</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>809126</td>\n",
|
||
" <td>1063</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>11005</td>\n",
|
||
" <td>1063</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>17663</td>\n",
|
||
" <td>12731</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>38100</td>\n",
|
||
" <td>12395</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>343121</th>\n",
|
||
" <td>4667645</td>\n",
|
||
" <td>122</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1534181.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>862</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>343122</th>\n",
|
||
" <td>4667649</td>\n",
|
||
" <td>122</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1534177.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>862</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>343123</th>\n",
|
||
" <td>4667660</td>\n",
|
||
" <td>122</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1534165.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>862</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>343124</th>\n",
|
||
" <td>4667679</td>\n",
|
||
" <td>122</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1534132.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>862</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>343125</th>\n",
|
||
" <td>4667686</td>\n",
|
||
" <td>122</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1567949.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>862</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>1523688 rows × 30 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
|
||
"0 821538 139 NaN NaN 0 \n",
|
||
"1 809126 1063 NaN NaN 0 \n",
|
||
"2 11005 1063 NaN NaN 0 \n",
|
||
"3 17663 12731 NaN NaN 0 \n",
|
||
"4 38100 12395 NaN NaN 0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"343121 4667645 122 NaN 1534181.0 0 \n",
|
||
"343122 4667649 122 NaN 1534177.0 0 \n",
|
||
"343123 4667660 122 NaN 1534165.0 0 \n",
|
||
"343124 4667679 122 NaN 1534132.0 0 \n",
|
||
"343125 4667686 122 NaN 1567949.0 0 \n",
|
||
"\n",
|
||
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
|
||
"0 875 False NaN 2 True ... \n",
|
||
"1 875 False NaN 2 True ... \n",
|
||
"2 875 False NaN 2 False ... \n",
|
||
"3 875 False NaN 0 False ... \n",
|
||
"4 875 False NaN 0 True ... \n",
|
||
"... ... ... ... ... ... ... \n",
|
||
"343121 862 False NaN 2 True ... \n",
|
||
"343122 862 False NaN 2 True ... \n",
|
||
"343123 862 False NaN 0 True ... \n",
|
||
"343124 862 False NaN 2 True ... \n",
|
||
"343125 862 False NaN 0 True ... \n",
|
||
"\n",
|
||
" first_buying_date country gender_label gender_female gender_male \\\n",
|
||
"0 NaN NaN other 0 0 \n",
|
||
"1 NaN fr other 0 0 \n",
|
||
"2 NaN fr other 0 0 \n",
|
||
"3 NaN fr female 1 0 \n",
|
||
"4 NaN fr female 1 0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"343121 NaN NaN other 0 0 \n",
|
||
"343122 NaN NaN other 0 0 \n",
|
||
"343123 NaN NaN female 1 0 \n",
|
||
"343124 NaN NaN other 0 0 \n",
|
||
"343125 NaN NaN female 1 0 \n",
|
||
"\n",
|
||
" gender_other country_fr has_tags number_compagny already_purchased \n",
|
||
"0 1 NaN 0 10 False \n",
|
||
"1 1 1.0 0 10 False \n",
|
||
"2 1 1.0 0 10 False \n",
|
||
"3 0 1.0 0 10 False \n",
|
||
"4 0 1.0 0 10 False \n",
|
||
"... ... ... ... ... ... \n",
|
||
"343121 1 NaN 0 14 False \n",
|
||
"343122 1 NaN 0 14 False \n",
|
||
"343123 0 NaN 0 14 False \n",
|
||
"343124 1 NaN 0 14 False \n",
|
||
"343125 0 NaN 0 14 False \n",
|
||
"\n",
|
||
"[1523688 rows x 30 columns]"
|
||
]
|
||
},
|
||
"execution_count": 77,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"customerplus_clean_spectacle[\"already_purchased\"] = customerplus_clean_spectacle[\"first_buying_date\"].isna()==False\n",
|
||
"customerplus_clean_spectacle"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 83,
|
||
"id": "cec4f1eb-cec8-409d-8b2c-1e01f1bf81ff",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>structure_id</th>\n",
|
||
" <th>mcp_contact_id</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>tenant_id</th>\n",
|
||
" <th>is_partner</th>\n",
|
||
" <th>deleted_at</th>\n",
|
||
" <th>gender</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>first_buying_date</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>gender_label</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>country_fr</th>\n",
|
||
" <th>has_tags</th>\n",
|
||
" <th>number_compagny</th>\n",
|
||
" <th>already_purchased</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>11005</td>\n",
|
||
" <td>1063</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>17663</td>\n",
|
||
" <td>12731</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>38100</td>\n",
|
||
" <td>12395</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>307036</td>\n",
|
||
" <td>139</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>2946</td>\n",
|
||
" <td>1063</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>875</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>338933</th>\n",
|
||
" <td>3625705</td>\n",
|
||
" <td>648752</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1253864.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>862</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>338954</th>\n",
|
||
" <td>3627626</td>\n",
|
||
" <td>636890</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1253887.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>862</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>338959</th>\n",
|
||
" <td>3628124</td>\n",
|
||
" <td>653042</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1253899.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>862</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>338986</th>\n",
|
||
" <td>3631189</td>\n",
|
||
" <td>648423</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1253928.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>862</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>339039</th>\n",
|
||
" <td>3635380</td>\n",
|
||
" <td>659417</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1253975.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>862</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fr</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>26246 rows × 30 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
|
||
"2 11005 1063 NaN NaN 0 \n",
|
||
"3 17663 12731 NaN NaN 0 \n",
|
||
"4 38100 12395 NaN NaN 0 \n",
|
||
"5 307036 139 NaN NaN 0 \n",
|
||
"6 2946 1063 NaN NaN 0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"338933 3625705 648752 NaN 1253864.0 0 \n",
|
||
"338954 3627626 636890 NaN 1253887.0 0 \n",
|
||
"338959 3628124 653042 NaN 1253899.0 0 \n",
|
||
"338986 3631189 648423 NaN 1253928.0 0 \n",
|
||
"339039 3635380 659417 NaN 1253975.0 0 \n",
|
||
"\n",
|
||
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
|
||
"2 875 False NaN 2 False ... \n",
|
||
"3 875 False NaN 0 False ... \n",
|
||
"4 875 False NaN 0 True ... \n",
|
||
"5 875 False NaN 2 True ... \n",
|
||
"6 875 False NaN 2 False ... \n",
|
||
"... ... ... ... ... ... ... \n",
|
||
"338933 862 False NaN 0 True ... \n",
|
||
"338954 862 False NaN 0 True ... \n",
|
||
"338959 862 False NaN 0 True ... \n",
|
||
"338986 862 False NaN 0 True ... \n",
|
||
"339039 862 False NaN 1 True ... \n",
|
||
"\n",
|
||
" first_buying_date country gender_label gender_female gender_male \\\n",
|
||
"2 NaN fr other 0 0 \n",
|
||
"3 NaN fr female 1 0 \n",
|
||
"4 NaN fr female 1 0 \n",
|
||
"5 NaN NaN other 0 0 \n",
|
||
"6 NaN fr other 0 0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"338933 NaN fr female 1 0 \n",
|
||
"338954 NaN fr female 1 0 \n",
|
||
"338959 NaN fr female 1 0 \n",
|
||
"338986 NaN fr female 1 0 \n",
|
||
"339039 NaN fr male 0 1 \n",
|
||
"\n",
|
||
" gender_other country_fr has_tags number_compagny already_purchased \n",
|
||
"2 1 1.0 0 10 False \n",
|
||
"3 0 1.0 0 10 False \n",
|
||
"4 0 1.0 0 10 False \n",
|
||
"5 1 NaN 0 10 False \n",
|
||
"6 1 1.0 0 10 False \n",
|
||
"... ... ... ... ... ... \n",
|
||
"338933 0 1.0 0 14 False \n",
|
||
"338954 0 1.0 0 14 False \n",
|
||
"338959 0 1.0 0 14 False \n",
|
||
"338986 0 1.0 0 14 False \n",
|
||
"339039 0 1.0 0 14 False \n",
|
||
"\n",
|
||
"[26246 rows x 30 columns]"
|
||
]
|
||
},
|
||
"execution_count": 83,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# attention, on a des cas où le client a pas de première date d'achat alors qu'il compte plusieurs achats\n",
|
||
"# on peut donc avoir une date de première achat valant NaN non pas parce que l'individu n'a jamais acheté \n",
|
||
"# mais simplement car elle n'est pas renseignée\n",
|
||
"\n",
|
||
"customerplus_clean_spectacle[(customerplus_clean_spectacle[\"already_purchased\"]==False) &\n",
|
||
"(customerplus_clean_spectacle[\"purchase_count\"]>0)]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 80,
|
||
"id": "b5904039-a967-47d5-ba13-1b805bcd76ca",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>street_id</th>\n",
|
||
" <th>structure_id</th>\n",
|
||
" <th>mcp_contact_id</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>tenant_id</th>\n",
|
||
" <th>is_partner</th>\n",
|
||
" <th>deleted_at</th>\n",
|
||
" <th>gender</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>first_buying_date</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>gender_label</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>country_fr</th>\n",
|
||
" <th>has_tags</th>\n",
|
||
" <th>number_compagny</th>\n",
|
||
" <th>already_purchased</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>0 rows × 30 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
"Empty DataFrame\n",
|
||
"Columns: [customer_id, street_id, structure_id, mcp_contact_id, fidelity, tenant_id, is_partner, deleted_at, gender, is_email_true, opt_in, last_buying_date, max_price, ticket_sum, average_price, average_purchase_delay, average_price_basket, average_ticket_basket, total_price, purchase_count, first_buying_date, country, gender_label, gender_female, gender_male, gender_other, country_fr, has_tags, number_compagny, already_purchased]\n",
|
||
"Index: []\n",
|
||
"\n",
|
||
"[0 rows x 30 columns]"
|
||
]
|
||
},
|
||
"execution_count": 80,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# cpdt, si un client a un nombre d'achats nul, il a bien une date de premier achat valant NaN, OK\n",
|
||
"customerplus_clean_spectacle[(customerplus_clean_spectacle[\"already_purchased\"]) &\n",
|
||
"(customerplus_clean_spectacle[\"purchase_count\"]==0)]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 89,
|
||
"id": "e940bfcf-29cc-4d4c-ae5e-e2a8cecf28af",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"number_compagny already_purchased\n",
|
||
"10 False 0.234840\n",
|
||
" True 0.236236\n",
|
||
"11 False 0.141746\n",
|
||
" True 0.002804\n",
|
||
"12 False 0.485950\n",
|
||
" True 0.244779\n",
|
||
"13 False 0.084057\n",
|
||
" True 0.177213\n",
|
||
"14 False 0.885553\n",
|
||
" True 0.308859\n",
|
||
"Name: opt_in, dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 89,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# différence de consentement aux campagnes de mails (opt in)\n",
|
||
"\n",
|
||
"# en se restreignant au personnes n'ayant pas acheté, on a quand même des individus acceptant d'être ciblés\n",
|
||
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"first_buying_date\"].isna()][\"opt_in\"].unique()\n",
|
||
"\n",
|
||
"# taux de consentement variés\n",
|
||
"customerplus_clean_spectacle[\"already_purchased\"] = customerplus_clean_spectacle[\"purchase_count\"] > 0\n",
|
||
"customerplus_clean_spectacle.groupby([\"number_compagny\", \"already_purchased\"])[\"opt_in\"].mean()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 94,
|
||
"id": "a5e79beb-9ba0-4c89-b084-e27ff0d65dcc",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>number_compagny</th>\n",
|
||
" <th>already_purchased</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.234840</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0.236236</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.141746</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0.002804</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.485950</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0.244779</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.084057</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0.177213</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.885553</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0.308859</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" number_compagny already_purchased opt_in\n",
|
||
"0 10 False 0.234840\n",
|
||
"1 10 True 0.236236\n",
|
||
"2 11 False 0.141746\n",
|
||
"3 11 True 0.002804\n",
|
||
"4 12 False 0.485950\n",
|
||
"5 12 True 0.244779\n",
|
||
"6 13 False 0.084057\n",
|
||
"7 13 True 0.177213\n",
|
||
"8 14 False 0.885553\n",
|
||
"9 14 True 0.308859"
|
||
]
|
||
},
|
||
"execution_count": 94,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_graph = customerplus_clean_spectacle.groupby([\"number_compagny\", \"already_purchased\"])[\"opt_in\"].mean().reset_index()\n",
|
||
"df_graph"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 127,
|
||
"id": "5be56c41-7697-481a-84ea-f77a2041484b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 1000x600 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Création du barplot groupé\n",
|
||
"fig, ax = plt.subplots(figsize=(10, 6))\n",
|
||
"\n",
|
||
"categories = df_graph[\"number_compagny\"].unique()\n",
|
||
"bar_width = 0.35\n",
|
||
"bar_positions = np.arange(len(categories))\n",
|
||
"\n",
|
||
"# Grouper les données par label et créer les barres groupées\n",
|
||
"for label in df_graph[\"already_purchased\"].unique():\n",
|
||
" label_data = df_graph[df_graph['already_purchased'] == label]\n",
|
||
" values = [label_data[label_data['number_compagny'] == category]['opt_in'].values[0]*100 for category in categories]\n",
|
||
"\n",
|
||
" label_printed = \"purchased\" if label else \"no purchase\"\n",
|
||
" ax.bar(bar_positions, values, bar_width, label=label_printed)\n",
|
||
"\n",
|
||
" # Mise à jour des positions des barres pour le prochain groupe\n",
|
||
" bar_positions = [pos + bar_width for pos in bar_positions]\n",
|
||
"\n",
|
||
"# Ajout des étiquettes, de la légende, etc.\n",
|
||
"ax.set_xlabel('Numero de compagnie')\n",
|
||
"ax.set_ylabel('Part de consentement (%)')\n",
|
||
"ax.set_title('Part de consentement au mailing selon les compagnies')\n",
|
||
"ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])\n",
|
||
"ax.set_xticklabels(categories)\n",
|
||
"ax.legend()\n",
|
||
"\n",
|
||
"# Affichage du plot\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 94,
|
||
"id": "91b743c4-5473-41e1-b97e-cf06904f0fa8",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>number_company</th>\n",
|
||
" <th>y_has_purchased</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>22.681533</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>45.617174</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>8.681794</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.034686</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>38.730755</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.046081</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>12.596642</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>16.709675</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>77.789137</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>17.561409</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" number_company y_has_purchased opt_in\n",
|
||
"0 10 0.0 22.681533\n",
|
||
"1 10 1.0 45.617174\n",
|
||
"2 11 0.0 8.681794\n",
|
||
"3 11 1.0 0.034686\n",
|
||
"4 12 0.0 38.730755\n",
|
||
"5 12 1.0 0.046081\n",
|
||
"6 13 0.0 12.596642\n",
|
||
"7 13 1.0 16.709675\n",
|
||
"8 14 0.0 77.789137\n",
|
||
"9 14 1.0 17.561409"
|
||
]
|
||
},
|
||
"execution_count": 94,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# on refait le graphique sur train set \n",
|
||
"\n",
|
||
"df_graph = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[\"opt_in\"].mean().reset_index()\n",
|
||
"df_graph[\"opt_in\"] = 100 * df_graph[\"opt_in\"]\n",
|
||
"df_graph"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 96,
|
||
"id": "728e0021-4f95-4601-bb01-032db2cf6571",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"0.43578991448407206\n",
|
||
"0.2889600758160463\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# pourquoi une telle différence sur la variable opt in ??\n",
|
||
"print(train_set_spectacle[\"opt_in\"].mean())\n",
|
||
"print(customerplus_clean_spectacle[\"opt_in\"].mean())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 72,
|
||
"id": "274b4bc5-277f-476a-8bc1-c1764b1df2de",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"0.8473746548562269\n",
|
||
"0.7573747808905485\n"
|
||
]
|
||
}
|
||
],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 76,
|
||
"id": "e1d837e1-c445-424b-867a-48b1e790f703",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"genre = homme : \n",
|
||
"0.3175633912091978\n",
|
||
"0.3103916287323914\n",
|
||
"email vérifié : \n",
|
||
"0.9581971527197163\n",
|
||
"0.9360131470484772\n",
|
||
"nationalité française : \n",
|
||
"0.8473746548562269\n",
|
||
"0.7573747808905485\n",
|
||
"nbre d'achats : \n",
|
||
"2.925387603847428\n",
|
||
"1.968932616126136\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# pour les autres variables, la distribution semble similaire\n",
|
||
"\n",
|
||
"print(\"genre = homme : \")\n",
|
||
"print(train_set_spectacle[\"gender_male\"].mean())\n",
|
||
"print(customerplus_clean_spectacle[\"gender_male\"].mean())\n",
|
||
"\n",
|
||
"print(\"email vérifié : \")\n",
|
||
"print(train_set_spectacle[\"is_email_true\"].mean())\n",
|
||
"print(customerplus_clean_spectacle[\"is_email_true\"].mean())\n",
|
||
"\n",
|
||
"print(\"nationalité française : \")\n",
|
||
"print(train_set_spectacle[\"country_fr\"].mean())\n",
|
||
"print(customerplus_clean_spectacle[\"country_fr\"].mean())\n",
|
||
"\n",
|
||
"# sauf pr nbre d'achats - à verif\n",
|
||
"print(\"nbre d'achats : \")\n",
|
||
"print(train_set_spectacle[\"purchase_count\"].mean())\n",
|
||
"print(customerplus_clean_spectacle[\"purchase_count\"].mean())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 98,
|
||
"id": "43deeeb5-8092-42fc-b80b-59d2c58093de",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 1000x600 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# with the generic function\n",
|
||
"multiple_barplot(df_graph, x=\"number_company\", y=\"opt_in\", var_labels=\"y_has_purchased\",\n",
|
||
" dico_labels = {0 : \"aucun achat\", 1 : \"achat durant la période\"},\n",
|
||
" xlabel = \"Numéro de compagnie\", ylabel = \"Part de consentement (%)\", \n",
|
||
" title = \"Part de consentement au mailing selon les compagnies (train set)\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 79,
|
||
"id": "32960530-cb46-4eeb-a6d2-1dcf5fb640d8",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>number_compagny</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0.181580</td>\n",
|
||
" <td>0.343837</td>\n",
|
||
" <td>0.474583</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>0.179520</td>\n",
|
||
" <td>0.314443</td>\n",
|
||
" <td>0.506037</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>0.346380</td>\n",
|
||
" <td>0.454036</td>\n",
|
||
" <td>0.199584</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>0.318108</td>\n",
|
||
" <td>0.503092</td>\n",
|
||
" <td>0.178800</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>0.331954</td>\n",
|
||
" <td>0.316181</td>\n",
|
||
" <td>0.351865</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" number_compagny gender_male gender_female gender_other\n",
|
||
"0 10 0.181580 0.343837 0.474583\n",
|
||
"1 11 0.179520 0.314443 0.506037\n",
|
||
"2 12 0.346380 0.454036 0.199584\n",
|
||
"3 13 0.318108 0.503092 0.178800\n",
|
||
"4 14 0.331954 0.316181 0.351865"
|
||
]
|
||
},
|
||
"execution_count": 79,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# genre \n",
|
||
"\n",
|
||
"company_genders = customerplus_clean_spectacle.groupby(\"number_compagny\")[[\"gender_male\", \"gender_female\", \"gender_other\"]].mean().reset_index()\n",
|
||
"company_genders"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 80,
|
||
"id": "1b4a49d7-7bfe-4e80-aa7e-c9c6d4bc46e2",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Création du barplot\n",
|
||
"plt.bar(company_genders[\"number_compagny\"], company_genders[\"gender_male\"], label = \"Homme\")\n",
|
||
"plt.bar(company_genders[\"number_compagny\"], company_genders[\"gender_female\"], \n",
|
||
" bottom = company_genders[\"gender_male\"], label = \"Femme\")\n",
|
||
"\n",
|
||
"\n",
|
||
"# Ajout de titres et d'étiquettes\n",
|
||
"plt.xlabel('Company')\n",
|
||
"plt.ylabel(\"Part de clients de chaque sexe\")\n",
|
||
"plt.title(\"Sexe des clients de chaque compagnie de spectacle\")\n",
|
||
"plt.legend()\n",
|
||
"\n",
|
||
"# Affichage du barplot\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 82,
|
||
"id": "c7348c95-e506-4002-90d9-d3b6768af985",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>number_company</th>\n",
|
||
" <th>y_has_purchased</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>share_of_women</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.171838</td>\n",
|
||
" <td>0.333929</td>\n",
|
||
" <td>0.494232</td>\n",
|
||
" <td>66.024263</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.312165</td>\n",
|
||
" <td>0.683363</td>\n",
|
||
" <td>0.004472</td>\n",
|
||
" <td>68.643306</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.151162</td>\n",
|
||
" <td>0.273204</td>\n",
|
||
" <td>0.575635</td>\n",
|
||
" <td>64.379376</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.328477</td>\n",
|
||
" <td>0.597641</td>\n",
|
||
" <td>0.073881</td>\n",
|
||
" <td>64.531835</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.334546</td>\n",
|
||
" <td>0.433672</td>\n",
|
||
" <td>0.231782</td>\n",
|
||
" <td>56.451654</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.366020</td>\n",
|
||
" <td>0.506659</td>\n",
|
||
" <td>0.127321</td>\n",
|
||
" <td>58.057873</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.314243</td>\n",
|
||
" <td>0.503242</td>\n",
|
||
" <td>0.182515</td>\n",
|
||
" <td>61.559817</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.351721</td>\n",
|
||
" <td>0.504910</td>\n",
|
||
" <td>0.143369</td>\n",
|
||
" <td>58.941356</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.317971</td>\n",
|
||
" <td>0.296388</td>\n",
|
||
" <td>0.385641</td>\n",
|
||
" <td>48.243443</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.451289</td>\n",
|
||
" <td>0.485106</td>\n",
|
||
" <td>0.063605</td>\n",
|
||
" <td>51.805692</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" number_company y_has_purchased gender_male gender_female gender_other \\\n",
|
||
"0 10 0.0 0.171838 0.333929 0.494232 \n",
|
||
"1 10 1.0 0.312165 0.683363 0.004472 \n",
|
||
"2 11 0.0 0.151162 0.273204 0.575635 \n",
|
||
"3 11 1.0 0.328477 0.597641 0.073881 \n",
|
||
"4 12 0.0 0.334546 0.433672 0.231782 \n",
|
||
"5 12 1.0 0.366020 0.506659 0.127321 \n",
|
||
"6 13 0.0 0.314243 0.503242 0.182515 \n",
|
||
"7 13 1.0 0.351721 0.504910 0.143369 \n",
|
||
"8 14 0.0 0.317971 0.296388 0.385641 \n",
|
||
"9 14 1.0 0.451289 0.485106 0.063605 \n",
|
||
"\n",
|
||
" share_of_women \n",
|
||
"0 66.024263 \n",
|
||
"1 68.643306 \n",
|
||
"2 64.379376 \n",
|
||
"3 64.531835 \n",
|
||
"4 56.451654 \n",
|
||
"5 58.057873 \n",
|
||
"6 61.559817 \n",
|
||
"7 58.941356 \n",
|
||
"8 48.243443 \n",
|
||
"9 51.805692 "
|
||
]
|
||
},
|
||
"execution_count": 82,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"company_genders = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[[\"gender_male\", \"gender_female\", \"gender_other\"]].mean().reset_index()\n",
|
||
"company_genders[\"share_of_women\"] = 100 * (company_genders[\"gender_female\"]/(1-company_genders[\"gender_other\"]))\n",
|
||
"company_genders"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 84,
|
||
"id": "b36e5a8f-45dc-4b74-8137-80b7e916aa84",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 1000x600 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# création barplot avec la fonction générique\n",
|
||
"\n",
|
||
"multiple_barplot(company_genders, x=\"number_company\", y=\"share_of_women\", var_labels=\"y_has_purchased\",\n",
|
||
" dico_labels = {0 : \"aucun achat\", 1 : \"achat durant la période\"},\n",
|
||
" xlabel = \"Numéro de compagnie\", ylabel = \"Part de femmes (%)\", \n",
|
||
" title = \"Part de femmes selon les compagnies de spectacle (train set)\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 87,
|
||
"id": "ed6374e5-f36c-4f8e-9dba-602715b726f1",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>number_compagny</th>\n",
|
||
" <th>country_fr</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0.996136</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>0.994838</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>0.002119</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>0.831795</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>0.993978</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" number_compagny country_fr\n",
|
||
"0 10 0.996136\n",
|
||
"1 11 0.994838\n",
|
||
"2 12 0.002119\n",
|
||
"3 13 0.831795\n",
|
||
"4 14 0.993978"
|
||
]
|
||
},
|
||
"execution_count": 87,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# pays d'origine (France VS reste du monde)\n",
|
||
"\n",
|
||
"company_country_fr = customerplus_clean_spectacle.groupby(\"number_compagny\")[\"country_fr\"].mean().reset_index()\n",
|
||
"company_country_fr"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 88,
|
||
"id": "8d95cdd9-2ab3-4c9a-8442-bb9b98e0dd18",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Création du barplot\n",
|
||
"plt.bar(company_country_fr[\"number_compagny\"], company_country_fr[\"country_fr\"])\n",
|
||
"\n",
|
||
"# Ajout de titres et d'étiquettes\n",
|
||
"plt.xlabel('Company')\n",
|
||
"plt.ylabel(\"Part de clients français\")\n",
|
||
"plt.title(\"Nationalité des clients de chaque compagnie de spectacle\")\n",
|
||
"\n",
|
||
"# Affichage du barplot\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 90,
|
||
"id": "b459f81f-6d30-44fa-ad65-e85acbf12fd2",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>number_company</th>\n",
|
||
" <th>y_has_purchased</th>\n",
|
||
" <th>country_fr</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>99.542095</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>99.909747</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>99.543280</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>99.501602</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.156470</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.265579</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>84.389610</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>77.596741</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>99.520205</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>98.471506</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" number_company y_has_purchased country_fr\n",
|
||
"0 10 0.0 99.542095\n",
|
||
"1 10 1.0 99.909747\n",
|
||
"2 11 0.0 99.543280\n",
|
||
"3 11 1.0 99.501602\n",
|
||
"4 12 0.0 0.156470\n",
|
||
"5 12 1.0 0.265579\n",
|
||
"6 13 0.0 84.389610\n",
|
||
"7 13 1.0 77.596741\n",
|
||
"8 14 0.0 99.520205\n",
|
||
"9 14 1.0 98.471506"
|
||
]
|
||
},
|
||
"execution_count": 90,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# graphique sur le train set\n",
|
||
"\n",
|
||
"company_country_fr = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[[\"country_fr\"]].mean().reset_index()\n",
|
||
"company_country_fr[\"country_fr\"] = 100 * company_country_fr[\"country_fr\"]\n",
|
||
"company_country_fr"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 92,
|
||
"id": "4a037b48-1d65-4ed3-a012-7d6f5a312533",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 1000x600 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# generic function to generate the barplot - nationality\n",
|
||
"\n",
|
||
"multiple_barplot(company_country_fr, x=\"number_company\", y=\"country_fr\", var_labels=\"y_has_purchased\",\n",
|
||
" dico_labels = {0 : \"aucun achat\", 1 : \"achat durant la période\"},\n",
|
||
" xlabel = \"Numéro de compagnie\", ylabel = \"Part de clients français (%)\", \n",
|
||
" title = \"Part de clients français des compagnies de spectacle (train set)\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "ecfd112e-270a-4223-b80f-7e95e57d199d",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 2. campaigns_information"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 189,
|
||
"id": "b37e7ddf-321a-4ebe-9742-9e760a541d29",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Nombre de lignes de la table : 688953\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"customer_id 0\n",
|
||
"nb_campaigns 0\n",
|
||
"nb_campaigns_opened 0\n",
|
||
"time_to_open 301495\n",
|
||
"number_compagny 0\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 189,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# nombre de nan\n",
|
||
"print(\"Nombre de lignes de la table : \",campaigns_information_spectacle.shape[0])\n",
|
||
"campaigns_information_spectacle.isna().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 192,
|
||
"id": "de1ecaac-25bb-4853-b8ab-3ef2ca6917ed",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>time_to_open</th>\n",
|
||
" <th>number_compagny</th>\n",
|
||
" <th>no_campaign_opened</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>29</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>37</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>39</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0 days 05:16:38</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>41</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0 days 01:12:29</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>44</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>254699</th>\n",
|
||
" <td>6837769</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0 days 23:42:15</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>254700</th>\n",
|
||
" <td>6875038</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>254701</th>\n",
|
||
" <td>6875066</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>254702</th>\n",
|
||
" <td>6875099</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaT</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>254703</th>\n",
|
||
" <td>6875143</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0 days 01:17:01</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>688953 rows × 6 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id nb_campaigns nb_campaigns_opened time_to_open \\\n",
|
||
"0 29 4 0.0 NaT \n",
|
||
"1 37 3 0.0 NaT \n",
|
||
"2 39 4 1.0 0 days 05:16:38 \n",
|
||
"3 41 4 1.0 0 days 01:12:29 \n",
|
||
"4 44 4 0.0 NaT \n",
|
||
"... ... ... ... ... \n",
|
||
"254699 6837769 1 1.0 0 days 23:42:15 \n",
|
||
"254700 6875038 1 0.0 NaT \n",
|
||
"254701 6875066 1 0.0 NaT \n",
|
||
"254702 6875099 1 0.0 NaT \n",
|
||
"254703 6875143 1 1.0 0 days 01:17:01 \n",
|
||
"\n",
|
||
" number_compagny no_campaign_opened \n",
|
||
"0 10 True \n",
|
||
"1 10 True \n",
|
||
"2 10 False \n",
|
||
"3 10 False \n",
|
||
"4 10 True \n",
|
||
"... ... ... \n",
|
||
"254699 14 False \n",
|
||
"254700 14 True \n",
|
||
"254701 14 True \n",
|
||
"254702 14 True \n",
|
||
"254703 14 False \n",
|
||
"\n",
|
||
"[688953 rows x 6 columns]"
|
||
]
|
||
},
|
||
"execution_count": 192,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# part de clients n'ouvrant jamais les mails par compagnie\n",
|
||
"\n",
|
||
"campaigns_information_spectacle[\"no_campaign_opened\"] = pd.isna(campaigns_information_spectacle[\"time_to_open\"])\n",
|
||
"campaigns_information_spectacle"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 197,
|
||
"id": "b5a0060f-a9dd-435b-844f-b24674b8bc27",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>number_compagny</th>\n",
|
||
" <th>no_campaign_opened</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0.605656</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>0.294001</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>0.475719</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>0.353820</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>0.428148</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" number_compagny no_campaign_opened\n",
|
||
"0 10 0.605656\n",
|
||
"1 11 0.294001\n",
|
||
"2 12 0.475719\n",
|
||
"3 13 0.353820\n",
|
||
"4 14 0.428148"
|
||
]
|
||
},
|
||
"execution_count": 197,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"company_lazy_customers = campaigns_information_spectacle.groupby(\"number_compagny\")[\"no_campaign_opened\"].mean().reset_index()\n",
|
||
"company_lazy_customers"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 198,
|
||
"id": "788c90e0-f13a-4804-ace7-e5159fddd7fd",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Création du barplot\n",
|
||
"plt.bar(company_lazy_customers[\"number_compagny\"], company_lazy_customers[\"no_campaign_opened\"])\n",
|
||
"\n",
|
||
"# Ajout de titres et d'étiquettes\n",
|
||
"plt.xlabel('Company')\n",
|
||
"plt.ylabel(\"Part de clients n'ayant ouvert aucun mail\")\n",
|
||
"plt.title(\"Part de clients n'ayant ouvert aucun mail pour les compagnies de spectacle\")\n",
|
||
"\n",
|
||
"# Affichage du barplot\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 203,
|
||
"id": "c48015c2-6451-4089-93b7-6d55d3b2e553",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>number_compagny</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>ratio_campaigns_opened</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>734772</td>\n",
|
||
" <td>126151.0</td>\n",
|
||
" <td>0.171687</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>342396</td>\n",
|
||
" <td>129833.0</td>\n",
|
||
" <td>0.379190</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>3168123</td>\n",
|
||
" <td>810722.0</td>\n",
|
||
" <td>0.255900</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>3218569</td>\n",
|
||
" <td>793581.0</td>\n",
|
||
" <td>0.246563</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>2427043</td>\n",
|
||
" <td>723846.0</td>\n",
|
||
" <td>0.298242</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" number_compagny nb_campaigns nb_campaigns_opened ratio_campaigns_opened\n",
|
||
"0 10 734772 126151.0 0.171687\n",
|
||
"1 11 342396 129833.0 0.379190\n",
|
||
"2 12 3168123 810722.0 0.255900\n",
|
||
"3 13 3218569 793581.0 0.246563\n",
|
||
"4 14 2427043 723846.0 0.298242"
|
||
]
|
||
},
|
||
"execution_count": 203,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# taux d'ouverture des campaigns\n",
|
||
"\n",
|
||
"company_campaigns_stats = campaigns_information_spectacle.groupby(\"number_compagny\")[[\"nb_campaigns\", \"nb_campaigns_opened\"]].sum().reset_index()\n",
|
||
"company_campaigns_stats[\"ratio_campaigns_opened\"] = company_campaigns_stats[\"nb_campaigns_opened\"] / company_campaigns_stats[\"nb_campaigns\"]\n",
|
||
"company_campaigns_stats"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"id": "d06ab865-4832-4fe9-918b-e5ff72bebee4",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"ename": "NameError",
|
||
"evalue": "name 'company_campaigns_stats' is not defined",
|
||
"output_type": "error",
|
||
"traceback": [
|
||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
||
"Cell \u001b[0;32mIn[15], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Création du barplot\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m plt\u001b[38;5;241m.\u001b[39mbar(\u001b[43mcompany_campaigns_stats\u001b[49m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnumber_compagny\u001b[39m\u001b[38;5;124m\"\u001b[39m], \u001b[38;5;241m100\u001b[39m \u001b[38;5;241m*\u001b[39m company_campaigns_stats[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mratio_campaigns_opened\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# Ajout de titres et d'étiquettes\u001b[39;00m\n\u001b[1;32m 5\u001b[0m plt\u001b[38;5;241m.\u001b[39mxlabel(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCompany\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
|
||
"\u001b[0;31mNameError\u001b[0m: name 'company_campaigns_stats' is not defined"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Création du barplot\n",
|
||
"plt.bar(company_campaigns_stats[\"number_compagny\"], 100 * company_campaigns_stats[\"ratio_campaigns_opened\"])\n",
|
||
"\n",
|
||
"# Ajout de titres et d'étiquettes\n",
|
||
"plt.xlabel('Company')\n",
|
||
"plt.ylabel(\"Taux d'ouverture (%)\")\n",
|
||
"plt.title(\"Taux d'ouverture des campagnes de mails pour les compagnies de spectacle\")\n",
|
||
"\n",
|
||
"# Affichage du barplot\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 219,
|
||
"id": "5c37e063-a717-4a8c-828e-b386b87e8409",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# création d'un barplot permettant de visualiser les 2 indicateurs sur le même graphique\n",
|
||
"\n",
|
||
"# Création du premier barplot\n",
|
||
"plt.bar(company_campaigns_stats[\"number_compagny\"], 100 * company_campaigns_stats[\"ratio_campaigns_opened\"],\n",
|
||
" label = \"taux d'ouverture\", alpha = 0.7)\n",
|
||
"\n",
|
||
"# Création du deuxième barplot à côté du premier\n",
|
||
"bar_width = 0.4 # Largeur des barres\n",
|
||
"indices2 = company_campaigns_stats[\"number_compagny\"] + bar_width\n",
|
||
"plt.bar(indices2, 100 * (1 - company_lazy_customers[\"no_campaign_opened\"]), \n",
|
||
" label='Part de clients ouvrant des mails', alpha=0.7, width=bar_width)\n",
|
||
"\n",
|
||
"# Ajout des étiquettes et de la légende\n",
|
||
"plt.xlabel('Compagnie')\n",
|
||
"plt.ylabel('Taux (%)')\n",
|
||
"plt.title('Lien entre taux d ouverture des mails et nombre de clients actifs')\n",
|
||
"plt.legend()\n",
|
||
"\n",
|
||
"# Affichage du graphique\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 102,
|
||
"id": "4fdf4134-d32c-42c3-ab4f-36ad4783332c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>time_between_purchase</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>gender_label</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>country_fr</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>time_to_open</th>\n",
|
||
" <th>y_has_purchased</th>\n",
|
||
" <th>number_company</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10_299341</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>0 days 05:47:26.333333333</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>10_63788</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>62.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>393.205891</td>\n",
|
||
" <td>281.017639</td>\n",
|
||
" <td>112.188252</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0 days 05:13:51</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>10_759946</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>10_20653</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>11.0</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>1 days 00:45:54</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>10_824705</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 41 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 10_299341 0.0 0.0 0.0 0.0 \n",
|
||
"1 10_63788 3.0 2.0 62.0 1.0 \n",
|
||
"2 10_759946 0.0 0.0 0.0 0.0 \n",
|
||
"3 10_20653 0.0 0.0 0.0 0.0 \n",
|
||
"4 10_824705 0.0 0.0 0.0 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 NaN NaN \n",
|
||
"1 1.0 393.205891 281.017639 \n",
|
||
"2 0.0 NaN NaN \n",
|
||
"3 0.0 NaN NaN \n",
|
||
"4 0.0 NaN NaN \n",
|
||
"\n",
|
||
" time_between_purchase nb_tickets_internet ... gender_label \\\n",
|
||
"0 NaN 0.0 ... male \n",
|
||
"1 112.188252 3.0 ... female \n",
|
||
"2 NaN 0.0 ... other \n",
|
||
"3 NaN 0.0 ... male \n",
|
||
"4 NaN 0.0 ... other \n",
|
||
"\n",
|
||
" gender_female gender_male gender_other country_fr nb_campaigns \\\n",
|
||
"0 0 1 0 1.0 12.0 \n",
|
||
"1 1 0 0 1.0 3.0 \n",
|
||
"2 0 0 1 NaN 0.0 \n",
|
||
"3 0 1 0 1.0 11.0 \n",
|
||
"4 0 0 1 NaN 0.0 \n",
|
||
"\n",
|
||
" nb_campaigns_opened time_to_open y_has_purchased \\\n",
|
||
"0 3.0 0 days 05:47:26.333333333 0.0 \n",
|
||
"1 1.0 0 days 05:13:51 1.0 \n",
|
||
"2 0.0 NaN 0.0 \n",
|
||
"3 10.0 1 days 00:45:54 0.0 \n",
|
||
"4 0.0 NaN 0.0 \n",
|
||
"\n",
|
||
" number_company \n",
|
||
"0 10 \n",
|
||
"1 10 \n",
|
||
"2 10 \n",
|
||
"3 10 \n",
|
||
"4 10 \n",
|
||
"\n",
|
||
"[5 rows x 41 columns]"
|
||
]
|
||
},
|
||
"execution_count": 102,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# same statistics on the train set\n",
|
||
"\n",
|
||
"train_set_spectacle.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 105,
|
||
"id": "14ff9886-742c-4a60-8824-5d31f7c76aea",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"train_set_spectacle[\"no_campaign_opened\"] = train_set_spectacle[\"nb_campaigns_opened\"]==0"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 108,
|
||
"id": "16285593-a0fa-461c-aeb8-c64ffdf9a0d6",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>number_company</th>\n",
|
||
" <th>y_has_purchased</th>\n",
|
||
" <th>no_campaign_opened</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>91.227517</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>62.343470</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>84.608320</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>78.598682</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>100.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>100.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>90.124799</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>94.158651</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>72.903385</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>73.549517</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" number_company y_has_purchased no_campaign_opened\n",
|
||
"0 10 0.0 91.227517\n",
|
||
"1 10 1.0 62.343470\n",
|
||
"2 11 0.0 84.608320\n",
|
||
"3 11 1.0 78.598682\n",
|
||
"4 12 0.0 100.000000\n",
|
||
"5 12 1.0 100.000000\n",
|
||
"6 13 0.0 90.124799\n",
|
||
"7 13 1.0 94.158651\n",
|
||
"8 14 0.0 72.903385\n",
|
||
"9 14 1.0 73.549517"
|
||
]
|
||
},
|
||
"execution_count": 108,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"company_lazy_customers = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[\"no_campaign_opened\"].mean().reset_index()\n",
|
||
"company_lazy_customers[\"no_campaign_opened\"] = 100 * company_lazy_customers[\"no_campaign_opened\"] \n",
|
||
"company_lazy_customers"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 110,
|
||
"id": "d35f00e3-b9b0-42b3-9dce-785c1ad5506c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 1000x600 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"multiple_barplot(company_lazy_customers, x=\"number_company\", y=\"no_campaign_opened\", var_labels=\"y_has_purchased\",\n",
|
||
" dico_labels = {0 : \"aucun achat\", 1 : \"achat durant la période\"},\n",
|
||
" xlabel = \"Numéro de compagnie\", ylabel = \"Part de clients n'ayant ouvert aucun mail (%)\", \n",
|
||
" title = \"Part de clients des compagnies de spectacle n'ouvrant aucun mail (train set)\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 111,
|
||
"id": "b391f5b2-2424-4758-8ae5-f0fdacdfae66",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>time_between_purchase</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>country_fr</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>time_to_open</th>\n",
|
||
" <th>y_has_purchased</th>\n",
|
||
" <th>number_company</th>\n",
|
||
" <th>no_campaign_opened</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10_299341</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>0 days 05:47:26.333333333</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>10_63788</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>62.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>393.205891</td>\n",
|
||
" <td>281.017639</td>\n",
|
||
" <td>112.188252</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0 days 05:13:51</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>10_759946</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>10_20653</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>11.0</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>1 days 00:45:54</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>10_824705</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>697292</th>\n",
|
||
" <td>14_119950</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>697293</th>\n",
|
||
" <td>14_938</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>697294</th>\n",
|
||
" <td>14_5004707</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>2 days 16:42:51</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>697295</th>\n",
|
||
" <td>14_108184</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>697296</th>\n",
|
||
" <td>14_4663981</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>True</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>697297 rows × 42 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 10_299341 0.0 0.0 0.0 0.0 \n",
|
||
"1 10_63788 3.0 2.0 62.0 1.0 \n",
|
||
"2 10_759946 0.0 0.0 0.0 0.0 \n",
|
||
"3 10_20653 0.0 0.0 0.0 0.0 \n",
|
||
"4 10_824705 0.0 0.0 0.0 0.0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"697292 14_119950 0.0 0.0 0.0 0.0 \n",
|
||
"697293 14_938 0.0 0.0 0.0 0.0 \n",
|
||
"697294 14_5004707 0.0 0.0 0.0 0.0 \n",
|
||
"697295 14_108184 0.0 0.0 0.0 0.0 \n",
|
||
"697296 14_4663981 0.0 0.0 0.0 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 NaN NaN \n",
|
||
"1 1.0 393.205891 281.017639 \n",
|
||
"2 0.0 NaN NaN \n",
|
||
"3 0.0 NaN NaN \n",
|
||
"4 0.0 NaN NaN \n",
|
||
"... ... ... ... \n",
|
||
"697292 0.0 NaN NaN \n",
|
||
"697293 0.0 NaN NaN \n",
|
||
"697294 0.0 NaN NaN \n",
|
||
"697295 0.0 NaN NaN \n",
|
||
"697296 0.0 NaN NaN \n",
|
||
"\n",
|
||
" time_between_purchase nb_tickets_internet ... gender_female \\\n",
|
||
"0 NaN 0.0 ... 0 \n",
|
||
"1 112.188252 3.0 ... 1 \n",
|
||
"2 NaN 0.0 ... 0 \n",
|
||
"3 NaN 0.0 ... 0 \n",
|
||
"4 NaN 0.0 ... 0 \n",
|
||
"... ... ... ... ... \n",
|
||
"697292 NaN 0.0 ... 0 \n",
|
||
"697293 NaN 0.0 ... 0 \n",
|
||
"697294 NaN 0.0 ... 0 \n",
|
||
"697295 NaN 0.0 ... 0 \n",
|
||
"697296 NaN 0.0 ... 0 \n",
|
||
"\n",
|
||
" gender_male gender_other country_fr nb_campaigns \\\n",
|
||
"0 1 0 1.0 12.0 \n",
|
||
"1 0 0 1.0 3.0 \n",
|
||
"2 0 1 NaN 0.0 \n",
|
||
"3 1 0 1.0 11.0 \n",
|
||
"4 0 1 NaN 0.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"697292 1 0 1.0 0.0 \n",
|
||
"697293 1 0 1.0 0.0 \n",
|
||
"697294 1 0 1.0 2.0 \n",
|
||
"697295 0 1 1.0 0.0 \n",
|
||
"697296 0 1 NaN 0.0 \n",
|
||
"\n",
|
||
" nb_campaigns_opened time_to_open y_has_purchased \\\n",
|
||
"0 3.0 0 days 05:47:26.333333333 0.0 \n",
|
||
"1 1.0 0 days 05:13:51 1.0 \n",
|
||
"2 0.0 NaN 0.0 \n",
|
||
"3 10.0 1 days 00:45:54 0.0 \n",
|
||
"4 0.0 NaN 0.0 \n",
|
||
"... ... ... ... \n",
|
||
"697292 0.0 NaN 0.0 \n",
|
||
"697293 0.0 NaN 0.0 \n",
|
||
"697294 1.0 2 days 16:42:51 0.0 \n",
|
||
"697295 0.0 NaN 0.0 \n",
|
||
"697296 0.0 NaN 0.0 \n",
|
||
"\n",
|
||
" number_company no_campaign_opened \n",
|
||
"0 10 False \n",
|
||
"1 10 False \n",
|
||
"2 10 True \n",
|
||
"3 10 False \n",
|
||
"4 10 True \n",
|
||
"... ... ... \n",
|
||
"697292 14 True \n",
|
||
"697293 14 True \n",
|
||
"697294 14 False \n",
|
||
"697295 14 True \n",
|
||
"697296 14 True \n",
|
||
"\n",
|
||
"[697297 rows x 42 columns]"
|
||
]
|
||
},
|
||
"execution_count": 111,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# part de mails ouverts de chaque compagnie\n",
|
||
"\n",
|
||
"train_set_spectacle"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 112,
|
||
"id": "dc8cfd36-0eb2-4ef3-877d-626fd0a9ced4",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>number_compagny</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>ratio_campaigns_opened</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>734772</td>\n",
|
||
" <td>126151.0</td>\n",
|
||
" <td>0.171687</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>342396</td>\n",
|
||
" <td>129833.0</td>\n",
|
||
" <td>0.379190</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>3168123</td>\n",
|
||
" <td>810722.0</td>\n",
|
||
" <td>0.255900</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>3218569</td>\n",
|
||
" <td>793581.0</td>\n",
|
||
" <td>0.246563</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>2427043</td>\n",
|
||
" <td>723846.0</td>\n",
|
||
" <td>0.298242</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" number_compagny nb_campaigns nb_campaigns_opened ratio_campaigns_opened\n",
|
||
"0 10 734772 126151.0 0.171687\n",
|
||
"1 11 342396 129833.0 0.379190\n",
|
||
"2 12 3168123 810722.0 0.255900\n",
|
||
"3 13 3218569 793581.0 0.246563\n",
|
||
"4 14 2427043 723846.0 0.298242"
|
||
]
|
||
},
|
||
"execution_count": 112,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# taux d'ouverture des campaigns\n",
|
||
"\n",
|
||
"company_campaigns_stats = campaigns_information_spectacle.groupby(\"number_compagny\")[[\"nb_campaigns\", \"nb_campaigns_opened\"]].sum().reset_index()\n",
|
||
"company_campaigns_stats[\"ratio_campaigns_opened\"] = company_campaigns_stats[\"nb_campaigns_opened\"] / company_campaigns_stats[\"nb_campaigns\"]\n",
|
||
"company_campaigns_stats"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 119,
|
||
"id": "30b28426-088a-4153-b2aa-c20f11b2b771",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>number_company</th>\n",
|
||
" <th>y_has_purchased</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>perc_campaigns_opened</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>61668.0</td>\n",
|
||
" <td>8240.0</td>\n",
|
||
" <td>13.361873</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>4361.0</td>\n",
|
||
" <td>2002.0</td>\n",
|
||
" <td>45.906902</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>37799.0</td>\n",
|
||
" <td>12286.0</td>\n",
|
||
" <td>32.503505</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>8824.0</td>\n",
|
||
" <td>4493.0</td>\n",
|
||
" <td>50.917951</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>505008.0</td>\n",
|
||
" <td>118071.0</td>\n",
|
||
" <td>23.380026</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>45824.0</td>\n",
|
||
" <td>17233.0</td>\n",
|
||
" <td>37.606931</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1176373.0</td>\n",
|
||
" <td>313379.0</td>\n",
|
||
" <td>26.639425</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>129157.0</td>\n",
|
||
" <td>47987.0</td>\n",
|
||
" <td>37.154006</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" number_company y_has_purchased nb_campaigns nb_campaigns_opened \\\n",
|
||
"0 10 0.0 61668.0 8240.0 \n",
|
||
"1 10 1.0 4361.0 2002.0 \n",
|
||
"2 11 0.0 37799.0 12286.0 \n",
|
||
"3 11 1.0 8824.0 4493.0 \n",
|
||
"4 12 0.0 0.0 0.0 \n",
|
||
"5 12 1.0 0.0 0.0 \n",
|
||
"6 13 0.0 505008.0 118071.0 \n",
|
||
"7 13 1.0 45824.0 17233.0 \n",
|
||
"8 14 0.0 1176373.0 313379.0 \n",
|
||
"9 14 1.0 129157.0 47987.0 \n",
|
||
"\n",
|
||
" perc_campaigns_opened \n",
|
||
"0 13.361873 \n",
|
||
"1 45.906902 \n",
|
||
"2 32.503505 \n",
|
||
"3 50.917951 \n",
|
||
"4 NaN \n",
|
||
"5 NaN \n",
|
||
"6 23.380026 \n",
|
||
"7 37.606931 \n",
|
||
"8 26.639425 \n",
|
||
"9 37.154006 "
|
||
]
|
||
},
|
||
"execution_count": 119,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"company_campaigns_stats = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[[\"nb_campaigns\", \"nb_campaigns_opened\"]].sum().reset_index()\n",
|
||
"company_campaigns_stats[\"perc_campaigns_opened\"] = 100* (company_campaigns_stats[\"nb_campaigns_opened\"] / company_campaigns_stats[\"nb_campaigns\"])\n",
|
||
"company_campaigns_stats"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 120,
|
||
"id": "9cebe912-fce1-4f4f-9d87-9649605296c8",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>number_company</th>\n",
|
||
" <th>y_has_purchased</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>perc_campaigns_opened</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>61668.0</td>\n",
|
||
" <td>8240.0</td>\n",
|
||
" <td>13.361873</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>4361.0</td>\n",
|
||
" <td>2002.0</td>\n",
|
||
" <td>45.906902</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>37799.0</td>\n",
|
||
" <td>12286.0</td>\n",
|
||
" <td>32.503505</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>8824.0</td>\n",
|
||
" <td>4493.0</td>\n",
|
||
" <td>50.917951</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>505008.0</td>\n",
|
||
" <td>118071.0</td>\n",
|
||
" <td>23.380026</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>45824.0</td>\n",
|
||
" <td>17233.0</td>\n",
|
||
" <td>37.606931</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1176373.0</td>\n",
|
||
" <td>313379.0</td>\n",
|
||
" <td>26.639425</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>129157.0</td>\n",
|
||
" <td>47987.0</td>\n",
|
||
" <td>37.154006</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" number_company y_has_purchased nb_campaigns nb_campaigns_opened \\\n",
|
||
"0 10 0.0 61668.0 8240.0 \n",
|
||
"1 10 1.0 4361.0 2002.0 \n",
|
||
"2 11 0.0 37799.0 12286.0 \n",
|
||
"3 11 1.0 8824.0 4493.0 \n",
|
||
"6 13 0.0 505008.0 118071.0 \n",
|
||
"7 13 1.0 45824.0 17233.0 \n",
|
||
"8 14 0.0 1176373.0 313379.0 \n",
|
||
"9 14 1.0 129157.0 47987.0 \n",
|
||
"\n",
|
||
" perc_campaigns_opened \n",
|
||
"0 13.361873 \n",
|
||
"1 45.906902 \n",
|
||
"2 32.503505 \n",
|
||
"3 50.917951 \n",
|
||
"6 23.380026 \n",
|
||
"7 37.606931 \n",
|
||
"8 26.639425 \n",
|
||
"9 37.154006 "
|
||
]
|
||
},
|
||
"execution_count": 120,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"company_campaigns_stats = company_campaigns_stats[company_campaigns_stats[\"number_company\"]!=12]\n",
|
||
"company_campaigns_stats"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 123,
|
||
"id": "8418531b-4f30-4d96-8035-f3630c789d6f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 1000x600 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"multiple_barplot(company_campaigns_stats, x=\"number_company\", y=\"perc_campaigns_opened\", var_labels=\"y_has_purchased\",\n",
|
||
" dico_labels = {0 : \"clients n'ayant pas acheté\", 1 : \"clients ayant acheté sur la période\"},\n",
|
||
" xlabel = \"Numéro de compagnie\", ylabel = \"Part de mails ouverts (%)\", \n",
|
||
" title = \"Taux d'ouverture global des mails envoyés par les compagnies de spectacle (train set)\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "783f6fb2-5f26-42a9-a22d-f4ece44bfaf2",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 3. products_purchased_reduced"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"id": "74534ded-8121-43fb-8cf8-af353bed2c77",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Nombre de lignes de la table : 764880\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"customer_id 0\n",
|
||
"nb_tickets 0\n",
|
||
"nb_purchases 0\n",
|
||
"total_amount 0\n",
|
||
"nb_suppliers 0\n",
|
||
"vente_internet_max 0\n",
|
||
"purchase_date_min 0\n",
|
||
"purchase_date_max 0\n",
|
||
"time_between_purchase 0\n",
|
||
"nb_tickets_internet 0\n",
|
||
"number_compagny 0\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 16,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# nombre de nan\n",
|
||
"print(\"Nombre de lignes de la table : \",products_purchased_reduced_spectacle.shape[0])\n",
|
||
"products_purchased_reduced_spectacle.isna().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"id": "6db089d5-5517-4aee-a5fd-53f20ae3f0d7",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#importation librairies\n",
|
||
"import warnings\n",
|
||
"warnings.simplefilter(\"ignore\")\n",
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"import statsmodels\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"import seaborn as sns\n",
|
||
"from scipy.stats import shapiro\n",
|
||
"from numpy.random import randn\n",
|
||
"import scipy.stats as st\n",
|
||
"%matplotlib inline\n",
|
||
"\n",
|
||
"#col_purchase=[\"nb_tickets\",\"nb_purchases\",\"total_amount\",\"nb_suppliers\",\"time_between_purchase\",\"nb_tickets_internet\"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 39,
|
||
"id": "943b8088-9ca2-40a4-b658-2cfae1589fac",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"30.0\n",
|
||
"62.0\n",
|
||
"120.0\n",
|
||
"90.0\n",
|
||
"Moustache inferieure -105.0\n",
|
||
"Moustache superieure 255.0\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"#identification des valeur manquantes\n",
|
||
"#calcule des quartile de la variable valeur(taille de la population)\n",
|
||
"Q1=np.percentile(products_purchased_reduced_spectacle[\"total_amount\"], 25) # Q1\n",
|
||
"Q2=np.percentile(products_purchased_reduced_spectacle[\"total_amount\"], 50) # Q2\n",
|
||
"Q3=np.percentile(products_purchased_reduced_spectacle[\"total_amount\"], 75) # Q3\n",
|
||
"print(Q1)\n",
|
||
"print(Q2)\n",
|
||
"print(Q3)\n",
|
||
"\n",
|
||
"#intervale interquartile de la variable Valeur\n",
|
||
"\n",
|
||
"IQ=Q3-Q1\n",
|
||
"print(IQ)\n",
|
||
"\n",
|
||
"#la valeur minimale des moustache de la variable Valeur\n",
|
||
"\n",
|
||
"M_inf=Q1-1.5*IQ\n",
|
||
"M_sup=Q3+1.5*IQ\n",
|
||
"\n",
|
||
"print(\"Moustache inferieure\",M_inf)#moustache inferieur\n",
|
||
"print(\"Moustache superieure\",M_sup)#moustache sup\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "a63e6d13-429b-4b01-ad11-27e5eea68cbd",
|
||
"metadata": {},
|
||
"source": [
|
||
"#histogrames des variable quantitatives\n",
|
||
"col_purchase=[\"nb_tickets\",\"nb_purchases\",\"total_amount\",\"nb_suppliers\",\"time_between_purchase\",\"nb_tickets_internet\"]\n",
|
||
"for col in col_purchase:\n",
|
||
" plt.figure()\n",
|
||
" sns.histplot(products_purchased_reduced_spectacle[col], kde=True, color='red')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 46,
|
||
"id": "5a08b5a5-7d56-4543-945a-38f6219d831d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"#repartition Chiffre d'affaire selon les compagnie de spectacle\n",
|
||
"\n",
|
||
"# Filtrer les données pour inclure uniquement les valeurs positives de total_amount et exclusion des valeur aberrantes\n",
|
||
"filtered_products_purchased_reduced_spectacle = products_purchased_reduced_spectacle[(products_purchased_reduced_spectacle['total_amount'] > 0) & (products_purchased_reduced_spectacle['total_amount'] <= 255)]\n",
|
||
"\n",
|
||
"# Créer le graphique en utilisant les données filtrées\n",
|
||
"sns.boxplot(data=filtered_products_purchased_reduced_spectacle, y=\"total_amount\", x=\"number_compagny\", showfliers=False, showmeans=True)\n",
|
||
"\n",
|
||
"# Titre du graphique\n",
|
||
"plt.title(\"Boite à moustache du chiffre d'affaire selon les compagnies de spectacles\")\n",
|
||
"\n",
|
||
"# Afficher le graphique\n",
|
||
"plt.show()\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "20ce4a40-8f0d-40e8-91d3-b923670326cb",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#reprise du graphe de la repartition Chiffre d'affaire selon les compagnie de spectacle sur la base de train\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 44,
|
||
"id": "76e08ece-0b58-4b3a-abca-53e30ccc907b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Statistique F : 317.1792172580724\n",
|
||
"Valeur de p : 3.665389608154993e-273\n",
|
||
"Nombre de degrés de liberté entre les groupes : 4\n",
|
||
"Nombre de degrés de liberté à l'intérieur des groupes : 670581\n",
|
||
"Il y a des différences significatives entre au moins une des entrepries .\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"#test d'anova pour voir si la difference de chiffre d'affaire est statistiquement significative\n",
|
||
"\n",
|
||
"from scipy.stats import f_oneway\n",
|
||
"\n",
|
||
"# Créez une liste pour stocker les données de chaque groupe\n",
|
||
"groupes = []\n",
|
||
"\n",
|
||
"# Parcourez chaque modalité de la variable catégorielle et divisez les données en groupes\n",
|
||
"for modalite in filtered_products_purchased_reduced_spectacle['number_compagny'].unique():\n",
|
||
" groupe = filtered_products_purchased_reduced_spectacle[filtered_products_purchased_reduced_spectacle['number_compagny'] == modalite]['total_amount']\n",
|
||
" groupes.append(groupe)\n",
|
||
"\n",
|
||
"# Effectuez le test ANOVA\n",
|
||
"f_statistic, p_value = f_oneway(*groupes)\n",
|
||
"\n",
|
||
"# Nombre total d'observations\n",
|
||
"N = sum(len(groupe) for groupe in groupes)\n",
|
||
"\n",
|
||
"# Nombre de groupes ou de catégories\n",
|
||
"k = len(groupes)\n",
|
||
"\n",
|
||
"# Degrés de liberté entre les groupes\n",
|
||
"df_between = k - 1\n",
|
||
"\n",
|
||
"# Degrés de liberté à l'intérieur des groupes\n",
|
||
"df_within = N - k\n",
|
||
"\n",
|
||
"# Affichez les résultats\n",
|
||
"print(\"Statistique F :\", f_statistic)\n",
|
||
"print(\"Valeur de p :\", p_value)\n",
|
||
"\n",
|
||
"print(\"Nombre de degrés de liberté entre les groupes :\", df_between)\n",
|
||
"print(\"Nombre de degrés de liberté à l'intérieur des groupes :\", df_within)\n",
|
||
"\n",
|
||
"if p_value < 0.05:\n",
|
||
" print(\"Il y a des différences significatives entre au moins une des entrepries .\")\n",
|
||
"else:\n",
|
||
" print(\"Il n'y a pas de différences significatives entre les entreprises .\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 45,
|
||
"id": "aacf2c34-f7ea-4d6e-935b-c5db01f03bbe",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>number_compagny</th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>Taux_ticket_internet</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>492314</td>\n",
|
||
" <td>126262.0</td>\n",
|
||
" <td>25.646640</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>318969</td>\n",
|
||
" <td>16348.0</td>\n",
|
||
" <td>5.125263</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>591028</td>\n",
|
||
" <td>42045.0</td>\n",
|
||
" <td>7.113876</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>7024227</td>\n",
|
||
" <td>1247482.0</td>\n",
|
||
" <td>17.759705</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>335741</td>\n",
|
||
" <td>125638.0</td>\n",
|
||
" <td>37.421107</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" number_compagny nb_tickets nb_tickets_internet Taux_ticket_internet\n",
|
||
"0 10 492314 126262.0 25.646640\n",
|
||
"1 11 318969 16348.0 5.125263\n",
|
||
"2 12 591028 42045.0 7.113876\n",
|
||
"3 13 7024227 1247482.0 17.759705\n",
|
||
"4 14 335741 125638.0 37.421107"
|
||
]
|
||
},
|
||
"execution_count": 45,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"#Taux de ticket payé par internet selon les compagnies\n",
|
||
"\n",
|
||
"purchase_spectacle = products_purchased_reduced_spectacle.groupby(\"number_compagny\")[[\"nb_tickets\", \"nb_tickets_internet\"]].sum().reset_index()\n",
|
||
"purchase_spectacle[\"Taux_ticket_internet\"] = purchase_spectacle[\"nb_tickets_internet\"]*100 / purchase_spectacle[\"nb_tickets\"]\n",
|
||
"purchase_spectacle"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 57,
|
||
"id": "f71bb53d-724b-454d-8743-305d20eec2b0",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Création du barplot\n",
|
||
"plt.bar(purchase_spectacle[\"number_compagny\"], purchase_spectacle[\"Taux_ticket_internet\"])\n",
|
||
"\n",
|
||
"# Ajout de titres et d'étiquettes\n",
|
||
"plt.xlabel('Company')\n",
|
||
"plt.ylabel(\"Taux d'achat de tickets en ligne (%)\")\n",
|
||
"plt.title(\"Taux d'achat des tickets en ligne selon les compagnies de spectacle\")\n",
|
||
"\n",
|
||
"# Affichage du barplot\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 66,
|
||
"id": "59a95248-0261-4970-9e91-e43d50cf4d69",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Text(0.5, 1.0, 'Boite à moustache du temps ecoulés entre le premier et le dernier achat selon les compagnies de spectacles')"
|
||
]
|
||
},
|
||
"execution_count": 66,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"#repartition Chiffre d'affaire selon le numero de la compagnie\n",
|
||
"\n",
|
||
"sns.boxplot(data=products_purchased_reduced_spectacle, y=\"time_between_purchase\",x=\"number_compagny\",showfliers=False,showmeans=True)\n",
|
||
"plt.title(\"Boite à moustache du temps ecoulés entre le premier et le dernier achat selon les compagnies de spectacles\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"id": "e2c51e28-6197-48f0-ab6d-9fc7b3b0de74",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Statistique F : 7956.05932109542\n",
|
||
"Valeur de p : 0.0\n",
|
||
"Nombre de degrés de liberté entre les groupes : 4\n",
|
||
"Nombre de degrés de liberté à l'intérieur des groupes : 764875\n",
|
||
"Il y a des différences significatives entre au moins une des entrepries .\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"#test d'anova pour voir si la difference de temps entre le premier et le dernier achat est statistiquement significative\n",
|
||
"\n",
|
||
"from scipy.stats import f_oneway\n",
|
||
"\n",
|
||
"# Créez une liste pour stocker les données de chaque groupe\n",
|
||
"groupes = []\n",
|
||
"\n",
|
||
"# Parcourez chaque modalité de la variable catégorielle et divisez les données en groupes\n",
|
||
"for modalite in products_purchased_reduced_spectacle['number_compagny'].unique():\n",
|
||
" groupe = products_purchased_reduced_spectacle[products_purchased_reduced_spectacle['number_compagny'] == modalite]['time_between_purchase']\n",
|
||
" groupes.append(groupe)\n",
|
||
"\n",
|
||
"# Effectuez le test ANOVA\n",
|
||
"f_statistic, p_value = f_oneway(*groupes)\n",
|
||
"\n",
|
||
"# Nombre total d'observations\n",
|
||
"N = sum(len(groupe) for groupe in groupes)\n",
|
||
"\n",
|
||
"# Nombre de groupes ou de catégories\n",
|
||
"k = len(groupes)\n",
|
||
"\n",
|
||
"# Degrés de liberté entre les groupes\n",
|
||
"df_between = k - 1\n",
|
||
"\n",
|
||
"# Degrés de liberté à l'intérieur des groupes\n",
|
||
"df_within = N - k\n",
|
||
"\n",
|
||
"# Affichez les résultats\n",
|
||
"print(\"Statistique F :\", f_statistic)\n",
|
||
"print(\"Valeur de p :\", p_value)\n",
|
||
"\n",
|
||
"print(\"Nombre de degrés de liberté entre les groupes :\", df_between)\n",
|
||
"print(\"Nombre de degrés de liberté à l'intérieur des groupes :\", df_within)\n",
|
||
"\n",
|
||
"if p_value < 0.05:\n",
|
||
" print(\"Il y a des différences significatives entre au moins une des entrepries .\")\n",
|
||
"else:\n",
|
||
" print(\"Il n'y a pas de différences significatives entre les entreprises .\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"id": "74f06e96-3c25-4eca-8190-25b0a4ab0d75",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"customer_id int64\n",
|
||
"nb_tickets int64\n",
|
||
"nb_purchases int64\n",
|
||
"total_amount float64\n",
|
||
"nb_suppliers int64\n",
|
||
"vente_internet_max int64\n",
|
||
"purchase_date_min float64\n",
|
||
"purchase_date_max float64\n",
|
||
"time_between_purchase float64\n",
|
||
"nb_tickets_internet float64\n",
|
||
"number_compagny int64\n",
|
||
"dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 33,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"products_purchased_reduced_spectacle.dtypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 38,
|
||
"id": "20a70ec0-38f6-470e-a442-7884a150613a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 800x600 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"#Repartition du nombre de canaux de vente selon les entreprise\n",
|
||
"plt.figure(figsize=(8, 6))\n",
|
||
"sns.barplot(x='number_compagny', y='nb_suppliers', data=products_purchased_reduced_spectacle, ci=None) # ci=None pour ne pas afficher les intervalles de confiance\n",
|
||
"plt.title('Nombre moyen de canaux de vente par entreprise')\n",
|
||
"plt.xlabel('number_compagny')\n",
|
||
"plt.ylabel('Nombre moyen de caneaux ')\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "b9e84af4-a02b-4f83-81ae-b7a73475d060",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 4. target_information"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "2867eceb-1f72-406c-adc2-adfedcaf60e6",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Nombre de lignes de la table : 6240166\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"id 0\n",
|
||
"customer_id 0\n",
|
||
"target_name 0\n",
|
||
"target_type_is_import 0\n",
|
||
"target_type_name 0\n",
|
||
"number_compagny 0\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# nombre de nan\n",
|
||
"print(\"Nombre de lignes de la table : \",target_information_spectacle.shape[0])\n",
|
||
"target_information_spectacle.isna().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 47,
|
||
"id": "561f361d-7d39-430a-9e27-a32f6c2f7b50",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# pas exploitable"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "904cbf32-77b6-49dd-a96c-9e7e5a0175c3",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|