2024-03-02 11:37:44 +01:00
{
"cells": [
2024-03-03 09:32:45 +01:00
{
"cell_type": "markdown",
"id": "be628bfc-0bca-48b0-97c9-29063289127e",
"metadata": {},
"source": [
"# Statistiques descriptives : compagnies offrant des spectacles"
]
},
{
"cell_type": "markdown",
"id": "0bf5450b-f44d-430a-aed7-d875dc365048",
"metadata": {},
"source": [
"## Importations et chargement des données"
]
},
2024-03-02 11:37:44 +01:00
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 85,
2024-03-02 11:37:44 +01:00
"id": "aa915888-cede-4eb0-8a26-7df573d29a3e",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import os\n",
"import s3fs\n",
"import warnings\n",
"from datetime import date, timedelta, datetime\n",
2024-03-03 09:32:45 +01:00
"import numpy as np\n",
2024-03-08 10:30:12 +01:00
"import matplotlib.pyplot as plt\n",
2024-03-11 18:43:56 +01:00
"import re\n",
"import io"
2024-03-02 11:37:44 +01:00
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 4,
2024-03-02 11:37:44 +01:00
"id": "17949e81-c30b-4fdf-9872-d7dc2b22ba9e",
"metadata": {},
"outputs": [],
"source": [
"# Import KPI construction functions\n",
"#exec(open('0_KPI_functions.py').read())\n",
"exec(open('../0_KPI_functions.py').read())\n"
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 8,
2024-03-02 11:37:44 +01:00
"id": "9c1737a2-bad8-4266-8dec-452085d8cfe7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']"
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 8,
2024-03-02 11:37:44 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"BUCKET = \"projet-bdc2324-team1/0_Input/Company_10\"\n",
"fs.ls(BUCKET)"
]
},
{
"cell_type": "code",
2024-03-10 17:41:43 +01:00
"execution_count": 4,
2024-03-02 11:37:44 +01:00
"id": "a35dc2f6-2017-4b21-abd2-2c4c112c96b2",
"metadata": {},
"outputs": [],
"source": [
2024-03-08 08:44:28 +01:00
"# test avec company 10\n",
"\n",
2024-03-02 11:37:44 +01:00
"dic_base=['campaigns_information','customerplus_cleaned','products_purchased_reduced','target_information']\n",
"for nom_base in dic_base:\n",
" FILE_PATH_S3_fanta = 'projet-bdc2324-team1/0_Input/Company_10/' + nom_base + '.csv'\n",
" with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n",
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
2024-03-10 17:41:43 +01:00
"execution_count": 5,
2024-03-02 11:37:44 +01:00
"id": "40b705eb-fd18-436b-b150-61611a3c6a84",
"metadata": {},
"outputs": [],
"source": [
2024-03-03 09:32:45 +01:00
"# fonction permettant d'extraire une table à partir du numéro de la compagnie (directory_path)\n",
2024-03-02 11:37:44 +01:00
"\n",
"def display_databases(directory_path, file_name, datetime_col = None):\n",
" \"\"\"\n",
" This function returns the file from s3 storage \n",
" \"\"\"\n",
" file_path = \"projet-bdc2324-team1\" + \"/0_Input/Company_\" + directory_path + \"/\" + file_name + \".csv\"\n",
" print(\"File path : \", file_path)\n",
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser) \n",
" return df \n"
]
},
{
"cell_type": "code",
2024-03-10 17:41:43 +01:00
"execution_count": 6,
2024-03-05 00:36:48 +01:00
"id": "c56decc3-de19-4786-82a4-1386c72a6bfb",
"metadata": {},
"outputs": [
{
2024-03-10 17:41:43 +01:00
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>customer_id</th>\n",
" <th>target_name</th>\n",
" <th>target_type_is_import</th>\n",
" <th>target_type_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1165098</td>\n",
" <td>618562</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1165100</td>\n",
" <td>618559</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1165101</td>\n",
" <td>618561</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1165102</td>\n",
" <td>618560</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1165103</td>\n",
" <td>618558</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69253</th>\n",
" <td>1698158</td>\n",
" <td>18580</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69254</th>\n",
" <td>1698159</td>\n",
" <td>18569</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69255</th>\n",
" <td>1698160</td>\n",
" <td>2962</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69256</th>\n",
" <td>1698161</td>\n",
" <td>3825</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69257</th>\n",
" <td>1698162</td>\n",
" <td>5731</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>69258 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" id customer_id target_name target_type_is_import \\\n",
"0 1165098 618562 Newsletter mensuelle False \n",
"1 1165100 618559 Newsletter mensuelle False \n",
"2 1165101 618561 Newsletter mensuelle False \n",
"3 1165102 618560 Newsletter mensuelle False \n",
"4 1165103 618558 Newsletter mensuelle False \n",
"... ... ... ... ... \n",
"69253 1698158 18580 Newsletter mensuelle False \n",
"69254 1698159 18569 Newsletter mensuelle False \n",
"69255 1698160 2962 Newsletter mensuelle False \n",
"69256 1698161 3825 Newsletter mensuelle False \n",
"69257 1698162 5731 Newsletter mensuelle False \n",
"\n",
" target_type_name \n",
"0 manual_static_filter \n",
"1 manual_static_filter \n",
"2 manual_static_filter \n",
"3 manual_static_filter \n",
"4 manual_static_filter \n",
"... ... \n",
"69253 manual_static_filter \n",
"69254 manual_static_filter \n",
"69255 manual_static_filter \n",
"69256 manual_static_filter \n",
"69257 manual_static_filter \n",
"\n",
"[69258 rows x 5 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
2024-03-05 00:36:48 +01:00
}
],
"source": [
"target_information"
]
},
{
"cell_type": "code",
2024-03-10 17:41:43 +01:00
"execution_count": 7,
2024-03-05 00:36:48 +01:00
"id": "c825d64b-356c-4b71-aa3c-90e0dd7ca092",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ticket_id</th>\n",
" <th>customer_id</th>\n",
" <th>purchase_id</th>\n",
" <th>event_type_id</th>\n",
" <th>supplier_name</th>\n",
" <th>purchase_date</th>\n",
" <th>amount</th>\n",
" <th>is_full_price</th>\n",
" <th>name_event_types</th>\n",
" <th>name_facilities</th>\n",
" <th>name_categories</th>\n",
" <th>name_events</th>\n",
" <th>name_seasons</th>\n",
" <th>start_date_time</th>\n",
" <th>end_date_time</th>\n",
" <th>open</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1799177</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>2</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>danse</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>aringa rossa</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2016-09-27 00:00:00+02:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1799178</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>3</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>cirque</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>5èmes hurlants</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2016-11-18 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1799179</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>dom juan</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2016-12-07 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1799180</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>vanishing point</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2017-01-04 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1799181</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>3</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>12.0</td>\n",
" <td>False</td>\n",
" <td>cirque</td>\n",
" <td>la cite des congres</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>a o lang pho</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2017-01-03 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492309</th>\n",
" <td>3252232</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492310</th>\n",
" <td>3252233</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492311</th>\n",
" <td>3252234</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492312</th>\n",
" <td>3252235</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492313</th>\n",
" <td>3252236</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>492314 rows × 16 columns</p>\n",
"</div>"
],
"text/plain": [
" ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
"0 1799177 36984 409613 2 guichet \n",
"1 1799178 36984 409613 3 guichet \n",
"2 1799179 36984 409613 1 guichet \n",
"3 1799180 36984 409613 1 guichet \n",
"4 1799181 36984 409613 3 guichet \n",
"... ... ... ... ... ... \n",
"492309 3252232 621716 710062 1 guichet \n",
"492310 3252233 621716 710062 1 guichet \n",
"492311 3252234 621716 710062 1 guichet \n",
"492312 3252235 621716 710062 1 guichet \n",
"492313 3252236 621716 710062 1 guichet \n",
"\n",
" purchase_date amount is_full_price name_event_types \\\n",
"0 2016-04-28 17:58:26+02:00 9.0 False danse \n",
"1 2016-04-28 17:58:26+02:00 9.0 False cirque \n",
"2 2016-04-28 17:58:26+02:00 9.0 False théâtre \n",
"3 2016-04-28 17:58:26+02:00 9.0 False théâtre \n",
"4 2016-04-28 17:58:26+02:00 12.0 False cirque \n",
"... ... ... ... ... \n",
"492309 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492310 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492311 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492312 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492313 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"\n",
" name_facilities name_categories \\\n",
"0 le grand t abo t gourmand jeune \n",
"1 le grand t abo t gourmand jeune \n",
"2 le grand t abo t gourmand jeune \n",
"3 le grand t abo t gourmand jeune \n",
"4 la cite des congres abo t gourmand jeune \n",
"... ... ... \n",
"492309 cap nort tarif sco co 1 seance scolaire \n",
"492310 cap nort tarif sco co 1 seance scolaire \n",
"492311 cap nort tarif sco co 1 seance scolaire \n",
"492312 cap nort tarif sco co 1 seance scolaire \n",
"492313 cap nort tarif sco co 1 seance scolaire \n",
"\n",
" name_events name_seasons start_date_time \\\n",
"0 aringa rossa test 2016/2017 2016-09-27 00:00:00+02:00 \n",
"1 5èmes hurlants test 2016/2017 2016-11-18 00:00:00+01:00 \n",
"2 dom juan test 2016/2017 2016-12-07 00:00:00+01:00 \n",
"3 vanishing point test 2016/2017 2017-01-04 00:00:00+01:00 \n",
"4 a o lang pho test 2016/2017 2017-01-03 00:00:00+01:00 \n",
"... ... ... ... \n",
"492309 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492310 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492311 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492312 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492313 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"\n",
" end_date_time open \n",
"0 1901-01-01 00:09:21+00:09 True \n",
"1 1901-01-01 00:09:21+00:09 True \n",
"2 1901-01-01 00:09:21+00:09 True \n",
"3 1901-01-01 00:09:21+00:09 True \n",
"4 1901-01-01 00:09:21+00:09 True \n",
"... ... ... \n",
"492309 1901-01-01 00:09:21+00:09 True \n",
"492310 1901-01-01 00:09:21+00:09 True \n",
"492311 1901-01-01 00:09:21+00:09 True \n",
"492312 1901-01-01 00:09:21+00:09 True \n",
"492313 1901-01-01 00:09:21+00:09 True \n",
"\n",
"[492314 rows x 16 columns]"
]
},
2024-03-10 17:41:43 +01:00
"execution_count": 7,
2024-03-05 00:36:48 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_purchased_reduced"
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 63,
2024-03-02 13:05:51 +01:00
"id": "afd044b8-ac83-4a35-b959-700cae0b3b41",
2024-03-02 12:16:24 +01:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 12:16:24 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 12:16:24 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:32:54 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:28: SettingWithCopyWarning: \n",
2024-03-02 13:05:51 +01:00
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-08 10:30:12 +01:00
"Tables imported for tenant 10\n",
2024-03-05 14:34:43 +01:00
"File path : projet-bdc2324-team1/0_Input/Company_11/customerplus_cleaned.csv\n"
2024-03-02 13:05:51 +01:00
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-08 10:30:12 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_11/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:05:51 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_11/products_purchased_reduced.csv\n"
]
},
2024-03-02 12:16:24 +01:00
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:32:54 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_11/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:28: SettingWithCopyWarning: \n",
2024-03-02 13:05:51 +01:00
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-08 10:30:12 +01:00
"Tables imported for tenant 11\n",
2024-03-02 13:05:51 +01:00
"File path : projet-bdc2324-team1/0_Input/Company_12/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:05:51 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:05:51 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: DtypeWarning: Columns (4,8,10) have mixed types. Specify dtype option on import or set low_memory=False.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:32:54 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:28: SettingWithCopyWarning: \n",
2024-03-02 13:05:51 +01:00
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-08 10:30:12 +01:00
"Tables imported for tenant 12\n",
2024-03-02 13:05:51 +01:00
"File path : projet-bdc2324-team1/0_Input/Company_13/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:05:51 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:05:51 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:32:54 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:28: SettingWithCopyWarning: \n",
2024-03-02 13:05:51 +01:00
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-08 10:30:12 +01:00
"Tables imported for tenant 13\n",
2024-03-02 13:05:51 +01:00
"File path : projet-bdc2324-team1/0_Input/Company_14/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:05:51 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:05:51 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: DtypeWarning: Columns (8,9) have mixed types. Specify dtype option on import or set low_memory=False.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:32:54 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:28: SettingWithCopyWarning: \n",
2024-03-02 12:16:24 +01:00
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
2024-03-08 10:30:12 +01:00
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tables imported for tenant 14\n"
]
2024-03-02 12:16:24 +01:00
}
],
"source": [
2024-03-03 09:32:45 +01:00
"# création des bases contenant les KPI pour les 5 compagnies de spectacle\n",
2024-03-02 12:16:24 +01:00
"\n",
2024-03-03 09:32:45 +01:00
"# liste des compagnies de spectacle\n",
2024-03-02 13:05:51 +01:00
"nb_compagnie=['10','11','12','13','14']\n",
2024-03-03 09:32:45 +01:00
"\n",
"# début de la boucle permettant de générer des datasets agrégés pour les 5 compagnies de spectacle\n",
2024-03-02 13:05:51 +01:00
"for directory_path in nb_compagnie:\n",
" df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
" df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
" df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
2024-03-02 13:32:54 +01:00
" df_target_information = display_databases(directory_path, file_name = \"target_information\")\n",
2024-03-03 09:32:45 +01:00
" \n",
2024-03-02 13:05:51 +01:00
" df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information) \n",
" df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n",
" df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)\n",
2024-03-02 13:32:54 +01:00
"\n",
" \n",
2024-03-03 09:32:45 +01:00
"# creation de la colonne Number compagnie, qui permettra d'agréger les résultats\n",
" df_tickets_kpi[\"number_compagny\"]=int(directory_path)\n",
" df_campaigns_kpi[\"number_compagny\"]=int(directory_path)\n",
" df_customerplus_clean[\"number_compagny\"]=int(directory_path)\n",
" df_target_information[\"number_compagny\"]=int(directory_path)\n",
2024-03-02 13:05:51 +01:00
"\n",
" if nb_compagnie.index(directory_path)>=1:\n",
" customerplus_clean_spectacle=pd.concat([customerplus_clean_spectacle,df_customerplus_clean],axis=0)\n",
" campaigns_information_spectacle=pd.concat([campaigns_information_spectacle,df_campaigns_kpi],axis=0)\n",
" products_purchased_reduced_spectacle=pd.concat([products_purchased_reduced_spectacle,df_tickets_kpi],axis=0)\n",
2024-03-02 13:32:54 +01:00
" target_information_spectacle=pd.concat([target_information_spectacle,df_target_information],axis=0)\n",
2024-03-02 13:05:51 +01:00
" else:\n",
" customerplus_clean_spectacle=df_customerplus_clean\n",
" campaigns_information_spectacle=df_campaigns_kpi\n",
2024-03-02 13:32:54 +01:00
" products_purchased_reduced_spectacle=df_tickets_kpi\n",
2024-03-08 08:44:28 +01:00
" target_information_spectacle=df_target_information\n",
"\n",
" print(f\"Tables imported for tenant {directory_path}\")"
2024-03-02 13:05:51 +01:00
]
},
2024-03-03 09:32:45 +01:00
{
"cell_type": "code",
"execution_count": 37,
"id": "b5a4a031-9533-4a50-8569-5f4246691a7a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>2</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18031</th>\n",
" <td>2</td>\n",
" <td>319517</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>1556</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>2</td>\n",
" <td>2020-01-01 14:06:52+00:00</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>291642</th>\n",
" <td>2</td>\n",
" <td>757541</td>\n",
" <td>303.0</td>\n",
" <td>5.0</td>\n",
" <td>1</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>2016-09-08 14:50:00+00:00</td>\n",
" <td>fr</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3 rows × 29 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"17 2 139 NaN NaN 0 \n",
"18031 2 319517 NaN NaN 0 \n",
"291642 2 757541 303.0 5.0 1 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"17 875 False NaN 2 False ... \n",
"18031 1556 False NaN 0 True ... \n",
"291642 862 False NaN 1 True ... \n",
"\n",
" purchase_count first_buying_date country gender_label \\\n",
"17 3 NaN NaN other \n",
"18031 2 2020-01-01 14:06:52+00:00 fr female \n",
"291642 3 2016-09-08 14:50:00+00:00 fr male \n",
"\n",
" gender_female gender_male gender_other country_fr has_tags \\\n",
"17 0 0 1 NaN 0 \n",
"18031 1 0 0 1.0 0 \n",
"291642 0 1 0 1.0 1 \n",
"\n",
" number_compagny \n",
"17 10 \n",
"18031 11 \n",
"291642 14 \n",
"\n",
"[3 rows x 29 columns]"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"customer_id\"]==2]"
]
},
{
"cell_type": "code",
2024-03-05 00:36:48 +01:00
"execution_count": 1,
2024-03-03 09:32:45 +01:00
"id": "b9b6ec1f-36fb-4ee9-a1ed-09ff41878005",
"metadata": {},
"outputs": [
{
2024-03-05 00:36:48 +01:00
"ename": "NameError",
"evalue": "name 'customerplus_clean_spectacle' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcustomerplus_clean_spectacle\u001b[49m[customerplus_clean_spectacle[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcustomer_id\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m==\u001b[39m\u001b[38;5;241m1\u001b[39m]\n",
"\u001b[0;31mNameError\u001b[0m: name 'customerplus_clean_spectacle' is not defined"
]
2024-03-03 09:32:45 +01:00
}
],
"source": [
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"customer_id\"]==1]"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "a12c1b7d-6f6f-483e-b215-6336d7a51057",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['customer_id', 'street_id', 'structure_id', 'mcp_contact_id',\n",
" 'fidelity', 'tenant_id', 'is_partner', 'deleted_at', 'gender',\n",
" 'is_email_true', 'opt_in', 'last_buying_date', 'max_price',\n",
" 'ticket_sum', 'average_price', 'average_purchase_delay',\n",
" 'average_price_basket', 'average_ticket_basket', 'total_price',\n",
" 'purchase_count', 'first_buying_date', 'country', 'gender_label',\n",
" 'gender_female', 'gender_male', 'gender_other', 'country_fr',\n",
" 'has_tags', 'number_compagny'],\n",
" dtype='object')"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle.columns"
]
},
2024-03-02 13:05:51 +01:00
{
"cell_type": "code",
2024-03-02 14:05:48 +01:00
"execution_count": 38,
2024-03-02 13:32:54 +01:00
"id": "05b9a396-dcd7-4d3d-8b39-5ca48beba4b0",
2024-03-02 12:16:24 +01:00
"metadata": {},
2024-03-02 14:05:48 +01:00
"outputs": [],
2024-03-02 12:16:24 +01:00
"source": [
2024-03-02 14:05:48 +01:00
"#customerplus_clean_spectacle.isna().sum()\n",
"#campaigns_information_spectacle.isna().sum()\n",
"#products_purchased_reduced_spectacle.isna().sum()\n",
"#target_information_spectacle.isna().sum()"
2024-03-02 12:16:24 +01:00
]
2024-03-03 09:32:45 +01:00
},
{
"cell_type": "markdown",
2024-03-08 10:30:12 +01:00
"id": "81e15508-32ca-46f1-a03d-1febddbbf5b4",
2024-03-03 09:32:45 +01:00
"metadata": {},
"source": [
2024-03-08 10:30:12 +01:00
"### Ajout : importation de la table train_set pour faire les stats desc dessus"
2024-03-03 09:32:45 +01:00
]
},
{
2024-03-08 10:30:12 +01:00
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 119,
2024-03-08 10:30:12 +01:00
"id": "3a1fdd6b-ac43-4e90-9a31-4f522bcc44bb",
2024-03-03 09:32:45 +01:00
"metadata": {},
2024-03-08 10:30:12 +01:00
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3450421856.py:9: DtypeWarning: Columns (38) have mixed types. Specify dtype option on import or set low_memory=False.\n",
2024-03-08 10:30:12 +01:00
" train_set_spectacle = pd.read_csv(file_in, sep=\",\")\n"
]
}
],
2024-03-03 09:32:45 +01:00
"source": [
2024-03-08 10:30:12 +01:00
"# importation de la table train_set pour les compagnies de spectacle (ou musique)\n",
"\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"path_train_set_spectacle = \"projet-bdc2324-team1/Generalization/musique/Train_set.csv\"\n",
"\n",
"with fs.open(path_train_set_spectacle, mode=\"rb\") as file_in:\n",
" train_set_spectacle = pd.read_csv(file_in, sep=\",\")"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 120,
2024-03-08 10:30:12 +01:00
"id": "3a4c1ff4-2861-4e86-99df-26eea0370dc3",
2024-03-03 09:32:45 +01:00
"metadata": {},
2024-03-08 10:30:12 +01:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
2024-03-11 18:43:56 +01:00
" <td>10_492779</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
2024-03-11 18:43:56 +01:00
" <td>female</td>\n",
2024-03-08 10:30:12 +01:00
" <td>1</td>\n",
" <td>0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0</td>\n",
2024-03-08 10:30:12 +01:00
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>13.0</td>\n",
" <td>4.0</td>\n",
" <td>8 days 04:08:27</td>\n",
2024-03-10 17:41:43 +01:00
" <td>0.0</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-11 18:43:56 +01:00
" <td>10_563424</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
2024-03-08 10:30:12 +01:00
" <td>...</td>\n",
" <td>fr</td>\n",
2024-03-11 18:43:56 +01:00
" <td>other</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>1</td>\n",
2024-03-08 10:30:12 +01:00
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>0 days 01:39:58.555555555</td>\n",
" <td>0.0</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-11 18:43:56 +01:00
" <td>10_44369</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>...</td>\n",
2024-03-11 18:43:56 +01:00
" <td>fr</td>\n",
" <td>male</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0</td>\n",
" <td>1</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>14.0</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>NaN</td>\n",
2024-03-10 17:41:43 +01:00
" <td>0.0</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-11 18:43:56 +01:00
" <td>10_620271</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>...</td>\n",
2024-03-11 18:43:56 +01:00
" <td>NaN</td>\n",
" <td>other</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
2024-03-10 17:41:43 +01:00
" <td>0.0</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2024-03-11 18:43:56 +01:00
" <td>10_687644</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
2024-03-11 18:43:56 +01:00
" <td>4.0</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>NaN</td>\n",
2024-03-10 17:41:43 +01:00
" <td>0.0</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 40 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
2024-03-11 18:43:56 +01:00
"0 10_492779 0.0 0.0 0.0 0.0 \n",
"1 10_563424 0.0 0.0 0.0 0.0 \n",
"2 10_44369 0.0 0.0 0.0 0.0 \n",
"3 10_620271 0.0 0.0 0.0 0.0 \n",
"4 10_687644 0.0 0.0 0.0 0.0 \n",
2024-03-08 10:30:12 +01:00
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
2024-03-11 18:43:56 +01:00
"0 0.0 550.0 550.0 \n",
"1 0.0 550.0 550.0 \n",
"2 0.0 550.0 550.0 \n",
"3 0.0 550.0 550.0 \n",
"4 0.0 550.0 550.0 \n",
2024-03-08 10:30:12 +01:00
"\n",
" time_between_purchase nb_tickets_internet ... country gender_label \\\n",
2024-03-11 18:43:56 +01:00
"0 -1.0 0.0 ... fr female \n",
"1 -1.0 0.0 ... fr other \n",
"2 -1.0 0.0 ... fr male \n",
"3 -1.0 0.0 ... NaN other \n",
"4 -1.0 0.0 ... NaN other \n",
2024-03-08 10:30:12 +01:00
"\n",
" gender_female gender_male gender_other country_fr nb_campaigns \\\n",
2024-03-11 18:43:56 +01:00
"0 1 0 0 1.0 13.0 \n",
"1 0 0 1 1.0 10.0 \n",
"2 0 1 0 1.0 14.0 \n",
"3 0 0 1 NaN 9.0 \n",
"4 0 0 1 NaN 4.0 \n",
2024-03-08 10:30:12 +01:00
"\n",
" nb_campaigns_opened time_to_open y_has_purchased \n",
2024-03-11 18:43:56 +01:00
"0 4.0 8 days 04:08:27 0.0 \n",
"1 9.0 0 days 01:39:58.555555555 0.0 \n",
2024-03-10 17:41:43 +01:00
"2 0.0 NaN 0.0 \n",
2024-03-11 18:43:56 +01:00
"3 0.0 NaN 0.0 \n",
2024-03-10 17:41:43 +01:00
"4 0.0 NaN 0.0 \n",
2024-03-08 10:30:12 +01:00
"\n",
"[5 rows x 40 columns]"
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 120,
2024-03-08 10:30:12 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
2024-03-03 09:32:45 +01:00
"source": [
2024-03-08 10:30:12 +01:00
"train_set_spectacle.head()"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 121,
2024-03-08 10:30:12 +01:00
"id": "4632384d-2a06-445d-9fdb-b0c91b37ebaf",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2024-03-08 10:30:12 +01:00
"array([0., 1.])"
2024-03-03 09:32:45 +01:00
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 121,
2024-03-03 09:32:45 +01:00
"metadata": {},
2024-03-08 10:30:12 +01:00
"output_type": "execute_result"
2024-03-03 09:32:45 +01:00
}
],
"source": [
2024-03-08 10:30:12 +01:00
"# on remplace les valeurs has purchased = NaN par des 0\n",
"train_set_spectacle[\"y_has_purchased\"] = train_set_spectacle[\"y_has_purchased\"].fillna(0)\n",
"train_set_spectacle[\"y_has_purchased\"].unique()"
2024-03-08 08:44:28 +01:00
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 122,
2024-03-08 10:30:12 +01:00
"id": "5fd56696-b479-46c7-8a59-fb8137db5fb5",
2024-03-08 08:44:28 +01:00
"metadata": {},
2024-03-08 10:30:12 +01:00
"outputs": [
{
"data": {
"text/plain": [
"array([10, 11, 12, 13, 14])"
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 122,
2024-03-08 10:30:12 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
2024-03-08 08:44:28 +01:00
"source": [
2024-03-08 10:30:12 +01:00
"# on reproduit une colonne avec le numéro de la compagnie \n",
2024-03-08 08:44:28 +01:00
"\n",
2024-03-08 10:30:12 +01:00
"train_set_spectacle[\"number_company\"] = train_set_spectacle[\"customer_id\"].apply(lambda x : int(re.split(\"_\", str(x))[0]))\n",
"train_set_spectacle[\"number_company\"].unique()"
2024-03-08 08:44:28 +01:00
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 123,
2024-03-08 10:30:12 +01:00
"id": "91c6e047-43d2-456c-81f1-087026eef4f0",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" <th>number_company</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
2024-03-11 18:43:56 +01:00
" <td>10_492779</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>...</td>\n",
2024-03-11 18:43:56 +01:00
" <td>female</td>\n",
2024-03-08 10:30:12 +01:00
" <td>1</td>\n",
" <td>0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0</td>\n",
2024-03-08 10:30:12 +01:00
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>13.0</td>\n",
" <td>4.0</td>\n",
" <td>8 days 04:08:27</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-11 18:43:56 +01:00
" <td>10_563424</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
2024-03-08 10:30:12 +01:00
" <td>...</td>\n",
2024-03-11 18:43:56 +01:00
" <td>other</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>1</td>\n",
2024-03-08 10:30:12 +01:00
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>0 days 01:39:58.555555555</td>\n",
" <td>0.0</td>\n",
2024-03-08 10:30:12 +01:00
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-11 18:43:56 +01:00
" <td>10_44369</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>...</td>\n",
2024-03-11 18:43:56 +01:00
" <td>male</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0</td>\n",
" <td>1</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>14.0</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-11 18:43:56 +01:00
" <td>10_620271</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>...</td>\n",
2024-03-11 18:43:56 +01:00
" <td>other</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2024-03-11 18:43:56 +01:00
" <td>10_687644</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
2024-03-11 18:43:56 +01:00
" <td>4.0</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 41 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
2024-03-11 18:43:56 +01:00
"0 10_492779 0.0 0.0 0.0 0.0 \n",
"1 10_563424 0.0 0.0 0.0 0.0 \n",
"2 10_44369 0.0 0.0 0.0 0.0 \n",
"3 10_620271 0.0 0.0 0.0 0.0 \n",
"4 10_687644 0.0 0.0 0.0 0.0 \n",
2024-03-08 10:30:12 +01:00
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
2024-03-11 18:43:56 +01:00
"0 0.0 550.0 550.0 \n",
"1 0.0 550.0 550.0 \n",
"2 0.0 550.0 550.0 \n",
"3 0.0 550.0 550.0 \n",
"4 0.0 550.0 550.0 \n",
2024-03-08 10:30:12 +01:00
"\n",
" time_between_purchase nb_tickets_internet ... gender_label \\\n",
2024-03-11 18:43:56 +01:00
"0 -1.0 0.0 ... female \n",
"1 -1.0 0.0 ... other \n",
"2 -1.0 0.0 ... male \n",
"3 -1.0 0.0 ... other \n",
"4 -1.0 0.0 ... other \n",
2024-03-08 10:30:12 +01:00
"\n",
" gender_female gender_male gender_other country_fr nb_campaigns \\\n",
2024-03-11 18:43:56 +01:00
"0 1 0 0 1.0 13.0 \n",
"1 0 0 1 1.0 10.0 \n",
"2 0 1 0 1.0 14.0 \n",
"3 0 0 1 NaN 9.0 \n",
"4 0 0 1 NaN 4.0 \n",
2024-03-08 10:30:12 +01:00
"\n",
" nb_campaigns_opened time_to_open y_has_purchased \\\n",
2024-03-11 18:43:56 +01:00
"0 4.0 8 days 04:08:27 0.0 \n",
"1 9.0 0 days 01:39:58.555555555 0.0 \n",
2024-03-08 10:30:12 +01:00
"2 0.0 NaN 0.0 \n",
2024-03-11 18:43:56 +01:00
"3 0.0 NaN 0.0 \n",
2024-03-08 10:30:12 +01:00
"4 0.0 NaN 0.0 \n",
"\n",
" number_company \n",
"0 10 \n",
"1 10 \n",
"2 10 \n",
"3 10 \n",
"4 10 \n",
"\n",
"[5 rows x 41 columns]"
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 123,
2024-03-08 10:30:12 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_set_spectacle.head()"
]
},
{
"cell_type": "markdown",
"id": "fff306c2-1d41-4ef6-867b-ba9a7cf4ee68",
"metadata": {},
"source": [
"## Statistiques descriptives"
]
},
{
"cell_type": "markdown",
"id": "0549bdc4-edd7-4511-916e-26e94b5a30f5",
"metadata": {},
"source": [
"### 0. Détection du client anonyme (outlier) - utile pour la section 3"
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 132,
2024-03-08 10:30:12 +01:00
"id": "5b460061-f8b5-4a6b-ba59-539446d8487f",
"metadata": {},
"outputs": [],
"source": [
"def outlier_detection(directory_path = \"1\", coupure = 1):\n",
" df_tickets = display_databases(directory_path, file_name = 'products_purchased_reduced' , datetime_col = ['purchase_date'])\n",
" df_tickets_kpi = tickets_kpi_function(df_tickets)\n",
"\n",
" if directory_path == \"101\" :\n",
" df_tickets_1 = display_databases(directory_path, file_name = 'products_purchased_reduced_1' , datetime_col = ['purchase_date'])\n",
" df_tickets_kpi_1 = tickets_kpi_function(df_tickets_1)\n",
"\n",
" df_tickets_kpi = pd.concat([df_tickets_kpi, df_tickets_kpi_1])\n",
" # Part du CA par customer\n",
" total_amount_share = df_tickets_kpi.groupby('customer_id')['total_amount'].sum().reset_index()\n",
" total_amount_share['total_amount_entreprise'] = total_amount_share['total_amount'].sum()\n",
" total_amount_share['share_total_amount'] = total_amount_share['total_amount']/total_amount_share['total_amount_entreprise']\n",
" \n",
" total_amount_share_index = total_amount_share.set_index('customer_id')\n",
" df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False)\n",
" \n",
" top = df_circulaire[:coupure]\n",
" rest = df_circulaire[coupure:]\n",
" \n",
" # Calculez la somme du reste\n",
" rest_sum = rest.sum()\n",
" \n",
" # Créez une nouvelle série avec les cinq plus grandes parts et 'Autre'\n",
" new_series = pd.concat([top, pd.Series([rest_sum], index=['Autre'])])\n",
" \n",
" # Créez le graphique circulaire\n",
" plt.figure(figsize=(3, 3))\n",
" plt.pie(new_series, labels=new_series.index, autopct='%1.1f%%', startangle=140, pctdistance=0.5)\n",
" plt.axis('equal') # Assurez-vous que le graphique est un cercle\n",
" plt.title('Répartition des montants totaux')\n",
" plt.show()\n"
]
},
2024-03-10 12:31:28 +01:00
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 166,
2024-03-10 12:31:28 +01:00
"id": "cccee90c-67d1-4e14-8410-1210a5ef97d9",
"metadata": {},
"outputs": [],
"source": [
"# def d'une fonction permettant de générer un barplot à plusieurs barres selon une modalité \n",
"\n",
"def multiple_barplot(data, x, y, var_labels, bar_width=0.35,\n",
" figsize=(10, 6), xlabel=None, ylabel=None, title=None, dico_labels = None) :\n",
"\n",
" # si on donne aucun nom pour la legende, le graphique reprend les noms des variables x et y \n",
" xlabel = x if xlabel==None else xlabel\n",
" ylabel = y if ylabel==None else ylabel\n",
" \n",
" fig, ax = plt.subplots(figsize=figsize)\n",
" \n",
" categories = data[x].unique()\n",
" bar_width = bar_width\n",
" bar_positions = np.arange(len(categories))\n",
" \n",
" # Grouper les données par label et créer les barres groupées\n",
" for label in data[var_labels].unique():\n",
" label_data = data[data[var_labels] == label]\n",
" values = [label_data[label_data[x] == category][y].values[0] for category in categories]\n",
" \n",
" # label_printed = \"achat durant la période\" if label else \"aucun achat\"\n",
" label_printed = f\"{var_labels}={label}\" if dico_labels==None else dico_labels[label]\n",
" \n",
" ax.bar(bar_positions, values, bar_width, label=label_printed)\n",
" \n",
" # Mise à jour des positions des barres pour le prochain groupe\n",
" bar_positions = [pos + bar_width for pos in bar_positions]\n",
"\n",
" # Ajout des étiquettes, de la légende, etc.\n",
" ax.set_xlabel(xlabel)\n",
" ax.set_ylabel(ylabel)\n",
" ax.set_title(title)\n",
" ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])\n",
" ax.set_xticklabels(categories)\n",
" ax.legend()\n",
" \n",
" # Affichage du plot - la proportion de français est la même selon qu'il y ait achat sur la période ou non\n",
" # sauf compagnie 12, et peut-être 13\n",
" plt.show()"
]
},
2024-03-08 10:30:12 +01:00
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 133,
2024-03-08 10:30:12 +01:00
"id": "b6417f09-a6c7-4319-95b3-98c95ec5a3b7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-08 10:30:12 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAUwAAAEQCAYAAADbIk3TAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA5TUlEQVR4nO3dd3xT9f7H8VeStkkX3dDSySxbliggU9kgyLiKskVUcKIX0SvrOlB/XkXFLVdEkesGUUQ2IgKCWPYqs0DL6KB7Jd/fH4FAaEtTaHvS5vN8PPrQJCcn76Snb8745hydUkohhBCiVHqtAwghRFUhhSmEEA6SwhRCCAdJYQohhIOkMIUQwkFSmEII4SApTCGEcJAUphBCOEgKUwghHFRtCnPnzp14eXnxzjvvaB1FCFFNOVVhzp8/H51OZ/txc3MjLCyMe+65h0OHDpX4vIyMDIYOHcqjjz7Ko48+WomJi1q2bBkzZ84s9rGYmBjGjBlju3369GlmzpxJXFxckWlnzpyJTqermJDXSafTlfjeXMXevXuZOXMmx44dq5TXe/nll1m8eHG5za888l9rGa/2lBP59NNPFaA+/fRTtWnTJrV27Vr14osvKk9PT1WzZk2VkpJS7POGDRum7rvvPmWxWCo5cVGTJk1SJX2s27dvV/Hx8bbbW7dutb3fqyUkJKhNmzZVVMzrAqgZM2ZoHUNT33zzjQLU2rVrK+X1vL291ejRo8ttfuWR/1rLeHXnpmFXl6hZs2a0bdsWgK5du2I2m5kxYwaLFy9m7NixRab/+uuvKztiEdnZ2Xh5eV1zmlatWjk8v4iICCIiIm40lhCiPGnd2Fe6tIa5detWu/t//vlnBajZs2fb3b9161Y1YMAAFRAQoIxGo2rZsqX66quvip3nihUr1JgxY1RAQIDy8vJS/fv3V4cPH7abdsWKFerOO+9U4eHhymg0qnr16qkJEyaoc+fO2U03Y8YMBai//vpLDRkyRPn7+6vQ0FA1evRoBRT5OXr0qFJKqejoaNvawtq1a4ud9tIa3KXXuJLZbFavvvqqio2NVR4eHiokJESNHDlSJSQk2E3XpUsX1bRpU/Xnn3+q2267TXl6eqo6deqo2bNnK7PZXOrv4cKFC2r8+PEqMDBQeXt7q169eqkDBw4Uu4Z58OBBNXz4cBUSEqI8PDxUo0aN1Ny5c4vkfuGFF1TDhg2VyWRSfn5+qnnz5mrOnDnXzHHpM1q4cKGaMmWKCg0NVd7e3qp///4qKSlJpaenqwceeEAFBQWpoKAgNWbMGJWRkWE3j5ycHDV16lQVExOj3N3dVe3atdXEiRNVamqq3XTR0dGqX79+6pdfflGtWrVSJpNJxcbGqnnz5tmmubQsXf1zaQuhrMvP7t271T333KNq1KihatasqcaOHavS0tJs0xX3Wl26dFFKKZWVlaWeeuopFRMTo4xGowoICFBt2rRRX375ZYmfZ2n5lVJq3rx5qkWLFrZ5Dho0SO3du9f2eGnL+Ny5c1WnTp1USEiI8vLyUs2aNVOvvvqqys/PL/J5F7fm3KVLF9t7VEqpBx98UBmNRrVt2zbbfWazWXXv3l3VrFlTnT59usT3WxGqRGHOnTtXAeq7776z3bdmzRrl4eGhOnXqpL766iu1fPlyNWbMmCILwKV5RkZGqnHjxqlffvlFffTRR6pmzZoqMjLS7g/n/fffV7Nnz1Y//vijWr9+vfrss8/UTTfdpGJjY+1+4ZcW+OjoaPXMM8+olStXqsWLF6v4+Hg1dOhQBahNmzbZfnJzc5VS9gvJhQsXbNmef/5527SXyq+4wpwwYYIC1COPPKKWL1+uPvjgAxUSEqIiIyPt/ii7dOmigoKCVIMGDdQHH3ygVq5cqSZOnKgA9dlnn13zd2CxWFS3bt2U0WhUL730klqxYoWaMWOGqlu3bpHC3LNnj638FixYoFasWKGeeuoppdfr1cyZM23TzZ49WxkMBjVjxgy1evVqtXz5cjVnzhy7aYpzqTCjo6PVmDFjbO/Zx8dHdevWTfXo0UM9/fTTasWKFerVV19VBoNBPfroo3bvpVevXsrNzU1NmzZNrVixQr3++uvK29tbtWrVyvZ7ufS7iYiIUE2aNFELFixQv/76qxo2bJgC1Pr165VSSp09e1a9/PLLClDvvvuu7Xd29uzZ61p+YmNj1fTp09XKlSvVG2+8oYxGoxo7dqxtuk2bNilPT0/Vt29f22vt2bNHKWUtEi8vL/XGG2+otWvXqp9++km98sor6p133inx8ywt/6XHhg8frn7++We1YMECVbduXeXn56cOHjyolFKlLuNPPvmkev/999Xy5cvVmjVr1JtvvqmCg4Pt3telz9uRwszJyVEtW7ZUdevWtf2tTp8+Xen1erVixYoS32tFccrC3Lx5syooKFAZGRlq+fLlKjQ0VHXu3FkVFBTYpm3UqJFq1aqV3X1KKdW/f38VFhZmW5O6NM+77rrLbrqNGzcqQL344ovFZrFYLKqgoEAdP35cAWrJkiW2xy4t8NOnTy/yvGvt37l6IbnWPsyrC3Pfvn0KUBMnTrSbbsuWLQpQzz33nO2+Ll26KEBt2bLFbtomTZqoXr16FZvtkl9++UUB6q233rK7/6WXXipSmL169VIRERHqwoULdtM+8sgjymQy2fY59+/fX7Vs2fKar1ucS4U5YMAAu/ufeOIJBajHHnvM7v5BgwapwMBA2+3ly5crQL322mt203311VcKUB999JHtvujoaGUymdTx48dt9+Xk5KjAwED14IMP2u5zdB+gI8vP1bkmTpyoTCaT3b74kvZhNmvWTA0aNOiaGYpTUv7U1FRbOV/pxIkTymg0qnvvvdd2n6P7MM1msyooKFALFixQBoPB7hiEo4WplFKHDh1SNWrUUIMGDVKrVq1Ser1ePf/886W/2QrgVEfJL7n11ltxd3fH19eX3r17ExAQwJIlS3Bzs+5yjY+PZ//+/dx3330AFBYW2n769u1LYmIiBw4csJvnpWkv6dChA9HR0axdu9Z239mzZ3nooYeIjIzEzc0Nd3d3oqOjAdi3b1+RnEOGDCnX930tl3JeeZQdoF27djRu3JjVq1fb3R8aGkq7du3s7mvRogXHjx936HWu/rzuvfdeu9u5ubmsXr2au+66Cy8vryK/g9zcXDZv3mzLuGPHDiZOnMivv/5Kenq6Y2/6ov79+9vdbty4MQD9+vUrcn9KSgqZmZkArFmzBij6mQ0bNgxvb+8in1nLli2Jioqy3TaZTDRs2LDUz+ySsi4/d955p93tFi1akJuby9mzZ0t9rXbt2vHLL78wdepU1q1bR05OjkMZS7Jp0yZycnKKfFaRkZF07969yGdVkr///ps777yToKAgDAYD7u7ujBo1CrPZzMGDB68rW/369fn4449ZvHgx/fv3p1OnTpodpXfKwlywYAFbt25lzZo1PPjgg+zbt4/hw4fbHj9z5gwATz/9NO7u7nY/EydOBOD8+fN28wwNDS3yOqGhoSQnJwNgsVjo2bMn33//PVOmTGH16tX8+eeftj/64hbIsLCw8nnDDriUs7jXrF27tu3xS4KCgopMZzQaS/3DSk5Oxs3Nrcjzr/78kpOTKSws5J133inyO+jbty9w+Xfw7LPP8vrrr7N582b69OlDUFAQt99+O9u2bSvlXVsFBgba3fbw8Ljm/bm5uXbvJSQkxG46nU5n97u/5Ho/M7i+5efq1zMajSVOe7W3336bZ555hsWLF9OtWzcCAwMZNGjQNYffXUtZl6/inDhxgk6dOnHq1CneeustNmzYwNatW3n33XcBx95XSfr160etWrXIzc1l8uTJGAyG657XjXDKo+SNGze2HSXv1q0bZrOZTz75hG+//ZahQ4cSHBwMWP8QBw8eXOw8YmNj7W4nJSUVmSYpKYn69esDsHv3bnbs2MH8+fMZPXq0bZr4+PgSc1bmOMlLf1yJiYlFjp6fPn3a9pmUx+sUFhaSnJxs9wd99ecXEBCAwWBg5MiRTJo0qdh51alTBwA3NzcmT57M5MmTSUtLY9WqVTz33HP06tWLhISEUkcX3Oh
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# outlier à enlever (dépend des stats desc !)\n",
"outlier_detection(directory_path=\"10\") # mettre 2 si on veut le 1er client non anonyme"
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 145,
2024-03-08 10:30:12 +01:00
"id": "f08c082e-f76f-41f3-9530-3e6700eb74d9",
"metadata": {},
2024-03-11 18:43:56 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"outlier for tenant 10\n",
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAUwAAAEQCAYAAADbIk3TAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA5TUlEQVR4nO3dd3xT9f7H8VeStkkX3dDSySxbliggU9kgyLiKskVUcKIX0SvrOlB/XkXFLVdEkesGUUQ2IgKCWPYqs0DL6KB7Jd/fH4FAaEtTaHvS5vN8PPrQJCcn76Snb8745hydUkohhBCiVHqtAwghRFUhhSmEEA6SwhRCCAdJYQohhIOkMIUQwkFSmEII4SApTCGEcJAUphBCOEgKUwghHFRtCnPnzp14eXnxzjvvaB1FCFFNOVVhzp8/H51OZ/txc3MjLCyMe+65h0OHDpX4vIyMDIYOHcqjjz7Ko48+WomJi1q2bBkzZ84s9rGYmBjGjBlju3369GlmzpxJXFxckWlnzpyJTqermJDXSafTlfjeXMXevXuZOXMmx44dq5TXe/nll1m8eHG5za888l9rGa/2lBP59NNPFaA+/fRTtWnTJrV27Vr14osvKk9PT1WzZk2VkpJS7POGDRum7rvvPmWxWCo5cVGTJk1SJX2s27dvV/Hx8bbbW7dutb3fqyUkJKhNmzZVVMzrAqgZM2ZoHUNT33zzjQLU2rVrK+X1vL291ejRo8ttfuWR/1rLeHXnpmFXl6hZs2a0bdsWgK5du2I2m5kxYwaLFy9m7NixRab/+uuvKztiEdnZ2Xh5eV1zmlatWjk8v4iICCIiIm40lhCiPGnd2Fe6tIa5detWu/t//vlnBajZs2fb3b9161Y1YMAAFRAQoIxGo2rZsqX66quvip3nihUr1JgxY1RAQIDy8vJS/fv3V4cPH7abdsWKFerOO+9U4eHhymg0qnr16qkJEyaoc+fO2U03Y8YMBai//vpLDRkyRPn7+6vQ0FA1evRoBRT5OXr0qFJKqejoaNvawtq1a4ud9tIa3KXXuJLZbFavvvqqio2NVR4eHiokJESNHDlSJSQk2E3XpUsX1bRpU/Xnn3+q2267TXl6eqo6deqo2bNnK7PZXOrv4cKFC2r8+PEqMDBQeXt7q169eqkDBw4Uu4Z58OBBNXz4cBUSEqI8PDxUo0aN1Ny5c4vkfuGFF1TDhg2VyWRSfn5+qnnz5mrOnDnXzHHpM1q4cKGaMmWKCg0NVd7e3qp///4qKSlJpaenqwceeEAFBQWpoKAgNWbMGJWRkWE3j5ycHDV16lQVExOj3N3dVe3atdXEiRNVamqq3XTR0dGqX79+6pdfflGtWrVSJpNJxcbGqnnz5tmmubQsXf1zaQuhrMvP7t271T333KNq1KihatasqcaOHavS0tJs0xX3Wl26dFFKKZWVlaWeeuopFRMTo4xGowoICFBt2rRRX375ZYmfZ2n5lVJq3rx5qkWLFrZ5Dho0SO3du9f2eGnL+Ny5c1WnTp1USEiI8vLyUs2aNVOvvvqqys/PL/J5F7fm3KVLF9t7VEqpBx98UBmNRrVt2zbbfWazWXXv3l3VrFlTnT59usT3WxGqRGHOnTtXAeq7776z3bdmzRrl4eGhOnXqpL766iu1fPlyNWbMmCILwKV5RkZGqnHjxqlffvlFffTRR6pmzZoqMjLS7g/n/fffV7Nnz1Y//vijWr9+vfrss8/UTTfdpGJjY+1+4ZcW+OjoaPXMM8+olStXqsWLF6v4+Hg1dOhQBahNmzbZfnJzc5VS9gvJhQsXbNmef/5527SXyq+4wpwwYYIC1COPPKKWL1+uPvjgAxUSEqIiIyPt/ii7dOmigoKCVIMGDdQHH3ygVq5cqSZOnKgA9dlnn13zd2CxWFS3bt2U0WhUL730klqxYoWaMWOGqlu3bpHC3LNnj638FixYoFasWKGeeuoppdfr1cyZM23TzZ49WxkMBjVjxgy1evVqtXz5cjVnzhy7aYpzqTCjo6PVmDFjbO/Zx8dHdevWTfXo0UM9/fTTasWKFerVV19VBoNBPfroo3bvpVevXsrNzU1NmzZNrVixQr3++uvK29tbtWrVyvZ7ufS7iYiIUE2aNFELFixQv/76qxo2bJgC1Pr165VSSp09e1a9/PLLClDvvvuu7Xd29uzZ61p+YmNj1fTp09XKlSvVG2+8oYxGoxo7dqxtuk2bNilPT0/Vt29f22vt2bNHKWUtEi8vL/XGG2+otWvXqp9++km98sor6p133inx8ywt/6XHhg8frn7++We1YMECVbduXeXn56cOHjyolFKlLuNPPvmkev/999Xy5cvVmjVr1JtvvqmCg4Pt3telz9uRwszJyVEtW7ZUdevWtf2tTp8+Xen1erVixYoS32tFccrC3Lx5syooKFAZGRlq+fLlKjQ0VHXu3FkVFBTYpm3UqJFq1aqV3X1KKdW/f38VFhZmW5O6NM+77rrLbrqNGzcqQL344ovFZrFYLKqgoEAdP35cAWrJkiW2xy4t8NOnTy/yvGvt37l6IbnWPsyrC3Pfvn0KUBMnTrSbbsuWLQpQzz33nO2+Ll26KEBt2bLFbtomTZqoXr16FZvtkl9++UUB6q233rK7/6WXXipSmL169VIRERHqwoULdtM+8sgjymQy2fY59+/fX7Vs2fKar1ucS4U5YMAAu/ufeOIJBajHHnvM7v5BgwapwMBA2+3ly5crQL322mt203311VcKUB999JHtvujoaGUymdTx48dt9+Xk5KjAwED14IMP2u5zdB+gI8vP1bkmTpyoTCaT3b74kvZhNmvWTA0aNOiaGYpTUv7U1FRbOV/pxIkTymg0qnvvvdd2n6P7MM1msyooKFALFixQBoPB7hiEo4WplFKHDh1SNWrUUIMGDVKrVq1Ser1ePf/886W/2QrgVEfJL7n11ltxd3fH19eX3r17ExAQwJIlS3Bzs+5yjY+PZ//+/dx3330AFBYW2n769u1LYmIiBw4csJvnpWkv6dChA9HR0axdu9Z239mzZ3nooYeIjIzEzc0Nd3d3oqOjAdi3b1+RnEOGDCnX930tl3JeeZQdoF27djRu3JjVq1fb3R8aGkq7du3s7mvRogXHjx936HWu/rzuvfdeu9u5ubmsXr2au+66Cy8vryK/g9zcXDZv3mzLuGPHDiZOnMivv/5Kenq6Y2/6ov79+9vdbty4MQD9+vUrcn9KSgqZmZkArFmzBij6mQ0bNgxvb+8in1nLli2Jioqy3TaZTDRs2LDUz+ySsi4/d955p93tFi1akJuby9mzZ0t9rXbt2vHLL78wdepU1q1bR05OjkMZS7Jp0yZycnKKfFaRkZF07969yGdVkr///ps777yToKAgDAYD7u7ujBo1CrPZzMGDB68rW/369fn4449ZvHgx/fv3p1OnTpodpXfKwlywYAFbt25lzZo1PPjgg+zbt4/hw4fbHj9z5gwATz/9NO7u7nY/EydOBOD8+fN28wwNDS3yOqGhoSQnJwNgsVjo2bMn33//PVOmTGH16tX8+eeftj/64hbIsLCw8nnDDriUs7jXrF27tu3xS4KCgopMZzQaS/3DSk5Oxs3Nrcjzr/78kpOTKSws5J133inyO+jbty9w+Xfw7LPP8vrrr7N582b69OlDUFAQt99+O9u2bSvlXVsFBgba3fbw8Ljm/bm5uXbvJSQkxG46nU5n97u/5Ho/M7i+5efq1zMajSVOe7W3336bZ555hsWLF9OtWzcCAwMZNGjQNYffXUtZl6/inDhxgk6dOnHq1CneeustNmzYwNatW3n33XcBx95XSfr160etWrXIzc1l8uTJGAyG657XjXDKo+SNGze2HSXv1q0bZrOZTz75hG+//ZahQ4cSHBwMWP8QBw8eXOw8YmNj7W4nJSUVmSYpKYn69esDsHv3bnbs2MH8+fMZPXq0bZr4+PgSc1bmOMlLf1yJiYlFjp6fPn3a9pmUx+sUFhaSnJxs9wd99ecXEBCAwWBg5MiRTJo0qdh51alTBwA3NzcmT57M5MmTSUtLY9WqVTz33HP06tWLhISEUkcX3Oh
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"outlier for tenant 11\n",
"File path : projet-bdc2324-team1/0_Input/Company_11/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASMAAAEQCAYAAAD7zhIuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA1AUlEQVR4nO3dd3QU9f7G8fe2ZJMQSCGQEEIglFADKCAoXem9CShI8yJSvDaKSFWRci0o+FNUEEEF9IIoIE2aXgVpUpTee29JSN39/v5YE1jSk01md/N5nZOTk9nJzLO7kyfTdkanlFIIIYTG9FoHEEIIkDISQjgJKSMhhFOQMhJCOAUpIyGEU5AyEkI4BSkjIYRTkDISQjgFKSMhhFMosDLat28f3t7ezJo1q6BmKYRwITkqo/nz56PT6VK/jEYjISEh9OrVi6NHj2b4e9HR0XTv3p0RI0YwYsSIPIfOi59++olJkyal+1jZsmXp379/6s8XLlxg0qRJ7NmzJ824kyZNQqfT5U/IXNLpdBk+t8LiwIEDTJo0iVOnThXI/N5++22WL1/usOk5In9my7hTUznwxRdfKEB98cUXauvWrWrTpk3qrbfeUl5eXqpEiRLqxo0b6f5ejx491NNPP62sVmtOZpcvhg0bpjJ62rt371bHjh1L/XnHjh2pz/dBZ8+eVVu3bs2vmLkCqIkTJ2odQ1PfffedAtSmTZsKZH4+Pj6qX79+DpueI/Jntow7M2NuCqx69erUqVMHgKZNm2KxWJg4cSLLly9nwIABacb/9ttvc1mVjnP37l28vb0zHad27drZnl7p0qUpXbp0XmMJIVLkpLlS1ox27NhhN3zVqlUKUFOnTrUbvmPHDtWhQwfl7++vPD09Va1atdSSJUvSnea6detU//79lb+/v/L29lbt27dXx48ftxt33bp1qmPHjio0NFR5enqq8uXLq8GDB6urV6/ajTdx4kQFqF27dqlu3bopPz8/FRwcrPr166eANF8nT55USikVHh6e+l9u06ZN6Y6bsuaRMo/7WSwWNX36dBUZGak8PDxUUFCQ6tu3rzp79qzdeE2aNFHVqlVT27dvVw0bNlReXl6qXLlyaurUqcpisWT5Pty+fVs9++yzKiAgQPn4+KhWrVqpw4cPp7tmdOTIEdW7d28VFBSkPDw8VOXKldXs2bPT5H7zzTdVpUqVlNlsVsWKFVM1atRQM2fOzDRHymv09ddfq1GjRqng4GDl4+Oj2rdvry5duqTu3Lmj/vWvf6nAwEAVGBio+vfvr6Kjo+2mERcXp8aMGaPKli2rTCaTKlWqlBo6dKi6efOm3Xjh4eGqXbt2avXq1ap27drKbDaryMhINXfu3NRxUpalB79S1mxzuvz89ddfqlevXqpo0aKqRIkSasCAAerWrVup46U3ryZNmiillIqNjVWvvPKKKlu2rPL09FT+/v7q4YcfVt98802Gr2dW+ZVSau7cuSoqKip1mp07d1YHDhxIfTyrZXz27NmqUaNGKigoSHl7e6vq1aur6dOnq8TExDSvd3prfE2aNEl9jkop9dxzzylPT0+1c+fO1GEWi0U1b95clShRQl24cCHD5/sgh5TR7NmzFaCWLl2aOmzjxo3Kw8NDNWrUSC1ZskStWbNG9e/fP82LmzLNsLAwNXDgQLV69Wr16aefqhIlSqiwsDC7hfLjjz9WU6dOVT/++KPasmWL+vLLL1XNmjVVZGSk3YuZsjCFh4er0aNHq/Xr16vly5erY8eOqe7duytAbd26NfUrPj5eKWX/Bty+fTs127hx41LHTSmW9Mpo8ODBClDDhw9Xa9asUZ988okKCgpSYWFhdgt8kyZNVGBgoKpYsaL65JNP1Pr169XQoUMVoL788stM3wOr1aqaNWumPD091ZQpU9S6devUxIkTVURERJoy+vvvv1OLZcGCBWrdunXqlVdeUXq9Xk2aNCl1vKlTpyqDwaAmTpyoNmzYoNasWaNmzpxpN056UsooPDxc9e/fP/U5FylSRDVr1ky1aNFCvfrqq2rdunVq+vTpymAwqBEjRtg9l1atWimj0ajGjx+v1q1bp9555x3l4+Ojateunfq+pLw3pUuXVlWrVlULFixQa9euVT169FCA2rJli1JKqStXrqi3335bAeqjjz5Kfc+uXLmSq+UnMjJSTZgwQa1fv1699957ytPTUw0YMCB1vK1btyovLy/Vtm3b1Hn9/fffSinbH6m3t7d677331KZNm9TKlSvVtGnT1KxZszJ8PbPKn/JY79691apVq9SCBQtURESEKlasmDpy5IhSSmW5jL/00kvq448/VmvWrFEbN25U77//vipevLjd80p5vbNTRnFxcapWrVoqIiIi9W91woQJSq/Xq3Xr1mX4XNOTqzLatm2bSkpKUtHR0WrNmjUqODhYNW7cWCUlJaWOW7lyZVW7dm27YUop1b59exUSEpK6BpAyzS5dutiN99tvvylAvfXWW+lmsVqtKikpSZ0+fVoB6ocffkh9LGVhmjBhQprfy2x7+sE3ILN9Rg+W0cGDBxWghg4dajfeH3/8oQA1duzY1GFNmjRRgPrjjz/sxq1atapq1apVutlSrF69WgHqgw8+sBs+ZcqUNGXUqlUrVbp0aXX79m27cYcPH67MZnPqPr727durWrVqZTrf9KSUUYcOHeyGv/jiiwpQL7zwgt3wzp07q4CAgNSf16xZowA1Y8YMu/GWLFmiAPXpp5+mDgsPD1dms1mdPn06dVhcXJwKCAhQzz33XOqw7O5zyc7y82CuoUOHKrPZbLfvM6N9RtWrV1edO3fONEN6Msp/8+bN1OK735kzZ5Snp6d66qmnUodld5+RxWJRSUlJasGCBcpgMNjt881uGSml1NGjR1XRokVV586d1c8//6z0er0aN25c1k/2Abk6tF+/fn1MJhO+vr60bt0af39/fvjhB4xG2y6oY8eOcejQIZ5++mkAkpOTU7/atm3LxYsXOXz4sN00U8ZN8eijjxIeHs6mTZtSh125coUhQ4YQFhaG0WjEZDIRHh4OwMGDB9Pk7NatW26eXq6k5Lz/aBxAvXr1qFKlChs2bLAbHhwcTL169eyGRUVFcfr06WzN58HX66mnnrL7OT4+ng0bNtClSxe8vb3TvAfx8fFs27YtNePevXsZOnQoa9eu5c6dO9l70v9o37693c9VqlQBoF27dmmG37hxg5iYGAA2btwIpH3NevTogY+PT5rXrFatWpQpUyb1Z7PZTKVKlbJ8zVLkdPnp2LGj3c9RUVHEx8dz5cqVLOdVr149Vq9ezZgxY9i8eTNxcXHZypiRrVu3EhcXl+a1CgsLo3nz5mleq4z8+eefdOzYkcDAQAwGAyaTiWeeeQaLxcKRI0dyla1ChQp89tlnLF++nPbt29OoUaNcHc3LVRktWLCAHTt2sHHjRp577jkOHjxI7969Ux+/fPkyAK+++iomk8nua+jQoQBcu3bNbprBwcFp5hMcHMz169cBsFqttGzZkmXLljFq1Cg2bNjA9u3bU/+g0nuzQ0JCcvP0ciUlZ3rzLFWqVOrjKQIDA9OM5+npmeVCe/36dYxGY5rff/D1u379OsnJycyaNSvNe9C2bVvg3nvw2muv8c4777Bt2zbatGlDYGAgjz/+ODt37sziWdsEBATY/ezh4ZHp8Pj4eLvnEhQUZDeeTqeze+9T5PY1g9wtPw/Oz9PTM8NxH/Thhx8yevRoli9fTrNmzQgICKBz586ZngKTmZwuX+k5c+YMjRo14vz583zwwQf8+uuv7Nixg48++gjI3vPKSLt27ShZsiTx8fG8/PLLGAyGHE8jV0fTqlSpkno0rVmzZlgsFj7//HP++9//0r17d4oXLw7YFvKuXbumO43IyEi7ny9dupRmnEuXLlGhQgUA/vrrL/bu3cv8+fPp169f6jjHjh3LMGdBngeUsuBevHgxzVG2CxcupL4mjphPcnIy169ft/tjefD18/f3x2Aw0LdvX4YNG5butMqVKweA0Wjk5Zdf5uWXX+bWrVv8/PPPjB07llatWnH27Nksj0Lm9blcvXrVrpCUUly6dIm
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"outlier for tenant 12\n",
"File path : projet-bdc2324-team1/0_Input/Company_12/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"/tmp/ipykernel_436/3170175140.py:10: DtypeWarning: Columns (4,8,10) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAV0AAAEQCAYAAAATTqcFAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA6IklEQVR4nO3dd3hTZf8G8DurSffei132EGRaNpRREJkyKlMQEAVBRARaFERwgeJPVIqAIAKCRfYGRfbehTLaAl10j7TNeH5/5G00dEOSJ+P7ua5e75vk5Jw76ent4cmTcwSMMQZCCCFGIeQdgBBCrAmVLiGEGBGVLiGEGBGVLiGEGBGVLiGEGBGVLiGEGBGVLiGEGBGVLiGEGBGVLiGEGBGV7jOuXr0KOzs7fPvtt7yjEEIskEWW7tq1ayEQCLQ/YrEYvr6+eP3113H37t1yn5ebm4vBgwdj2rRpmDZtmhETl7Znzx5ERUWV+ViNGjUwZswY7e0nT54gKioKly9fLrVsVFQUBAKBYUI+J4FAUO5rsxY3b95EVFQUHj58aJTtffrpp4iJidHb+vSRv6J93KIxC/Tzzz8zAOznn39mp06dYkePHmWLFi1itra2zMvLi2VkZJT5vCFDhrCRI0cytVpt5MSlTZ06lZX367l48SKLi4vT3j537pz29T4rMTGRnTp1ylAxnwsAFhkZyTsGV1u3bmUA2NGjR42yPXt7ezZ69Gi9rU8f+Svaxy2ZmGPfG1zjxo3RqlUrAEDnzp2hUqkQGRmJmJgYjB07ttTyW7ZsMXbEUgoKCmBnZ1fhMi1atKjy+gICAhAQEPCisQgh+sK79Q2h5Ej33LlzOvfv3r2bAWBLlizRuf/cuXOsX79+zNXVlUmlUta8eXO2efPmMtd54MABNmbMGObq6srs7OxYeHg4u3fvns6yBw4cYP3792f+/v5MKpWy2rVrs4kTJ7K0tDSd5SIjIxkAduHCBTZo0CDm4uLCfHx82OjRoxmAUj8PHjxgjDEWHBysPWo5evRomcuWHEmWbOO/VCoVW7p0KQsJCWE2NjbM09OTRUREsMTERJ3lOnXqxBo1asTOnj3LXnnlFWZra8tq1qzJlixZwlQqVaW/h+zsbDZhwgTm5ubG7O3tWVhYGIuNjS3zSPfOnTts+PDhzNPTk9nY2LD69euzlStXlsr9ySefsHr16jGZTMacnZ1ZkyZN2PLlyyvMUfIebdy4kc2ePZv5+Pgwe3t7Fh4ezpKTk1lOTg578803mbu7O3N3d2djxoxhubm5OuuQy+Vszpw5rEaNGkwikTA/Pz82ZcoUlpmZqbNccHAw69u3L9u7dy9r0aIFk8lkLCQkhEVHR2uXKdmXnv0p+ZdKdfef69evs9dff505OTkxLy8vNnbsWJaVlaVdrqxtderUiTHGWH5+Pps5cyarUaMGk0qlzNXVlbVs2ZL9+uuv5b6fleVnjLHo6GjWtGlT7ToHDBjAbt68qX28sn185cqVLDQ0lHl6ejI7OzvWuHFjtnTpUlZcXFzq/S7rCL5Tp07a18gYY5MmTWJSqZSdP39ee59KpWJdu3ZlXl5e7MmTJ+W+Xn2zqtJduXIlA8C2bdumve/IkSPMxsaGhYaGss2bN7N9+/axMWPGlNqJStYZGBjIxo0bx/bu3ct+/PFH5uXlxQIDA3X++L7//nu2ZMkS9ueff7Ljx4+zdevWsWbNmrGQkBCdnabkjyY4OJh98MEH7ODBgywmJobFxcWxwYMHMwDs1KlT2p/CwkLGmO6Olp2drc02b9487bIlBVpW6U6cOJEBYG+//Tbbt28fW7VqFfP09GSBgYE6f9idOnVi7u7urG7dumzVqlXs4MGDbMqUKQwAW7duXYW/A7Vazbp06cKkUilbvHgxO3DgAIuMjGS1atUqVbo3btzQFuj69evZgQMH2MyZM5lQKGRRUVHa5ZYsWcJEIhGLjIxkhw8fZvv27WPLly/XWaYsJaUbHBzMxowZo33NDg4OrEuXLqxHjx5s1qxZ7MCBA2zp0qVMJBKxadOm6byWsLAwJhaL2fz589mBAwfYF198wezt7VmLFi20v5eS301AQABr2LAhW79+Pdu/fz8bMmQIA8COHz/OGGMsNTWVffrppwwA++6777S/s9TU1Ofaf0JCQtiCBQvYwYMH2VdffcWkUikbO3asdrlTp04xW1tb1qdPH+22bty4wRjTlJGdnR376quv2NGjR9muXbvYZ599xr799tty38/K8pc8Nnz4cLZ79262fv16VqtWLebs7Mzu3LnDGGOV7uMzZsxg33//Pdu3bx87cuQI+/rrr5mHh4fO6yp5v6tSunK5nDVv3pzVqlVL+7e6YMECJhQK2YEDB8p9rYZg0aV7+vRpplAoWG5uLtu3bx/z8fFhHTt2ZAqFQrts/fr1WYsWLXTuY4yx8PBw5uvrqz2iK1nna6+9prPcP//8wwCwRYsWlZlFrVYzhULB4uPjGQC2Y8cO7WMlfzQLFiwo9byKxrue3dEqGtN9tnRv3brFALApU6boLHfmzBkGgM2dO1d7X6dOnRgAdubMGZ1lGzZsyMLCwsrMVmLv3r0MAFuxYoXO/YsXLy5VumFhYSwgIIBlZ2frLPv2228zmUymHYMPDw9nzZs3r3C7ZSkp3X79+uncP336dAaAvfPOOzr3DxgwgLm5uWlv79u3jwFgy5Yt01lu8+bNDAD78ccftfcFBwczmUzG4uPjtffJ5XLm5ubGJk2apL2vqmOiVdl/ns01ZcoUJpPJdD6bKG9Mt3HjxmzAgAEVZihLefkzMzO1Bf9fCQkJTCqVshEjRmjvq+qYrkqlYgqFgq1fv56JRCKdz2SqWrqMMXb37l3m5OTEBgwYwA4dOsSEQiGbN29e5S9Wzyxy9kKJtm3bQiKRwNHREb169YKrqyt27NgBsVgzlB0XF4fbt29j5MiRAAClUqn96dOnD5KSkhAbG6uzzpJlS7Rv3x7BwcE4evSo9r7U1FS89dZbCAwMhFgshkQiQXBwMADg1q1bpXIOGjRIr6+7IiU5/zv7AQBat26NBg0a4PDhwzr3+/j4oHXr1jr3NW3aFPHx8VXazrPv14gRI3RuFxYW4vDhw3jttddgZ2dX6ndQWFiI06dPazNeuXIFU6ZMwf79+5GTk1O1F/0/4eHhOrcbNGgAAOjbt2+p+zMyMpCXlwcAOHLkCIDS79mQIUNgb29f6j1r3rw5goKCtLdlMhnq1atX6XtWorr7T//+/XVuN23aFIWFhUhNTa10W61bt8bevXsxZ84cHDt2DHK5vEoZy3Pq1CnI5fJS71VgYCC6du1a6r0qz6VLl9C/f3+4u7tDJBJBIpHgjTfegEqlwp07d54rW506dfDTTz8hJiYG4eHhCA0N5TJ7wqJLd/369Th37hyOHDmCSZMm4datWxg+fLj28ZSUFADArFmzIJFIdH6mTJkCAHj69KnOOn18fEptx8fHB+np6QAAtVqNnj17Yvv27Zg9ezYOHz6Ms2fPaoujrJ3a19dXPy+4CkpylrVNPz8/7eMl3N3dSy0nlUor/eNMT0+HWCwu9fxn37/09HQolUp8++23pX4Hffr0AfDv7+DDDz/EF198gdOnT6N3795wd3dHt27dcP78+UpetYabm5vObRsbmwrvLyws1Hktnp6eOssJBAKd332J533PgOfbf57dnlQqLXfZZ33zzTf44IMPEBMTgy5dusDNzQ0DBgyocGplRaq7f5UlISEBoaGhePz4MVasWIG///4b586dw3fffQegaq+rPH379oW3tzcKCwvx3nvvQSQSPfe6npdFz15o0KCBdvZCly5doFKpsHr1avz+++8YPHgwPDw8AGj+mAcOHFjmOkJCQnRuJycnl1omOTkZderUAQBcv34dV65cwdq1azF69GjtMnFxceXmNOY82pI/0KSkpFKzGp48eaJ9T/SxHaVSifT0dJ1SePb9c3V1hUgkQkREBKZOnVrmumrWrAkAEIvFeO+99/Dee+8hKysLhw4dwty5cxEWFobExMRKZ3286GtJS0vTKV7GGJKTk/Hyyy/rbVvPs/+8CHt7eyx
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"outlier for tenant 13\n",
"File path : projet-bdc2324-team1/0_Input/Company_13/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAATEAAAEQCAYAAADYlUP7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA4m0lEQVR4nO3dd3hT5d8G8Ds7TffegzIKFArIlF0EyigCArKnKArCDxERBxQURBQVBGUoMgR8EQcCMoqAKLL3HmUVaUtL926TPO8fsdHQlbRpn5zk+7muXNr05OROcnpzVs4jYowxEEKIQIl5ByCEkOqgEiOECBqVGCFE0KjECCGCRiVGCBE0KjFCiKBRiRFCBI1KjBAiaFRihBBB415iFy9ehEqlwvLly3lHIYQIkFlKbP369RCJRPqbVCqFr68vhg0bhlu3bpX7uOzsbAwePBhTp07F1KlTzRGlynbv3o158+aV+buQkBCMGzdO/3NCQgLmzZuH8+fPl5p23rx5EIlENROyikQiUbmvzVZcvXoV8+bNw71792rl+T744ANs377dbPMzR/6KlnFBY2awbt06BoCtW7eOHTt2jB06dIgtWLCA2dnZMS8vL5aWllbm44YMGcJGjhzJtFqtOWJUy5QpU1h5b8fZs2dZXFyc/udTp07pX++THjx4wI4dO1ZTMasEAIuJieEdg6tt27YxAOzQoUO18nz29vZs7NixZpufOfJXtIwLmdSchdikSRO0atUKANC1a1doNBrExMRg+/btGD9+fKnpv//+e3M+fZXk5eVBpVJVOE2LFi2Mnl9AQAACAgKqG4sQYixzNGHJmtipU6cM7v/1118ZALZo0SKD+0+dOsX69evHXF1dmUKhYM2bN2dbt24tc56xsbFs3LhxzNXVlalUKhYdHc1u375tMG1sbCx79tlnmb+/P1MoFKxu3brspZdeYikpKQbTxcTEMADszJkzbNCgQczFxYX5+PiwsWPHMgClbnfv3mWMMRYcHKz/V/XQoUNlTluyplPyHP+l0WjY4sWLWVhYGJPL5czT05ONHj2aPXjwwGC6Ll26sPDwcHby5EnWsWNHZmdnx+rUqcMWLVrENBpNpZ9DZmYmmzhxInNzc2P29vYsKiqK3bhxo8w1sZs3b7Lhw4czT09PJpfLWcOGDdmKFStK5X7//fdZgwYNmFKpZM7Ozqxp06Zs6dKlFeYoeY82b97MZs2axXx8fJi9vT2Ljo5mSUlJLCsri7344ovM3d2dubu7s3HjxrHs7GyDeeTn57PZs2ezkJAQJpPJmJ+fH5s8eTJLT083mC44OJj17duX7dmzh7Vo0YIplUoWFhbG1q5dq5+mZFl68layJm3q8nP58mU2bNgw5uTkxLy8vNj48eNZRkaGfrqynqtLly6MMcZyc3PZ66+/zkJCQphCoWCurq6sZcuWbMuWLeW+n5XlZ4yxtWvXsoiICP08BwwYwK5evar/fWXL+IoVK1inTp2Yp6cnU6lUrEmTJmzx4sWsqKio1Ptd1hpmly5d9K+RMcYmTZrEFAoFO336tP4+jUbDunXrxry8vFhCQkK5r9dUNVpiK1asYADYjz/+qL/v4MGDTC6Xs06dOrGtW7eyvXv3snHjxpX6UErmGRgYyCZMmMD27NnD1qxZw7y8vFhgYKDBwrxy5Uq2aNEitmPHDnb48GG2YcMG1qxZMxYWFmbwIZQshMHBwezNN99k+/fvZ9u3b2dxcXFs8ODBDAA7duyY/lZQUMAYM/zgMjMz9dneffdd/bQlhVRWib300ksMAHv11VfZ3r172apVq5inpycLDAw0+EPp0qULc3d3Z/Xr12erVq1i+/fvZ5MnT2YA2IYNGyr8DLRaLYuMjGQKhYItXLiQxcbGspiYGBYaGlqqxK5cuaIvpI0bN7LY2Fj2+uuvM7FYzObNm6efbtGiRUwikbCYmBh24MABtnfvXrZ06VKDacpSUmLBwcFs3Lhx+tfs4ODAIiMjWY8ePdjMmTNZbGwsW7x4MZNIJGzq1KkGryUqKopJpVI2Z84cFhsby5YsWcLs7e1ZixYt9J9LyWcTEBDAGjduzDZu3Mj27dvHhgwZwgCww4cPM8YYS05OZh988AEDwL744gv9Z5acnFyl5ScsLIzNnTuX7d+/n3366adMoVCw8ePH66c7duwYs7OzY3369NE/15UrVxhjuj9ulUrFPv30U3bo0CG2a9cu9uGHH7Lly5eX+35Wlr/kd8OHD2e//vor27hxIwsNDWXOzs7s5s2bjDFW6TL+2muvsZUrV7K9e/eygwcPss8++4x5eHgYvK6S99uYEsvPz2fNmzdnoaGh+r/VuXPnMrFYzGJjY8t9rVVh1hI7fvw4Ky4uZtnZ2Wzv3r3Mx8eHde7cmRUXF+unbdiwIWvRooXBfYwxFh0dzXx9ffVrHCXzHDhwoMF0f/31FwPAFixYUGYWrVbLiouL2f379xkA9ssvv+h/V7IQzp07t9TjKtpf8OQHV9E+sSdL7Nq1awwAmzx5ssF0J06cYADY22+/rb+vS5cuDAA7ceKEwbSNGzdmUVFRZWYrsWfPHgaALVu2zOD+hQsXliqxqKgoFhAQwDIzMw2mffXVV5lSqdTvw4yOjmbNmzev8HnLUlJi/fr1M7h/+vTpDACbNm2awf0DBgxgbm5u+p/37t3LALCPPvrIYLqtW7cyAGzNmjX6+4KDg5lSqWT379/X35efn8/c3NzYpEmT9PcZu0/JmOXnyVyTJ09mSqXSYN9uefvEmjRpwgYMGFBhhrKUlz89PV1fmP8VHx/PFAoFGzFihP4+Y/eJaTQaVlxczDZu3MgkEonBPm1jS4wxxm7dusWcnJzYgAED2G+//cbEYjF79913K3+xJjLrKRbt2rWDTCaDo6MjevXqBVdXV/zyyy+QSnW73uLi4nD9+nWMHDkSAKBWq/W3Pn36IDExETdu3DCYZ8m0Jdq3b4/g4GAcOnRIf19ycjJefvllBAYGQiqVQiaTITg4GABw7dq1UjkHDRpkzpddoZKc/z26CQBt2rRBo0aNcODAAYP7fXx80KZNG4P7IiIicP/+faOe58n3a8SIEQY/FxQU4MCBAxg4cCBUKlWpz6CgoADHjx/XZ7xw4QImT56Mffv2ISsry7gX/Y/o6GiDnxs1agQA6Nu3b6n709LSkJOTAwA4ePAggNLv2ZAhQ2Bvb1/qPWvevDmCgoL0PyuVSjRo0KDS96yEqcvPs88+a/BzREQECgoKkJycXOlztWnTBnv27MHs2bPx+++/Iz8/36iM5Tl27Bjy8/NLvVeBgYHo1q1bqfeqPOfOncOzzz4Ld3d3SCQSyGQyjBkzBhqNBjdv3qxStnr16uGrr77C9u3bER0djU6dOtXI0VGzltjGjRtx6tQpHDx4EJMmTcK1a9cwfPhw/e8fPXoEAJg5cyZkMpnBbfLkyQCAx48fG8zTx8en1PP4+PggNTUVAKDVatGzZ0/89NNPmDVrFg4cOICTJ0/q/xDLWkh8fX3N84KNUJKzrOf08/PT/76Eu7t7qekUCkWlC3tqaiqkUmmpxz/5/qWmpkKtVmP58uWlPoM+ffoA+PczeOutt7BkyRIcP34cvXv3hru7O5555hmcPn26klet4+bmZvCzXC6v8P6CggKD1+Lp6WkwnUgkMvjsS1T1PQOqtvw8+XwKhaLcaZ/0+eef480338T27dsRGRkJNzc3DBgwoMJTkSpi6vJVlvj4eHTq1AkPHz7EsmXL8Oeff+LUqVP44osvABj3usrTt29feHt7o6CgADNmzIBEIqnyvMpj1qOTjRo10h+djIyMhEajwddff40ffvgBgwcPhoeHBwDdH8dzzz1X5jzCwsIMfk5KSio1TVJSEurVqwcAuHz5Mi5cuID169dj7Nix+mni4uLKzVmb53GVLPCJiYmljlomJCTo3xNzPI9arUZqaqrBH9mT75+rqyskEglGjx6NKVOmlDmvOnXqAACkUilmzJiBGTNmICMjA7/99hvefvttREVF4cGDB5Ue1a3ua0lJSTEoMsYYkpKS0Lp1a7M9V1WWn+qwt7fH/PnzMX/+fDx
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"outlier for tenant 14\n",
"File path : projet-bdc2324-team1/0_Input/Company_14/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_436/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"/tmp/ipykernel_436/3170175140.py:10: DtypeWarning: Columns (8,9) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASMAAAEQCAYAAAD7zhIuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA2uklEQVR4nO3dd3hTdf/G8XeSpk1boLu0QCmUUfbQAgIie5ehgICAICoiKI84EJX5KK6fIgI+CIoiiIgDUUT2EmUVZClb2XvTls7k+/sjNhDa0pX2JOnndV29oCcnJ3dOT++elXN0SimFEEJoTK91ACGEACkjIYSTkDISQjgFKSMhhFOQMhJCOAUpIyGEU5AyEkI4BSkjIYRTkDISQjiFIiujPXv24OPjw7Rp04rqJYUQLiRPZTRnzhx0Op3ty8PDg/DwcPr06cPhw4ezfV58fDw9e/bk2Wef5dlnny1w6IL45ZdfmDBhQpaPVahQgUGDBtm+P3PmDBMmTGDXrl2Zxp0wYQI6na5wQuaTTqfL9r0VF/v27WPChAkcO3asSF7vzTffZPHixQ6bniPy320Zd2oqDz7//HMFqM8//1xt3rxZrVu3Tr3xxhvK29tbhYaGqitXrmT5vF69eql+/fopi8WSl5crFMOHD1fZve0//vhDHTlyxPZ9XFyc7f3e6eTJk2rz5s2FFTNfADV+/HitY2jq22+/VYBat25dkbyer6+vGjhwoMOm54j8d1vGnZlHfgqsVq1axMTEANCiRQvMZjPjx49n8eLFPPbYY5nG/+abb/JZlY5z8+ZNfHx87jpO/fr1cz29cuXKUa5cuYLGEkJkyEtzZawZxcXF2Q1funSpAtRbb71lNzwuLk516dJFBQQEKC8vL1WvXj21cOHCLKe5cuVKNWjQIBUQEKB8fHxUbGys+vvvv+3GXblyperatasqW7as8vLyUpUqVVJDhgxRFy9etBtv/PjxClA7duxQPXr0UP7+/iosLEwNHDhQAZm+jh49qpRSKjIy0vZXbt26dVmOm7HmkfEatzObzeqdd95R0dHRytPTU4WEhKgBAwaokydP2o3XvHlzVbNmTbVt2zZ1//33K29vb1WxYkX11ltvKbPZnOPP4fr16+qJJ55QgYGBytfXV7Vv314dPHgwyzWjQ4cOqb59+6qQkBDl6empqlWrpqZPn54p9+uvv66qVq2qTCaT8vPzU7Vr11ZTpky5a46MeTR//nw1atQoFRYWpnx9fVVsbKw6d+6cunHjhnryySdVUFCQCgoKUoMGDVLx8fF200hKSlKjR49WFSpUUEajUZUpU0YNGzZMXb161W68yMhI1blzZ7Vs2TJVv359ZTKZVHR0tJo9e7ZtnIxl6c6vjDXbvC4/f/75p+rTp48qVaqUCg0NVY899pi6du2abbysXqt58+ZKKaUSExPVCy+8oCpUqKC8vLxUQECAuvfee9VXX32V7fzMKb9SSs2ePVvVqVPHNs3u3burffv22R7PaRmfPn26atasmQoJCVE+Pj6qVq1a6p133lGpqamZ5ndWa3zNmze3vUellHrqqaeUl5eX2r59u22Y2WxWrVq1UqGhoerMmTPZvt87OaSMpk+frgD1/fff24atXbtWeXp6qmbNmqmFCxeq5cuXq0GDBmWauRnTjIiIUIMHD1bLli1Ts2bNUqGhoSoiIsJuoZwxY4Z666231E8//aQ2bNigvvjiC1W3bl0VHR1tNzMzFqbIyEj18ssvq1WrVqnFixerI0eOqJ49eypAbd682faVnJyslLL/AVy/ft2WbcyYMbZxM4olqzIaMmSIAtQzzzyjli9frj7++GMVEhKiIiIi7Bb45s2bq6CgIFWlShX18ccfq1WrVqlhw4YpQH3xxRd3/RlYLBbVsmVL5eXlpSZNmqRWrlypxo8fr6KiojKV0V9//WUrlrlz56qVK1eqF154Qen1ejVhwgTbeG+99ZYyGAxq/Pjxas2aNWr58uVqypQpduNkJaOMIiMj1aBBg2zvuUSJEqply5aqbdu26sUXX1QrV65U77zzjjIYDOrZZ5+1ey/t27dXHh4eauzYsWrlypXqvffeU76+vqp+/fq2n0vGz6ZcuXKqRo0aau7cuWrFihWqV69eClAbNmxQSil14cIF9eabbypAffTRR7af2YULF/K1/ERHR6tx48apVatWqcmTJysvLy/12GOP2cbbvHmz8vb2Vp06dbK91l9//aWUsv6S+vj4qMmTJ6t169apn3/+Wb399ttq2rRp2c7PnPJnPNa3b1+1dOlSNXfuXBUVFaX8/PzUoUOHlFIqx2V85MiRasaMGWr58uVq7dq16oMPPlDBwcF27ytjfuemjJKSklS9evVUVFSU7Xd13LhxSq/Xq5UrV2b7XrOSrzLasmWLSktLU/Hx8Wr58uUqLCxMPfDAAyotLc02brVq1VT9+vXthimlVGxsrAoPD7etAWRM88EHH7Qb7/fff1eAeuONN7LMYrFYVFpamjp+/LgC1I8//mh7LGNhGjduXKbn3W17+s4fwN32Gd1ZRvv371eAGjZsmN14W7duVYB69dVXbcOaN2+uALV161a7cWvUqKHat2+fZbYMy5YtU4D68MMP7YZPmjQpUxm1b99elStXTl2/ft1u3GeeeUaZTCbbPr7Y2FhVr169u75uVjLKqEuXLnbDn3vuOQWoESNG2A3v3r27CgwMtH2/fPlyBah3333XbryFCxcqQM2aNcs2LDIyUplMJnX8+HHbsKSkJBUYGKieeuop27Dc7nPJzfJzZ65hw4Ypk8lkt+8zu31GtWrVUt27d79rhqxkl//q1au24rvdiRMnlJeXl3rkkUdsw3K7z8hsNqu0tDQ1d+5cZTAY7Pb55raMlFLq8OHDqlSpUqp79+5q9erVSq/XqzFjxuT8Zu+Qr0P79913H0ajkZIlS9KhQwcCAgL48ccf8fCw7oI6cuQIBw4coF+/fgCkp6fbvjp16sTZs2c5ePCg3TQzxs3QpEkTIiMjWbdunW3YhQsXGDp0KBEREXh4eGA0GomMjARg//79mXL26NEjP28vXzJy3n40DqBhw4ZUr16dNWvW2A0PCwujYcOGdsPq1KnD8ePHc/U6d86vRx55xO775ORk1qxZw4MPPoiPj0+mn0FycjJbtmyxZdy9ezfDhg1jxYoV3LhxI3dv+l+xsbF231evXh2Azp07Zxp+5coVEhISAFi7di2QeZ716tULX1/fTPOsXr16lC9f3va9yWSiatWqOc6zDHldfrp27Wr3fZ06dUhOTubChQs5vlbDhg1ZtmwZo0ePZv369SQlJeUqY3Y2b95MUlJSpnkVERFBq1atMs2r7OzcuZOuXbsSFBSEwWDAaDTy6KOPYjabOXToUL6yVa5cmU8++YTFixcTGxtLs2bN8nU0L19lNHfuXOLi4li7di1PPfUU+/fvp2/fvrbHz58/D8CLL76I0Wi0+xo2bBgAly5dsptmWFhYptcJCwvj8uXLAFgsFtq1a8eiRYsYNWoUa9asYdu2bbZfqKx+2OHh4fl5e/mSkTOr1yxTpozt8QxBQUGZxvPy8spxob18+TIeHh6Znn/n/Lt8+TLp6elMmzYt08+gU6dOwK2fwSuvvMJ7773Hli1b6NixI0FBQbRu3Zrt27fn8K6tAgMD7b739PS86/Dk5GS79xISEmI3nk6ns/vZZ8jvPIP8LT93vp6Xl1e2495p6tSpvPzyyyxevJiWLVsSGBhI9+7d73oKzN3kdfnKyokTJ2jWrBmnT5/mww8/ZOPGjcTFxfHRRx8BuXtf2encuTOlS5cmOTmZ559/HoPBkOdp5OtoWvXq1W1H01q2bInZbObTTz/lu+++o2fPngQHBwPWhfyhhx7KchrR0dF23587dy7TOOfOnaNy5coA/Pnnn+zevZs5c+YwcOBA2zhHjhzJNmdRngeUseCePXs201G2M2fO2OaJI14nPT2dy5cv2/2y3Dn/AgICMBgMDBgwgOHDh2c5rYoVKwLg4eHB888/z/PPP8+1a9dYvXo1r776Ku3bt+fkyZM5HoUs6Hu5ePGiXSEppTh37hwNGjR
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# boucle pour identifier les outliers de chaque compagnie (et le client principal non anonyme)\n",
"\n",
"# nb_compagnie=['10','11','12','13','14']\n",
"for company_number in nb_compagnie :\n",
" print(f\"outlier for tenant {company_number}\")\n",
" outlier_detection(directory_path=company_number, coupure = 1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dbe1af6a-79e9-45c7-a810-c6df3bf647f7",
"metadata": {},
"outputs": [],
"source": [
"# print(products_purchased_reduced_spectacle.loc[products_purchased_reduced_spectacle[\"number_compagny\"]==10][\"total_amount\"].describe())\n",
"\n",
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==10) & \n",
"(products_purchased_reduced_spectacle[\"customer_id\"]==19521)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "20e2b8a2-f31c-42a4-8ea5-7ad67ab66915",
"metadata": {},
"outputs": [],
"source": [
"# company 11 \n",
"# etrange, pas de vente sur internet, et un seul supplier. Plus de 9k achats\n",
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==11) & \n",
"(products_purchased_reduced_spectacle[\"customer_id\"]==36)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5dbce57c-d091-4ce2-92f9-1201deb2462e",
"metadata": {},
"outputs": [],
"source": [
"# company 12\n",
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==12) & \n",
"(products_purchased_reduced_spectacle[\"customer_id\"]==1706757)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0a243b57-19da-4e29-a53d-bb8d03e2ab77",
"metadata": {},
"outputs": [],
"source": [
"# company 13\n",
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==13) & \n",
"(products_purchased_reduced_spectacle[\"customer_id\"]==8422)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d9b01bc-9584-4882-bd06-7de8acb8a88f",
"metadata": {},
"outputs": [],
"source": [
"# company 14\n",
"# a-t-on vrmt un outlier ? A acheté quasi 3k tickets, pr 96 achats\n",
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==14) & \n",
"(products_purchased_reduced_spectacle[\"customer_id\"]==6354)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "033c1e00-52bd-4651-b893-57bda531760e",
"metadata": {},
"outputs": [],
"source": [
"# verifs dans les tables customerplus (outlier incertain pr 11 et 14)\n",
"\n",
"customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==36) &\n",
"(customerplus_clean_spectacle[\"number_compagny\"]==11)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "28ac8cda-32fa-4fb7-a75b-e1cc24871c39",
"metadata": {},
"outputs": [],
"source": [
2024-03-08 08:44:28 +01:00
"customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==6354) &\n",
"(customerplus_clean_spectacle[\"number_compagny\"]==14)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3faea297-2cc5-4704-af85-77d95f600cc1",
"metadata": {},
"outputs": [],
"source": [
"customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==8422) &\n",
"(customerplus_clean_spectacle[\"number_compagny\"]==13)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b165ea79-347b-46fb-8217-635d9e888c65",
"metadata": {},
"outputs": [],
"source": [
"customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==19521) &\n",
"(customerplus_clean_spectacle[\"number_compagny\"]==10)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "282b0a96-5e78-48aa-9c2c-7d00d3907add",
"metadata": {},
"outputs": [],
"source": [
2024-03-11 18:43:56 +01:00
"customerplus_clean_spectacle.columns"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "ad47a812-a744-49c5-8079-0919b49ef24c",
"metadata": {},
"outputs": [],
"source": [
"# on enlève les outliers des tables\n",
"\n",
"outliers_musique_dico = {10 : 19521, 11 : 36, 12 : 1706757, 13 : 8422}\n",
"\n",
"# outlier_music_list = list(outliers_musique_dico.values())\n"
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "9717dfd5-c39c-41eb-858d-5baf3ab71554",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10 19521\n",
"11 36\n",
"12 1706757\n",
"13 8422\n"
]
}
],
"source": [
"for tenant_number, customer_id in outliers_musique_dico.items() :\n",
"\n",
" print(tenant_number, customer_id)\n",
" \n",
" customerplus_clean_spectacle = customerplus_clean_spectacle[(customerplus_clean_spectacle['number_compagny']!= tenant_number) |\n",
" (customerplus_clean_spectacle['customer_id']!= customer_id) ]\n",
"\n",
" campaigns_information_spectacle = campaigns_information_spectacle[(campaigns_information_spectacle['number_compagny']!= tenant_number) |\n",
" (campaigns_information_spectacle['customer_id']!= customer_id) ]\n",
"\n",
" products_purchased_reduced_spectacle = products_purchased_reduced_spectacle[(products_purchased_reduced_spectacle['number_compagny']!= tenant_number) |\n",
" (products_purchased_reduced_spectacle['customer_id']!= customer_id) ]\n",
"\n",
" target_information_spectacle = target_information_spectacle[(target_information_spectacle['number_compagny']!= tenant_number) |\n",
" (target_information_spectacle['customer_id']!= customer_id) ]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eb7f4c95-817b-4145-9319-11d2f62b24d9",
"metadata": {},
"outputs": [],
"source": [
"# on vérifie que les outliers sont pas dans le train set "
]
},
{
"cell_type": "code",
"execution_count": 147,
"id": "b50e1de8-28fe-42bd-bd81-dde7e36b64fb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['10_19521', '11_36', '12_1706757', '13_8422']"
]
},
"execution_count": 147,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"outliers_train_set_musique = [str(tenant_id) + \"_\" + str(customer_id) for tenant_id, customer_id in outliers_musique_dico.items()]\n",
"outliers_train_set_musique"
]
},
{
"cell_type": "code",
"execution_count": 161,
"id": "1753d45d-beac-48a4-9bc4-f84925320a89",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" <th>number_company</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"<p>0 rows × 41 columns</p>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [customer_id, nb_tickets, nb_purchases, total_amount, nb_suppliers, vente_internet_max, purchase_date_min, purchase_date_max, time_between_purchase, nb_tickets_internet, street_id, structure_id, mcp_contact_id, fidelity, tenant_id, is_partner, deleted_at, gender, is_email_true, opt_in, last_buying_date, max_price, ticket_sum, average_price, average_purchase_delay, average_price_basket, average_ticket_basket, total_price, purchase_count, first_buying_date, country, gender_label, gender_female, gender_male, gender_other, country_fr, nb_campaigns, nb_campaigns_opened, time_to_open, y_has_purchased, number_company]\n",
"Index: []\n",
"\n",
"[0 rows x 41 columns]"
]
},
"execution_count": 161,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_set_spectacle[train_set_spectacle[\"customer_id\"].isin(outliers_train_set_musique)] # OK"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "markdown",
"id": "42f8171c-e80d-4faa-b278-21fcbe3b242c",
"metadata": {},
"source": [
"### 1. customerplus_clean"
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 66,
2024-03-03 09:32:45 +01:00
"id": "47f98721-53dd-4f8f-85ac-88043ee8d967",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
2024-03-11 18:43:56 +01:00
" <th>total_price</th>\n",
2024-03-03 09:32:45 +01:00
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>number_compagny</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>821538</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0.0</td>\n",
2024-03-03 09:32:45 +01:00
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>809126</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0.0</td>\n",
2024-03-03 09:32:45 +01:00
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11005</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
2024-03-11 18:43:56 +01:00
" <td>NaN</td>\n",
2024-03-03 09:32:45 +01:00
" <td>14</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>17663</td>\n",
" <td>12731</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
2024-03-11 18:43:56 +01:00
" <td>NaN</td>\n",
2024-03-03 09:32:45 +01:00
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>38100</td>\n",
" <td>12395</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
2024-03-11 18:43:56 +01:00
" <td>NaN</td>\n",
2024-03-03 09:32:45 +01:00
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>307036</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
2024-03-11 18:43:56 +01:00
" <td>NaN</td>\n",
2024-03-03 09:32:45 +01:00
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2946</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
2024-03-11 18:43:56 +01:00
" <td>NaN</td>\n",
2024-03-03 09:32:45 +01:00
" <td>8</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>18441</td>\n",
" <td>11139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
2024-03-11 18:43:56 +01:00
" <td>NaN</td>\n",
2024-03-03 09:32:45 +01:00
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>9231</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
2024-03-11 18:43:56 +01:00
" <td>NaN</td>\n",
2024-03-03 09:32:45 +01:00
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>9870</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
2024-03-11 18:43:56 +01:00
" <td>NaN</td>\n",
2024-03-03 09:32:45 +01:00
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
2024-03-11 18:43:56 +01:00
"<p>10 rows × 28 columns</p>\n",
2024-03-03 09:32:45 +01:00
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity tenant_id \\\n",
"0 821538 139 NaN NaN 0 875 \n",
"1 809126 1063 NaN NaN 0 875 \n",
"2 11005 1063 NaN NaN 0 875 \n",
"3 17663 12731 NaN NaN 0 875 \n",
"4 38100 12395 NaN NaN 0 875 \n",
"5 307036 139 NaN NaN 0 875 \n",
"6 2946 1063 NaN NaN 0 875 \n",
"7 18441 11139 NaN NaN 0 875 \n",
"8 9231 139 NaN NaN 0 875 \n",
"9 9870 139 NaN NaN 0 875 \n",
"\n",
2024-03-11 18:43:56 +01:00
" is_partner deleted_at gender is_email_true ... total_price \\\n",
"0 False NaN 2 True ... 0.0 \n",
"1 False NaN 2 True ... 0.0 \n",
"2 False NaN 2 False ... NaN \n",
"3 False NaN 0 False ... NaN \n",
"4 False NaN 0 True ... NaN \n",
"5 False NaN 2 True ... NaN \n",
"6 False NaN 2 False ... NaN \n",
"7 False NaN 2 False ... NaN \n",
"8 False NaN 0 True ... NaN \n",
"9 False NaN 2 True ... NaN \n",
2024-03-03 09:32:45 +01:00
"\n",
2024-03-11 18:43:56 +01:00
" purchase_count first_buying_date country gender_label gender_female \\\n",
"0 0 NaN NaN other 0 \n",
"1 0 NaN fr other 0 \n",
"2 14 NaN fr other 0 \n",
"3 1 NaN fr female 1 \n",
"4 1 NaN fr female 1 \n",
"5 1 NaN NaN other 0 \n",
"6 8 NaN fr other 0 \n",
"7 3 NaN fr other 0 \n",
"8 1 NaN NaN female 1 \n",
"9 1 NaN NaN other 0 \n",
2024-03-03 09:32:45 +01:00
"\n",
2024-03-11 18:43:56 +01:00
" gender_male gender_other country_fr number_compagny \n",
"0 0 1 NaN 10 \n",
"1 0 1 1.0 10 \n",
"2 0 1 1.0 10 \n",
"3 0 0 1.0 10 \n",
"4 0 0 1.0 10 \n",
"5 0 1 NaN 10 \n",
"6 0 1 1.0 10 \n",
"7 0 1 1.0 10 \n",
"8 0 0 NaN 10 \n",
"9 0 1 NaN 10 \n",
2024-03-03 09:32:45 +01:00
"\n",
2024-03-11 18:43:56 +01:00
"[10 rows x 28 columns]"
2024-03-03 09:32:45 +01:00
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 66,
2024-03-03 09:32:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# visu de la table\n",
"customerplus_clean_spectacle.head(10)"
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 67,
2024-03-03 09:32:45 +01:00
"id": "738e063b-f84e-4a00-b35d-6d1d657e3c09",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"Nombre de lignes de la table : 1523684\n"
2024-03-03 09:32:45 +01:00
]
},
{
"data": {
"text/plain": [
"customer_id 0\n",
"street_id 0\n",
2024-03-11 18:43:56 +01:00
"structure_id 1460622\n",
"mcp_contact_id 729163\n",
2024-03-03 09:32:45 +01:00
"fidelity 0\n",
"tenant_id 0\n",
"is_partner 0\n",
2024-03-11 18:43:56 +01:00
"deleted_at 1523684\n",
2024-03-03 09:32:45 +01:00
"gender 0\n",
"is_email_true 0\n",
"opt_in 0\n",
"last_buying_date 762879\n",
"max_price 762879\n",
"ticket_sum 0\n",
"average_price 667328\n",
"average_purchase_delay 762915\n",
"average_price_basket 762915\n",
"average_ticket_basket 762915\n",
"total_price 95551\n",
"purchase_count 0\n",
"first_buying_date 762879\n",
2024-03-11 18:43:56 +01:00
"country 429485\n",
2024-03-03 09:32:45 +01:00
"gender_label 0\n",
"gender_female 0\n",
"gender_male 0\n",
"gender_other 0\n",
2024-03-11 18:43:56 +01:00
"country_fr 429485\n",
2024-03-03 09:32:45 +01:00
"number_compagny 0\n",
"dtype: int64"
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 67,
2024-03-03 09:32:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de NaN\n",
"print(\"Nombre de lignes de la table : \",customerplus_clean_spectacle.shape[0])\n",
"customerplus_clean_spectacle.isna().sum()"
]
},
2024-03-11 18:43:56 +01:00
{
"cell_type": "markdown",
"id": "b44054b3-d850-4bc9-bc73-feb9979908bc",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"#### Nombre de clients de la compagnie"
]
},
2024-03-03 09:32:45 +01:00
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 70,
"id": "884a33d0-c275-4ab4-ab1f-8b53e563fb95",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
2024-03-11 18:43:56 +01:00
"name": "stdout",
"output_type": "stream",
"text": [
" number_compagny already_purchased customer_id\n",
"0 10 True 45263\n",
"1 11 True 35312\n",
"2 12 True 216104\n",
"3 13 True 388730\n",
"4 14 True 101642\n",
" number_compagny already_purchased customer_id\n",
"0 10 False 53530\n",
"1 11 False 35994\n",
"2 12 False 26620\n",
"3 13 False 379005\n",
"4 14 False 241484\n"
]
2024-03-03 09:32:45 +01:00
}
],
"source": [
2024-03-11 18:43:56 +01:00
"# nouveau barplot pr les clients : on regarde la taille totale de la base et on distingue clients ayant acheté / pas acheté\n",
"\n",
"# variable relative à l'achat\n",
"customerplus_clean_spectacle[\"already_purchased\"] = customerplus_clean_spectacle[\"purchase_count\"]>0\n",
"\n",
"nb_customers_purchasing_spectacle = customerplus_clean_spectacle[customerplus_clean_spectacle[\"already_purchased\"]].groupby([\"number_compagny\",\"already_purchased\"])[\"customer_id\"].count().reset_index()\n",
"nb_customers_no_purchase_spectacle = customerplus_clean_spectacle[~customerplus_clean_spectacle[\"already_purchased\"]].groupby([\"number_compagny\",\"already_purchased\"])[\"customer_id\"].count().reset_index()\n",
"\n",
"print(nb_customers_purchasing_spectacle)\n",
"print(nb_customers_no_purchase_spectacle)"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 72,
"id": "41c9fb5a-708b-4f85-9918-00337151f155",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-03-11 18:43:56 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkoAAAHGCAYAAACLuaSnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABtyklEQVR4nO3deVxO6f8/8NdN+75QdxGFMqgIw9CMQsnOYCzNWAZjZyKDxqAsmRj7OmaQYSxjLJ8ZY98aZElkCWOrMFNClJLW6/eHX+frru7qzp07eT0fj/vxcK5znXO9z7mX3q5znevIhBACRERERFRAJU0HQERERFReMVEiIiIiUoKJEhEREZESTJSIiIiIlGCiRERERKQEEyUiIiIiJZgoERERESnBRImIiIhICSZKREREREowUSKiN/Ltt9/C2toad+7c0XQoRERqVyETpdDQUMhkMujp6SEuLq7Aek9PTzg7O2sgMmDQoEEwMjLSSNvFkclkCAwMfKttenp6wtPT863HsXfvXrW3YW9vj0GDBhVb7/jx45DJZDh+/Lha288vODgYu3fvLnH7y5YtQ506daCjowOZTIZnz55h0KBBsLe3V9rGvn37sHz5cuzZswe1a9dW7wG8A97We0lvrqTfz7dBE7+1mvDff/8hMDAQUVFRZdpObGwsZDIZQkNDy2T/FTJRypORkYHvvvtO02GQik6fPo2hQ4eWaRt79+5FUFCQWve5a9cuTJs2Ta37fBPKEqXGjRvj9OnTaNy4sVQWFRWFcePGoXXr1jh69ChOnz4NY2NjTJs2Dbt27Sp0//fv38eXX36Jbdu24cMPPyyrwyBSi/L2/Xwf/PfffwgKCirzRKmsaWk6gLLUvn17bN68GRMnTkTDhg01Hc4bE0Lg5cuX0NfX13QoZeqjjz7SdAil4ubmpukQSsTExKTAOY6OjgYAfPXVV2jWrJlUXlQvkZ2dHRISEsomyHLkxYsXMDAw0HQY9Ibele8nlT8Vukdp0qRJsLS0xOTJk4ut+/LlSwQEBMDBwQE6OjqoVq0aRo8ejWfPninUs7e3R+fOnbFnzx64ublBX18f9erVw549ewC8uuxXr149GBoaolmzZjh//nyh7UVHR6Nt27YwNDRE1apVMWbMGLx48UKhjkwmw5gxY7B69WrUq1cPurq62LBhAwDg1q1b8PX1hZWVFXR1dVGvXj2sWLGiROclJSUFX331FSwtLWFkZIT27dvj5s2bhdZ9k3Zyc3OxbNkyNGrUCPr6+jAzM8NHH32EP/74o8jtCuuWTkhIwPDhw1G9enXo6OjAwcEBQUFByM7Olurkdb/+8MMPWLhwIRwcHGBkZIQWLVrgzJkzUr1BgwZJxyCTyaRXbGwsAGD79u1o3rw5TE1NYWBggFq1amHw4MHFHm9hXfs3btxA+/btYWBggCpVqmDEiBF4/vx5odsfPnwYbdu2hYmJCQwMDODu7o4jR44o1AkMDIRMJkN0dDT69esHU1NTWFtbY/DgwUhOTlY4h2lpadiwYYN0fHmXOPNfLvL09MQXX3wBAGjevDlkMpl0HIVdehNCYOXKldL7am5ujl69euHu3bsK9S5evIjOnTtLnx1bW1t06tQJDx48KPI85l0aP3HiBD766CPo6+ujWrVqmDZtGnJychTqJiUlYdSoUahWrRp0dHRQq1YtTJ06FRkZGVKdorrl83/W8s7vhQsX0KtXL5ibm5fqkuL58+fRtWtXWFhYQE9PD25ubvjtt98U6rx48QITJ06Eg4MD9PT0YGFhgaZNm2LLli3F7v/ff//FsGHDYGdnBx0dHdja2qJXr154+PChVOfevXv44osvFL67CxYsQG5uboFzM3/+fISEhMDe3h76+vrw9PTEzZs3kZWVhSlTpsDW1hampqb49NNPkZiYqBBL3m/irl274OrqCj09PdSqVQtLly5VqPfy5Uv4+/ujUaNGMDU1hYWFBVq0aIH//e9/BY7v2bNnGDJkCCwsLGBkZIROnTrh7t27St+v4r4PeXHm/36mpKRI70He776fnx/S0tIU6pX2N+Ft/dYWF1/ed37Tpk2YMGEC5HI59PX14eHhgYsXLxbYX0k+v0DRn8Pjx49LPc1ffvml9DuU9/6dP38effv2lT5z9vb26NevX6HDZUryeVf3OX1dhe5RMjY2xnfffYevv/4aR48eRZs2bQqtJ4RA9+7dceTIEQQEBOCTTz7B5cuXMWPGDJw+fRqnT5+Grq6uVP/SpUsICAjA1KlTYWpqiqCgIPTo0QMBAQE4cuQIgoODIZPJMHnyZHTu3BkxMTEKvUBZWVno2LEjhg8fjilTpiA8PByzZ89GXFwc/vzzT4XYdu/ejRMnTmD69OmQy+WwsrLCtWvX0LJlS9SoUQMLFiyAXC7HgQMHMG7cODx+/BgzZsxQek7yjjU8PBzTp0/Hhx9+iFOnTqFDhw4F6r5JO8CrP7KbNm3CkCFDMHPmTOjo6ODChQtSQlJSCQkJaNasGSpVqoTp06ejdu3aOH36NGbPno3Y2FisX79eof6KFSvwwQcfYPHixQCAadOmoWPHjoiJiYGpqSmmTZuGtLQ0/P777zh9+rS0nY2NDU6fPo0+ffqgT58+CAwMlMa5HT16VKWYAeDhw4fw8PCAtrY2Vq5cCWtra/z6668YM2ZMgbqbNm3CgAED0K1bN2zYsAHa2tr48ccf4ePjgwMHDqBt27YK9Xv27Ik+ffpgyJAhuHLlCgICAgAA69atA/Dq8mWbNm3QunVr6XKDiYlJoXGuXLkSW7ZswezZs7F+/Xp88MEHqFq1qtLjGj58OEJDQzFu3DiEhIQgKSkJM2fORMuWLXHp0iVYW1sjLS0N3t7ecHBwwIoVK2BtbY2EhAQcO3ZMaaL4uoSEBPTt2xdTpkzBzJkz8ddff2H27Nl4+vQpli9fDuDVH97WrVvjzp07CAoKgqurK06cOIG5c+ciKioKf/31V7HtKNOjRw/07dsXI0aMKPBHszjHjh1D+/bt0bx5c6xevRqmpqbYunUr+vTpgxcvXkh/rCdMmICNGzdi9uzZcHNzQ1paGq5evYonT54Uuf9///0XH374IbKysvDtt9/C1dUVT548wYEDB/D06VNYW1vj0aNHaNmyJTIzMzFr1izY29tjz549mDhxIu7cuYOVK1cq7HPFihVwdXXFihUr8OzZM/j7+6NLly5o3rw5tLW1sW7dOsTFxWHixIkYOnRogf/sREVFwc/PD4GBgZDL5fj111/x9ddfIzMzExMnTgTwaihEUlISJk6ciGrVqiEzMxOHDx9Gjx49sH79egwYMADAq/9gdenSBefPn0dgYKB0qbh9+/ZKz0lx34fCvHjxAh4eHnjw4IF0HqOjozF9+nRcuXIFhw8fhkwmK/Vvwtv6rVUlvm+//RaNGzfGzz//jOTkZAQGBsLT0xMXL15ErVq1AJT881vc57Bx48ZYv349vvzyS3z33Xfo1KkTAKB69eoAXiXpdevWRd++fWFhYYH4+HisWrUKH374Ia5du4YqVaqUqB1ra+tCz8ub/v3K/2ZWOOvXrxcAREREhMjIyBC1atUSTZs2Fbm5uUIIITw8PESDBg2k+vv37xcAxLx58xT2s23bNgFArFmzRiqrWbOm0NfXFw8ePJDKoqKiBABhY2Mj0tLSpPLdu3cLAOKPP/6QygYOHCgAiCVLlii0NWfOHAFAnDx5UioDIExNTUVSUpJCXR8fH1G9enWRnJysUD5mzBihp6dXoP7r9u3bV2T7M2bMUEs7f//9twAgpk6dqrSOEK/eCw8PD4Wy/HEMHz5cGBkZibi4OIV6P/zwgwAgoqOjhRBCxMTECADCxcVFZGdnS/XOnTsnAIgtW7ZIZaNHjxaFffzz9vns2bMi4y5MzZo1xcCBA6XlyZMnC5lMJqKiohTqeXt7CwDi2LFjQggh0tLShIWFhejSpYtCvZycHNGwYUPRrFkzqWzGjBmFflZHjRol9PT0pM+4EEIYGhoqxJPn2LFjCu0Lofided3AgQNFzZo1peXTp08LAGLBggUK9e7fvy/09fXFpEmThBBCnD9/XgAQu3f
2024-03-03 09:32:45 +01:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
2024-03-11 18:43:56 +01:00
"plt.bar(nb_customers_purchasing_spectacle[\"number_compagny\"], nb_customers_purchasing_spectacle[\"customer_id\"]/1000, label = \"clients ayant acheté\")\n",
"plt.bar(nb_customers_no_purchase_spectacle[\"number_compagny\"], nb_customers_no_purchase_spectacle[\"customer_id\"]/1000, \n",
" bottom = nb_customers_purchasing_spectacle[\"customer_id\"]/1000, label = \"clients ciblés par un mail\")\n",
"\n",
2024-03-03 09:32:45 +01:00
"\n",
"# Ajout de titres et d'étiquettes\n",
2024-03-11 18:43:56 +01:00
"plt.xlabel('Compagnie')\n",
"plt.ylabel(\"Nombre de clients (en milliers)\")\n",
"plt.title(\"Nombre de clients identifiés pour les compagnies de spectacle\")\n",
"plt.legend()\n",
2024-03-03 09:32:45 +01:00
"\n",
"# Affichage du barplot\n",
2024-03-11 18:43:56 +01:00
"plt.show()\n"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 104,
"id": "983190f7-8bb1-4416-95f9-1dcf66a2e72e",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
2024-03-11 18:43:56 +01:00
"data": {
"text/plain": [
"<matplotlib.legend.Legend at 0x7fab10ba2ad0>"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkoAAAHGCAYAAACLuaSnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABtyklEQVR4nO3deVxO6f8/8NdN+75QdxGFMqgIw9CMQsnOYCzNWAZjZyKDxqAsmRj7OmaQYSxjLJ8ZY98aZElkCWOrMFNClJLW6/eHX+frru7qzp07eT0fj/vxcK5znXO9z7mX3q5znevIhBACRERERFRAJU0HQERERFReMVEiIiIiUoKJEhEREZESTJSIiIiIlGCiRERERKQEEyUiIiIiJZgoERERESnBRImIiIhICSZKREREREowUSKiN/Ltt9/C2toad+7c0XQoRERqVyETpdDQUMhkMujp6SEuLq7Aek9PTzg7O2sgMmDQoEEwMjLSSNvFkclkCAwMfKttenp6wtPT863HsXfvXrW3YW9vj0GDBhVb7/jx45DJZDh+/Lha288vODgYu3fvLnH7y5YtQ506daCjowOZTIZnz55h0KBBsLe3V9rGvn37sHz5cuzZswe1a9dW7wG8A97We0lvrqTfz7dBE7+1mvDff/8hMDAQUVFRZdpObGwsZDIZQkNDy2T/FTJRypORkYHvvvtO02GQik6fPo2hQ4eWaRt79+5FUFCQWve5a9cuTJs2Ta37fBPKEqXGjRvj9OnTaNy4sVQWFRWFcePGoXXr1jh69ChOnz4NY2NjTJs2Dbt27Sp0//fv38eXX36Jbdu24cMPPyyrwyBSi/L2/Xwf/PfffwgKCirzRKmsaWk6gLLUvn17bN68GRMnTkTDhg01Hc4bE0Lg5cuX0NfX13QoZeqjjz7SdAil4ubmpukQSsTExKTAOY6OjgYAfPXVV2jWrJlUXlQvkZ2dHRISEsomyHLkxYsXMDAw0HQY9Ibele8nlT8Vukdp0qRJsLS0xOTJk4ut+/LlSwQEBMDBwQE6OjqoVq0aRo8ejWfPninUs7e3R+fOnbFnzx64ublBX18f9erVw549ewC8uuxXr149GBoaolmzZjh//nyh7UVHR6Nt27YwNDRE1apVMWbMGLx48UKhjkwmw5gxY7B69WrUq1cPurq62LBhAwDg1q1b8PX1hZWVFXR1dVGvXj2sWLGiROclJSUFX331FSwtLWFkZIT27dvj5s2bhdZ9k3Zyc3OxbNkyNGrUCPr6+jAzM8NHH32EP/74o8jtCuuWTkhIwPDhw1G9enXo6OjAwcEBQUFByM7Olurkdb/+8MMPWLhwIRwcHGBkZIQWLVrgzJkzUr1BgwZJxyCTyaRXbGwsAGD79u1o3rw5TE1NYWBggFq1amHw4MHFHm9hXfs3btxA+/btYWBggCpVqmDEiBF4/vx5odsfPnwYbdu2hYmJCQwMDODu7o4jR44o1AkMDIRMJkN0dDT69esHU1NTWFtbY/DgwUhOTlY4h2lpadiwYYN0fHmXOPNfLvL09MQXX3wBAGjevDlkMpl0HIVdehNCYOXKldL7am5ujl69euHu3bsK9S5evIjOnTtLnx1bW1t06tQJDx48KPI85l0aP3HiBD766CPo6+ujWrVqmDZtGnJychTqJiUlYdSoUahWrRp0dHRQq1YtTJ06FRkZGVKdorrl83/W8s7vhQsX0KtXL5ibm5fqkuL58+fRtWtXWFhYQE9PD25ubvjtt98U6rx48QITJ06Eg4MD9PT0YGFhgaZNm2LLli3F7v/ff//FsGHDYGdnBx0dHdja2qJXr154+PChVOfevXv44osvFL67CxYsQG5uboFzM3/+fISEhMDe3h76+vrw9PTEzZs3kZWVhSlTpsDW1hampqb49NNPkZiYqBBL3m/irl274OrqCj09PdSqVQtLly5VqPfy5Uv4+/ujUaNGMDU1hYWFBVq0aIH//e9/BY7v2bNnGDJkCCwsLGBkZIROnTrh7t27St+v4r4PeXHm/36mpKRI70He776fnx/S0tIU6pX2N+Ft/dYWF1/ed37Tpk2YMGEC5HI59PX14eHhgYsXLxbYX0k+v0DRn8Pjx49LPc1ffvml9DuU9/6dP38effv2lT5z9vb26NevX6HDZUryeVf3OX1dhe5RMjY2xnfffYevv/4aR48eRZs2bQqtJ4RA9+7dceTIEQQEBOCTTz7B5cuXMWPGDJw+fRqnT5+Grq6uVP/SpUsICAjA1KlTYWpqiqCgIPTo0QMBAQE4cuQIgoODIZPJMHnyZHTu3BkxMTEKvUBZWVno2LEjhg8fjilTpiA8PByzZ89GXFwc/vzzT4XYdu/ejRMnTmD69OmQy+WwsrLCtWvX0LJlS9SoUQMLFiyAXC7HgQMHMG7cODx+/BgzZsxQek7yjjU8PBzTp0/Hhx9+iFOnTqFDhw4F6r5JO8CrP7KbNm3CkCFDMHPmTOjo6ODChQtSQlJSCQkJaNasGSpVqoTp06ejdu3aOH36NGbPno3Y2FisX79eof6KFSvwwQcfYPHixQCAadOmoWPHjoiJiYGpqSmmTZuGtLQ0/P777zh9+rS0nY2NDU6fPo0+ffqgT58+CAwMlMa5HT16VKWYAeDhw4fw8PCAtrY2Vq5cCWtra/z6668YM2ZMgbqbNm3CgAED0K1bN2zYsAHa2tr48ccf4ePjgwMHDqBt27YK9Xv27Ik+ffpgyJAhuHLlCgICAgAA69atA/Dq8mWbNm3QunVr6XKDiYlJoXGuXLkSW7ZswezZs7F+/Xp88MEHqFq1qtLjGj58OEJDQzFu3DiEhIQgKSkJM2fORMuWLXHp0iVYW1sjLS0N3t7ecHBwwIoVK2BtbY2EhAQcO3ZMaaL4uoSEBPTt2xdTpkzBzJkz8ddff2H27Nl4+vQpli9fDuDVH97WrVvjzp07CAoKgqurK06cOIG5c+ciKioKf/31V7HtKNOjRw/07dsXI0aMKPBHszjHjh1D+/bt0bx5c6xevRqmpqbYunUr+vTpgxcvXkh/rCdMmICNGzdi9uzZcHNzQ1paGq5evYonT54Uuf9///0XH374IbKysvDtt9/C1dUVT548wYEDB/D06VNYW1vj0aNHaNmyJTIzMzFr1izY29tjz549mDhxIu7cuYOVK1cq7HPFihVwdXXFihUr8OzZM/j7+6NLly5o3rw5tLW1sW7dOsTFxWHixIkYOnRogf/sREVFwc/PD4GBgZDL5fj111/x9ddfIzMzExMnTgTwaihEUlISJk6ciGrVqiEzMxOHDx9Gjx49sH79egwYMADAq/9gdenSBefPn0dgYKB0qbh9+/ZKz0lx34fCvHjxAh4eHnjw4IF0HqOjozF9+nRcuXIFhw8fhkwmK/Vvwtv6rVUlvm+//RaNGzfGzz//jOTkZAQGBsLT0xMXL15ErVq1AJT881vc57Bx48ZYv349vvzyS3z33Xfo1KkTAKB69eoAXiXpdevWRd++fWFhYYH4+HisWrUKH374Ia5du4YqVaqUqB1ra+tCz8ub/v3K/2ZWOOvXrxcAREREhMjIyBC1atUSTZs2Fbm5uUIIITw8PESDBg2k+vv37xcAxLx58xT2s23bNgFArFmzRiqrWbOm0NfXFw8ePJDKoqKiBABhY2Mj0tLSpPLdu3cLAOKPP/6QygYOHCgAiCVLlii0NWfOHAFAnDx5UioDIExNTUVSUpJCXR8fH1G9enWRnJysUD5mzBihp6dXoP7r9u3bV2T7M2bMUEs7f//9twAgpk6dqrSOEK/eCw8PD4Wy/HEMHz5cGBkZibi4OIV6P/zwgwAgoqOjhRBCxMTECADCxcVFZGdnS/XOnTsnAIgtW7ZIZaNHjxaFffzz9vns2bMi4y5MzZo1xcCBA6XlyZMnC5lMJqKiohTqeXt7CwDi2LFjQggh0tLShIWFhejSpYtCvZycHNGwYUPRrFkzqWzGjBmFflZHjRol9PT0pM+4EEIYGhoqxJPn2LFjCu0Lofided3AgQNFzZo1peXTp08LAGLBggUK9e7fvy/09fXFpEmThBBCnD9/XgAQu3f
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
2024-03-08 10:30:12 +01:00
}
],
"source": [
2024-03-11 18:43:56 +01:00
"# Création du barplot\n",
"plt.bar(nb_customers_purchasing_spectacle[\"number_compagny\"], nb_customers_purchasing_spectacle[\"customer_id\"]/1000, label = \"clients ayant acheté\")\n",
"plt.bar(nb_customers_no_purchase_spectacle[\"number_compagny\"], nb_customers_no_purchase_spectacle[\"customer_id\"]/1000, \n",
" bottom = nb_customers_purchasing_spectacle[\"customer_id\"]/1000, label = \"clients ciblés par un mail\")\n",
2024-03-08 10:30:12 +01:00
"\n",
"\n",
2024-03-11 18:43:56 +01:00
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Compagnie')\n",
"plt.ylabel(\"Nombre de clients (en milliers)\")\n",
"plt.title(\"Nombre de clients identifiés pour les compagnies de spectacle\")\n",
"plt.legend()\n",
2024-03-08 10:30:12 +01:00
"\n",
2024-03-11 18:43:56 +01:00
"# Affichage du barplot\n",
"# plt.savefig(\"nbre_clients_musique.png\")"
2024-03-08 10:30:12 +01:00
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 112,
"id": "a41dfb3e-12b6-4a7b-9282-698d9476b17b",
2024-03-08 10:30:12 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-03-11 18:43:56 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkoAAAHGCAYAAACLuaSnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABtyklEQVR4nO3deVxO6f8/8NdN+75QdxGFMqgIw9CMQsnOYCzNWAZjZyKDxqAsmRj7OmaQYSxjLJ8ZY98aZElkCWOrMFNClJLW6/eHX+frru7qzp07eT0fj/vxcK5znXO9z7mX3q5znevIhBACRERERFRAJU0HQERERFReMVEiIiIiUoKJEhEREZESTJSIiIiIlGCiRERERKQEEyUiIiIiJZgoERERESnBRImIiIhICSZKREREREowUSKiN/Ltt9/C2toad+7c0XQoRERqVyETpdDQUMhkMujp6SEuLq7Aek9PTzg7O2sgMmDQoEEwMjLSSNvFkclkCAwMfKttenp6wtPT863HsXfvXrW3YW9vj0GDBhVb7/jx45DJZDh+/Lha288vODgYu3fvLnH7y5YtQ506daCjowOZTIZnz55h0KBBsLe3V9rGvn37sHz5cuzZswe1a9dW7wG8A97We0lvrqTfz7dBE7+1mvDff/8hMDAQUVFRZdpObGwsZDIZQkNDy2T/FTJRypORkYHvvvtO02GQik6fPo2hQ4eWaRt79+5FUFCQWve5a9cuTJs2Ta37fBPKEqXGjRvj9OnTaNy4sVQWFRWFcePGoXXr1jh69ChOnz4NY2NjTJs2Dbt27Sp0//fv38eXX36Jbdu24cMPPyyrwyBSi/L2/Xwf/PfffwgKCirzRKmsaWk6gLLUvn17bN68GRMnTkTDhg01Hc4bE0Lg5cuX0NfX13QoZeqjjz7SdAil4ubmpukQSsTExKTAOY6OjgYAfPXVV2jWrJlUXlQvkZ2dHRISEsomyHLkxYsXMDAw0HQY9Ibele8nlT8Vukdp0qRJsLS0xOTJk4ut+/LlSwQEBMDBwQE6OjqoVq0aRo8ejWfPninUs7e3R+fOnbFnzx64ublBX18f9erVw549ewC8uuxXr149GBoaolmzZjh//nyh7UVHR6Nt27YwNDRE1apVMWbMGLx48UKhjkwmw5gxY7B69WrUq1cPurq62LBhAwDg1q1b8PX1hZWVFXR1dVGvXj2sWLGiROclJSUFX331FSwtLWFkZIT27dvj5s2bhdZ9k3Zyc3OxbNkyNGrUCPr6+jAzM8NHH32EP/74o8jtCuuWTkhIwPDhw1G9enXo6OjAwcEBQUFByM7Olurkdb/+8MMPWLhwIRwcHGBkZIQWLVrgzJkzUr1BgwZJxyCTyaRXbGwsAGD79u1o3rw5TE1NYWBggFq1amHw4MHFHm9hXfs3btxA+/btYWBggCpVqmDEiBF4/vx5odsfPnwYbdu2hYmJCQwMDODu7o4jR44o1AkMDIRMJkN0dDT69esHU1NTWFtbY/DgwUhOTlY4h2lpadiwYYN0fHmXOPNfLvL09MQXX3wBAGjevDlkMpl0HIVdehNCYOXKldL7am5ujl69euHu3bsK9S5evIjOnTtLnx1bW1t06tQJDx48KPI85l0aP3HiBD766CPo6+ujWrVqmDZtGnJychTqJiUlYdSoUahWrRp0dHRQq1YtTJ06FRkZGVKdorrl83/W8s7vhQsX0KtXL5ibm5fqkuL58+fRtWtXWFhYQE9PD25ubvjtt98U6rx48QITJ06Eg4MD9PT0YGFhgaZNm2LLli3F7v/ff//FsGHDYGdnBx0dHdja2qJXr154+PChVOfevXv44osvFL67CxYsQG5uboFzM3/+fISEhMDe3h76+vrw9PTEzZs3kZWVhSlTpsDW1hampqb49NNPkZiYqBBL3m/irl274OrqCj09PdSqVQtLly5VqPfy5Uv4+/ujUaNGMDU1hYWFBVq0aIH//e9/BY7v2bNnGDJkCCwsLGBkZIROnTrh7t27St+v4r4PeXHm/36mpKRI70He776fnx/S0tIU6pX2N+Ft/dYWF1/ed37Tpk2YMGEC5HI59PX14eHhgYsXLxbYX0k+v0DRn8Pjx49LPc1ffvml9DuU9/6dP38effv2lT5z9vb26NevX6HDZUryeVf3OX1dhe5RMjY2xnfffYevv/4aR48eRZs2bQqtJ4RA9+7dceTIEQQEBOCTTz7B5cuXMWPGDJw+fRqnT5+Grq6uVP/SpUsICAjA1KlTYWpqiqCgIPTo0QMBAQE4cuQIgoODIZPJMHnyZHTu3BkxMTEKvUBZWVno2LEjhg8fjilTpiA8PByzZ89GXFwc/vzzT4XYdu/ejRMnTmD69OmQy+WwsrLCtWvX0LJlS9SoUQMLFiyAXC7HgQMHMG7cODx+/BgzZsxQek7yjjU8PBzTp0/Hhx9+iFOnTqFDhw4F6r5JO8CrP7KbNm3CkCFDMHPmTOjo6ODChQtSQlJSCQkJaNasGSpVqoTp06ejdu3aOH36NGbPno3Y2FisX79eof6KFSvwwQcfYPHixQCAadOmoWPHjoiJiYGpqSmmTZuGtLQ0/P777zh9+rS0nY2NDU6fPo0+ffqgT58+CAwMlMa5HT16VKWYAeDhw4fw8PCAtrY2Vq5cCWtra/z6668YM2ZMgbqbNm3CgAED0K1bN2zYsAHa2tr48ccf4ePjgwMHDqBt27YK9Xv27Ik+ffpgyJAhuHLlCgICAgAA69atA/Dq8mWbNm3QunVr6XKDiYlJoXGuXLkSW7ZswezZs7F+/Xp88MEHqFq1qtLjGj58OEJDQzFu3DiEhIQgKSkJM2fORMuWLXHp0iVYW1sjLS0N3t7ecHBwwIoVK2BtbY2EhAQcO3ZMaaL4uoSEBPTt2xdTpkzBzJkz8ddff2H27Nl4+vQpli9fDuDVH97WrVvjzp07CAoKgqurK06cOIG5c+ciKioKf/31V7HtKNOjRw/07dsXI0aMKPBHszjHjh1D+/bt0bx5c6xevRqmpqbYunUr+vTpgxcvXkh/rCdMmICNGzdi9uzZcHNzQ1paGq5evYonT54Uuf9///0XH374IbKysvDtt9/C1dUVT548wYEDB/D06VNYW1vj0aNHaNmyJTIzMzFr1izY29tjz549mDhxIu7cuYOVK1cq7HPFihVwdXXFihUr8OzZM/j7+6NLly5o3rw5tLW1sW7dOsTFxWHixIkYOnRogf/sREVFwc/PD4GBgZDL5fj111/x9ddfIzMzExMnTgTwaihEUlISJk6ciGrVqiEzMxOHDx9Gjx49sH79egwYMADAq/9gdenSBefPn0dgYKB0qbh9+/ZKz0lx34fCvHjxAh4eHnjw4IF0HqOjozF9+nRcuXIFhw8fhkwmK/Vvwtv6rVUlvm+//RaNGzfGzz//jOTkZAQGBsLT0xMXL15ErVq1AJT881vc57Bx48ZYv349vvzyS3z33Xfo1KkTAKB69eoAXiXpdevWRd++fWFhYYH4+HisWrUKH374Ia5du4YqVaqUqB1ra+tCz8ub/v3K/2ZWOOvXrxcAREREhMjIyBC1atUSTZs2Fbm5uUIIITw8PESDBg2k+vv37xcAxLx58xT2s23bNgFArFmzRiqrWbOm0NfXFw8ePJDKoqKiBABhY2Mj0tLSpPLdu3cLAOKPP/6QygYOHCgAiCVLlii0NWfOHAFAnDx5UioDIExNTUVSUpJCXR8fH1G9enWRnJysUD5mzBihp6dXoP7r9u3bV2T7M2bMUEs7f//9twAgpk6dqrSOEK/eCw8PD4Wy/HEMHz5cGBkZibi4OIV6P/zwgwAgoqOjhRBCxMTECADCxcVFZGdnS/XOnTsnAIgtW7ZIZaNHjxaFffzz9vns2bMi4y5MzZo1xcCBA6XlyZMnC5lMJqKiohTqeXt7CwDi2LFjQggh0tLShIWFhejSpYtCvZycHNGwYUPRrFkzqWzGjBmFflZHjRol9PT0pM+4EEIYGhoqxJPn2LFjCu0Lofided3AgQNFzZo1peXTp08LAGLBggUK9e7fvy/09fXFpEmThBBCnD9/XgAQu3f
2024-03-08 10:30:12 +01:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-03-11 18:43:56 +01:00
"# syntaxe à retenir pr exporter des images !!\n",
"\n",
"\n",
"FILE_PATH = \"projet-bdc2324-team1/graphics/music/\"\n",
"FILE_NAME = \"number_customers_music.png\"\n",
"FILE_PATH_OUT_S3 = FILE_PATH + FILE_NAME\n",
"\n",
2024-03-08 10:30:12 +01:00
"# Création du barplot\n",
2024-03-11 18:43:56 +01:00
"plt.bar(nb_customers_purchasing_spectacle[\"number_compagny\"], nb_customers_purchasing_spectacle[\"customer_id\"]/1000, label = \"clients ayant acheté\")\n",
2024-03-08 10:30:12 +01:00
"plt.bar(nb_customers_no_purchase_spectacle[\"number_compagny\"], nb_customers_no_purchase_spectacle[\"customer_id\"]/1000, \n",
2024-03-11 18:43:56 +01:00
" bottom = nb_customers_purchasing_spectacle[\"customer_id\"]/1000, label = \"clients ciblés par un mail\")\n",
2024-03-08 10:30:12 +01:00
"\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
2024-03-11 18:43:56 +01:00
"plt.xlabel('Compagnie')\n",
2024-03-08 10:30:12 +01:00
"plt.ylabel(\"Nombre de clients (en milliers)\")\n",
2024-03-11 11:40:29 +01:00
"plt.title(\"Nombre de clients identifiés pour les compagnies de spectacle\")\n",
2024-03-08 10:30:12 +01:00
"plt.legend()\n",
"\n",
2024-03-11 18:43:56 +01:00
"with fs.open(FILE_PATH_OUT_S3, 'wb') as file_out:\n",
" plt.savefig(file_out)"
]
},
{
"cell_type": "markdown",
"id": "85b6c7a9-d970-4071-8633-45bc1f50e157",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"#### Prix maximal payé par un client (utile ??)"
2024-03-08 10:30:12 +01:00
]
},
{
"cell_type": "code",
"execution_count": 152,
"id": "fd11c547-7128-4ef6-ad7b-4b7c2a30cd9e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
2024-03-03 09:32:45 +01:00
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>max_price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>13823.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>108.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>5000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>3180.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>456.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny max_price\n",
"0 10 13823.0\n",
"1 11 108.0\n",
"2 12 5000.0\n",
"3 13 3180.0\n",
"4 14 456.0"
]
},
"execution_count": 152,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# prix maximal payé par un client pour chaque compagnie - très variable : de 108 à 13823\n",
"\n",
"company_max_price = customerplus_clean_spectacle.groupby(\"number_compagny\")[\"max_price\"].max().reset_index()\n",
"company_max_price"
]
},
{
"cell_type": "code",
"execution_count": 153,
"id": "b8f8f162-4153-4cfe-bfaa-d981d414510d",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlAAAAHGCAYAAAC7NbWGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABc0UlEQVR4nO3dd1gUV/828HulCQgrRcBVBCyxgT0iGINGURNKNBoLilhiLyFifRJjSWKPGOUxamKJJWJMwMdYUGxEI1hQYkMTE6yIGMFFLIDLef/wx7yuC8joIgven+vaS/fMmZnvDLvLzZmyCiGEABERERGVWKWyLoCIiIiovGGAIiIiIpKJAYqIiIhIJgYoIiIiIpkYoIiIiIhkYoAiIiIikokBioiIiEgmBigiIiIimRigiIiIiGRigCIiekH/+c9/4OjoiL///rusSyGiV4wBSk/Wrl0LhUIhPYyNjVGzZk0MGjQIN27cKNEyBg4cCFdX19IttAwV7KPLly+/8nVfvnwZCoUCa9eufaH5Dx48CIVCgYMHD+q1rrKwc+dOzJgxo8zWX/A6OHHiRJnVoA+7du1CREQEtm/fjjp16pR1OVSKFArFK3nPuLq6YuDAgaW+nrL24MEDzJgx45V8npbmz44BSs/WrFmD+Ph4xMbGYujQodi0aRPatWuH+/fvP3feadOmITo6+hVUWTb8/PwQHx+P6tWrl3Upr7WdO3di5syZZV1GuXbt2jUMGjQImzdvxptvvlnW5VApi4+Px0cffVTWZVQYDx48wMyZM8v9H6TGZV1ARePu7o5WrVoBADp06ACNRoMvvvgCW7duRb9+/Qqd58GDB7CwsKjwf8VWq1YN1apVK+syiCQF7z25nJ2dkZaWVgoVvbwX3SYqWps2bcq6BDJAHIEqZQVvvCtXrgB4cpiuSpUqOHPmDDp37gwrKyt07NhRmvb0IbzIyEgoFApERERoLXP69OkwMjJCbGxsset2dXWFv78/tm/fjubNm8Pc3BwNGzbE9u3bATw5lNKwYUNYWlqidevWOodUTpw4gT59+sDV1RXm5uZwdXVF3759pW0BACEE3nvvPdjZ2eHq1atS+4MHD9C4cWM0bNhQGn0r7BBe+/bt4e7ujvj4eHh7e0vrWbNmDQBgx44daNGiBSwsLODh4YGYmBitGi9duoRBgwahXr16sLCwQI0aNRAQEIAzZ84Uu2+Kc+HCBXTt2hUWFhawt7fHiBEjcO/evUL77t27Fx07doS1tTUsLCzQtm1b7Nu3r9jl3759G6amppg2bVqh61YoFFiyZInUlpaWhuHDh6NmzZowNTWFm5sbZs6cicePH0t9Cg5RLly4EIsWLYKbmxuqVKkCLy8vJCQkSP0GDhyI//73vwCgdci54GcihMCyZcvQrFkzmJubw8bGBj179sQ///xTon13+PBhdOzYEVZWVrCwsIC3tzd27NhRaN/MzEwMGjQItra2sLS0REBAgM56Tp06BX9/fzg4OMDMzAwqlQp+fn64fv261KekNRe81n777Td4e3vDwsICgwcPRrdu3eDi4oL8/HydGj09PdGiRQvZ6yrMjBkzoFAocOrUKXzwwQewtraGUqlE//79cfv2ba2+mzdvRufOnVG9enXpfTtlyhSdkeziPk+KcuHCBfTt2xeOjo4wMzNDrVq1MGDAAOTk5Eh9zp49i/fffx82NjaoXLkymjVrhh9++EFrOQWHtX/88UdMnjwZ1atXR5UqVRAQEIBbt27h3r17GDZsGOzt7WFvb49BgwYhOztbaxkKhQJjxozBihUr8MYbb8DMzAyNGjVCZGSkVr/bt29j1KhRaNSoEapUqQIHBwe88847OHTokM72Xb9+HT179oSVlRWqVq2Kfv364fjx4zqH8Av23aVLl/Dee++hSpUqcHZ2RlhYmNa+KKjz2cNAJXlfFiUvLw+TJk2Ck5MTLCws8NZbb+HYsWOF9n2Z9ezfvx/t27eHnZ0dzM3NUatWLfTo0QMPHjwA8P8/N+bPn4+vvvoKtWrVQuXKldGqVatCP8f++usvBAUFSe/Hhg0bSp8nT7t79y7CwsJQu3ZtmJmZwcHBAe+99x4uXLiAy5cvS39Iz5w5U/oMKjh0Keczvbj1FOdl9qkWQXqxZs0aAUAcP35cq/2bb74RAMTKlSuFEEKEhIQIExMT4erqKubMmSP27dsndu/eLU1zcXHRmn/EiBHC1NRUWu6+fftEpUqVxGefffbcmlxcXETNmjWFu7u72LRpk9i5c6fw9PQUJiYm4vPPPxdt27YVUVFRIjo6WrzxxhvC0dFRPHjwQJp/y5Yt4vPPPxfR0dEiLi5OREZGCh8fH1GtWjVx+/Ztqd+///4ratasKTw9PUVubq60Lebm5uL06dM6+yglJUVq8/HxEXZ2dqJ+/fpi1apVYvfu3cLf318AEDNnzhQeHh5S7W3atBFmZmbixo0b0vxxcXEiLCxM/PzzzyIuLk5ER0eLbt26CXNzc3HhwgWpX0pKigAg1qxZU+w+S0tLEw4ODqJGjRpizZo1YufOnaJfv36iVq1aAoA4cOCA1Hf9+vVCoVCIbt26iaioKPHrr78Kf39/YWRkJPbu3Vvserp37y6cnZ2FRqPRap80aZIwNTUV//77rxBCiJs3bwpnZ2fh4uIiVqxYIfbu3Su++OILYWZmJgYOHKizfa6urqJr165i69atYuvWrcLDw0PY2NiIu3fvCiGEuHTpkujZs6cAIOLj46XHo0ePhBBCDB06VJiYmIiwsDARExMjfvzxR9GgQQPh6Ogo0tLSit2mgwcPChMTE9GyZUuxefNmsXXrVtG5c2ehUChEZGSk1K/gdeDs7CwGDx4sdu3aJVauXCkcHByEs7OzyMzMFEIIkZ2dLezs7ESrVq3ETz/9JOLi4sTmzZvFiBEjxPnz56XllbRmHx8fYWtrK5ydncXSpUvFgQMHRFxcnPjf//4nAIjY2Fit7UlOThYAxJIlS2SvqzDTp08XAISLi4uYOHGi2L17t1i0aJGwtLQUzZs3l947QgjxxRdfiPDwcLFjxw5x8OBBsXz5cuHm5iY6dOigtcziPk8Kk5SUJKpUqSJcXV3F8uXLxb59+8SGDRtEr169RFZWlhBCiAsXLggrKytRp04dsW7dOrFjxw7Rt29fAUDMmzdPWtaBAwek7Rk4cKCIiYkRy5cvF1WqVBEdOnQQvr6+YsKECWLPnj1i3rx5wsjISIwdO1arnoLXQaNGjcSmTZvEtm3bRNeuXQUAsWXLFqnfhQsXxMiRI0VkZKQ4ePCg2L59uxgyZIioVKmS1nsyOztb1K1bV9ja2or//ve/Yvfu3eKTTz4Rbm5uOu//kJAQYWpqKho2bCgWLlwo9u7dKz7//HOhUCjEzJkzdeqcPn269Lyk78uihISECIVCISZOnCj27NkjFi1aJGrUqCGsra1FSEiIXtaTkpIiKleuLHx9fcXWrVvFwYMHxcaNG0VwcLD0Hiv43HB2dhZvvfWW+OWXX8SWLVvEm2++KUxMTMSRI0ek5Z07d04olUrh4eEh1q1bJ/bs2SPCwsJEpUqVxIwZM6R+WVlZonHjxsLS0lLMmjVL7N69W/zyyy/i448/Fvv37xePHj0SMTExAoAYMmSI9Bl06dIlIUTJP9Oft57S+tk9jQFKTwp+KSQkJIi8vDxx7949sX37dlGtWjVhZWUlfbiGhIQIAGL16tU6yygsQD169Eg0b95cuLm5ifPnzwtHR0fh4+MjHj9+/NyaXFxchLm5ubh+/brUlpSUJACI6tWri/v370vtW7duFQDEtm3bilze48ePRXZ2trC0tBTffPON1rTDhw8LY2NjERoaKlavXi0AiO+//77QffRsgAIgTpw4IbXduXNHGBkZCXNzc62wVFD707/QCqsxNzdX1KtXT3zyySdSe0kD1OTJk4VCoRBJSUla7b6+vloB6v79+8LW1lYEBARo9dNoNKJp06aidevWxa5n27ZtAoDYs2ePVu0qlUr06NFDahs+fLioUqWKuHLlitb8CxcuFADEuXPntLbPw8ND67Vx7NgxAUBs2rRJahs9erQ
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_max_price[\"number_compagny\"], company_max_price[\"max_price\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Prix maximal d'un billet vendu\")\n",
"plt.title(\"Prix maximal de vente observé par compagnie de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "bff23e5d-d7ed-4092-ae3c-5df503e54a6d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 762879.000000\n",
"mean 0.079068\n",
"std 3.969729\n",
"min 0.000000\n",
"25% 0.000000\n",
"50% 0.000000\n",
"75% 0.000000\n",
"max 3334.000000\n",
"Name: purchase_count, dtype: float64"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"first_buying_date\"].isna()][\"purchase_count\"].describe()"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "89466dbd-14d2-4ede-9ca0-b9c32b764e25",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 7.608090e+05\n",
"mean 3.863940e+00\n",
"std 1.685825e+03\n",
"min 1.000000e+00\n",
"25% 1.000000e+00\n",
"50% 1.000000e+00\n",
"75% 2.000000e+00\n",
"max 1.469325e+06\n",
"Name: purchase_count, dtype: float64"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle[~customerplus_clean_spectacle[\"first_buying_date\"].isna()][\"purchase_count\"].describe()"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "5f9feae4-35f4-43b6-adeb-f75773900a2d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>821538</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>809126</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11005</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>17663</td>\n",
" <td>12731</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>38100</td>\n",
" <td>12395</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343121</th>\n",
" <td>4667645</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534181.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343122</th>\n",
" <td>4667649</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534177.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343123</th>\n",
" <td>4667660</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534165.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343124</th>\n",
" <td>4667679</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534132.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343125</th>\n",
" <td>4667686</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1567949.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1523688 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"0 821538 139 NaN NaN 0 \n",
"1 809126 1063 NaN NaN 0 \n",
"2 11005 1063 NaN NaN 0 \n",
"3 17663 12731 NaN NaN 0 \n",
"4 38100 12395 NaN NaN 0 \n",
"... ... ... ... ... ... \n",
"343121 4667645 122 NaN 1534181.0 0 \n",
"343122 4667649 122 NaN 1534177.0 0 \n",
"343123 4667660 122 NaN 1534165.0 0 \n",
"343124 4667679 122 NaN 1534132.0 0 \n",
"343125 4667686 122 NaN 1567949.0 0 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"0 875 False NaN 2 True ... \n",
"1 875 False NaN 2 True ... \n",
"2 875 False NaN 2 False ... \n",
"3 875 False NaN 0 False ... \n",
"4 875 False NaN 0 True ... \n",
"... ... ... ... ... ... ... \n",
"343121 862 False NaN 2 True ... \n",
"343122 862 False NaN 2 True ... \n",
"343123 862 False NaN 0 True ... \n",
"343124 862 False NaN 2 True ... \n",
"343125 862 False NaN 0 True ... \n",
"\n",
" first_buying_date country gender_label gender_female gender_male \\\n",
"0 NaN NaN other 0 0 \n",
"1 NaN fr other 0 0 \n",
"2 NaN fr other 0 0 \n",
"3 NaN fr female 1 0 \n",
"4 NaN fr female 1 0 \n",
"... ... ... ... ... ... \n",
"343121 NaN NaN other 0 0 \n",
"343122 NaN NaN other 0 0 \n",
"343123 NaN NaN female 1 0 \n",
"343124 NaN NaN other 0 0 \n",
"343125 NaN NaN female 1 0 \n",
"\n",
" gender_other country_fr has_tags number_compagny already_purchased \n",
"0 1 NaN 0 10 False \n",
"1 1 1.0 0 10 False \n",
"2 1 1.0 0 10 False \n",
"3 0 1.0 0 10 False \n",
"4 0 1.0 0 10 False \n",
"... ... ... ... ... ... \n",
"343121 1 NaN 0 14 False \n",
"343122 1 NaN 0 14 False \n",
"343123 0 NaN 0 14 False \n",
"343124 1 NaN 0 14 False \n",
"343125 0 NaN 0 14 False \n",
"\n",
"[1523688 rows x 30 columns]"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle[\"already_purchased\"] = customerplus_clean_spectacle[\"first_buying_date\"].isna()==False\n",
"customerplus_clean_spectacle"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "cec4f1eb-cec8-409d-8b2c-1e01f1bf81ff",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11005</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>17663</td>\n",
" <td>12731</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>38100</td>\n",
" <td>12395</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>307036</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2946</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338933</th>\n",
" <td>3625705</td>\n",
" <td>648752</td>\n",
" <td>NaN</td>\n",
" <td>1253864.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338954</th>\n",
" <td>3627626</td>\n",
" <td>636890</td>\n",
" <td>NaN</td>\n",
" <td>1253887.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338959</th>\n",
" <td>3628124</td>\n",
" <td>653042</td>\n",
" <td>NaN</td>\n",
" <td>1253899.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338986</th>\n",
" <td>3631189</td>\n",
" <td>648423</td>\n",
" <td>NaN</td>\n",
" <td>1253928.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>339039</th>\n",
" <td>3635380</td>\n",
" <td>659417</td>\n",
" <td>NaN</td>\n",
" <td>1253975.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>26246 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"2 11005 1063 NaN NaN 0 \n",
"3 17663 12731 NaN NaN 0 \n",
"4 38100 12395 NaN NaN 0 \n",
"5 307036 139 NaN NaN 0 \n",
"6 2946 1063 NaN NaN 0 \n",
"... ... ... ... ... ... \n",
"338933 3625705 648752 NaN 1253864.0 0 \n",
"338954 3627626 636890 NaN 1253887.0 0 \n",
"338959 3628124 653042 NaN 1253899.0 0 \n",
"338986 3631189 648423 NaN 1253928.0 0 \n",
"339039 3635380 659417 NaN 1253975.0 0 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"2 875 False NaN 2 False ... \n",
"3 875 False NaN 0 False ... \n",
"4 875 False NaN 0 True ... \n",
"5 875 False NaN 2 True ... \n",
"6 875 False NaN 2 False ... \n",
"... ... ... ... ... ... ... \n",
"338933 862 False NaN 0 True ... \n",
"338954 862 False NaN 0 True ... \n",
"338959 862 False NaN 0 True ... \n",
"338986 862 False NaN 0 True ... \n",
"339039 862 False NaN 1 True ... \n",
"\n",
" first_buying_date country gender_label gender_female gender_male \\\n",
"2 NaN fr other 0 0 \n",
"3 NaN fr female 1 0 \n",
"4 NaN fr female 1 0 \n",
"5 NaN NaN other 0 0 \n",
"6 NaN fr other 0 0 \n",
"... ... ... ... ... ... \n",
"338933 NaN fr female 1 0 \n",
"338954 NaN fr female 1 0 \n",
"338959 NaN fr female 1 0 \n",
"338986 NaN fr female 1 0 \n",
"339039 NaN fr male 0 1 \n",
"\n",
" gender_other country_fr has_tags number_compagny already_purchased \n",
"2 1 1.0 0 10 False \n",
"3 0 1.0 0 10 False \n",
"4 0 1.0 0 10 False \n",
"5 1 NaN 0 10 False \n",
"6 1 1.0 0 10 False \n",
"... ... ... ... ... ... \n",
"338933 0 1.0 0 14 False \n",
"338954 0 1.0 0 14 False \n",
"338959 0 1.0 0 14 False \n",
"338986 0 1.0 0 14 False \n",
"339039 0 1.0 0 14 False \n",
"\n",
"[26246 rows x 30 columns]"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# attention, on a des cas où le client a pas de première date d'achat alors qu'il compte plusieurs achats\n",
"# on peut donc avoir une date de première achat valant NaN non pas parce que l'individu n'a jamais acheté \n",
"# mais simplement car elle n'est pas renseignée\n",
"\n",
"customerplus_clean_spectacle[(customerplus_clean_spectacle[\"already_purchased\"]==False) &\n",
"(customerplus_clean_spectacle[\"purchase_count\"]>0)]"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "b5904039-a967-47d5-ba13-1b805bcd76ca",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"<p>0 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [customer_id, street_id, structure_id, mcp_contact_id, fidelity, tenant_id, is_partner, deleted_at, gender, is_email_true, opt_in, last_buying_date, max_price, ticket_sum, average_price, average_purchase_delay, average_price_basket, average_ticket_basket, total_price, purchase_count, first_buying_date, country, gender_label, gender_female, gender_male, gender_other, country_fr, has_tags, number_compagny, already_purchased]\n",
"Index: []\n",
"\n",
"[0 rows x 30 columns]"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# cpdt, si un client a un nombre d'achats nul, il a bien une date de premier achat valant NaN, OK\n",
"customerplus_clean_spectacle[(customerplus_clean_spectacle[\"already_purchased\"]) &\n",
"(customerplus_clean_spectacle[\"purchase_count\"]==0)]"
]
},
2024-03-11 18:43:56 +01:00
{
"cell_type": "markdown",
"id": "703d9986-4497-404f-881a-45ca44b25beb",
"metadata": {},
"source": [
"#### différence de consentement aux campagnes de mails (opt in)"
]
},
2024-03-03 09:32:45 +01:00
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 113,
2024-03-03 09:32:45 +01:00
"id": "e940bfcf-29cc-4d4c-ae5e-e2a8cecf28af",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"number_compagny already_purchased\n",
"10 False 0.234840\n",
2024-03-11 18:43:56 +01:00
" True 0.236242\n",
2024-03-03 09:32:45 +01:00
"11 False 0.141746\n",
" True 0.002804\n",
"12 False 0.485950\n",
2024-03-11 18:43:56 +01:00
" True 0.244780\n",
2024-03-03 09:32:45 +01:00
"13 False 0.084057\n",
" True 0.177213\n",
"14 False 0.885553\n",
" True 0.308859\n",
"Name: opt_in, dtype: float64"
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 113,
2024-03-03 09:32:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# différence de consentement aux campagnes de mails (opt in)\n",
"\n",
"# en se restreignant au personnes n'ayant pas acheté, on a quand même des individus acceptant d'être ciblés\n",
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"first_buying_date\"].isna()][\"opt_in\"].unique()\n",
"\n",
"# taux de consentement variés\n",
"customerplus_clean_spectacle[\"already_purchased\"] = customerplus_clean_spectacle[\"purchase_count\"] > 0\n",
"customerplus_clean_spectacle.groupby([\"number_compagny\", \"already_purchased\"])[\"opt_in\"].mean()"
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 168,
2024-03-03 09:32:45 +01:00
"id": "a5e79beb-9ba0-4c89-b084-e27ff0d65dcc",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" <th>opt_in</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" <td>0.234840</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>True</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0.236242</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>False</td>\n",
" <td>0.141746</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>True</td>\n",
" <td>0.002804</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>False</td>\n",
" <td>0.485950</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>True</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0.244780</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>False</td>\n",
" <td>0.084057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>True</td>\n",
" <td>0.177213</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" <td>0.885553</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" <td>0.308859</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny already_purchased opt_in\n",
"0 10 False 0.234840\n",
2024-03-11 18:43:56 +01:00
"1 10 True 0.236242\n",
2024-03-03 09:32:45 +01:00
"2 11 False 0.141746\n",
"3 11 True 0.002804\n",
"4 12 False 0.485950\n",
2024-03-11 18:43:56 +01:00
"5 12 True 0.244780\n",
2024-03-03 09:32:45 +01:00
"6 13 False 0.084057\n",
"7 13 True 0.177213\n",
"8 14 False 0.885553\n",
"9 14 True 0.308859"
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 168,
2024-03-03 09:32:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_graph = customerplus_clean_spectacle.groupby([\"number_compagny\", \"already_purchased\"])[\"opt_in\"].mean().reset_index()\n",
"df_graph"
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 169,
2024-03-03 09:32:45 +01:00
"id": "5be56c41-7697-481a-84ea-f77a2041484b",
"metadata": {},
"outputs": [
{
"data": {
2024-03-11 18:43:56 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0oAAAIhCAYAAABwnkrAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABjoklEQVR4nO3dZ3hU1f728XtID2kQTENIgoQmXRQN0ptSFBBpUoKiIHCQdigiVboKHEUpKoSjUvSIDRGFUASp0gVERUJACCBgKIEAyX5e+M88zk4CMzDJhPD9XNdcOmu3395ZKTdr7zUWwzAMAQAAAACsCrm6AAAAAADIbwhKAAAAAGBCUAIAAAAAE4ISAAAAAJgQlAAAAADAhKAEAAAAACYEJQAAAAAwISgBAAAAgAlBCQAAAABMCEpALoiPj5fFYrG+3N3dde+996p79+76448/nHqsiRMn6vPPP7+tfSQmJspisSg+Pt4pNd1t3nnnnQJ57ZzRt+5kY8aMkcVisWmrV6+e6tWrZ9NmsVg0ZsyYvCssl7jqPDJ/XiYmJub5seGYgtLXAXsRlIBcNH/+fG3atEkrV67U888/r0WLFql27dq6dOmS045xt/8xmx8QlAqmHj16aNOmTTddb9OmTerRo0ceVAS4Fn0ddxt3VxcAFGQVK1ZUjRo1JEn169dXenq6Xn31VX3++ed65plnbmvfly9flo+PjzPKBJCNe++9V/fee+9N13v44YfzoBrA9ejruNswogTkocxfMkeOHJEkjR07VjVr1lTRokUVEBCg6tWr6/3335dhGDbbRUVFqUWLFlq6dKmqVasmb29vjR07VhaLRZcuXdKCBQust/mZbwsyO378uNq1ayd/f38FBgaqffv2Sk5OznbdH3/8UU888YSKFi0qb29vVatWTR9//LFd55qWlqZx48apfPny8vb2VnBwsOrXr6+NGzda17ly5YqGDx+u6OhoeXp6qnjx4urTp4/++uuvbM9/xYoVql69unx8fFSuXDnNmzfPZr3U1FQNHjxY0dHR8vb2VtGiRVWjRg0tWrTI4fPKvB1ozZo1evHFF1WsWDEFBwerTZs2On78uE1t+/bt07p166xfg6ioKOvy8+fPW2vKPMf+/ftnGVW0WCzq27ev5s+fr7Jly8rHx0c1atTQ5s2bZRiGXnvtNUVHR8vPz08NGjTQb7/9luWar1q1Sg0bNlRAQIB8fX1Vq1YtJSQk2KyTeTvZvn371LFjRwUGBio0NFTPPvusUlJSbOpxtG/Z259zun0nKipKcXFxNzxG5m2ir732mqZMmaKoqCj5+PioXr16+uWXX3Tt2jUNGzZMERERCgwMVOvWrXXq1CmbfSxZskRNmjRReHi4fHx8VL58eQ0bNizL1yS7W++yYz4fe/uO9Pf3yaBBgxQWFiZfX1/VqVNH27dvt+taSNKsWbNUpUoV+fn5yd/fX+XKldPLL79ss05ycrJ69uype++9V56enoqOjtbYsWN1/fr1m+7/p59+0pNPPqkiRYrI29tbVatW1YIFC2zWWbt2rSwWixYtWqQRI0YoIiJCAQEBatSokQ4ePHjTY+TEnv58+vRpvfDCCypRooS8vLx0zz33qFatWlq1atVN9//zzz+rY8eOCg0NlZeXl0qWLKmuXbsqLS3tls5/4cKFGjp0qMLDw+Xn56eWLVvq5MmTunDhgl544QUVK1ZMxYoVU/fu3XXx4kWbfWR+/8+ZM0dlypSRl5eXKlSooMWLF2c53969e6tChQry8/NTSEiIGjRooPXr12c5v2PHjqlt27by9/dXUFCQnnnmGW3bti3LbdZxcXHy8/PTb7/9pmbNmsnPz08lSpTQoEGDbK5FZp3m7117+5c9fRXIbxhRAvJQ5h+399xzj6S//+jr2bOnSpYsKUnavHmz/vWvf+mPP/7QqFGjbLbdsWOHDhw4oFdeeUXR0dEqXLiwWrVqpQYNGqh+/foaOXKkJCkgICDH41++fFmNGjXS8ePHNWnSJJUpU0Zff/212rdvn2XdNWvW6LHHHlPNmjU1e/ZsBQYGavHixWrfvr1SU1Nv+Efc9evX9fjjj2v9+vXq37+/GjRooOvXr2vz5s1KSkpSbGysDMNQq1atlJCQoOHDh6t27dras2ePRo8erU2bNmnTpk3y8vKy7nP37t0aNGiQhg0bptDQUL333nt67rnnVLp0adWpU0eSNHDgQH3wwQcaP368qlWrpkuXLumnn37SmTNnbvm8evTooebNm2vhwoU6evSo/v3vf6tz585avXq1JOmzzz5T27ZtFRgYqHfeeUeSrHWnpqaqbt26OnbsmF5++WVVrlxZ+/bt06hRo7R3716tWrXK5g/xZcuWaefOnZo8ebIsFouGDh2q5s2bq1u3bvr99981c+ZMpaSkaODAgXrqqae0a9cu6/YffvihunbtqieffFILFiyQh4eH5syZo6ZNm+rbb79Vw4YNbc7rqaeeUvv27fXcc89p7969Gj58uCRZw+emTZsc6luSY/35dr399tuqXLmy3n77bf31118aNGiQWrZsqZo1a8rDw0Pz5s3TkSNHNHjwYPXo0UNffvmlddtff/1VzZo1U//+/VW4cGH9/PPPmjJlirZu3Wr9ujrDzfqOJHXv3l1LlizRkCFD1KBBA+3fv1+tW7fW+fPnb7r/xYsXq3fv3vrXv/6l119/XYUKFdJvv/2m/fv3W9dJTk7WQw89pEKFCmnUqFG67777tGnTJo0fP16JiYmaP39+jvs/ePCgYmNjFRISojfffFPBwcH68MMPFRcXp5MnT2rIkCE267/88suqVauW3nvvPZ0/f15Dhw5Vy5YtdeDAAbm5uTl07eztz126dNGOHTs0YcIElSlTRn/99Zd27Nhh8z2fnd27d+vRRx9VsWLFNG7cOMXExOjEiRP68ssvdfXqVXl5ed3S+devX1/x8fFKTEzU4MGD1bFjR7m7u6tKlSpatGiRdu7cqZdffln+/v568803bbb/8ssvtWbNGo0bN06FCxfWO++8Y92+bdu2kqSzZ89KkkaPHq2wsDBdvHhRn332merVq6eEhATrP2ZcunRJ9evX19mzZzVlyhSVLl1aK1asyPZnvSRdu3ZNTzzxhJ577jkNGjRI33//vV599VUFBgbe8HvX3v5lT18F8iUDgNPNnz/fkGRs3rzZuHbtmnHhwgVj2bJlxj333GP4+/sbycnJWbZJT083rl27ZowbN84IDg42MjIyrMsiIyMNNzc34+DBg1m2K1y4sNGtWze76po1a5Yhyfjiiy9s2p9//nlDkjF//nxrW7ly5Yxq1aoZ165ds1m3RYsWRnh4uJGenp7jcf773/8akox33303x3VWrFhhSDKmTp1q075kyRJDkjF37lxrW2RkpOHt7W0cOXLE2nb58mWjaNGiRs+ePa1tFStWNFq1apXjMR05r8yvYe/evW3Wmzp1qiHJOHHihLXt/vvvN+rWrZvlWJMmTTIKFSpkbNu2zab9f//7nyHJWL58ubVNkhEWFmZcvHjR2vb5558bkoyqVava9IcZM2YYkow9e/YYhmEYly5dMooWLWq0bNnS5jjp6elGlSpVjIceesjaNnr06Gyve+/evQ1vb2+b4zjSt8xu1J8lGaNHj86yTWRk5E2Pd/jwYUOSUaVKFZs+mHlNnnjiCZv1+/fvb0gyUlJSst1fRkaGce3aNWPdunWGJGP37t3WZZnX6p/q1q2b5WttPh97+86+ffsMScbQoUNt1lu0aJEh6abXom/fvkZQUNAN1+nZs6fh5+dn871jGIbx+uuvG5KMffv25XgeHTp0MLy8vIykpCSbbR9//HHD19fX+OuvvwzDMIw1a9YYkoxmzZrZrPfxxx8bkoxNmzbdsMbM63X48GHDMBzrz35+fkb//v1vuP/sNGjQwAgKCjJOnTqV4zqOnr+53sy+169fP5v2Vq1aGUWLFrVpk2T4+PjY/G64fv26Ua5cOaN06dI51nj9+nXj2rVrRsOGDY3WrVtb299++21DkvHNN9/YrN+zZ88sP+u7detmSDI+/vhjm3WbNWtmlC1bNkud/+w
2024-03-03 09:32:45 +01:00
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot groupé\n",
"fig, ax = plt.subplots(figsize=(10, 6))\n",
"\n",
"categories = df_graph[\"number_compagny\"].unique()\n",
"bar_width = 0.35\n",
"bar_positions = np.arange(len(categories))\n",
"\n",
"# Grouper les données par label et créer les barres groupées\n",
"for label in df_graph[\"already_purchased\"].unique():\n",
" label_data = df_graph[df_graph['already_purchased'] == label]\n",
" values = [label_data[label_data['number_compagny'] == category]['opt_in'].values[0]*100 for category in categories]\n",
"\n",
2024-03-11 18:43:56 +01:00
" label_printed = \"client ayant déjà acheté\" if label else \"client n'ayant jamais acheté\"\n",
2024-03-03 09:32:45 +01:00
" ax.bar(bar_positions, values, bar_width, label=label_printed)\n",
"\n",
" # Mise à jour des positions des barres pour le prochain groupe\n",
" bar_positions = [pos + bar_width for pos in bar_positions]\n",
"\n",
"# Ajout des étiquettes, de la légende, etc.\n",
2024-03-11 18:43:56 +01:00
"ax.set_xlabel('Compagnie')\n",
2024-03-03 09:32:45 +01:00
"ax.set_ylabel('Part de consentement (%)')\n",
"ax.set_title('Part de consentement au mailing selon les compagnies')\n",
"ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])\n",
"ax.set_xticklabels(categories)\n",
"ax.legend()\n",
"\n",
2024-03-11 18:43:56 +01:00
"# Affichage du plot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 117,
"id": "af4d0d9c-0233-4af4-8fdf-83aa71c3ce9e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 640x480 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# sauvegarde dans le MinIO\n",
"\n",
"FILE_NAME = \"consent_customers_music.png\"\n",
"FILE_PATH_OUT_S3 = FILE_PATH + FILE_NAME\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as file_out:\n",
" plt.savefig(file_out)"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 170,
2024-03-08 10:30:12 +01:00
"id": "91b743c4-5473-41e1-b97e-cf06904f0fa8",
"metadata": {
"scrolled": true
},
2024-03-03 09:32:45 +01:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2024-03-08 10:30:12 +01:00
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>opt_in</th>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>55.896356</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-08 10:30:12 +01:00
" <td>10</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>50.795672</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-08 10:30:12 +01:00
" <td>11</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>4.856590</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-08 10:30:12 +01:00
" <td>11</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0.046125</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2024-03-08 10:30:12 +01:00
" <td>12</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>37.098498</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0.021608</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>32.457022</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>19.461217</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
2024-03-03 09:32:45 +01:00
" <td>14</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>69.470107</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>26.682793</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-10 12:31:28 +01:00
" number_company y_has_purchased opt_in\n",
2024-03-11 18:43:56 +01:00
"0 10 0.0 55.896356\n",
"1 10 1.0 50.795672\n",
"2 11 0.0 4.856590\n",
"3 11 1.0 0.046125\n",
"4 12 0.0 37.098498\n",
"5 12 1.0 0.021608\n",
"6 13 0.0 32.457022\n",
"7 13 1.0 19.461217\n",
"8 14 0.0 69.470107\n",
"9 14 1.0 26.682793"
2024-03-03 09:32:45 +01:00
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 170,
2024-03-03 09:32:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-08 10:30:12 +01:00
"# on refait le graphique sur train set \n",
2024-03-03 09:32:45 +01:00
"\n",
2024-03-08 10:30:12 +01:00
"df_graph = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[\"opt_in\"].mean().reset_index()\n",
2024-03-10 12:31:28 +01:00
"df_graph[\"opt_in\"] = 100 * df_graph[\"opt_in\"]\n",
2024-03-08 10:30:12 +01:00
"df_graph"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 163,
2024-03-08 10:30:12 +01:00
"id": "728e0021-4f95-4601-bb01-032db2cf6571",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
2024-03-08 10:30:12 +01:00
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-11 18:43:56 +01:00
"0.43006504592722195\n",
"0.2889608343987336\n"
2024-03-08 10:30:12 +01:00
]
2024-03-03 09:32:45 +01:00
}
],
"source": [
2024-03-08 10:30:12 +01:00
"# pourquoi une telle différence sur la variable opt in ??\n",
"print(train_set_spectacle[\"opt_in\"].mean())\n",
"print(customerplus_clean_spectacle[\"opt_in\"].mean())"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "274b4bc5-277f-476a-8bc1-c1764b1df2de",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.8473746548562269\n",
"0.7573747808905485\n"
]
}
],
"source": []
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 164,
2024-03-08 10:30:12 +01:00
"id": "e1d837e1-c445-424b-867a-48b1e790f703",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"genre = homme : \n",
2024-03-11 18:43:56 +01:00
"0.3754292890099192\n",
"0.3103924435775397\n",
2024-03-08 10:30:12 +01:00
"email vérifié : \n",
2024-03-11 18:43:56 +01:00
"0.9966249488521722\n",
"0.936015604285403\n",
2024-03-08 10:30:12 +01:00
"nationalité française : \n",
2024-03-11 18:43:56 +01:00
"0.7882316165225254\n",
"0.7573741156773128\n",
2024-03-08 10:30:12 +01:00
"nbre d'achats : \n",
2024-03-11 18:43:56 +01:00
"1.7069010765735895\n",
"0.9938799646120849\n"
2024-03-08 10:30:12 +01:00
]
}
],
"source": [
"# pour les autres variables, la distribution semble similaire\n",
2024-03-03 09:32:45 +01:00
"\n",
2024-03-08 10:30:12 +01:00
"print(\"genre = homme : \")\n",
"print(train_set_spectacle[\"gender_male\"].mean())\n",
"print(customerplus_clean_spectacle[\"gender_male\"].mean())\n",
2024-03-03 09:32:45 +01:00
"\n",
2024-03-08 10:30:12 +01:00
"print(\"email vérifié : \")\n",
"print(train_set_spectacle[\"is_email_true\"].mean())\n",
"print(customerplus_clean_spectacle[\"is_email_true\"].mean())\n",
2024-03-03 09:32:45 +01:00
"\n",
2024-03-08 10:30:12 +01:00
"print(\"nationalité française : \")\n",
"print(train_set_spectacle[\"country_fr\"].mean())\n",
"print(customerplus_clean_spectacle[\"country_fr\"].mean())\n",
"\n",
"# sauf pr nbre d'achats - à verif\n",
"print(\"nbre d'achats : \")\n",
"print(train_set_spectacle[\"purchase_count\"].mean())\n",
"print(customerplus_clean_spectacle[\"purchase_count\"].mean())"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 171,
2024-03-10 12:31:28 +01:00
"id": "43deeeb5-8092-42fc-b80b-59d2c58093de",
"metadata": {},
"outputs": [
{
"data": {
2024-03-11 18:43:56 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0oAAAIiCAYAAAD2CjhuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABp9UlEQVR4nO3deXxMZ///8ffIvoeQTUOiYqu9boqbJNROq6qq1NaNoq2illtbUcTSFlVFtSq6WOquqlvR2lu7IiiqaOwiitolJOf3R3+Zb2eSMEOSSeL1fDzm8TDXOXPO55y5Msnbdc41JsMwDAEAAAAAzIo4ugAAAAAAyG8ISgAAAABghaAEAAAAAFYISgAAAABghaAEAAAAAFYISgAAAABghaAEAAAAAFYISgAAAABghaAEAAAAAFYISigQ4uPjZTKZzA9nZ2c98MAD6tGjh06ePJmj+4qLi9OiRYvuaRtHjhyRyWRSfHx8jtR0v5k6dWqhPHc50bcKstjYWJlMJou26OhoRUdHW7SZTCbFxsbmXWG5xFHHkfF5eeTIkTzfN+zjqD5y+PBhubm5adOmTea2OXPmaNKkSbm2z/DwcHXv3j3Xtn8vNm7cqNjYWP3111+ZljVs2FD9+vXL85qQPxCUUKDMmjVLmzZt0ooVK/Tiiy9q7ty5atCgga5evZpj+7jf/5jNDwhKhdMLL7xg8YdZdjZt2qQXXnghDyoCHMtRfX3gwIFq0qSJ6tata27L7aD07bff6q233sq17d+LjRs3asSIEVkGpZEjR2rq1Kk6cOBA3hcGh3N2dAGAPSpXrqxatWpJkmJiYpSWlqaRI0dq0aJF6ty58z1t+/r16/Lw8MiJMgFk4YEHHtADDzxwx/UeeeSRPKgGcDxH9PX9+/dr0aJFWr58+V1vIy0tTbdu3ZKbm5vNr6lRo8Zd78+RoqKiVL58eb3//vuaMWOGo8tBHmNECQVaxi+Zo0ePSpJGjBihOnXqqFixYvL19VXNmjU1c+ZMGYZh8brw8HC1bt1aCxcuVI0aNeTu7q4RI0bIZDLp6tWrmj17tvkyP+vLgqydOnVKHTp0kI+Pj/z8/PT0008rKSkpy3V/+eUXPfbYYypWrJjc3d1Vo0YNff311zYda0pKit555x1VrFhR7u7uCggIUExMjDZu3Ghe58aNGxo6dKgiIiLk6uqqkiVLqk+fPpn+lyzj+JcvX66aNWvKw8NDFSpU0GeffWax3rVr1zRw4EBFRETI3d1dxYoVU61atTR37ly7jyvjcqA1a9bo5ZdfVvHixRUQEKB27drp1KlTFrXt3btX69atM78H4eHh5uWXLl0y15RxjP369cs0qmgymdS3b1/NmjVL5cuXl4eHh2rVqqXNmzfLMAy9++67ioiIkLe3txo1aqRDhw5lOucrV65U48aN5evrK09PT9WvX1+rVq2yWCfjcrK9e/fqmWeekZ+fn4KCgvTcc8/p4sWLFvXY27ds7c/ZXb5jy6UuGZeJvvvuuxo3bpzCw8Pl4eGh6Oho/f7777p586aGDBmi0NBQ+fn56YknnlBycrLFNubPn6+mTZsqJCREHh4eqlixooYMGZLpPcnq0rusWB+PrX1H+vvnZMCAAQoODpanp6caNmyo7du323zZz7Rp01StWjV5e3vLx8dHFSpU0H/+8x+LdZKSktSzZ0898MADcnV1VUREhEaMGKFbt27dcfu//vqrHn/8cRUtWlTu7u6qXr26Zs+ebbHO2rVrZTKZNHfuXA0bNkyhoaHy9fXVo48+ek//q21Lfz579qxeeuklhYWFyc3NTSVKlFD9+vW1cuXKO27/t99+0zPPPKOgoCC5ubmpVKlS6tq1q1JSUu7q+OfMmaPBgwcrJCRE3t7eatOmjc6cOaPLly/rpZdeUvHixVW8eHH16NFDV65csdhGxs//xx9/rHLlysnNzU2VKlXSvHnzMh1v7969ValSJXl7eyswMFCNGjXSzz//nOn4Tpw4ofbt28vHx0f+/v7q3Lmztm3bluky6+7du8vb21uHDh1Sy5Yt5e3trbCwMA0YMMDiXGTUaf2za2v/sqWvZmXatGkKDg5WkyZNzG3R0dH6/vvvdfToUYvL3KX/+4wYP368Ro0apYiICLm5uWnNmjW6ceOGBgwYoOrVq8vPz0/FihVT3bp19d1332Xar/XP4L32c1v76p36fWxsrN544w1JUkREhPnY165da16nS5cumjNnji5fvnzHulC4MKKEAi3jj9sSJUpI+vsDvWfPnipVqpQkafPmzXrllVd08uRJvf322xav3bFjh/bv368333xTERER8vLyUtu2bdWoUSPFxMSYLxHw9fXNdv/Xr1/Xo48+qlOnTmnMmDEqV66cvv/+ez399NOZ1l2zZo2aN2+uOnXqaPr06fLz89O8efP09NNP69q1a7f9I+7WrVtq0aKFfv75Z/Xr10+NGjXSrVu3tHnzZh07dkz16tWTYRhq27atVq1apaFDh6pBgwbavXu3hg8frk2bNmnTpk0W//u3a9cuDRgwQEOGDFFQUJA+/fRTPf/88ypbtqwaNmwoSerfv7+++OILjRo1SjVq1NDVq1f166+/6ty5c3d9XC+88IJatWqlOXPm6Pjx43rjjTf07LPPavXq1ZL+vjyjffv28vPz09SpUyXJXPe1a9cUFRWlEydO6D//+Y+qVq2qvXv36u2339aePXu0cuVKiz/ElyxZop07d2rs2LEymUwaPHiwWrVqpW7duumPP/7QlClTdPHiRfXv319PPvmkEhISzK//8ssv1bVrVz3++OOaPXu2XFxc9PHHH6tZs2b64Ycf1LhxY4vjevLJJ/X000/r+eef1549ezR06FBJMofPTZs22dW3JPv687366KOPVLVqVX300Uf666+/NGDAALVp00Z16tSRi4uLPvvsMx09elQDBw7UCy+8oMWLF5tfe/DgQbVs2VL9+vWTl5eXfvvtN40bN05bt241v6854U59R5J69Oih+fPna9CgQWrUqJH27dunJ554QpcuXbrj9ufNm6fevXvrlVde0XvvvaciRYro0KFD2rdvn3mdpKQk1a5dW0WKFNHbb7+tBx98UJs2bdKoUaN05MgRzZo1K9vtHzhwQPXq1VNgYKAmT56sgIAAffnll+revbvOnDmjQYMGWaz/n//8R/Xr19enn36qS5cuafDgwWrTpo32798vJycnu86drf25S5cu2rFjh0aPHq1y5crpr7/+0o4dOyx+5rOya9cu/fvf/1bx4sX1zjvvKDIyUqdPn9bixYuVmpoqNze3uzr+mJgYxcfH68iRIxo4cKCeeeYZOTs7q1q1apo7d6527typ//znP/Lx8dHkyZMtXr948WKtWbNG77zzjry8vDR16lTz69u3by9JOn/+vCRp+PDhCg4O1pUrV/Ttt98qOjpaq1atMv9nxtWrVxUTE6Pz589r3LhxKlu2rJYvX57lZ70k3bx5U4899pief/55DRgwQD/99JNGjhwpPz+/2/7s2tq/bOmr2fn+++/VsGFDFSnyf/9XPnXqVL300ks6fPiwvv322yxfN3nyZJUrV07vvfeefH19FRkZqZSUFJ0/f14DBw5UyZIllZqaqpUrV6pdu3aaNWuWunbtesd67raf29JXben3L7zwgs6fP68PP/xQCxcuVEhIiCSpUqVK5u1ER0dr8ODBWrt2rdq0aXPHY0IhYgAFwKxZswxJxubNm42bN28aly9fNpYsWWKUKFHC8PHxMZKSkjK9Ji0tzbh586bxzjvvGAEBAUZ6erp5WenSpQ0nJyfjwIEDmV7n5eVldOvWzaa6pk2bZkgyvvvuO4v2F1980ZBkzJo1y9xWoUIFo0aNGsbNmzct1m3durUREhJipKWlZbufzz//3JBkfPLJJ9mus3z5ckOSMX78eIv2+fPnG5KMGTNmmNtKly5tuLu7G0ePHjW3Xb9+3ShWrJjRs2dPc1vlypWNtm3bZrtPe44r4z3s3bu3xXrjx483JBmnT582tz300ENGVFRUpn2NGTPGKFKkiLFt2zaL9v/+97+GJGPp0qXmNklGcHCwceXKFXP
2024-03-10 12:31:28 +01:00
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# with the generic function\n",
"multiple_barplot(df_graph, x=\"number_company\", y=\"opt_in\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"aucun achat\", 1 : \"achat durant la période\"},\n",
" xlabel = \"Numéro de compagnie\", ylabel = \"Part de consentement (%)\", \n",
" title = \"Part de consentement au mailing selon les compagnies (train set)\")"
]
},
2024-03-11 18:43:56 +01:00
{
"cell_type": "code",
"execution_count": 172,
"id": "360047fc-70a4-4876-b0f1-c0af5cc93e17",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 640x480 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# save in the s3\n",
"\n",
"FILE_NAME = \"consent_customers_train_set_music.png\"\n",
"FILE_PATH_OUT_S3 = FILE_PATH + FILE_NAME\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as file_out:\n",
" plt.savefig(file_out)"
]
},
{
"cell_type": "markdown",
"id": "5fcff5cb-923b-44d7-b345-0bee89d30ea2",
"metadata": {},
"source": [
"#### Etude du genre"
]
},
2024-03-10 12:31:28 +01:00
{
"cell_type": "code",
"execution_count": 79,
"id": "32960530-cb46-4eeb-a6d2-1dcf5fb640d8",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2024-03-10 12:31:28 +01:00
" <th>number_compagny</th>\n",
2024-03-08 10:30:12 +01:00
" <th>gender_male</th>\n",
2024-03-10 12:31:28 +01:00
" <th>gender_female</th>\n",
2024-03-08 10:30:12 +01:00
" <th>gender_other</th>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.181580</td>\n",
" <td>0.343837</td>\n",
" <td>0.474583</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-10 12:31:28 +01:00
" <td>11</td>\n",
" <td>0.179520</td>\n",
" <td>0.314443</td>\n",
" <td>0.506037</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-10 12:31:28 +01:00
" <td>12</td>\n",
" <td>0.346380</td>\n",
" <td>0.454036</td>\n",
" <td>0.199584</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-10 12:31:28 +01:00
" <td>13</td>\n",
" <td>0.318108</td>\n",
" <td>0.503092</td>\n",
" <td>0.178800</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.331954</td>\n",
" <td>0.316181</td>\n",
" <td>0.351865</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-10 12:31:28 +01:00
" number_compagny gender_male gender_female gender_other\n",
"0 10 0.181580 0.343837 0.474583\n",
"1 11 0.179520 0.314443 0.506037\n",
"2 12 0.346380 0.454036 0.199584\n",
"3 13 0.318108 0.503092 0.178800\n",
"4 14 0.331954 0.316181 0.351865"
2024-03-08 10:30:12 +01:00
]
},
2024-03-10 12:31:28 +01:00
"execution_count": 79,
2024-03-08 10:30:12 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"# genre \n",
"\n",
"company_genders = customerplus_clean_spectacle.groupby(\"number_compagny\")[[\"gender_male\", \"gender_female\", \"gender_other\"]].mean().reset_index()\n",
"company_genders"
2024-03-08 10:30:12 +01:00
]
},
{
"cell_type": "code",
2024-03-10 12:31:28 +01:00
"execution_count": 80,
"id": "1b4a49d7-7bfe-4e80-aa7e-c9c6d4bc46e2",
2024-03-08 10:30:12 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-03-10 12:31:28 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHFCAYAAAAOmtghAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABYJ0lEQVR4nO3dd1RU1/428GdoQxNQkaJS7QUVQSMYNUQFS9TYO9Z7LTEWNCrXKFgxJlFMwZKIaEIMMZZEQ1RiISqWiGiM2BuoIIoKVpCZ/f7hy/wyDugcGBgcn89as5azZ59zvmdP4fFUmRBCgIiIiMhAGOm7ACIiIiJdYrghIiIig8JwQ0RERAaF4YaIiIgMCsMNERERGRSGGyIiIjIoDDdERERkUBhuiIiIyKAw3BAREZFBYbjRgyNHjqBnz55wdXWFXC6Ho6Mj/Pz8MHXqVH2XJplMJkN4eLi+y9DwYl379u2DTCbDvn37ynS5UVFRiImJKdNlAOW3Pv929epVyGQyfPbZZ+W2THo9xMTEQCaT4erVq2W6HH187vUlKSkJ4eHhuH//fpkup7zeu/LGcFPOfvvtN/j7+yM3NxdLlizBrl27sHz5crRu3RpxcXH6Ls9gNW/eHIcOHULz5s3LdDnlFW6IKpKuXbvi0KFDcHZ21ncpBiMpKQlz584t83BjqEz0XcCbZsmSJfDw8MDOnTthYvJ/wz9gwAAsWbJEj5UZNhsbG7Rq1UrfZRAZpGrVqqFatWr6LoNIhVtuyll2djbs7e3Vgk0hIyPNtyMuLg5+fn6wsrKCtbU1goKCkJKSonr9wIEDMDU1xbRp09SmK9zUuGbNGlXbhQsXMGjQIDg4OEAul6NBgwb4+uuvtao7NzcX//nPf1C1alVYW1ujU6dOOH/+fJF9tVmOUqnEggULUK9ePVhYWMDOzg5NmjTB8uXLX1nL/fv3MXXqVHh6ekIul8PBwQFdunTB2bNni52muM3Zx44dQ/fu3VGlShWYm5vD29sbP/30k1qfwrHcu3cvxo0bB3t7e1StWhW9evXCzZs3Vf3c3d1x+vRpJCYmQiaTQSaTwd3dvdTre/bsWXTq1AmWlpawt7fH2LFj8eDBgyL7/vHHH2jfvj1sbGxgaWmJ1q1bY/fu3a9cBiBtXJcuXQoPDw9YW1vDz88Phw8fVnv92LFjGDBgANzd3WFhYQF3d3cMHDgQ165d05jX4cOH0bp1a5ibm6N69eoIDQ3FN998o7GpvLhdoO7u7hg+fLhaW2ZmJsaMGYOaNWvCzMwMHh4emDt3LgoKCrQaix9++AF+fn6wtraGtbU1mjVrpvZdAoDo6Gg0bdoU5ubmqFKlCnr27IkzZ86o9Rk+fDisra1x9uxZBAUFwcrKCs7Ozli8eLFq3d9++21YWVmhbt26WLdundr0hZ+9hIQEjBgxAlWqVIGVlRW6deuGy5cvq/VNSEhAjx49ULNmTZibm6N27doYM2YM7ty5o7F+v/zyC5o0aQK5XA5PT08sX74c4eHhkMlkav1kMhkmTJiA7777Dg0aNIClpSWaNm2K7du3F1nni7s2SvN5LI/PvTbfy8JxSUlJQa9evWBjYwNbW1sMGTIEt2/f1pjnq36zCx05cgTdunVD1apVYW5ujlq1amHy5MmqZX700UcAAA8PD9XvSeHvV1xcHAIDA+Hs7AwLCws0aNAAM2fOxKNHjyQt52VK895VCILK1ejRowUA8eGHH4rDhw+L/Pz8YvsuXLhQyGQyMXLkSLF9+3axefNm4efnJ6ysrMTp06dV/RYvXiwAiF9++UUIIcQ///wjLC0txZAhQ1R9Tp8+LWxtbYWXl5dYv3692LVrl5g6daowMjIS4eHhL61ZqVSKgIAAIZfLxcKFC8WuXbtEWFiY8PT0FABEWFiY5OVEREQIY2NjERYWJnbv3i127NghIiMjX1lLbm6uaNSokbCyshLz5s0TO3fuFJs2bRKTJk0Se/bsUfV7sa69e/cKAGLv3r2qtj179ggzMzPRpk0bERcXJ3bs2CGGDx8uAIi1a9eq+q1du1YAEJ6enuLDDz8UO3fuFN9++62oXLmyCAgIUPU7fvy48PT0FN7e3uLQoUPi0KFD4vjx46Va38zMTOHg4CBq1Kgh1q5dK+Lj48XgwYOFq6urxvp89913QiaTiffff19s3rxZbNu2Tbz33nvC2NhY/PHHH6Ue1ytXrggAwt3dXXTq1Els3bpVbN26VXh5eYnKlSuL+/fvq+a3ceNGMWfOHLFlyxaRmJgofvzxR9GuXTtRrVo1cfv2bVW/06dPC0tLS9GwYUOxYcMG8csvv4igoCDV+l25cqXY97SQm5ubGDZsmOp5RkaGcHFxEW5ubmLVqlXijz/+EPPnzxdyuVwMHz78peMghBCzZ88WAESvXr3Exo0bxa5du8TSpUvF7NmzVX0WLVokAIiBAweK3377Taxfv154enoKW1tbcf78eVW/YcOGCTMzM9GgQQOxfPlykZCQIEaMGCEAiNDQUFG3bl2xZs0asXPnTvHee+8JAOLYsWOq6Qs/ey4uLmLkyJHi999/F6tXrxYODg7CxcVF3Lt3T9V3xYoVIiIiQvz6668iMTFRrFu3TjRt2lTUq1dP7Xfm999/F0ZGRuKdd94RW7ZsERs3bhRvvfWWcHd3Fy/+SSh8v1u2bCl++uknER8fL9555x1hYmIiLl26pFHnv9+v0nwey+tzr833MiwsTAAQbm5u4qOPPhI7d+4US5cuFVZWVsLb21ttbLX9zd6xY4cwNTUVTZo0ETExMWLPnj0iOjpaDBgwQAghRHp6uvjwww8FALF582bV70lOTo4QQoj58+eLZcuWid9++03s27dPrFy5Unh4eKj9HmmznLJ47yoKhptydufOHfH2228LAAKAMDU1Ff7+/iIiIkI8ePBA1S8tLU2YmJiIDz/8UG36Bw8eCCcnJ9GvXz9Vm1KpFF26dBF2dnbin3/+EQ0bNhT169cXDx8+VPUJCgoSNWvWVH05Ck2YMEGYm5uLu3fvFlvz77//LgCI5cuXq7UvXLhQ4w+Otst57733RLNmzV4xWprmzZsnAIiEhISX9tMm3NSvX194e3uLZ8+eqU373nvvCWdnZ6FQKIQQ//flHz9+vFq/JUuWCAAiIyND1daoUSPRrl07jXpKur4zZswQMplMnDhxQq29Y8eOauvz6NEjUaVKFdGtWze1fgqFQjRt2lS0bNnypcvRZlwLw42Xl5coKChQtR89elQAEBs2bCh22oKCAvHw4UNhZWWl9jnq37+/sLCwEJmZmWp969evX+JwM2bMGGFtbS2uXbum1u+zzz4TANT+yLzo8uXLwtjYWAwePLjYPvfu3RMWFhaiS5cuau1paWlCLpeLQYMGqdqGDRsmAIhNmzap2p49eyaqVasmAKjCrxBCZGdnC2NjYxESEqJqK/zs9ezZU21ZBw8eFADEggULiqxRqVSKZ8+eiWvXrqn9x0cIIVq0aCFcXFxEXl6equ3BgweiatWqRYYbR0dHkZubq2rLzMwURkZGIiIiQqPOwvertJ/H8vrca/O9LAw3U6ZMUWuPjY0VAMT3338vhJD2m12rVi1Rq1Yt8eTJk2KX++mnn2p8B4pS+F4nJiYKAOLkyZOSlqPr966i4G6pcla1alXs378ff/31FxYvXowePXrg/PnzCA0NhZeXl2oT8s6dO1FQUIDg4GAUFBSoHubm5mjXrp3a7hWZTIb169ejUqVK8PX1xZUrV/DTTz/BysoKAPD06VPs3r0bPXv2hKWlpdr8unTpgqdPn2rsVvi3vXv3AgAGDx6s1j5o0CC151KW07JlS5w8eRLjx4/Hzp07kZubq9X4/f7776hbty46dOigVf/iXLx4EWfPnlWt04u1ZmRk4Ny5c2rTdO/eXe15kyZNAKDIXS0vKun67t27F40aNULTpk3V2l8c+6SkJNy9exfDhg1TWxelUolOnTrhr7/+KnKTdSEp49q1a1cYGxurnhc1Dg8fPsSMGTNQu3ZtmJiYwMTEBNbW1nj06JHarpu9e/eiffv2cHR0VLUZGxujf//+r6yjONu3b0d
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_genders[\"number_compagny\"], company_genders[\"gender_male\"], label = \"Homme\")\n",
"plt.bar(company_genders[\"number_compagny\"], company_genders[\"gender_female\"], \n",
" bottom = company_genders[\"gender_male\"], label = \"Femme\")\n",
"\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Part de clients de chaque sexe\")\n",
"plt.title(\"Sexe des clients de chaque compagnie de spectacle\")\n",
"plt.legend()\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 174,
2024-03-10 12:31:28 +01:00
"id": "c7348c95-e506-4002-90d9-d3b6768af985",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
2024-03-08 10:30:12 +01:00
" <th>gender_male</th>\n",
2024-03-10 12:31:28 +01:00
" <th>gender_female</th>\n",
2024-03-08 10:30:12 +01:00
" <th>gender_other</th>\n",
2024-03-10 12:31:28 +01:00
" <th>share_of_women</th>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0.140862</td>\n",
" <td>0.288775</td>\n",
" <td>0.570363</td>\n",
" <td>67.213639</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
2024-03-10 12:31:28 +01:00
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0.284532</td>\n",
" <td>0.714831</td>\n",
" <td>0.000637</td>\n",
" <td>71.528662</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-10 12:31:28 +01:00
" <td>11</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0.289900</td>\n",
" <td>0.512669</td>\n",
" <td>0.197431</td>\n",
" <td>63.878535</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-10 12:31:28 +01:00
" <td>11</td>\n",
2024-03-08 10:30:12 +01:00
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0.321033</td>\n",
" <td>0.609779</td>\n",
" <td>0.069188</td>\n",
" <td>65.510406</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2024-03-10 12:31:28 +01:00
" <td>12</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0.357546</td>\n",
" <td>0.470654</td>\n",
" <td>0.171799</td>\n",
" <td>56.828519</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
2024-03-10 12:31:28 +01:00
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0.396824</td>\n",
" <td>0.494058</td>\n",
" <td>0.109118</td>\n",
" <td>55.457191</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
2024-03-10 12:31:28 +01:00
" <th>6</th>\n",
" <td>13</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0.363198</td>\n",
" <td>0.492956</td>\n",
" <td>0.143846</td>\n",
" <td>57.577983</td>\n",
2024-03-10 12:31:28 +01:00
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
2024-03-08 10:30:12 +01:00
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0.379703</td>\n",
" <td>0.516605</td>\n",
" <td>0.103693</td>\n",
" <td>57.637000</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
2024-03-10 12:31:28 +01:00
" <th>8</th>\n",
" <td>14</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0.447676</td>\n",
" <td>0.443646</td>\n",
" <td>0.108678</td>\n",
" <td>49.773906</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
2024-03-10 12:31:28 +01:00
" <th>9</th>\n",
2024-03-08 10:30:12 +01:00
" <td>14</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0.487695</td>\n",
" <td>0.471498</td>\n",
" <td>0.040808</td>\n",
" <td>49.155702</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-10 12:31:28 +01:00
" number_company y_has_purchased gender_male gender_female gender_other \\\n",
2024-03-11 18:43:56 +01:00
"0 10 0.0 0.140862 0.288775 0.570363 \n",
"1 10 1.0 0.284532 0.714831 0.000637 \n",
"2 11 0.0 0.289900 0.512669 0.197431 \n",
"3 11 1.0 0.321033 0.609779 0.069188 \n",
"4 12 0.0 0.357546 0.470654 0.171799 \n",
"5 12 1.0 0.396824 0.494058 0.109118 \n",
"6 13 0.0 0.363198 0.492956 0.143846 \n",
"7 13 1.0 0.379703 0.516605 0.103693 \n",
"8 14 0.0 0.447676 0.443646 0.108678 \n",
"9 14 1.0 0.487695 0.471498 0.040808 \n",
2024-03-08 10:30:12 +01:00
"\n",
2024-03-10 12:31:28 +01:00
" share_of_women \n",
2024-03-11 18:43:56 +01:00
"0 67.213639 \n",
"1 71.528662 \n",
"2 63.878535 \n",
"3 65.510406 \n",
"4 56.828519 \n",
"5 55.457191 \n",
"6 57.577983 \n",
"7 57.637000 \n",
"8 49.773906 \n",
"9 49.155702 "
2024-03-08 10:30:12 +01:00
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 174,
2024-03-08 10:30:12 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"company_genders = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[[\"gender_male\", \"gender_female\", \"gender_other\"]].mean().reset_index()\n",
"company_genders[\"share_of_women\"] = 100 * (company_genders[\"gender_female\"]/(1-company_genders[\"gender_other\"]))\n",
"company_genders"
2024-03-08 10:30:12 +01:00
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 175,
2024-03-10 12:31:28 +01:00
"id": "b36e5a8f-45dc-4b74-8137-80b7e916aa84",
2024-03-08 10:30:12 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-03-11 18:43:56 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0oAAAIiCAYAAAD2CjhuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABkHUlEQVR4nO3dfXzO9f////thZ3bOxs40NhoRQ8lZsUnO6USlotC5UDlLeVdfE+aspKh0IpSkepO3EJaTlZyfDElCc1JZE3LO2J6/P/rt+DheG46DbcfM7Xq5HJeL1/N19ni9jueO7e71ej0PmzHGCAAAAABgV8rdBQAAAABAcUNQAgAAAAALghIAAAAAWBCUAAAAAMCCoAQAAAAAFgQlAAAAALAgKAEAAACABUEJAAAAACwISgAAAABgQVDCNWXKlCmy2Wz2l6enp6677jo9+uij+uOPPwp0X8nJyZo9e/YVbWP37t2y2WyaMmVKgdSUu8127dopJCRENptNffr0KbBtw1FhvH/OSkpKks1mK/L9wjXu7CNWy5Ytk81m07Jly9xdSqFbsWKFkpKS9M8//xTqfnJ/5+zevbtAt/vDDz/Ix8dHe/bssbe9++67hdqPbDabkpKSCm37V2L+/Pn51nb27FlVqVJF48aNK/KaUDIQlHBNmjx5slauXKmUlBQ9+eST+vzzz9WkSROdOHGiwPZREEGpMPTt21erV6/Wxx9/rJUrV6pv377uLgm4ZkVGRmrlypVq166du0u5pqxYsUJDhgwp9KBUGIwx6tOnj5588klVqlTJ3l7YQWnlypV64oknCm37V2L+/PkaMmRInnYvLy/9v//3//Taa6/p4MGDbqgMVzuCEq5JNWvWVMOGDdWsWTMNHjxYAwcOVHp6eoEEm1OnTl15gYXop59+Uv369XX33XerYcOGDr9oARQtHx8fNWzYUOXLl3d3KbhKLFiwQBs2bNCzzz572ds4e/aszp0759I6DRs21HXXXXfZ+3SXhx56SDabTe+//767S8FViKAE6N9fAJLstzEMGTJEDRo0UEhIiIKCgnTTTTdp0qRJMsY4rBcTE6P27dtr1qxZqlu3rkqXLq0hQ4bIZrPpxIkTmjp1qv02v8TExIvW8Oeff6pTp04KDAxUcHCwHnjgAWVkZOS77Lp163TnnXcqJCREpUuXVt26dfXll19edPu5t9Xs3LlT3377rb2u3FtCjh49qgEDBig2Nlbe3t6qUKGC+vTpk+cqm81mU+/evTV58mRVq1ZNvr6+qlevnlatWiVjjMaMGaPY2FgFBATo9ttv186dOx3WT0xMVM2aNbVy5Uo1btxYvr6+iomJ0eTJkyVJ8+bN00033SQ/Pz/VqlVLCxYsyHMsO3bsUOfOnRUWFiYfHx9Vr15d77zzjsMyOTk5GjZsmL3GMmXKKD4+Xm+99dZFz5Oz6zlTw4UsX75czZs3V2BgoPz8/NS4cWPNmzfPYZncW3aWLl2qZ555RuXKlVNoaKg6duyoP//806n95OeLL75Qo0aN5O/vr4CAALVq1UobN250WOa3337Tgw8+qKioKPn4+Cg8PFzNmzdXWlraJbe/evVqdejQQaGhoSpdurSqVKmS5/ZOV45/yZIlevLJJxUaGqqgoCB17dpVJ06cUEZGhjp16qQyZcooMjJSAwYM0NmzZ+3r597SNnr0aA0fPlwVK1ZU6dKlVa9ePS1evNhhXzt37tSjjz6quLg4+fn5qUKFCurQoYO2bNmS5/i2bt2qli1bys/PT+XLl1evXr00b968PLes5fbztWvXqkmTJvLz81PlypU1cuRI5eTk5KnTeiWgMPu4JP3yyy9q3bq1/Pz8VK5cOfXo0UPHjh3Ld9nvvvtOzZs3V1BQkPz8/HTrrbfmOYf5caa+3FtEN27cqI4dOyooKEjBwcF6+OGHdeDAgTzbdKb/Shfvh0lJSXrhhRckSbGxsfbPwtz374svvlDLli0VGRkpX19fVa9eXS+99FK+dxw4098L8pxK0nvvvadbbrlF1apVs7fFxMRo69atSk1NtR9PTEyMpP/77P/000/Vv39/VahQQT4+Ptq5c6cOHDignj17qkaNGgoICFBYWJhuv/12/fDDD3n2a7317ko/o5z9nLnUe969e3f7z8b5t9bn/m7z9vbWAw88oA8++CDP73DgkgxwDZk8ebKRZNauXevQ/tZbbxlJ5oMPPjDGGNO9e3czadIkk5KSYlJSUszQoUONr6+vGTJkiMN6lSpVMpGRkaZy5crm448/NkuXLjVr1qwxK1euNL6+vqZt27Zm5cqVZuXKlWbr1q0XrOvkyZOmevXqJjg42IwfP94sXLjQPPfcc6ZixYpGkpk8ebJ92SVLlhhvb2/TpEkT88UXX5gFCxaY7t2751nO6siRI2blypUmIiLC3Hrrrfa6Tp8+bU6cOGHq1KljypUrZ8aOHWu+++4789Zbb5ng4GBz++23m5ycHPt2JJlKlSqZxo0bm1mzZpmvv/7aVK1a1YSEhJi+ffuau+66y8ydO9d89tlnJjw83MTHxzusn5CQYEJDQ021atXMpEmTzMKFC0379u2NJDNkyBBTq1Yt8/nnn5v58+ebhg0bGh8fH/PHH3/Y19+6dasJDg42tWrVMp988olZtGiR6d+/vylVqpRJSkqyLzdixAjj4eFhBg8ebBYvXmwWLFhgxo0b57BMfpxZz9ka0tPT87wvy5YtM15eXubmm282X3zxhZk9e7Zp2bKlsdlsZsaMGfblcvtq5cqVzbPPPmsWLlxoPvroI1O2bFnTrFmzix6DMcYMHjzYWD/ihw8fbmw2m3nsscfM3LlzzaxZs0yjRo2Mv7+/Q/+sVq2auf76682nn35qUlNTzcyZM03//v3N0qVLL7rPBQsWGC8vLxMfH2+mTJlilixZYj7++GPz4IMPXvbxx8bGmv79+5tFixaZUaNGGQ8PD/PQQw+Zm266yQwbNsykpKSYF1980Ugyb7zxRp5zHx0dbW677TYzc+ZM89VXX5lbbrnFeHl5mRUrVtiXTU1NNf379zf//e9/TWpqqvn666/N3XffbXx9fc0vv/xiX+7PP/80oaGhpmLFimbKlClm/vz55pFHHjExMTFGksP5ye3ncXFxZuLEiSYlJcX07NnTSDJTp07NU+f5faSw+3hGRoYJCwszFSpUMJMnTzbz5883Xbp0sX/enH8cn376qbHZbObuu+82s2bNMt98841p37698fDwMN99991F9+NMfbn9tFKlSuaFF14wCxcuNGPHjjX+/v6mbt26Jisry76ss/33Uv1w37595tlnnzWSzKxZs+yfhUeOHDHGGDN06FDz5ptvmnnz5plly5aZiRMnmtjY2Dw/d87099x+nJ6eXiDn9MyZM8bX19cMHDjQoX3Dhg2mcuXKpm7duvbj2bBhgzHGmKVLlxpJpkKFCua+++4zc+bMMXPnzjUHDx40v/zyi3nmmWfMjBkzzLJly8zcuXPN448/bkqVKpXn512SGTx4cJ5ju9zPKGc+Z5x5z3fu3Gnuu+8+I8l+7Lm/23J98cUXRpLZvHnzJesCzkdQwjUl94N91apV5uzZs+bYsWNm7ty5pnz58iYwMNBkZGTkWSc7O9ucPXvWvPbaayY0NNThj/5KlSoZDw8Ps3379jzr+fv7m27dujlV13vvvWckmf/9738O7U8++WSeP6JuuOEGU7duXXP27FmHZdu3b28iIyNNdnb2RfdVqVIl065dO4e2ESNGmFKlSuUJkP/973+NJDN//nx7myQTERFhjh8/bm+bPXu2kWTq1KnjcH7GjRuX55dTQkKCkWTWrVtnbzt48KDx8PAwvr6+DqEoLS3NSDJvv/22va1Vq1bmuuuus/9Rk6t3796mdOnS5tChQ/bzUadOnYuei/w4s56zNeT3R3DDhg1NWFiYOXbsmL3t3LlzpmbNmua6666zn7/cvtqzZ0+HfYwePdpIMvv3779ojdagtHfvXuPp6WmeffZZh+WOHTtmIiIiTKdOnYwxxvz9999Gkhk3btxFt5+fKlWqmCpVqphTp05dcBlXj99a7913320kmbF
2024-03-08 10:30:12 +01:00
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"# création barplot avec la fonction générique\n",
2024-03-08 10:30:12 +01:00
"\n",
2024-03-10 12:31:28 +01:00
"multiple_barplot(company_genders, x=\"number_company\", y=\"share_of_women\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"aucun achat\", 1 : \"achat durant la période\"},\n",
" xlabel = \"Numéro de compagnie\", ylabel = \"Part de femmes (%)\", \n",
" title = \"Part de femmes selon les compagnies de spectacle (train set)\")"
2024-03-08 10:30:12 +01:00
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 176,
"id": "17992ceb-b68b-4035-8d48-279b645bc425",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 640x480 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# save in the s3\n",
"\n",
"FILE_NAME = \"gender_train_set_music.png\"\n",
"FILE_PATH_OUT_S3 = FILE_PATH + FILE_NAME\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as file_out:\n",
" plt.savefig(file_out)"
]
},
{
"cell_type": "markdown",
"id": "9504e6b6-d97c-4aa9-a56a-f9f97264be05",
"metadata": {},
"source": [
"#### Etude du pays d'origine"
]
},
{
"cell_type": "code",
"execution_count": 177,
2024-03-10 12:31:28 +01:00
"id": "ed6374e5-f36c-4f8e-9dba-602715b726f1",
2024-03-08 10:30:12 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
2024-03-10 12:31:28 +01:00
" <th>country_fr</th>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.996136</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.994838</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.002119</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0.831794</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.993978</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-10 12:31:28 +01:00
" number_compagny country_fr\n",
"0 10 0.996136\n",
"1 11 0.994838\n",
"2 12 0.002119\n",
2024-03-11 18:43:56 +01:00
"3 13 0.831794\n",
2024-03-10 12:31:28 +01:00
"4 14 0.993978"
2024-03-08 10:30:12 +01:00
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 177,
2024-03-08 10:30:12 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"# pays d'origine (France VS reste du monde)\n",
2024-03-08 10:30:12 +01:00
"\n",
2024-03-10 12:31:28 +01:00
"company_country_fr = customerplus_clean_spectacle.groupby(\"number_compagny\")[\"country_fr\"].mean().reset_index()\n",
"company_country_fr"
2024-03-08 10:30:12 +01:00
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 178,
2024-03-10 12:31:28 +01:00
"id": "8d95cdd9-2ab3-4c9a-8442-bb9b98e0dd18",
2024-03-08 10:30:12 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-03-10 12:31:28 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHGCAYAAACIDqqPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABINElEQVR4nO3deVxU9f7H8fcAAoKAggliCph7LrmUe7jhkqm3zCXNLe1qWl63NDOXvC7pLTMrtXJBy7pmmql5UzIzS819yy1LwQUXRMUVFc7vDx/Mz3FAZ2Bw9PR6Ph7zeDjf8z3nfM53zgxvzzJjMQzDEAAAgEl4uLsAAAAAVyLcAAAAUyHcAAAAUyHcAAAAUyHcAAAAUyHcAAAAUyHcAAAAUyHcAAAAUyHcAAAAUyHcIFddvHhRZcqUUZs2bZSenu7ucgAAfwOEm1vExsbKYrHI19dX8fHxdtPr1aun8uXLZ2vZX3zxhSZPnpzpNIvFolGjRmVrua6WMQaHDx+2tnXt2lWRkZE2/caNG6fFixffdXkvvfSSQkND9fnnn8vDI/u7W2RkpLp27Zrt+XPL7XUdPnxYFotFsbGxubreO+1PrnSvtud2FotFr7zyyj1dJ+5/P/30kywWi3766adcXY+79nt32LNnj0aNGmXzmZ8b7tVrl4Fwk4nU1FS9+eabLl3mnf4YrV+/Xj169HDp+lxp+PDh+uabb2zaHAk3H330kXbu3Klvv/1WPj4+uVjh/aNw4cJav369mjdvnqvruVfhBrifVKlSRevXr1eVKlXcXYpp7NmzR2+99Vauh5t7jXCTiaZNm+qLL77Qjh077sn6atSooYcffvierCs7HnnkEVWuXNnp+fr06aPff/9d+fPnd31R9ykfHx/VqFFDDz30kLtLAUwnMDBQNWrUUGBgoLtLwX2OcJOJwYMHKyQkREOGDLlr348++khPPvmkChUqJH9/f1WoUEETJ07U9evXrX3q1aun7777TvHx8bJYLNZHhsxOS+3evVutWrVSgQIF5Ovrq8cee0xz5syx6ZNxmO/LL7/UsGHDFB4ersDAQDVq1Ej79++36RsXF6dWrVrp4Ycflq+vr0qUKKGePXsqKSnprtt4+2kpi8WiS5cuac6cOdZtqVevnnX6iRMn1LNnTz388MPy9vZWVFSU3nrrLd24ceOu67p+/boGDx6ssLAw+fn5qU6dOtq4cWOmfR1dz7Rp01SpUiXly5dPAQEBKlOmjN5444271pKamqrRo0erbNmy8vX1VUhIiOrXr69169ZlOU9Wh7P/+OMPdejQQYUKFZKPj4/Kli2rjz76yKaPo6/n3fan7G7v8ePH1bZtWwUEBCgoKEjt2rXTiRMnMu27efNmtWzZUsHBwfL19VXlypX11Vdf3XUdknPj+tlnn6ls2bLy8/NTpUqVtGzZMpvpBw8eVLdu3VSyZEn5+fmpSJEiatGihXbt2mW3rH379qlp06by8/NTwYIF1atXLy1dutTuUHlWp0Dr1atns59LUkpKigYNGqSoqCh5e3urSJEi6tevny5duuTQWHz//fdq2LChgoKC5Ofnp7Jly2r8+PE2fZYsWaKaNWvKz89PAQEBiomJ0fr16236jBo1ShaLRTt37lSbNm0UFBSk4OBgDRgwQDdu3ND+/fvVtGlTBQQEKDIyUhMnTrSZP2Pf+/zzzzVgwACFhYUpb968io6O1rZt22z6bt68We3bt1dkZKTy5s2ryMhIPf/885meyv/ll19Us2ZN+fr6qkiRIho+fLhmzJhhd+o7MjJSTz/9tL7//ntVqVJFefPmVZkyZTRr1qxM67z91EZO9sd7td/f7X2ZcUlAXFycunXrpuDgYPn7+6tFixb666+/7Jb3ww8/qGHDhgoMDJSfn59q166tVatW2fXbt2+fnn/+eYWGhsrHx0fFihVT586dlZqaqtjYWLVp00aSVL9+fetnScbnlzN/N+60njvJyZjeiVeOl2BCAQEBevPNN/Wvf/1LP/74oxo0aJBl3z///FMdOnSwfrjt2LFDY8eO1b59+6xvzKlTp+qf//yn/vzzT7vTO5nZv3+/atWqpUKFCmnKlCkKCQnR559/rq5du+rkyZMaPHiwTf833nhDtWvX1owZM5SSkqIhQ4aoRYsW2rt3rzw9Pa111qxZUz169FBQUJAOHz6sSZMmqU6dOtq1a5fy5Mnj8PisX79eDRo0UP369TV8+HBJsv5P6sSJE3riiSfk4eGhESNG6JFHHtH69es1ZswYHT58WLNnz77jsl966SXNnTtXgwYNUkxMjHbv3q1nn31WFy5csOnn6Hr++9//qnfv3nr11Vf1zjvvyMPDQwcPHtSePXvuWMeNGzfUrFkzrV27Vv369VODBg1048YNbdiwQQkJCapVq5bD47Vnzx7VqlVLxYoV07vvvquwsDCtWLFCffv2VVJSkkaOHGnT/26v5532p+xu75UrV9SoUSMdP35c48ePV6lSpfTdd9+pXbt2dn1Xr16tpk2bqnr16po+fbqCgoL03//+V+3atdPly5fveG2UM+P63XffadOmTRo9erTy5cuniRMn6plnntH+/ftVvHhxSTf/MIWEhOjtt9/WQw89pOTkZM2ZM0fVq1fXtm3bVLp0aUnSyZMnFR0drTx58mjq1KkKDQ3VvHnzcnRdz+XLlxUdHa2jR4/qjTfeUMWKFfX7779rxIgR2rVrl3744Qeb0Hm7mTNn6qWXXlJ0dLSmT5+uQoUK6cCBA9q9e7e1zxdffKGOHTuqcePG+vLLL5WamqqJEyeqXr16WrVqlerUqWOzzLZt2+qFF15Qz549FRcXZ/2P1g8//KDevXtr0KBB+uKLLzRkyBCVKFFCzz77rM38b7zxhqpUqaIZM2bo/PnzGjVqlOrVq6dt27ZZx/zw4cMqXbq02rdvr+DgYCUmJmratGl6/PHHtWfPHhUsWFCStHPnTsXExKhUqVKaM2eO/Pz8NH36dH3++eeZjseOHTs0cOBAvf766woNDdWMGTPUvXt3lShRQk8++WSW45iT/fFe7ffOvC+7d++umJgYffHFFzpy5IjefPNN1atXTzt37rQeBf/888/VuXNntWrVSnPmzFGePHn08ccfq0mTJlqxYoUaNmxoHdM6deqoYMGCGj16tEqWLKnExEQtWbJE165dU/PmzTVu3Di98cYb+uijj6yn+x555BFJjv/duNt6srosISdjelcGrGbPnm1IMjZt2mSkpqYaxYsXN6pVq2akp6cbhmEY0dHRxqOPPprl/Glpacb169eNuXPnGp6enkZycrJ1WvPmzY2IiIhM55NkjBw50vq8ffv2ho+Pj5GQkGDTr1mzZoafn59x7tw5wzAMY/Xq1YYk46mnnrLp99VXXxmSjPXr12e6vvT0dOP69etGfHy8Icn49ttv7cbg0KFD1rYuXbrY1e7v72906dLFbtk9e/Y08uXLZ8THx9u0v/POO4Yk4/fff8+0JsMwjL179xqSjP79+9u0z5s3z5Bksz5H1/PKK68Y+fPnz3KdWZk7d64hyfj000/v2C8iIsKmrkOHDhmSjNmzZ1vbmjRpYjz88MPG+fPnbeZ95ZVXDF9fX+t+4szrmdX+lN3tnTZtmt2+YBiG8dJLL9ltT5kyZYzKlSsb169ft+n79NNPG4ULFzbS0tKyXI+j4yrJCA0NNVJSUqxtJ06cMDw8PIzx48dnOd+NGzeMa9euGSVLlrTZj4YMGWJYLBZj+/btNv1jYmIMScbq1autbbe/phmio6ON6Oho6/Px48cbHh4exqZNm2z6ff3114YkY/ny5VnWeeHCBSMwMNCoU6eO9fPldmlpaUZ4eLhRoUIFmzG9cOGCUahQIaNWrVrWtpEjRxqSjHfffddmGY899pghyVi0aJG17fr168ZDDz1kPPvss9a2jH2vSpUqNvUcPnzYyJMnj9GjR48st+XGjRvGxYsXDX9/f+P999+3trdp08bw9/c3Tp8+bbNN5cqVs/uMiYiIMHx9fW3ez1euXDGCg4ONnj172tV56+uVk/3xXu33jrwvMz57n3nmGZv2X3/91ZBkjBkzxjA
2024-03-08 10:30:12 +01:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
2024-03-10 12:31:28 +01:00
"plt.bar(company_country_fr[\"number_compagny\"], company_country_fr[\"country_fr\"])\n",
2024-03-08 10:30:12 +01:00
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
2024-03-10 12:31:28 +01:00
"plt.ylabel(\"Part de clients français\")\n",
"plt.title(\"Nationalité des clients de chaque compagnie de spectacle\")\n",
2024-03-08 10:30:12 +01:00
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 179,
2024-03-10 12:31:28 +01:00
"id": "b459f81f-6d30-44fa-ad65-e85acbf12fd2",
2024-03-08 10:30:12 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
2024-03-10 12:31:28 +01:00
" <th>country_fr</th>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>99.833259</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>99.935317</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>99.486493</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>99.808521</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0.155933</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0.079799</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>82.894264</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>94.744832</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>99.238475</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>99.032154</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-10 12:31:28 +01:00
" number_company y_has_purchased country_fr\n",
2024-03-11 18:43:56 +01:00
"0 10 0.0 99.833259\n",
"1 10 1.0 99.935317\n",
"2 11 0.0 99.486493\n",
"3 11 1.0 99.808521\n",
"4 12 0.0 0.155933\n",
"5 12 1.0 0.079799\n",
"6 13 0.0 82.894264\n",
"7 13 1.0 94.744832\n",
"8 14 0.0 99.238475\n",
"9 14 1.0 99.032154"
2024-03-08 10:30:12 +01:00
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 179,
2024-03-08 10:30:12 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"# graphique sur le train set\n",
"\n",
"company_country_fr = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[[\"country_fr\"]].mean().reset_index()\n",
"company_country_fr[\"country_fr\"] = 100 * company_country_fr[\"country_fr\"]\n",
"company_country_fr"
2024-03-08 10:30:12 +01:00
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 180,
2024-03-10 12:31:28 +01:00
"id": "4a037b48-1d65-4ed3-a012-7d6f5a312533",
2024-03-08 10:30:12 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-03-11 18:43:56 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1IAAAIiCAYAAADCc/lyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABpw0lEQVR4nO3dd3QUZf/+8WuTkF5IQipCEqRLF6lCQKRXQRBBqgVERAREEJUgHRWQ/qBIEKU8KiIiVQSk9wAiImAoUh56C5BAMr8//GW/LElgBxKyhPfrnJzD3nPPzGdmJ7NcmZl7LYZhGAIAAAAA2M0puwsAAAAAgIcNQQoAAAAATCJIAQAAAIBJBCkAAAAAMIkgBQAAAAAmEaQAAAAAwCSCFAAAAACYRJACAAAAAJMIUgAAAABgEkEKj5zY2FhZLBbrj4uLix577DF16tRJx44dy9R1DRs2TPPnz7+vZRw6dEgWi0WxsbGZUpO9IiMj1bFjxwdex6xZszR27NhMXeahQ4fUsGFDBQQEyGKxqGfPnpm6/AftQbwXMTExslgsWbZ8ZI5Vq1bJYrFo1apV2V2K9dx66NCh7C4lyy1atEgxMTFZvp6s+j386quvFBQUpMuXL1vbMuPzKiPZ9Tlmr4w+d86fP6/cuXNn2X7Bw48ghUfW9OnTtWHDBi1fvlyvvvqqZs+erWrVqikhISHT1pGVH0wPWlhYmDZs2KCGDRtm6XqyIki9/fbb2rRpk7788ktt2LBBb7/9dqYu/0F7UO8FHF+5cuW0YcMGlStXLrtLeaQsWrRIgwYNyu4y7snVq1f13nvv6d1335WPj4+1PSs/rxz9nJXR546/v7/efvttvfPOO0pKSnrwhcHhuWR3AUB2KVGihMqXLy9JqlmzppKTkzV48GDNnz9fbdu2va9lX7t2TR4eHplRpsNwc3NTpUqVsruMe/L777+rQoUKatas2R373bhxw3qV0pE9zO8FMpevry/HAkyZMWOGzp49q1deeeWel3Ht2jW5u7vbfbXsYT5nde3aVUOGDNF3332nNm3aZHc5cDBckQL+v9ST/OHDhyVJgwYNUsWKFRUQECBfX1+VK1dO06ZNk2EYNvNFRkaqUaNGmjdvnsqWLSt3d3cNGjRIFotFCQkJmjFjhvU2who1atyxhuPHj6tVq1by8fGRn5+fXnjhBZ08eTLdvlu3blWTJk0UEBAgd3d3lS1bVv/973/t2tbExER99NFHKlasmNzd3RUYGKiaNWtq/fr1Gc6T0a0Z+/fvV5s2bRQcHCw3NzcVK1ZMEydOtOmTevvR7NmzNWDAAIWHh8vX11fPPvus9u3bZ+1Xo0YN/fzzzzp8+LDN7ZepJk+erNKlS8vb21s+Pj4qWrSo3nvvvQxrTl3vgQMHtHjxYuvyDh06ZJ02c+ZM9e7dW3nz5pWbm5sOHDig06dPq1u3bipevLi8vb0VHBysZ555RmvWrEl3n3zyyScaPXq0oqKi5O3trcqVK2vjxo1p6tm0aZMaN26swMBAubu76/HHH7e5zfDAgQPq1KmTChUqJE9PT+XNm1eNGzfW7t277/penD59Wq+99pry5csnNzc3BQUFqWrVqvrll18y3D+pfv75Z5UpU0Zubm6KiorSJ598km4/wzA0adIklSlTRh4eHvL399fzzz+vv//+26bfjh071KhRI+sxER4eroYNG+qff/65ay1LlixRrVq15OfnJ09PTxUrVkzDhw+36bNgwQJVrlxZnp6e8vHxUe3atbVhwwabPqm3RO3atUstW7aUn5+fAgIC1KtXL928eVP79u1TvXr15OPjo8jISI0aNcpm/tTj4+uvv1avXr0UGhoqDw8PRUdHa8eOHTZ9t27dqtatWysyMlIeHh6KjIzUiy++aD2X3Grt2rWqXLmy3N3dlTdvXn3wwQf64osv0twSl3peWbJkicqVKycPDw8VLVpUX375Zbp13n5rnz3nh6tXr6pPnz6KioqSu7u7AgICVL58ec2ePfuO75Ekbdy4UVWrVpW7u7vCw8PVv39/3bhxI92+c+fOVeXKleXl5SVvb2/VrVs3zT5Mjz31dezYUd7e3tqzZ49q1aolLy8vBQUFqXv37rp69arN8uw9fqU7H4cdO3a0nuNuPU+lvn8TJ05U9erVFRwcLC8vL5UsWVKjRo1Kd//Yc7xn5j6V/j2PNm7cWLlz57a23enzKvWWzWXLlqlz584KCgqSp6enEhMT7+uclfo7umfPHr344ovy8/NTSEiIOnfurIsXL951O+w5z9jznt/tcyckJES1a9fWlClT7Nq/eLQ49p9dgQfowIEDkqSgoCBJ/574u3Tpovz580v69z8Ob775po4dO6YPP/zQZt7t27dr7969ev/99xUVFSUvLy81a9ZMzzzzjGrWrKkPPvhA0r9/Pc7ItWvX9Oyzz+r48eMaPny4ChcurJ9//lkvvPBCmr4rV65UvXr1VLFiRU2ZMkV+fn6aM2eOXnjhBV29etXm2abb3bx5U/Xr19eaNWvUs2dPPfPMM7p586Y2btyoI0eOqEqVKnbvsz/++ENVqlRR/vz59emnnyo0NFRLly5Vjx49dObMGQ0cONCm/3vvvaeqVavqiy++0KVLl/Tuu++qcePG2rt3r5ydnTVp0iS99tprOnjwoH744QebeefMmaNu3brpzTff1CeffCInJycdOHBAf/zxR4b1pd729Nxzz+nxxx+3BoSwsDDrf3r69++vypUra8qUKXJyclJwcLBOnz4tSRo4cKBCQ0N15coV/fDDD6pRo4ZWrFiRJhBPnDhRRYsWtd4a8sEHH6hBgwaKj4+Xn5+fJGnp0qVq3LixihUrptGjRyt//vw6dOiQli1bZl3O8ePHFRgYqBEjRigoKEjnzp3TjBkzVLFiRe3YsUNFihTJcFvbtWun7du3a+jQoSpcuLAuXLig7du36+zZsxm/gZJWrFihpk2bqnLlypozZ46Sk5M1atQo/e9//0vTt0uXLoqNjVWPHj00cuRInTt3Th999JGqVKminTt3KiQkRAkJCapdu7aioqI0ceJEhYSE6OTJk1q5cqXN8xjpmTZtml599VVFR0drypQpCg4O1l9//aXff//d2mfWrFlq27at6tSpo9mzZysxMVGjRo2yvjdPP/20zTJbtWqll156SV26dNHy5cut/6H95Zdf1K1bN/Xp00ezZs3Su+++q4IFC6p58+Y287/33nsqV66cvvjiC128eFExMTGqUaOGduzYoQIFCkj691xRpEgRtW7dWgEBATpx4oQmT56sp556Sn/88Yfy5MkjSdq1a5dq166twoULa8aMGfL09NSUKVP09ddfp7s/du7cqd69e6tfv34KCQnRF198oZdfflkFCxZU9erVM9yP9p4fevXqpZkzZ2rIkCEqW7asEhIS9Pvvv9/1mPnjjz9Uq1YtRUZGKjY2Vp6enpo0aZJmzZqVpu+wYcP0/vvvq1OnTnr//feVlJSkjz/+WNWqVdPmzZtVvHjxDNdjb303btxQgwYN1KVLF/Xr10/r16/XkCFDdPjwYf3000/WfvYcv9Ldj8MPPvhACQkJ+u6772wCfFhYmCTp4MGDatOmjaKiouTq6qqdO3dq6NCh+vPPP22CsD3He3ruZ5/+888/2r17t15//XWb9g0bNtz186pz585q2LChZs6cqYSEBOXKleu+zlmpWrRooRdeeEEvv/yydu/erf79+0tSmj8a3Mre84w97/mdPndS1ahRQ/3799eFCxdsAiggA3jETJ8+3ZBkbNy40bhx44Zx+fJlY+HChUZQUJDh4+NjnDx5Ms08ycnJxo0bN4yPPvrICAwMNFJSUqzTIiIiDGdnZ2Pfvn1p5vPy8jI6dOhgV12TJ082JBk//vijTfurr75qSDKmT59ubStatKhRtmxZ48aNGzZ9GzVqZISFhRnJyckZruerr74yJBmff/75HeuJiIiwqT0+Pj5NHXXr1jUee+wx4+LFizbzdu/e3XB3dzfOnTtnGIZhrFy50pBkNGjQwKbff//7X0OSsWHDBmtbw4YNjYiIiDT1dO/e3cidO/cda77TtjR
2024-03-08 10:30:12 +01:00
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-03-11 11:40:29 +01:00
"# generic function to generate the barplot ON THE TRAIN SET - nationality\n",
2024-03-08 10:30:12 +01:00
"\n",
2024-03-10 12:31:28 +01:00
"multiple_barplot(company_country_fr, x=\"number_company\", y=\"country_fr\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"aucun achat\", 1 : \"achat durant la période\"},\n",
" xlabel = \"Numéro de compagnie\", ylabel = \"Part de clients français (%)\", \n",
" title = \"Part de clients français des compagnies de spectacle (train set)\")"
]
},
2024-03-11 18:43:56 +01:00
{
"cell_type": "code",
"execution_count": 181,
"id": "01897a11-675e-49bf-aee2-44e2dd1f6c36",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 640x480 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# save in the s3\n",
"\n",
"FILE_NAME = \"nationality_fr_train_set_music.png\"\n",
"FILE_PATH_OUT_S3 = FILE_PATH + FILE_NAME\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as file_out:\n",
" plt.savefig(file_out)"
]
},
2024-03-10 12:31:28 +01:00
{
"cell_type": "markdown",
"id": "ecfd112e-270a-4223-b80f-7e95e57d199d",
"metadata": {},
"source": [
"### 2. campaigns_information"
2024-03-08 10:30:12 +01:00
]
},
{
"cell_type": "code",
2024-03-10 12:31:28 +01:00
"execution_count": 189,
"id": "b37e7ddf-321a-4ebe-9742-9e760a541d29",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de lignes de la table : 688953\n"
]
},
{
"data": {
"text/plain": [
"customer_id 0\n",
"nb_campaigns 0\n",
"nb_campaigns_opened 0\n",
"time_to_open 301495\n",
"number_compagny 0\n",
"dtype: int64"
]
},
"execution_count": 189,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de nan\n",
"print(\"Nombre de lignes de la table : \",campaigns_information_spectacle.shape[0])\n",
"campaigns_information_spectacle.isna().sum()"
]
},
2024-03-11 18:43:56 +01:00
{
"cell_type": "markdown",
"id": "47c15a1d-bef8-4105-87f3-607958667569",
"metadata": {},
"source": [
"#### Part de clients n'ouvrant jamais les mails"
]
},
2024-03-10 12:31:28 +01:00
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 182,
2024-03-10 12:31:28 +01:00
"id": "de1ecaac-25bb-4853-b8ab-3ef2ca6917ed",
2024-03-08 10:30:12 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2024-03-10 12:31:28 +01:00
" <th>customer_id</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
2024-03-08 10:30:12 +01:00
" <th>number_compagny</th>\n",
2024-03-10 12:31:28 +01:00
" <th>no_campaign_opened</th>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
2024-03-10 12:31:28 +01:00
" <td>29</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
2024-03-08 10:30:12 +01:00
" <td>10</td>\n",
2024-03-10 12:31:28 +01:00
" <td>True</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-10 12:31:28 +01:00
" <td>37</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-10 12:31:28 +01:00
" <td>39</td>\n",
" <td>4</td>\n",
" <td>1.0</td>\n",
" <td>0 days 05:16:38</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-10 12:31:28 +01:00
" <td>41</td>\n",
" <td>4</td>\n",
" <td>1.0</td>\n",
" <td>0 days 01:12:29</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2024-03-10 12:31:28 +01:00
" <td>44</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254699</th>\n",
" <td>6837769</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0 days 23:42:15</td>\n",
2024-03-08 10:30:12 +01:00
" <td>14</td>\n",
2024-03-10 12:31:28 +01:00
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254700</th>\n",
" <td>6875038</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254701</th>\n",
" <td>6875066</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254702</th>\n",
" <td>6875099</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254703</th>\n",
" <td>6875143</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0 days 01:17:01</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2024-03-10 12:31:28 +01:00
"<p>688953 rows × 6 columns</p>\n",
2024-03-03 09:32:45 +01:00
"</div>"
],
"text/plain": [
2024-03-10 12:31:28 +01:00
" customer_id nb_campaigns nb_campaigns_opened time_to_open \\\n",
"0 29 4 0.0 NaT \n",
"1 37 3 0.0 NaT \n",
"2 39 4 1.0 0 days 05:16:38 \n",
"3 41 4 1.0 0 days 01:12:29 \n",
"4 44 4 0.0 NaT \n",
"... ... ... ... ... \n",
"254699 6837769 1 1.0 0 days 23:42:15 \n",
"254700 6875038 1 0.0 NaT \n",
"254701 6875066 1 0.0 NaT \n",
"254702 6875099 1 0.0 NaT \n",
"254703 6875143 1 1.0 0 days 01:17:01 \n",
"\n",
" number_compagny no_campaign_opened \n",
"0 10 True \n",
"1 10 True \n",
"2 10 False \n",
"3 10 False \n",
"4 10 True \n",
"... ... ... \n",
"254699 14 False \n",
"254700 14 True \n",
"254701 14 True \n",
"254702 14 True \n",
"254703 14 False \n",
"\n",
"[688953 rows x 6 columns]"
2024-03-03 09:32:45 +01:00
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 182,
2024-03-03 09:32:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"# part de clients n'ouvrant jamais les mails par compagnie\n",
2024-03-03 09:32:45 +01:00
"\n",
2024-03-10 12:31:28 +01:00
"campaigns_information_spectacle[\"no_campaign_opened\"] = pd.isna(campaigns_information_spectacle[\"time_to_open\"])\n",
"campaigns_information_spectacle"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 183,
2024-03-10 12:31:28 +01:00
"id": "b5a0060f-a9dd-435b-844f-b24674b8bc27",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-03-10 12:31:28 +01:00
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>no_campaign_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.605656</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>0.294001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>0.475719</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>0.353820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>0.428148</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny no_campaign_opened\n",
"0 10 0.605656\n",
"1 11 0.294001\n",
"2 12 0.475719\n",
"3 13 0.353820\n",
"4 14 0.428148"
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 183,
2024-03-10 12:31:28 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"company_lazy_customers = campaigns_information_spectacle.groupby(\"number_compagny\")[\"no_campaign_opened\"].mean().reset_index()\n",
"company_lazy_customers"
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 184,
2024-03-10 12:31:28 +01:00
"id": "788c90e0-f13a-4804-ace7-e5159fddd7fd",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAo0AAAHFCAYAAACXTsPRAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABcSElEQVR4nO3dd1gUV9sG8HvpHQWlKQJGQRSwR9EodoJEsEUNKoqaiMauMXYFNagxSkxiS1Q0lhBrjDEFu4ldQE00dgEVREHFCric7w8/9s2yy+6CyADev+vaS/bMzDnPzM7OPp6ZMyMTQggQEREREWmgJ3UARERERFT2MWkkIiIiIq2YNBIRERGRVkwaiYiIiEgrJo1EREREpBWTRiIiIiLSikkjEREREWnFpJGIiIiItGLSSERERERaFSlpjImJgUwmU7wMDAxQvXp1hIWF4datWyUa2GeffYYdO3a8Uh03btyATCZDTExMicSkK1dXVwwcOLDU49i4cSOio6NfaxuFmTVrFlxdXSVpu7iePn2KWbNm4cCBA1KHUip2796NWbNmSR3GG0kmkylt+wMHDkAmk1W4fa/gsY/KplmzZkEmk0kdBgBg4MCB5e63o7iWLl1aKvlImzZt0KZNm9dSd7F6GtesWYOjR48iLi4OH374ITZt2oRWrVrhyZMnJRZYSSSNZYWjoyOOHj2KwMDA19qOlEljefT06VNERERUuB/uwuzevRsRERFSh/FGOnr0KIYMGSJ1GEQAgCFDhuDo0aNSh/HGKa2k8XUyKM5CXl5eaNKkCQCgbdu2kMvlmD17Nnbs2IG+ffu+UkDPnj2DqanpK9VR1hgbG6N58+ZSh0FvqKdPn8LMzEzqMN5oFeX7L5fL8eLFCxgbG0sdCr2C6tWro3r16lKHQeVQiVzTmH9ATEpKAgBERESgWbNmsLGxgZWVFRo1aoRVq1ZBCKG0nKurK9577z1s27YNDRs2hImJCSIiIiCTyfDkyROsXbtWcSpcW1fr7du30atXL1haWsLa2hq9e/dGWlqa2nlPnTqFoKAg2NjYwMTEBA0bNsSPP/6o07pmZ2cjMjISnp6eMDExga2tLdq2bYsjR44Uukxhp6cvX76MkJAQ2NnZwdjYGJ6envjmm2+U5sk/jbVp0yZMnToVTk5OsLKyQocOHXDx4kXFfG3atMEvv/yCpKQkpUsI8i1btgz169eHhYUFLC0tUadOHUyZMkXjuubHvXDhQixatAhubm6wsLCAr68vjh07pnVbffPNN2jdujXs7Oxgbm4Ob29vLFiwALm5uYp5Zs+eDQMDA6SkpKgsP2jQINja2uL58+cAgNjYWHTq1AmOjo4wNTWFp6cnJk2apNLDPXDgQFhYWODKlSvo3LkzLCws4OzsjPHjxyM7O1uxblWrVgUAxT4nk8m0nlpLTk5Gv379lD6zL774Anl5eYp5Cjv1WHA/iI6Ohkwmw5UrV1Ta+fTTT2FkZIR79+4pyvbs2YP27dvDysoKZmZmaNmyJfbu3au0XP5pp/j4ePTs2ROVK1fGW2+9hYEDByr2rf/uHzdu3Ch0XePi4hAcHIzq1avDxMQEtWrVwtChQ5Viyt/e6k4vqTsFlpeXh6+++goNGjSAqakpKlWqhObNm2Pnzp2KeQqeys1X8NRn/uUy+/fvx7Bhw1ClShXY2tqie/fuuH37dqHr9d+4LSws8O+//8Lf3x/m5uZwdHTEvHnzAADHjh3DO++8A3Nzc7i7u2Pt2rVKy9+9exfDhw9H3bp1YWFhATs7O7Rr1w6HDx9WaauwddImfx3j4uIQFhYGGxsbmJubo0uXLrh27ZrK/KtXr0b9+vVhYmICGxsbdOvWDRcuXFCap7BTVwU/x/z9dcGCBZgzZw7c3NxgbGyM/fv3F2kdsrKyMGHCBLi5ucHIyAjVqlXDmDFjVL63mzdvRrNmzWBtbQ0zMzPUrFkTgwYN0lq/LvtUXl4eFixYgDp16sDY2Bh2dnYIDQ3FzZs3VbaNl5cXjh49ihYtWsDU1BSurq5Ys2YNAOCXX35Bo0aNYGZmBm9vb/z2229Ky+fv8wkJCejevTusrKxgbW2Nfv364e7du0rz6no8A4Bvv/0W7u7uMDY2Rt26dbFx48ZCPy9djteFnZ6OjY2Fr68vzM3NYWFhAX9/fyQkJCjNc+3aNfTp0wdOTk4wNjaGvb092rdvj8TExMI/pP8XExMDDw8PxbFz3bp1aufLycnBnDlzFJ9X1apVERYWprIN1dElvvzcY/v27fDx8YGJiQlq1qyJJUuWqNSn6/6rbT90dXXFP//8g4MHDyqOv/mf3/PnzzF+/Hg0aNAA1tbWsLGxga+vL3766SeVeHTZ30t6m/5XsXoaC8r/0cv/Eb5x4waGDh2KGjVqAHh58B05ciRu3bqFGTNmKC0bHx+PCxcuYNq0aXBzc4O5uTm6du2Kdu3aoW3btpg+fToAwMrKqtD2nz17hg4dOuD27duIioqCu7s7fvnlF/Tu3Vtl3v379+Pdd99Fs2bNsHz5clhbW+OHH35A79698fTpU41Jw4sXLxAQEIDDhw9jzJgxaNeuHV68eIFjx44hOTkZLVq00HmbnT9/Hi1atECNGjXwxRdfwMHBAb///jtGjRqFe/fuYebMmUrzT5kyBS1btsR3332HrKwsfPrpp+jSpQsuXLgAfX19LF26FB999BGuXr2K7du3Ky37ww8/YPjw4Rg5ciQWLlwIPT09XLlyBefPn9cp1m+++QZ16tRRnPqePn06OnfujOvXr8Pa2hrAy4NQwR/Fq1evIiQkRPFlO3PmDObOnYt///0Xq1evBgAMHToUc+fOxYoVKzBnzhzFspmZmfjhhx8wYsQImJiYAHiZZHfu3BljxoyBubk5/v33X8yfPx8nTpzAvn37lNrOzc1FUFAQBg8ejPHjx+PQoUOYPXs2rK2tMWPGDDg6OuK3337Du+++i8GDBytOHebvw+rcvXsXLVq0QE5ODmbPng1XV1fs2rULEyZMwNWrV7F06VKdtme+fv364dNPP0VMTIzSusvlcqxfvx5dunRBlSpVAADr169HaGgogoODsXbtWhgaGmLFihXw9/fH77//jvbt2yvV3b17d/Tp0wfh4eF48uQJvLy88OTJE2zZskXptJSjo2Oh8V29ehW+vr4YMmQIrK2tcePGDSxatAjvvPMOzp07B0NDwyKtL/AyMVm/fj0GDx6MyMhIGBkZIT4+XmPyqs2QIUMQGBiIjRs3IiUlBZ988gn69eunsk+ok5ubi+7duyM8PByffPIJNm7ciMmTJyMrKwtbt27Fp59+iurVq+Orr77CwIED4eXlhcaNGwN4uY8CwMyZM+Hg4IDHjx9j+/btaNOmDfbu3Vui1xQNHjwYHTt2VKzjtGnT0KZNG5w9exaVKlUCAERFRWHKlCn44IMPEBUVhYyMDMyaNQu+vr44efIkateuXay2lyxZAnd3dyxcuBBWVlZFqufp06fw8/PDzZs3MWXKFPj4+OCff/7BjBkzcO7cOezZswcymQxHjx5F79690bt3b8yaNQsmJiZISkrS6TPUZZ8aNmwYVq5ciREjRuC9997DjRs3MH36dBw4cADx8fGK7xkApKWlISwsDBMnTlR89oMGDUJKSgq2bNmCKVOmwNraGpGRkejatSuuXbsGJycnpZi6deuGXr16ITw8HP/88w+mT5+O8+fP4/jx44rvja7Hs5UrV2Lo0KHo0aMHFi9ejIcPHyIiIkLxH+CCdDleq/PZZ59h2rRpCAsLw7Rp05CTk4PPP/8crVq1wokTJ1C3bl0AQOfOnSGXy7FgwQLUqFED9+7dw5EjR/DgwQONn1NMTAzCwsIQHByML774Ag8fPsSsWbOQnZ0NPb3/9V/l5eUhODgYhw8fxsSJE9GiRQskJSVh5syZaNOmDU6dOqXxbKSu8SUmJmLMmDGYNWsWHBwcsGHDBowePRo5OTmYMGECAN33X0D7frh9+3b07NkT1tbWit+K/B777OxsZGZmYsKECahWrRpycnKwZ88edO/eHWvWrEFoaKgi7uIcQ191myoRRbBmzRoBQBw7dkz
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_lazy_customers[\"number_compagny\"], company_lazy_customers[\"no_campaign_opened\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Part de clients n'ayant ouvert aucun mail\")\n",
"plt.title(\"Part de clients n'ayant ouvert aucun mail pour les compagnies de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
2024-03-03 09:32:45 +01:00
"plt.show()"
]
},
2024-03-11 18:43:56 +01:00
{
"cell_type": "markdown",
"id": "33233fb9-707d-44c0-80e2-a131756110a1",
"metadata": {},
"source": [
"#### Taux d'ouverture des campagnes de mails"
]
},
2024-03-08 10:30:12 +01:00
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 185,
2024-03-10 12:31:28 +01:00
"id": "c48015c2-6451-4089-93b7-6d55d3b2e553",
2024-03-08 10:30:12 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2024-03-10 12:31:28 +01:00
" <th>number_compagny</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>ratio_campaigns_opened</th>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
2024-03-10 12:31:28 +01:00
" <td>734772</td>\n",
" <td>126151.0</td>\n",
" <td>0.171687</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-10 12:31:28 +01:00
" <td>11</td>\n",
" <td>342396</td>\n",
" <td>129833.0</td>\n",
" <td>0.379190</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-10 12:31:28 +01:00
" <td>12</td>\n",
" <td>3168123</td>\n",
" <td>810722.0</td>\n",
" <td>0.255900</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-10 12:31:28 +01:00
" <td>13</td>\n",
" <td>3218569</td>\n",
" <td>793581.0</td>\n",
" <td>0.246563</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2024-03-10 12:31:28 +01:00
" <td>14</td>\n",
" <td>2427043</td>\n",
" <td>723846.0</td>\n",
" <td>0.298242</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny nb_campaigns nb_campaigns_opened ratio_campaigns_opened\n",
"0 10 734772 126151.0 0.171687\n",
"1 11 342396 129833.0 0.379190\n",
"2 12 3168123 810722.0 0.255900\n",
"3 13 3218569 793581.0 0.246563\n",
"4 14 2427043 723846.0 0.298242"
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 185,
2024-03-10 12:31:28 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# taux d'ouverture des campaigns\n",
"\n",
"company_campaigns_stats = campaigns_information_spectacle.groupby(\"number_compagny\")[[\"nb_campaigns\", \"nb_campaigns_opened\"]].sum().reset_index()\n",
"company_campaigns_stats[\"ratio_campaigns_opened\"] = company_campaigns_stats[\"nb_campaigns_opened\"] / company_campaigns_stats[\"nb_campaigns\"]\n",
"company_campaigns_stats"
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 186,
2024-03-10 12:31:28 +01:00
"id": "d06ab865-4832-4fe9-918b-e5ff72bebee4",
"metadata": {},
"outputs": [
{
2024-03-11 18:43:56 +01:00
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAApYAAAHFCAYAAABIALnOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABO10lEQVR4nO3dd3gU5f7+8XuBZBMgCSSQhpCAAkoJiCACSgClSS+CoFL1SLHQDs2DBKT7FYEDgkq34UEBPSC9KdKCVAUVkBJKQFpCDZA8vz882V82jYADuyHv13XtdTHPzM585tmZ2TtTFpsxxggAAAD4m3K5ugAAAADcHwiWAAAAsATBEgAAAJYgWAIAAMASBEsAAABYgmAJAAAASxAsAQAAYAmCJQAAACxBsAQAAIAlshwsbTZbll7r1q27i+X+fYcPH5bNZtPs2bMdbVFRUQoPD3dZTVbbu3evoqKidPjwYVeXkmXr1q3LFtsPXKdWrVqqVauWq8u4bekdc2bPni2bzZat9lErsJ9nH+Hh4erUqZOry5D0V/6IiopydRl33YkTJxQVFaWdO3fe1eWkd0yyUp6sTrhp0yan4XfeeUdr167VmjVrnNrLlCljTWW4Y3v37tWwYcNUq1at+yowA9lRSEiINm3apAcffNDVpQBZtnDhQvn6+rq6jBzlxIkTGjZsmMLDw1WxYkVXl3PHshwsn3jiCafhwoULK1euXGna4To3btyQzWa76/PPkyfLmw2Q49nt9hx1nLxy5Yry5s3r6jLwNz366KOuLgHZlKX3WE6ZMkU1a9ZUYGCg8uXLp/Lly2vcuHG6ceOG03QZnWJPfamrW7du8vLy0k8//eRoS0pK0tNPP62goCCdPHky03pOnDihNm3ayMfHR35+fmrbtq1iY2OztC7Xrl3ToEGDVLx4cXl6eqpIkSLq2bOnLly44DRdRqfoU67jrl27ZLPZNGPGjDTTLV26VDabTd9++62jbf/+/Wrfvr0CAwNlt9v1yCOPaMqUKU7vS76k9Mknn6hv374qUqSI7Ha7pk+frueee06SVLt2bcctCsmnvLPa9xnN/8CBA5KkVatW6emnn5avr6/y5s2rGjVqaPXq1bfo1b/8+uuvatCggfLmzatChQqpW7duunjxYrrTZmU5f/75p/7xj3+oaNGistvtKly4sGrUqKFVq1ZlqZZ27dopKChIdrtdxYoVU4cOHZSQkOCYd48ePVSmTBnlz59fgYGBqlOnjn744Qen+SRfWnj33Xc1duxYhYeHy9vbW7Vq1dLvv/+uGzduaODAgQoNDZWfn59atGih06dPO80jPDxcjRs31sKFCxURESEvLy+VKFFCkyZNcpru2rVr6tu3rypWrCg/Pz/5+/urWrVq+uabb9Ks34ULF9S1a1f5+/srf/78atSokf744480221UVJRsNpt++eUXtWvXTn5+fgoKClKXLl0UFxfnNE9jjD744ANVrFhR3t7eKliwoFq3bq0//vjDabodO3aocePGju04NDRUjRo10rFjxzL9TIwxGjdunMLCwuTl5aVKlSpp6dKl6U4bHx+vfv36Oe2nvXr10uXLlzNdhvTXNl+uXDlt2rRJ1atXl7e3t8LDwzVr1ixJ0pIlS1SpUiXlzZtX5cuX17Jly5zef+DAAXXu3FklS5ZU3rx5VaRIETVp0kR79uxxmi6rl53utL+S1+OHH37QE088IW9vbxUpUkRDhgxRYmKi07Tnzp1Tjx49VKRIEXl6eqpEiRJ66623HNv7rerNaLvZvn27WrdurYIFC97Rmdlt27apadOm8vf3l5eXlx599FH95z//cZrmypUrjs/ay8tL/v7+qly5sr744otbzv/48eOOY4Snp6dCQ0PVunVrnTp1yjHN0aNH9eKLLzodd9977z0lJSWl6ZucsJ+n912R1f1t/vz5qlq1qvz8/JQ3b16VKFFCXbp0ueXnFB8fr1deeUUBAQHKnz+/GjRooN9//z3dabPyPZmRW9WX/P336aefqk+fPgoODpa3t7ciIyO1Y8eONPPLyvYrZb4drlu3TlWqVJEkde7c2fHdnfz5bdu2Tc8//7xjmwsPD1e7du105MiR21pOZv5On6Zk6amngwcPqn379o6NbteuXRo5cqR+/fVXzZw587bnN2HCBG3ZskVt2rTRTz/9pAIFCmjYsGFat26dli1bppCQkAzfe/XqVT3zzDM6ceKERo8erVKlSmnJkiVq27ZtmmmjoqKcdj5jjJo3b67Vq1dr0KBBeuqpp7R7924NHTpUmzZt0qZNm2S327O8HhUqVNCjjz6qWbNmqWvXrk7jZs+ercDAQD377LOS/rqMXb16dRUrVkzvvfeegoODtXz5cr3xxhs6c+aMhg4d6vT+QYMGqVq1apo2bZpy5cqlypUr6/z58xo8eLCmTJmiSpUqSdIdX4ZLPf/AwEB9+umn6tChg5o1a6Y5c+bIw8NDH374oerXr6/ly5fr6aefznB+p06dUmRkpDw8PPTBBx8oKChIn332mV577bU002Z1OS+99JK2b9+ukSNHqlSpUrpw4YK2b9+us2fPZrpuu3bt0pNPPqlChQpp+PDhKlmypE6ePKlvv/1W169fl91u17lz5yRJQ4cOVXBwsC5duqSFCxeqVq1aWr16dZp7/qZMmaKIiAhNmTJFFy5cUN++fdWkSRNVrVpVHh4emjlzpo4cOaJ+/frp5ZdfdvqDQpJ27typXr16KSoqSsHBwfrss8/05ptv6vr16+rXr58kKSEhQefOnVO/fv1UpEgRXb9+XatWrVLLli01a9YsdejQQdJff4Q1adJE27ZtU1RUlCpVqqRNmzapQYMGGfZJq1at1LZtW3Xt2lV79uzRoEGDJMlp/3311Vc1e/ZsvfHGGxo7dqzOnTun4cOHq3r16tq1a5eCgoJ0+fJl1a1bV8WLF9eUKVMUFBSk2NhYrV27NsM/IpINGzZMw4YNU9euXdW6dWvFxMTolVdeUWJiokqXLu2Y7sqVK4qMjNSxY8c0ePBgRURE6JdfftHbb7+tPXv2aNWqVbc8gx8bG6vOnTurf//+euCBB/Tvf/9bXbp0UUxMjL766isNHjxYfn5+Gj58uJo3b64//vhDoaGhkv76wzUgIEBjxoxR4cKFde7cOc2ZM0dVq1bVjh07nGq9lb/TX8nr8fzzz2vgwIEaPny4lixZohEjRuj8+fOaPHmypL+CSu3atXXw4EENGzZMERER+uGHHzR69Gjt3LlTS5YsyXK9qbVs2VLPP/+8unXrlqVQn9LatWvVoEEDVa1aVdOmTZOfn5/mzZuntm3b6sqVK45w06dPH33yyScaMWKEHn30UV2+fFk///zzLffz48ePq0qVKrpx44ZjOzl79qyWL1+u8+fPKygoSH/++aeqV6+u69ev65133lF4eLgWL16sfv366eDBg/rggw+c5pkT9vPUsrq/bdq0SW3btlXbtm0VFRUlLy8vHTlyJM1tc6klf+9u3LhRb7/9tqpUqaIff/xRDRs2TDPt7X5PpnQ79Q0ePFiVKlXS9OnTFRcXp6ioKNWqVUs7duxQiRIlJGV9+73VdlipUiXNmjVLnTt31r/+9S81atRIkvTAAw9I+uuPmtKlS+v555+Xv7+/Tp48qalTp6pKlSrau3evChUqlKXlBAUFpdsvf6dP0/sw70jHjh1Nvnz5MhyfmJhobty4YebOnWty585tzp075xgXFhZmOnbsmOY9kZGRJjIy0qlt//79xtfX1zRv3tysWrXK5MqVy/zrX/+6ZX1Tp041ksw333zj1P7KK68YSWbWrFkZvnfZsmVGkhk3bpxT+5dffmkkmY8++sjRJskMHTo0zTxSr+OkSZOMJPPbb7852s6dO2fsdrvp27evo61+/frmgQceMHFxcU7ze+2114yXl5ejH9euXWskmZo1a6ZZ9vz5840ks3bt2lvWlSx132c0/8uXLxt/f3/TpEkTp/bExERToUIF8/jjj6eZd0oDBgwwNpvN7Ny506m9bt26TjXfznLy589vevXqlely01OnTh1ToEABc/r06Sy/5+bNm+bGjRvm6ae
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
2024-03-10 12:31:28 +01:00
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_campaigns_stats[\"number_compagny\"], 100 * company_campaigns_stats[\"ratio_campaigns_opened\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Taux d'ouverture (%)\")\n",
"plt.title(\"Taux d'ouverture des campagnes de mails pour les compagnies de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 187,
2024-03-10 12:31:28 +01:00
"id": "5c37e063-a717-4a8c-828e-b386b87e8409",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkoAAAHFCAYAAAANLdYJAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABbXUlEQVR4nO3dd1gUV/828HulLB0EgQVdARWxYSUSNQloFHuPPSoajQY1sT2WqBEsoEYRyyMao4IFjcYSW1RsmAQL9l4fFI0iiQUUpJ/3D1/m5wJDE1jU+3Nde13OmTMz351dZm/PzOwqhBACRERERJRDOW0XQERERFRWMSgRERERyWBQIiIiIpLBoEREREQkg0GJiIiISAaDEhEREZEMBiUiIiIiGQxKRERERDIYlIiIiIhklKmgFBISAoVCgdOnT8v2uXv3LhQKBUJCQkqvsBIUFhaGoKCgUtteUlISfH19cfTo0VLbZnHz9vaGo6OjtssoUQ8fPoSvry/Onz+v7VIK7H3728zO09MTnp6eGm0KhQK+vr6lVsP78PdbkOO8tuT2Gpc0X19fKBSKUq/j6tWr8PX1xd27d0t0O0WVV325fQY8ffoUvXv3ho2NDRQKBbp06VJstegW25pKiZ2dHY4fP46qVatqu5RiERYWhsuXL2P06NGlsr2kpCT4+fkBQKkfEKjgHj58CD8/Pzg6OqJ+/fraLocALFu2TNsl8O/3A1Ea77WrV6/Cz88Pnp6eZfI/nnnVN23aNHz33XcabTNnzsT27duxevVqVK1aFZaWlsVWyzsXlJRKJT7++GNtl6EVGRkZSE9Ph1Kp1HYpVEKyXuOSXj/fQ4VXq1YtbZdAeB0WjYyMtF1GieJ7LW+5DZRcvnwZVatWRb9+/Yp9e2Xq1FtByA3v37p1C3379oWNjQ2USiVq1qyJ//73vxp9jh49CoVCgY0bN2LKlCmwt7eHmZkZWrZsiRs3bhRo+8W5HU9PT+zZswf37t2DQqGQHm8+z3nz5mHWrFlwcnKCUqnEkSNHAACnT59Gp06dYGlpCQMDAzRo0ACbN2/Od99ZW1sDAPz8/KTteXt7AwBu376NQYMGwdnZGUZGRqhYsSI6duyIS5cuaawna+g8+5Bo1vPOOi1w69YtmJmZoUePHhr9Dh8+DB0dHUybNi3f/R0SEgIXFxdpX69duzbfZbJkZmZi3rx5qFGjBpRKJWxsbDBgwAA8ePBAo5+jo6O0D9705vD3P//8A319/Vxrvn79OhQKBRYvXiy1xcbGYtiwYahUqRL09fXh5OQEPz8/jRCU12v80UcfAQAGDRokvU5Zp3jkhuWzD0eXxHsoy8OHD9GzZ0+YmprC3NwcvXr1QmxsbK59C7KdpKQkjB8/Hk5OTjAwMIClpSXc3NywcePGPOvIei8ePnwYQ4cOhZWVFczMzDBgwAAkJiYiNjYWPXv2hIWFBezs7DB+/HikpaVprMPPzw/u7u6wtLSEmZkZGjZsiFWrViH774UX5HRIUZ8HkP97Jr+/39wU9pi3evVq1KtXT6q9a9euuHbtmkYfb29vmJiY4Pr162jdujWMjY1hZ2eHOXPmAABOnDiBTz75BMbGxqhevTpCQ0Nzre3Zs2cYNGgQLC0tYWxsjI4dO+J///ufRh9PT0/UqVMHx44dQ9OmTWFkZITBgwcDABISEqR9ra+vj4oVK2L06NFITEzMd18LITBv3jw4ODjAwMAADRs2xO+//55r37fZDgDs27cPn3/+OczNzWFkZISaNWsiICAgz2Vye6+lpqZi1qxZ0vHM2toagwYNwj///KPRz9HRER06dMC+ffvQsGFDGBoaokaNGli9erXUJyQkRDouN2/eXHovZX2unjt3Dh06dJA+5+zt7dG+ffscx87swsPD0blzZ1SqVAkGBgaoVq0ahg0bhn///TdH3+vXr6NPnz6wtbWFUqlE5cqVMWDAAKSkpORb35vHuqzj3MGDB3Ht2jWpb9bnUHBwMOrVqwcTExOYmpqiRo0a+P777/N8HjmIMmTNmjUCgIiKipLtEx0dLQCINWvWSG1XrlwR5ubmwtXVVaxdu1YcOHBAjBs3TpQrV074+vpK/Y4cOSIACEdHR9GvXz+xZ88esXHjRlG5cmXh7Ows0tPT86yvuLdz5coV0axZM6FSqcTx48elx5vPs2LFiqJ58+bi119/FQcOHBDR0dHi8OHDQl9fX3z66afil19+Efv27RPe3t459kt2ycnJYt++fQKA+Oqrr6Tt3b59WwghREREhBg3bpz49ddfRUREhNi+fbvo0qWLMDQ0FNevX8/xOkVHR2usP+t5HzlyRGrbtGmTACAWLVokhBDi0aNHwtbWVnh4eOS7v7O207lzZ7Fr1y6xfv16Ua1aNaFWq4WDg0OeywohxNdffy0AiJEjR4p9+/aJ5cuXC2tra6FWq8U///wj9XNwcBADBw7MsbyHh4fw8PCQprt27SrUarXIyMjQ6DdhwgShr68v/v33X+k5ZtW4YsUKcfDgQTFz5kyhVCqFt7e3tJzca3zhwgXpuU+dOlV6ne7fv59rXVkGDhyosV9K4j0khBBJSUmiZs2awtzcXCxZskTs379ffPvtt6Jy5co5li/odoYNGyaMjIxEYGCgOHLkiNi9e7eYM2eOWLJkSZ61ZO0nJycnMW7cOHHgwAExd+5coaOjI/r06SMaNmwoZs2aJcLDw8XEiRMFALFgwQKNdXh7e4tVq1aJ8PBwER4eLmbOnCkMDQ2Fn5+fRr/c9jsAMX369Ld+HgV5z+T395ubwhzz/P39BQDRp08fsWfPHrF27VpRpUoVYW5uLm7evCn1GzhwoNDX1xc1a9YUixYtEuHh4WLQoEECgJg8ebKoXr26WLVqldi/f7/o0KGDACBOnz6d4zVTq9Vi8ODB4vfffxc//fSTsLGxEWq1Wjx79kxjn1taWgq1Wi2WLFkijhw5IiIiIkRiYqKoX7++qFChgggMDBQHDx4UixYtEubm5qJFixYiMzMzz/09ffp0aT9mbb9ixYpCpVJpvMZvu52ff/5ZKBQK4enpKcLCwsTBgwfFsmXLhI+PT45a3pT9vZaRkSHatGkjjI2NhZ+fnwgPDxc///yzqFixoqhVq5ZISkqS+jo4OIhKlSqJWrVqibVr14r9+/eLHj16CAAiIiJCCCFEXFyc9Hr/97//ld5LcXFx4uXLl8LKykq4ubmJzZs3i4iICPHLL7+I4cOHi6tXr+b5fIODg0VAQIDYuXOniIiIEKGhoaJevXrCxcVFpKamSv3Onz8vTExMhKOjo1i+fLk4dOiQWL9+vejZs6dISEjIsz4hNI91ycnJ4vjx46JBgwaiSpUqUt/4+HixceNGAUCMGjVKHDhwQBw8eFAsX75cfPvtt3k+j+zei6DUunVrUalSJREfH6/Rd+TIkcLAwEA8ffpUCPF/B4127dpp9Nu8ebMAIIUUOSWxnfbt2+f6oZ/1PKtWrarxBhNCiBo1aogGDRqItLQ0jfYOHToIOzu7HB/kb/rnn39yHNzlpKeni9TUVOHs7CzGjBkjtRcmKAkhxDfffCP09fXF8ePHRYsWLYSNjY14+PBhntvOyMgQ9vb2omHDhhoHo7t37wo9Pb18g9K1a9cEAI0DkhBCnDx5UgAQ33//vdRW0KC0c+dOAUAcOHBAaktPTxf29vaie/fuUtuwYcOEiYmJuHfvnsb65s+fLwCIK1euCCHyfo2joqJkQ0thg1Jxv4eCg4MFAPHbb79ptA8dOjRHzQXdTp06dUSXLl1ktykn6704atQojfYuXboIACIwMFCjvX79+qJhw4ay68vIyBBpaWlixowZwsrKSuO9V5CgVNTnUdD3TGH+foUo+LHo2bNnwtDQMEe/mJgYoVQqRd++faW2gQMHCgBi69atUltaWpqwtrYWAMTZs2el9idPnggdHR0xduxYqS3rNevatavGtv766y8BQMyaNUtq8/DwEADEoUOHNPoGBASIcuXK5fis+PXXXwUAsXfvXtl98uzZM2FgYCC7/Tdf47fZzosXL4SZmZn45JNP8gxUBQlKWR/4b+5zIf7vOLFs2TK
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# création d'un barplot permettant de visualiser les 2 indicateurs sur le même graphique\n",
"\n",
"# Création du premier barplot\n",
"plt.bar(company_campaigns_stats[\"number_compagny\"], 100 * company_campaigns_stats[\"ratio_campaigns_opened\"],\n",
" label = \"taux d'ouverture\", alpha = 0.7)\n",
"\n",
"# Création du deuxième barplot à côté du premier\n",
"bar_width = 0.4 # Largeur des barres\n",
"indices2 = company_campaigns_stats[\"number_compagny\"] + bar_width\n",
"plt.bar(indices2, 100 * (1 - company_lazy_customers[\"no_campaign_opened\"]), \n",
" label='Part de clients ouvrant des mails', alpha=0.7, width=bar_width)\n",
"\n",
"# Ajout des étiquettes et de la légende\n",
"plt.xlabel('Compagnie')\n",
"plt.ylabel('Taux (%)')\n",
"plt.title('Lien entre taux d ouverture des mails et nombre de clients actifs')\n",
"plt.legend()\n",
"\n",
"# Affichage du graphique\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 188,
"id": "f1b1e6fe-9006-487a-a8a6-9dd8ce15ace1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 640x480 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# save in the s3\n",
"\n",
"FILE_NAME = \"stats_mail_opening_music.png\"\n",
"FILE_PATH_OUT_S3 = FILE_PATH + FILE_NAME\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as file_out:\n",
" plt.savefig(file_out)"
]
},
{
"cell_type": "markdown",
"id": "638ab84b-15a5-4e70-b140-f121c68c82f5",
"metadata": {},
"source": [
"#### on refait les mêmes stats sur le train set"
]
},
{
"cell_type": "code",
"execution_count": 189,
2024-03-10 12:31:28 +01:00
"id": "4fdf4134-d32c-42c3-ab4f-36ad4783332c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" <th>number_company</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
2024-03-11 18:43:56 +01:00
" <td>10_492779</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
" <td>...</td>\n",
2024-03-11 18:43:56 +01:00
" <td>female</td>\n",
2024-03-10 12:31:28 +01:00
" <td>1</td>\n",
" <td>0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>13.0</td>\n",
" <td>4.0</td>\n",
" <td>8 days 04:08:27</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-11 18:43:56 +01:00
" <td>10_563424</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>...</td>\n",
2024-03-11 18:43:56 +01:00
" <td>other</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>1</td>\n",
2024-03-10 12:31:28 +01:00
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>0 days 01:39:58.555555555</td>\n",
" <td>0.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-11 18:43:56 +01:00
" <td>10_44369</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
" <td>...</td>\n",
2024-03-11 18:43:56 +01:00
" <td>male</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0</td>\n",
" <td>1</td>\n",
2024-03-11 18:43:56 +01:00
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>14.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-11 18:43:56 +01:00
" <td>10_620271</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
" <td>...</td>\n",
2024-03-11 18:43:56 +01:00
" <td>other</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2024-03-11 18:43:56 +01:00
" <td>10_687644</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>550.0</td>\n",
" <td>550.0</td>\n",
" <td>-1.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
2024-03-11 18:43:56 +01:00
" <td>4.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 41 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
2024-03-11 18:43:56 +01:00
"0 10_492779 0.0 0.0 0.0 0.0 \n",
"1 10_563424 0.0 0.0 0.0 0.0 \n",
"2 10_44369 0.0 0.0 0.0 0.0 \n",
"3 10_620271 0.0 0.0 0.0 0.0 \n",
"4 10_687644 0.0 0.0 0.0 0.0 \n",
2024-03-10 12:31:28 +01:00
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
2024-03-11 18:43:56 +01:00
"0 0.0 550.0 550.0 \n",
"1 0.0 550.0 550.0 \n",
"2 0.0 550.0 550.0 \n",
"3 0.0 550.0 550.0 \n",
"4 0.0 550.0 550.0 \n",
2024-03-10 12:31:28 +01:00
"\n",
" time_between_purchase nb_tickets_internet ... gender_label \\\n",
2024-03-11 18:43:56 +01:00
"0 -1.0 0.0 ... female \n",
"1 -1.0 0.0 ... other \n",
"2 -1.0 0.0 ... male \n",
"3 -1.0 0.0 ... other \n",
"4 -1.0 0.0 ... other \n",
2024-03-10 12:31:28 +01:00
"\n",
" gender_female gender_male gender_other country_fr nb_campaigns \\\n",
2024-03-11 18:43:56 +01:00
"0 1 0 0 1.0 13.0 \n",
"1 0 0 1 1.0 10.0 \n",
"2 0 1 0 1.0 14.0 \n",
"3 0 0 1 NaN 9.0 \n",
"4 0 0 1 NaN 4.0 \n",
2024-03-10 12:31:28 +01:00
"\n",
" nb_campaigns_opened time_to_open y_has_purchased \\\n",
2024-03-11 18:43:56 +01:00
"0 4.0 8 days 04:08:27 0.0 \n",
"1 9.0 0 days 01:39:58.555555555 0.0 \n",
2024-03-10 12:31:28 +01:00
"2 0.0 NaN 0.0 \n",
2024-03-11 18:43:56 +01:00
"3 0.0 NaN 0.0 \n",
2024-03-10 12:31:28 +01:00
"4 0.0 NaN 0.0 \n",
"\n",
" number_company \n",
"0 10 \n",
"1 10 \n",
"2 10 \n",
"3 10 \n",
"4 10 \n",
"\n",
"[5 rows x 41 columns]"
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 189,
2024-03-10 12:31:28 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# same statistics on the train set\n",
"\n",
"train_set_spectacle.head()"
]
},
2024-03-11 18:43:56 +01:00
{
"cell_type": "markdown",
"id": "924300e5-d6a9-4686-a938-f5f99afda70c",
"metadata": {},
"source": [
"#### Part de clients n'ouvrant aucun mail"
]
},
2024-03-10 12:31:28 +01:00
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 190,
2024-03-10 12:31:28 +01:00
"id": "14ff9886-742c-4a60-8824-5d31f7c76aea",
"metadata": {},
"outputs": [],
"source": [
"train_set_spectacle[\"no_campaign_opened\"] = train_set_spectacle[\"nb_campaigns_opened\"]==0"
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 191,
2024-03-10 12:31:28 +01:00
"id": "16285593-a0fa-461c-aeb8-c64ffdf9a0d6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>no_campaign_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>73.553379</td>\n",
2024-03-10 12:31:28 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>35.582432</td>\n",
2024-03-10 12:31:28 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>42.609537</td>\n",
2024-03-10 12:31:28 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>32.887454</td>\n",
2024-03-10 12:31:28 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>100.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>100.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>68.335897</td>\n",
2024-03-10 12:31:28 +01:00
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>52.833256</td>\n",
2024-03-10 12:31:28 +01:00
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>44.334881</td>\n",
2024-03-10 12:31:28 +01:00
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>28.807320</td>\n",
2024-03-10 12:31:28 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_company y_has_purchased no_campaign_opened\n",
2024-03-11 18:43:56 +01:00
"0 10 0.0 73.553379\n",
"1 10 1.0 35.582432\n",
"2 11 0.0 42.609537\n",
"3 11 1.0 32.887454\n",
2024-03-10 12:31:28 +01:00
"4 12 0.0 100.000000\n",
"5 12 1.0 100.000000\n",
2024-03-11 18:43:56 +01:00
"6 13 0.0 68.335897\n",
"7 13 1.0 52.833256\n",
"8 14 0.0 44.334881\n",
"9 14 1.0 28.807320"
2024-03-10 12:31:28 +01:00
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 191,
2024-03-10 12:31:28 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"company_lazy_customers = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[\"no_campaign_opened\"].mean().reset_index()\n",
"company_lazy_customers[\"no_campaign_opened\"] = 100 * company_lazy_customers[\"no_campaign_opened\"] \n",
"company_lazy_customers"
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 195,
2024-03-10 12:31:28 +01:00
"id": "d35f00e3-b9b0-42b3-9dce-785c1ad5506c",
"metadata": {},
"outputs": [
{
"data": {
2024-03-11 18:43:56 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1IAAAIhCAYAAABE54vcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABwXElEQVR4nO3deXhM5///8deIrCSRRFYlofadWooSat+3LkqraJWiqqjSVoXaKapKS9VStXzUUtVWqa1VW+yKWkPUUvu+hOT8/vDLfI0kzGFGBs/HdeW6MvfZXufMmZm8c59zj8UwDEMAAAAAALtlSO8AAAAAAPCooZACAAAAAJMopAAAAADAJAopAAAAADCJQgoAAAAATKKQAgAAAACTKKQAAAAAwCQKKQAAAAAwiUIKAAAAAEyikEKqJk+eLIvFYv3JmDGjnnrqKbVu3VpHjhxx6LYGDhyo+fPnP9A6Dh48KIvFosmTJzskk72ioqLUqlWrh55j+vTpGjVqlFO3IaXfcYV5d56L6clisSgmJia9Yzjd0aNHFRMToy1btjh1O+n5Okz+LHgSPaz3WdhK7XxPPg8PHjxo1zr69eunggULKikpSZJ05coVxcTEaMWKFY4PLGnFihWyWCxOW/+DGjt2bKrvH3v27JGHh4c2bdr08EM9JiikcFeTJk3SmjVrtGTJErVt21YzZsxQxYoVdfnyZYdtwxGFlKsIDw/XmjVrVLduXaduhw943GnevHnq3bt3esd4ohw9elR9+/Z1eiGF9MH7bPp40M/Ro0ePaujQoerXr58yZLj1Z+6VK1fUt29fpxU6JUuW1Jo1a1SyZEmnrP9BpVVI5c2bVy1atNB777338EM9JjKmdwC4tsKFC6tUqVKSpCpVqigxMVGffvqp5s+frxYtWjzQuq9evSpvb29HxHQZnp6eevbZZ9M7Bp5AJUqUSO8IwEN39epVeXl5PbG9Zo+jB/0c/fzzz5UlSxY1adLkvtdx5coV+fj42D2/n5/fI/vZ36lTJ5UqVUqrV69W+fLl0zvOI4ceKZiS/EZx6NAhSVLfvn1VtmxZBQYGys/PTyVLltTEiRNlGIbNclFRUapXr57mzp2rEiVKyMvLS3379pXFYtHly5c1ZcoU62WElStXvmuGo0eP6qWXXpKvr6/8/f318ssv6/jx46nOu2HDBjVo0ECBgYHy8vJSiRIl9L///c+ufb1+/br69eunAgUKyMvLS0FBQapSpYpWr16d5jJpXYKzd+9eNW/eXCEhIfL09FSBAgX05Zdf2syTfGnAjBkz9NFHHykiIkJ+fn6qVq2adu/ebZ2vcuXK+vnnn3Xo0CGbyy+TjRs3TsWKFVPmzJnl6+ur/Pnz68MPP7zn/jr6uF65ckXdu3dXzpw55eXlpcDAQJUqVUozZsy4Z5YjR47orbfeUvbs2eXh4aGIiAi98MIL+u+//6zzxMfH69VXX7U5pp999pn1Ug7p/56PYcOGaciQIYqKipK3t7cqV66sPXv26MaNG+rZs6ciIiLk7++vxo0b68SJEzZZks/defPmqWjRovLy8lKuXLk0evRom/muXbumbt26qXjx4vL391dgYKDKlSunH3/8McX+nTt3Tm+88YYCAwOVOXNm1a1bVwcOHEhxSVxMTIwsFot27NihV155Rf7+/goNDVWbNm10/vz5FDnvvLTvwoUL1ufAw8ND2bJlU5cuXVL0KM+ePVtly5aVv7+/fHx8lCtXLrVp0+aez9OFCxfUtm1bBQUFKXPmzKpVq5b27NmT6rz2vAbScq98ya+dadOmqWvXrgoLC5O3t7eio6O1efPmFOuz933hbufhihUrVLp0aUlS69atra/D5Odvw4YNatasmfWci4qK0iuvvGJ977R3O3fzIMfUYrGoU6dO+u6771SgQAH5+PioWLFiWrhw4T2XTUpK0tChQ5U/f355enoqJCRELVu21L///mszX1qXm1auXNn6Pn/y5El5eHik2pv6zz//yGKxWF9ryZd3LV68WG3atFFwcLB8fHx0/fp17du3T61bt1aePHnk4+OjbNmyqX79+tq+fbvNOh31PpuaWbNmqUaNGgoPD5e3t7cKFCignj17pni93b7/t2vVqpWioqJs2u71OXS3yz4f5P0kNZUrV1bhwoW1Zs0alS9f3npeT5o0SZL0888/q2TJkvLx8VGRIkW0aNEim+XtfY4e5FLWhIQETZw4Uc2bN7f2Rh08eFDBwcGSZP27w2KxWM/N5OOyadMmvfDCCwoICNDTTz8tyf7XcWqX9rVq1UqZM2fWvn37VKdOHWXOnFnZs2dXt27ddP369Xvuy7Jly1S5cmUFBQXJ29tbOXLkUNOmTXXlyhWb/e3fv7/1tRgcHKzWrVvr5MmT1nmioqK0Y8cOrVy50rrvt59nzzzzjAoUKKCvvvrK1LHGLfRIwZR9+/ZJkvVN6eDBg2rXrp1y5MghSVq7dq3eeecdHTlyRJ988onNsps2bdKuXbv08ccfK2fOnMqUKZMaNWqk559/XlWqVLF+kPr5+aW5/atXr6patWo6evSoBg0apLx58+rnn3/Wyy+/nGLe5cuXq1atWipbtqy++uor+fv7a+bMmXr55Zd15cqVu95PcvPmTdWuXVt//vmnunTpoueff143b97U2rVrFR8fb+q/Njt37lT58uWVI0cOffbZZwoLC9Nvv/2mzp0769SpU+rTp4/N/B9++KEqVKigb775RhcuXNAHH3yg+vXra9euXXJzc9PYsWP11ltvaf/+/Zo3b57NsjNnzlSHDh30zjvvaPjw4cqQIYP27dunnTt33jWjM45r165d9d1336l///4qUaKELl++rL///lunT5++a5YjR46odOnSunHjhj788EMVLVpUp0+f1m+//aazZ88qNDRUJ0+eVPny5ZWQkKBPP/1UUVFRWrhwobp37679+/dr7NixNuv88ssvVbRoUX355Zc6d+6cunXrpvr166ts2bJyd3fXt99+q0OHDql79+568803tWDBApvlt2zZoi5duigmJkZhYWH6/vvv9e677yohIUHdu3eXdOsPnjNnzqh79+7Kli2bEhIS9Pvvv6tJkyaaNGmSWrZsKenWH6H169fXhg0bFBMTY70kpFatWmkek6ZNm+rll1/WG2+8oe3bt6tXr16SpG+//TbNZa5cuaLo6Gj9+++/1uO4Y8cOffLJJ9q+fbt+//13WSwWrVmzRi+//LJefvllxcTEyMvLS4cOHdKyZcvu+jwZhqFGjRpp9erV+uSTT1S6dGn99ddfql27dop5zb4Gbmcm34cffqiSJUvqm2++0fnz5xUTE6PKlStr8+bNypUrlyT7z997nYclS5bUpEmT1Lp1a3388cfWy5CeeuopSbfeG/Ply6dmzZopMDBQx44d07hx41S6dGnt3LlTWbNmtWs7oaGhqR6XBzmmyX7++WfFxsaqX79+ypw5s4YOHarGjRtr9+7d1uPVqlWrFO+Vb7/9tsaPH69OnTqpXr16OnjwoHr37q0VK1Zo06ZN1n2zR3BwsOrVq6cpU6aob9++1j9+pVuXlnt4eKS4+qFNmzaqW7euvvvuO12+fFnu7u46evSogoKCNHjwYAUHB+vMmTOaMmWKypYtq82bNytfvnw263iQ99m07N27V3Xq1FGXLl2UKVMm/fPPPxoyZIjWr19/z9dTahz5OXS7+3k/SXb8+HG1bt1aPXr00FNPPaUvvvhCbdq00eHDh/XDDz/oww8/lL+/v/r166dGjRrpwIEDioiIkCTTz9H9WLdunU6fPq0qVapY28LDw7Vo0SLVqlVLb7zxht58801J//d3TLImTZqoWbNmat++vbX4tfd1nJYbN26oQYMGeuONN9StWzf98ccf+vTTT+Xv75/ib6TbHTx4UHXr1lXFihX17bffKkuWLDpy5IgWLVqkhIQE+fj4KCkpSQ0bNtSff/6pHj16qHz58jp06JD69OmjypUra8OGDfL29ta8efP0wgsvyN/f3/rZ6OnpabO9ypUra/bs2TIMg95dswwgFZMmTTIkGWvXrjVu3LhhXLx40Vi4cKERHBxs+Pr
2024-03-10 12:31:28 +01:00
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-03-11 18:43:56 +01:00
"# graphic for non opening mails customers for music companies (train set)\n",
"\n",
2024-03-10 12:31:28 +01:00
"multiple_barplot(company_lazy_customers, x=\"number_company\", y=\"no_campaign_opened\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"aucun achat\", 1 : \"achat durant la période\"},\n",
2024-03-11 18:43:56 +01:00
" xlabel = \"Compagnie\", ylabel = \"Part de clients n'ayant ouvert aucun mail (%)\", \n",
2024-03-10 12:31:28 +01:00
" title = \"Part de clients des compagnies de spectacle n'ouvrant aucun mail (train set)\")"
]
},
2024-03-11 18:43:56 +01:00
{
"cell_type": "code",
"execution_count": 196,
"id": "1a6e969e-10c1-4593-a16f-82c9f83a517e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 640x480 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# save in the s3\n",
"\n",
"FILE_NAME = \"no_mail_opened_train_set_music.png\"\n",
"FILE_PATH_OUT_S3 = FILE_PATH + FILE_NAME\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as file_out:\n",
" plt.savefig(file_out)"
]
},
{
"cell_type": "markdown",
"id": "f3407307-7cc1-4f57-a3ae-7c83773b4b81",
"metadata": {},
"source": [
"#### Part globale de mails ouverts pour chaque compagnie"
]
},
2024-03-10 12:31:28 +01:00
{
"cell_type": "code",
"execution_count": 111,
"id": "b391f5b2-2424-4758-8ae5-f0fdacdfae66",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" <th>number_company</th>\n",
" <th>no_campaign_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10_299341</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>12.0</td>\n",
" <td>3.0</td>\n",
" <td>0 days 05:47:26.333333333</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10_63788</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>62.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>393.205891</td>\n",
" <td>281.017639</td>\n",
" <td>112.188252</td>\n",
" <td>3.0</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>0 days 05:13:51</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10_759946</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10_20653</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>11.0</td>\n",
" <td>10.0</td>\n",
" <td>1 days 00:45:54</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10_824705</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>697292</th>\n",
" <td>14_119950</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>697293</th>\n",
" <td>14_938</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>697294</th>\n",
" <td>14_5004707</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>2 days 16:42:51</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>697295</th>\n",
" <td>14_108184</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>697296</th>\n",
" <td>14_4663981</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>NaN</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>14</td>\n",
2024-03-10 12:31:28 +01:00
" <td>True</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2024-03-10 12:31:28 +01:00
"<p>697297 rows × 42 columns</p>\n",
2024-03-08 10:30:12 +01:00
"</div>"
],
"text/plain": [
2024-03-10 12:31:28 +01:00
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 10_299341 0.0 0.0 0.0 0.0 \n",
"1 10_63788 3.0 2.0 62.0 1.0 \n",
"2 10_759946 0.0 0.0 0.0 0.0 \n",
"3 10_20653 0.0 0.0 0.0 0.0 \n",
"4 10_824705 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... ... \n",
"697292 14_119950 0.0 0.0 0.0 0.0 \n",
"697293 14_938 0.0 0.0 0.0 0.0 \n",
"697294 14_5004707 0.0 0.0 0.0 0.0 \n",
"697295 14_108184 0.0 0.0 0.0 0.0 \n",
"697296 14_4663981 0.0 0.0 0.0 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0.0 NaN NaN \n",
"1 1.0 393.205891 281.017639 \n",
"2 0.0 NaN NaN \n",
"3 0.0 NaN NaN \n",
"4 0.0 NaN NaN \n",
"... ... ... ... \n",
"697292 0.0 NaN NaN \n",
"697293 0.0 NaN NaN \n",
"697294 0.0 NaN NaN \n",
"697295 0.0 NaN NaN \n",
"697296 0.0 NaN NaN \n",
"\n",
" time_between_purchase nb_tickets_internet ... gender_female \\\n",
"0 NaN 0.0 ... 0 \n",
"1 112.188252 3.0 ... 1 \n",
"2 NaN 0.0 ... 0 \n",
"3 NaN 0.0 ... 0 \n",
"4 NaN 0.0 ... 0 \n",
"... ... ... ... ... \n",
"697292 NaN 0.0 ... 0 \n",
"697293 NaN 0.0 ... 0 \n",
"697294 NaN 0.0 ... 0 \n",
"697295 NaN 0.0 ... 0 \n",
"697296 NaN 0.0 ... 0 \n",
"\n",
" gender_male gender_other country_fr nb_campaigns \\\n",
"0 1 0 1.0 12.0 \n",
"1 0 0 1.0 3.0 \n",
"2 0 1 NaN 0.0 \n",
"3 1 0 1.0 11.0 \n",
"4 0 1 NaN 0.0 \n",
"... ... ... ... ... \n",
"697292 1 0 1.0 0.0 \n",
"697293 1 0 1.0 0.0 \n",
"697294 1 0 1.0 2.0 \n",
"697295 0 1 1.0 0.0 \n",
"697296 0 1 NaN 0.0 \n",
"\n",
" nb_campaigns_opened time_to_open y_has_purchased \\\n",
"0 3.0 0 days 05:47:26.333333333 0.0 \n",
"1 1.0 0 days 05:13:51 1.0 \n",
"2 0.0 NaN 0.0 \n",
"3 10.0 1 days 00:45:54 0.0 \n",
"4 0.0 NaN 0.0 \n",
"... ... ... ... \n",
"697292 0.0 NaN 0.0 \n",
"697293 0.0 NaN 0.0 \n",
"697294 1.0 2 days 16:42:51 0.0 \n",
"697295 0.0 NaN 0.0 \n",
"697296 0.0 NaN 0.0 \n",
"\n",
" number_company no_campaign_opened \n",
"0 10 False \n",
"1 10 False \n",
"2 10 True \n",
"3 10 False \n",
"4 10 True \n",
"... ... ... \n",
"697292 14 True \n",
"697293 14 True \n",
"697294 14 False \n",
"697295 14 True \n",
"697296 14 True \n",
"\n",
"[697297 rows x 42 columns]"
2024-03-08 10:30:12 +01:00
]
},
2024-03-10 12:31:28 +01:00
"execution_count": 111,
2024-03-08 10:30:12 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"# part de mails ouverts de chaque compagnie\n",
2024-03-08 10:30:12 +01:00
"\n",
2024-03-10 12:31:28 +01:00
"train_set_spectacle"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 197,
2024-03-10 12:31:28 +01:00
"id": "dc8cfd36-0eb2-4ef3-877d-626fd0a9ced4",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2024-03-10 12:31:28 +01:00
" <th>number_compagny</th>\n",
2024-03-03 09:32:45 +01:00
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
2024-03-10 12:31:28 +01:00
" <th>ratio_campaigns_opened</th>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
2024-03-10 12:31:28 +01:00
" <td>734772</td>\n",
" <td>126151.0</td>\n",
" <td>0.171687</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-10 12:31:28 +01:00
" <td>11</td>\n",
" <td>342396</td>\n",
" <td>129833.0</td>\n",
" <td>0.379190</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-10 12:31:28 +01:00
" <td>12</td>\n",
" <td>3168123</td>\n",
" <td>810722.0</td>\n",
" <td>0.255900</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-10 12:31:28 +01:00
" <td>13</td>\n",
" <td>3218569</td>\n",
" <td>793581.0</td>\n",
" <td>0.246563</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
2024-03-10 12:31:28 +01:00
" <td>2427043</td>\n",
" <td>723846.0</td>\n",
" <td>0.298242</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-10 12:31:28 +01:00
" number_compagny nb_campaigns nb_campaigns_opened ratio_campaigns_opened\n",
"0 10 734772 126151.0 0.171687\n",
"1 11 342396 129833.0 0.379190\n",
"2 12 3168123 810722.0 0.255900\n",
"3 13 3218569 793581.0 0.246563\n",
"4 14 2427043 723846.0 0.298242"
2024-03-03 09:32:45 +01:00
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 197,
2024-03-03 09:32:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"# taux d'ouverture des campaigns\n",
2024-03-03 09:32:45 +01:00
"\n",
2024-03-10 12:31:28 +01:00
"company_campaigns_stats = campaigns_information_spectacle.groupby(\"number_compagny\")[[\"nb_campaigns\", \"nb_campaigns_opened\"]].sum().reset_index()\n",
"company_campaigns_stats[\"ratio_campaigns_opened\"] = company_campaigns_stats[\"nb_campaigns_opened\"] / company_campaigns_stats[\"nb_campaigns\"]\n",
"company_campaigns_stats"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 198,
2024-03-10 12:31:28 +01:00
"id": "30b28426-088a-4153-b2aa-c20f11b2b771",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2024-03-10 12:31:28 +01:00
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>perc_campaigns_opened</th>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>143960.0</td>\n",
" <td>18472.0</td>\n",
" <td>12.831342</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-10 12:31:28 +01:00
" <td>10</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>10609.0</td>\n",
" <td>5177.0</td>\n",
" <td>48.798190</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-10 12:31:28 +01:00
" <td>11</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>84676.0</td>\n",
" <td>27658.0</td>\n",
" <td>32.663328</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-10 12:31:28 +01:00
" <td>11</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>20848.0</td>\n",
" <td>10927.0</td>\n",
" <td>52.412701</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2024-03-10 12:31:28 +01:00
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>1182992.0</td>\n",
" <td>275366.0</td>\n",
" <td>23.277080</td>\n",
2024-03-10 12:31:28 +01:00
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>107160.0</td>\n",
" <td>41244.0</td>\n",
" <td>38.488242</td>\n",
2024-03-10 12:31:28 +01:00
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
2024-03-03 09:32:45 +01:00
" <td>14</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>822836.0</td>\n",
" <td>219220.0</td>\n",
" <td>26.642004</td>\n",
2024-03-10 12:31:28 +01:00
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>92099.0</td>\n",
" <td>34256.0</td>\n",
" <td>37.194758</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-10 12:31:28 +01:00
" number_company y_has_purchased nb_campaigns nb_campaigns_opened \\\n",
2024-03-11 18:43:56 +01:00
"0 10 0.0 143960.0 18472.0 \n",
"1 10 1.0 10609.0 5177.0 \n",
"2 11 0.0 84676.0 27658.0 \n",
"3 11 1.0 20848.0 10927.0 \n",
2024-03-10 12:31:28 +01:00
"4 12 0.0 0.0 0.0 \n",
"5 12 1.0 0.0 0.0 \n",
2024-03-11 18:43:56 +01:00
"6 13 0.0 1182992.0 275366.0 \n",
"7 13 1.0 107160.0 41244.0 \n",
"8 14 0.0 822836.0 219220.0 \n",
"9 14 1.0 92099.0 34256.0 \n",
2024-03-10 12:31:28 +01:00
"\n",
" perc_campaigns_opened \n",
2024-03-11 18:43:56 +01:00
"0 12.831342 \n",
"1 48.798190 \n",
"2 32.663328 \n",
"3 52.412701 \n",
2024-03-10 12:31:28 +01:00
"4 NaN \n",
"5 NaN \n",
2024-03-11 18:43:56 +01:00
"6 23.277080 \n",
"7 38.488242 \n",
"8 26.642004 \n",
"9 37.194758 "
2024-03-03 09:32:45 +01:00
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 198,
2024-03-03 09:32:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"company_campaigns_stats = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[[\"nb_campaigns\", \"nb_campaigns_opened\"]].sum().reset_index()\n",
"company_campaigns_stats[\"perc_campaigns_opened\"] = 100* (company_campaigns_stats[\"nb_campaigns_opened\"] / company_campaigns_stats[\"nb_campaigns\"])\n",
"company_campaigns_stats"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 199,
2024-03-10 12:31:28 +01:00
"id": "9cebe912-fce1-4f4f-9d87-9649605296c8",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2024-03-10 12:31:28 +01:00
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
2024-03-03 09:32:45 +01:00
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
2024-03-10 12:31:28 +01:00
" <th>perc_campaigns_opened</th>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>143960.0</td>\n",
" <td>18472.0</td>\n",
" <td>12.831342</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-10 12:31:28 +01:00
" <td>10</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>10609.0</td>\n",
" <td>5177.0</td>\n",
" <td>48.798190</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-10 12:31:28 +01:00
" <td>11</td>\n",
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>84676.0</td>\n",
" <td>27658.0</td>\n",
" <td>32.663328</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-10 12:31:28 +01:00
" <td>11</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>20848.0</td>\n",
" <td>10927.0</td>\n",
" <td>52.412701</td>\n",
2024-03-10 12:31:28 +01:00
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
2024-03-03 09:32:45 +01:00
" <td>13</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>1182992.0</td>\n",
" <td>275366.0</td>\n",
" <td>23.277080</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
2024-03-10 12:31:28 +01:00
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>107160.0</td>\n",
" <td>41244.0</td>\n",
" <td>38.488242</td>\n",
2024-03-10 12:31:28 +01:00
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
2024-03-03 09:32:45 +01:00
" <td>14</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>822836.0</td>\n",
" <td>219220.0</td>\n",
" <td>26.642004</td>\n",
2024-03-10 12:31:28 +01:00
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
2024-03-11 18:43:56 +01:00
" <td>92099.0</td>\n",
" <td>34256.0</td>\n",
" <td>37.194758</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-10 12:31:28 +01:00
" number_company y_has_purchased nb_campaigns nb_campaigns_opened \\\n",
2024-03-11 18:43:56 +01:00
"0 10 0.0 143960.0 18472.0 \n",
"1 10 1.0 10609.0 5177.0 \n",
"2 11 0.0 84676.0 27658.0 \n",
"3 11 1.0 20848.0 10927.0 \n",
"6 13 0.0 1182992.0 275366.0 \n",
"7 13 1.0 107160.0 41244.0 \n",
"8 14 0.0 822836.0 219220.0 \n",
"9 14 1.0 92099.0 34256.0 \n",
2024-03-10 12:31:28 +01:00
"\n",
" perc_campaigns_opened \n",
2024-03-11 18:43:56 +01:00
"0 12.831342 \n",
"1 48.798190 \n",
"2 32.663328 \n",
"3 52.412701 \n",
"6 23.277080 \n",
"7 38.488242 \n",
"8 26.642004 \n",
"9 37.194758 "
2024-03-03 09:32:45 +01:00
]
},
2024-03-11 18:43:56 +01:00
"execution_count": 199,
2024-03-03 09:32:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"company_campaigns_stats = company_campaigns_stats[company_campaigns_stats[\"number_company\"]!=12]\n",
2024-03-03 09:32:45 +01:00
"company_campaigns_stats"
]
},
{
"cell_type": "code",
2024-03-11 18:43:56 +01:00
"execution_count": 201,
2024-03-10 12:31:28 +01:00
"id": "8418531b-4f30-4d96-8035-f3630c789d6f",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-03-11 18:43:56 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0oAAAIiCAYAAAD2CjhuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABvtklEQVR4nO3dd3yN9///8echWyIEWVaoGDWjRvEhUSNGjapara0oqjFK1Qo1ilZRrS5EKVqt+vTTqj1qVmy1qhqz1N4VJNfvD7+cr3NlOIckR3ncb7fcbs77Wq9rnMt5nuu63sdiGIYhAAAAAIBVFmcXAAAAAACPGoISAAAAAJgQlAAAAADAhKAEAAAAACYEJQAAAAAwISgBAAAAgAlBCQAAAABMCEoAAAAAYEJQAgAAAAATghIA4JG1e/dueXl56cMPP3R2KQCAJwxBKR1ZLBa7/tasWePsUtN05MgRWSwWxcTEWNuio6MVEhLitJrS2759+xQdHa0jR444u5SH0qFDhwfeLyEhIXr++efTtZ6QkBB16NDhgae3WCyKjo5Ot3oeJSntq4fdXo+7q1evqnnz5nr99df1+uuvO7ucdLNmzZp/xf8FeLTeo4/z+fFef/31l6Kjo7Vz584MXU5Kn3XSw6VLl5Q7d27Nnz/f2rZ48eIM3XcRERGKiIjIsPk/jLQ+b7Vt21ZNmzbN9Joc4eLsAh4nmzZtsnn9zjvvaPXq1Vq1apVN+9NPP52ZZSEF+/bt04gRIxQREfFYBUA8uoYOHao33njD2WX8q3Tu3FmVKlXSu+++6+xS8IT6/vvvlT17dmeX8UT566+/NGLECIWEhKhcuXLOLsdhI0aMUHBwsFq2bGltW7x4sT766KMMC0sff/xxhsw3PaT1eSs6OlrFixfXqlWr9NxzzzmnwPsgKKWjZ5991uZ1njx5lCVLlmTtcJ7bt2/LYrFk+PxdXHhrwdZTTz3l7BL+db755htnl/DAbty4IS8vL2eXgYcUFhbm7BLwL3LhwgV9+umn+uCDDx74s4ZhGLp586Y8PT3tnubf+gX8U089pXr16undd999ZIMSt95lso8++kg1atSQv7+/smXLptKlS2v8+PG6ffu2zXipXe43X17t3r27PDw8tG3bNmtbYmKiatWqpYCAAJ06dSrNev766y+1aNFCPj4+8vX1VcuWLXX69Gm71uXmzZsaNGiQChUqJDc3N+XNm1c9e/bUpUuXbMZL7XaBe9dx165dslgsmj59erLxfv75Z1ksFv3www/WtkOHDqlNmzby9/eXu7u7SpQooY8++shmuqTbW2bPnq1+/fopb968cnd31xdffKGXXnpJklSzZk3rLZFJl9/t3fapzf+PP/6QJK1YsUK1atVS9uzZ5eXlpWrVqmnlypX32ap37d27V3Xr1pWXl5fy5Mmjnj176qeffrLrdh1790uS77//XmXKlJGHh4cKFy6sKVOmJJtfv379VK5cOfn6+srPz09VqlTRf//7X7vWJSVXrlzRq6++qly5csnb21v16tXT77//nuK49uzrxMREjRo1SsWKFZOnp6dy5MihMmXKaPLkyWnWkbQP586dq4EDByooKEje3t5q1KiR/v77b129elVdu3ZV7ty5lTt3bnXs2FHXrl2zmYe972l7bpN80PWQ7m7T/v372+z3qKgoXb9+3WY8i8WiXr16afbs2SpRooS8vLxUtmxZ/fjjj9ZxFi1aJIvFkuLxOm3aNFksFu3evdva9sMPP6hKlSry8vKSj4+P6tSpY3OFfd26dbJYLJo3b16y+X355ZeyWCyKjY21tm3dulWNGzeWn5+fPDw8FBYWliw03bhxw7q+Hh4e8vPzU4UKFVJcxr1iYmJksVi0fPlydezYUX5+fsqWLZsaNWqkP//802bc5cuXq0mTJsqXL588PDxUpEgRdevWTefOnbMZLzo6WhaLRdu3b1fz5s2VM2fOBwrGGbneknTy5El17dpV+fPnl5ubm4KDg9W8eXP9/fff1nGOHTumV155xeb99v777ysxMdE6TtItSxMmTNC4ceMUEhIiT09PRURE6Pfff9ft27f11ltvKTg4WL6+vnrhhRd05swZm1qSbv1Nz/PPpUuX1LlzZ/n5+cnb21sNGzbUn3/+mez/oKT9tXfvXrVu3Vq+vr4KCAhQp06ddPny5WR1mv8/sPe9tmDBAlWuXFm+vr7y8vJS4cKF1alTp/vup/Q+P6bmfvUlnR/nzJmjvn37KjAwUJ6engoPD9eOHTuSzc+e41dK+zhcs2aNKlasKEnq2LGj9f/npP23detWtWrVynrMhYSEqHXr1jp69KhDy0nLw2zTmJgY3blzx+ZqUocOHazT3/sYRtKtaEnn5E8++UQlSpSQu7u7Zs2aJenu1anKlSvLz89P2bNnV/ny5TV9+nQZhmGzXPPnk6T36HvvvaeJEyeqUKFC8vb2VpUqVbR58+b7roe955n77fOYmJg0P29Jd2+/W7FihQ4fPnz/DewEfO2dyQ4fPqw2bdpYT7C7du3S6NGjdeDAAc2YMcPh+U2aNEm//vqrWrRooW3btilHjhwaMWKE1qxZoyVLligoKCjVaf/55x/Vrl1bf/31l8aOHauiRYvqp59+snmDJ4mOjrb5j8YwDDVt2lQrV67UoEGDVL16de3evVvDhw/Xpk2btGnTJrm7u9u9HmXLllVYWJhmzpypzp072wyLiYmRv7+/GjRoIOnuZdyqVauqQIECev/99xUYGKilS5eqd+/eOnfunIYPH24z/aBBg1SlShV98sknypIliypUqKCLFy/q7bff1kcffaTy5ctLevBv/M3z9/f315w5c9SuXTs1adJEs2bNkqurqz799FNFRkZq6dKlqlWrVqrzO3XqlMLDw5UtWzZNmzZN/v7+mjdvnnr16nXfWhzdLzt37lRUVJSio6MVGBior776Sm+88YZu3bql/v37S5Li4+N14cIF9e/fX3nz5tWtW7e0YsUKNWvWTDNnzlS7du0c2l5JNW7cuFHDhg1TxYoVtWHDBtWvXz/ZuPbu6/Hjxys6OlpDhgxRjRo1dPv2bR04cCDVcGj29ttvq2bNmoqJidGRI0fUv39/tW7dWi4uLipbtqzmzZunHTt26O2335aPj4/Nh7n0fE8/6HrcuHFD4eHhOnHihN5++22VKVNGe/fu1bBhw7Rnzx6tWLHC5tvNn376SbGxsRo5cqS8vb01fvx4vfDCCzp48KAKFy6s559/Xv7+/po5c2ayYzUmJkbly5dXmTJlJElz587Vyy+/rLp162revHmKj4/X+PHjFRERoZUrV+o///mPqlevrrCwMH300Udq3bq1zfymTp2qihUrWj8crV69WvXq1VPlypX1ySefyNfXV/Pnz1fLli1148YN64fWvn37avbs2Ro1apTCwsJ0/fp1/fbbbzp//rxd27pz586qU6eO5s6dq+PHj2vIkCGKiIjQ7t27lSNHDkl3922VKlXUpUsX+fr66siRI5o4caL+85//aM+ePXJ1dbWZZ7NmzdSqVSt179492Yfm+8no9T558qQqVqyo27dvW4+R8+fPa+nSpbp48aICAgJ09uxZVa1aVbdu3dI777yjkJAQ/fjjj+rfv78OHz6c7Paejz76SGXKlNFHH32kS5cuqV+/fmrUqJEqV64sV1dXzZgxQ0ePHlX//v3VpUsXmy+7pPQ9/yQmJqpRo0baunWroqOjVb58eW3atEn16tVLdZu8+OKLatmypTp37qw9e/Zo0KBBkpTm+9be99qmTZvUsmVLtWzZUtHR0fLw8NDRo0eT3YpvlhHnx5Q4Ut/bb7+t8uXL64svvtDly5cVHR2tiIgI7dixQ4ULF5Zk//F7v+OwfPnymjlzpjp27KghQ4aoYcOGkqR8+fJJuhsAihUrplatWsnPz0+nTp3StGnTVLFiRe3bt0+5c+e2azkBAQEpbpeH2abS3XNrWFiY9Rwi3b3t+vr16/r2229tvkC69/PZokWLtG7dOg0bNkyBgYHy9/e3rm+3bt1UoEABSdLmzZv1+uuv6+TJkxo2bFiatUh336PFixfXpEmTrLU
2024-03-03 09:32:45 +01:00
"text/plain": [
2024-03-10 12:31:28 +01:00
"<Figure size 1000x600 with 1 Axes>"
2024-03-03 09:32:45 +01:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-03-11 18:43:56 +01:00
"# graphic - overall rate of opened mails (train set for music companies)\n",
"\n",
2024-03-10 12:31:28 +01:00
"multiple_barplot(company_campaigns_stats, x=\"number_company\", y=\"perc_campaigns_opened\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"clients n'ayant pas acheté\", 1 : \"clients ayant acheté sur la période\"},\n",
2024-03-11 18:43:56 +01:00
" xlabel = \"Compagnie\", ylabel = \"Part de mails ouverts (%)\", \n",
2024-03-10 12:31:28 +01:00
" title = \"Taux d'ouverture global des mails envoyés par les compagnies de spectacle (train set)\")"
2024-03-03 09:32:45 +01:00
]
},
2024-03-11 18:43:56 +01:00
{
"cell_type": "code",
"execution_count": 202,
"id": "1c32cd86-e08d-4b8a-90f1-27ad0df0ffeb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 640x480 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# save in the s3\n",
"\n",
"FILE_NAME = \"overall_mail_opening_train_set_music.png\"\n",
"FILE_PATH_OUT_S3 = FILE_PATH + FILE_NAME\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as file_out:\n",
" plt.savefig(file_out)"
]
},
2024-03-03 09:32:45 +01:00
{
"cell_type": "markdown",
"id": "783f6fb2-5f26-42a9-a22d-f4ece44bfaf2",
2024-03-11 18:43:56 +01:00
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
2024-03-03 09:32:45 +01:00
"source": [
"### 3. products_purchased_reduced"
]
},
2024-03-05 03:15:03 +01:00
{
"cell_type": "code",
2024-03-10 17:41:43 +01:00
"execution_count": 16,
2024-03-03 09:32:45 +01:00
"id": "74534ded-8121-43fb-8cf8-af353bed2c77",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de lignes de la table : 764880\n"
]
},
{
"data": {
"text/plain": [
"customer_id 0\n",
"nb_tickets 0\n",
"nb_purchases 0\n",
"total_amount 0\n",
"nb_suppliers 0\n",
"vente_internet_max 0\n",
"purchase_date_min 0\n",
"purchase_date_max 0\n",
"time_between_purchase 0\n",
"nb_tickets_internet 0\n",
"number_compagny 0\n",
"dtype: int64"
]
},
2024-03-10 17:41:43 +01:00
"execution_count": 16,
2024-03-03 09:32:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de nan\n",
"print(\"Nombre de lignes de la table : \",products_purchased_reduced_spectacle.shape[0])\n",
"products_purchased_reduced_spectacle.isna().sum()"
]
},
{
"cell_type": "code",
2024-03-10 17:41:43 +01:00
"execution_count": 21,
2024-03-05 03:15:03 +01:00
"id": "6db089d5-5517-4aee-a5fd-53f20ae3f0d7",
2024-03-05 00:36:48 +01:00
"metadata": {},
"outputs": [],
2024-03-05 03:15:03 +01:00
"source": [
"#importation librairies\n",
"import warnings\n",
"warnings.simplefilter(\"ignore\")\n",
"import pandas as pd\n",
"import numpy as np\n",
"import statsmodels\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from scipy.stats import shapiro\n",
"from numpy.random import randn\n",
"import scipy.stats as st\n",
2024-03-05 03:25:59 +01:00
"%matplotlib inline\n",
"\n",
"#col_purchase=[\"nb_tickets\",\"nb_purchases\",\"total_amount\",\"nb_suppliers\",\"time_between_purchase\",\"nb_tickets_internet\"]"
]
},
{
"cell_type": "code",
2024-03-10 17:41:43 +01:00
"execution_count": 39,
"id": "943b8088-9ca2-40a4-b658-2cfae1589fac",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"30.0\n",
"62.0\n",
"120.0\n",
"90.0\n",
"Moustache inferieure -105.0\n",
"Moustache superieure 255.0\n"
]
}
],
"source": [
"#identification des valeur manquantes\n",
"#calcule des quartile de la variable valeur(taille de la population)\n",
"Q1=np.percentile(products_purchased_reduced_spectacle[\"total_amount\"], 25) # Q1\n",
"Q2=np.percentile(products_purchased_reduced_spectacle[\"total_amount\"], 50) # Q2\n",
"Q3=np.percentile(products_purchased_reduced_spectacle[\"total_amount\"], 75) # Q3\n",
"print(Q1)\n",
"print(Q2)\n",
"print(Q3)\n",
"\n",
"#intervale interquartile de la variable Valeur\n",
"\n",
"IQ=Q3-Q1\n",
"print(IQ)\n",
"\n",
"#la valeur minimale des moustache de la variable Valeur\n",
"\n",
"M_inf=Q1-1.5*IQ\n",
"M_sup=Q3+1.5*IQ\n",
"\n",
"print(\"Moustache inferieure\",M_inf)#moustache inferieur\n",
"print(\"Moustache superieure\",M_sup)#moustache sup\n"
]
},
2024-03-10 19:08:50 +01:00
{
"cell_type": "code",
"execution_count": 62,
"id": "c3adb0cd-8292-4c6f-9d4e-8352a6967022",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"customer_id int64\n",
"nb_tickets int64\n",
"nb_purchases int64\n",
"total_amount float64\n",
"nb_suppliers int64\n",
"vente_internet_max int64\n",
"purchase_date_min float64\n",
"purchase_date_max float64\n",
"time_between_purchase float64\n",
"nb_tickets_internet float64\n",
"number_compagny int64\n",
"dtype: object"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_purchased_reduced_spectacle.dtypes"
]
},
2024-03-10 17:41:43 +01:00
{
"cell_type": "markdown",
"id": "a63e6d13-429b-4b01-ad11-27e5eea68cbd",
2024-03-05 03:25:59 +01:00
"metadata": {},
"source": [
"#histogrames des variable quantitatives\n",
"col_purchase=[\"nb_tickets\",\"nb_purchases\",\"total_amount\",\"nb_suppliers\",\"time_between_purchase\",\"nb_tickets_internet\"]\n",
"for col in col_purchase:\n",
" plt.figure()\n",
" sns.histplot(products_purchased_reduced_spectacle[col], kde=True, color='red')"
2024-03-05 03:15:03 +01:00
]
},
{
"cell_type": "code",
2024-03-10 21:00:29 +01:00
"execution_count": 127,
2024-03-10 17:41:43 +01:00
"id": "5a08b5a5-7d56-4543-945a-38f6219d831d",
2024-03-05 03:15:03 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-03-10 17:41:43 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAn8AAAHGCAYAAAAFY+3bAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABPdUlEQVR4nO3deZyNdeP/8feZ7cxqmMEsmhn7zqgUoZA9S8gt6S6kRaSEW8mNUZZSSTepW1mTVPeNmzZGlrIVoZJSSoaYRmMZ6xgzn98f/eZ8ndnNnHFm5no9H4/z4FzXdT6fz7We93yu5diMMUYAAACwBA93NwAAAADXDuEPAADAQgh/AAAAFkL4AwAAsBDCHwAAgIUQ/gAAACyE8AcAAGAhhD8AAAALIfwBAABYiOXC3549exQcHKx//etf7m4KAADANXdV4W/hwoWy2WxOr0qVKqlNmzb68MMPC92IqlWrauDAgY73R48eVVxcnPbs2VPoMnPTpEkTrVixQuPGjdO2bdtcXn5JNHXqVK1cubLY67HZbHrssceKvZ686o+Li7umdQ4cOFCBgYEFmjan9n322Wdq2rSpAgICZLPZHOvpvffeU4MGDeTn5yebzVYs+0J+Bg4cqKpVqzoNs9lsWrhwYaHK2717t1q3bq3g4GDZbDbNnDlTUu7LoCB+++23IrXJXTKPpb/99ts1r7tNmzZq06bNNa8XV8ed20hWcXFxstls7m7GNbF06VLHsak45XR8vZa8CvOhBQsWqG7dujLGKDExUbNnz1b37t21atUqde/e/arLW7FihcqVK+d4f/ToUU2aNElVq1ZVkyZNCtPEPN1+++1688031a9fP+3cuVOVKlVyeR0lydSpU9WnTx/17NnT3U2xtG3btum6665zvDfGqG/fvqpdu7ZWrVqlgIAA1alTR8ePH9d9992nzp07a86cObLb7apdu7YbW+4aDzzwgM6dO6dly5apQoUKqlq1aq7LoKAiIiK0bds21ahRoxhbDlx7Xbt21bZt2xQREeHupljK0qVLtXfvXo0YMcLdTSlWhQp/DRs2VNOmTR3vO3furAoVKujdd98tVPi7/vrrC9OMIunXr5/69et3zeuFdTVv3tzp/dGjR3XixAn16tVL7dq1cwzfsmWL0tLS9Pe//12tW7fOs8zz58/L39+/WNrranv37tVDDz2kLl26OIb9/vvvOS6DgrLb7dmWa05K03ICJKlSpUplvmMC7uOSa/58fX3l4+Mjb29vp+EnTpzQ0KFDVaVKFfn4+Kh69eoaN26cUlNTnaa78rTvxo0bddNNN0mSBg0a5Di9fOXpsp07d6pHjx4KCQmRr6+vrr/+er3//vsFauukSZPUrFkzhYSEqFy5crrhhhs0b948GWPy/WzmKb4ff/xRnTp1UkBAgCIiIvT8889LkrZv365WrVopICBAtWvX1qJFi7KVsXfvXt15552qUKGCfH191aRJk2zT5dbdv3HjRtlsNm3cuNExbPfu3erWrZsqV64su92uyMhIde3aVUeOHJH012m6c+fOadGiRY5lmXnK5/jx4xo6dKjq16+vwMBAVa5cWbfffru++OKLbO1OTU3Vs88+q3r16snX11ehoaFq27attm7dmm3at99+W/Xq1ZO/v79iY2NzvCTg559/Vv/+/R3trlevnl577bU8l3+mlJQUPfTQQwoNDVVgYKA6d+6sn376Kdt0uXWrX80pjE8//VTt2rVTcHCw/P39Va9ePU2bNi3bdAcOHNAdd9yhwMBARUVFadSoUdm28yu347i4OEcv4FNPPSWbzebYD1q1aiVJuvvuu53WV+b2991336ljx44KCgpyBKZLly5p8uTJqlu3rux2uypVqqRBgwbp+PHjBZrPhQsXqk6dOo51sXjx4gJ97sCBAxo0aJBq1aolf39/ValSRd27d9d3333nVLbNZtPly5f1+uuvO+3TOS2DgpYr5XzaN3P97tq1S3369FGFChUcPYPGGM2ZM0dNmjSRn5+fKlSooD59+ujXX3/Nd16PHz+uhx9+WFFRUY5l3LJlS61bt85punXr1qldu3YqV66c/P391bJlS3322WcFWp7z589XbGysfH19FRISol69eumHH35wmiZzOyjINldQBd1+1q9frzZt2ig0NFR+fn6Kjo7WXXfdpfPnz+dbx9KlS3XLLbcoMDBQgYGBatKkiebNm1fo+S/scThze4yPj9egQYMUEhKigIAAde/ePdt2EB8frzvvvFPXXXedfH19VbNmTT3yyCP6888/s83f//73PzVu3Fh2u13Vq1fXq6++muOxJvPymPyOk7l9DxRk+yrotpqTjz76SE2aNJHdble1atX00ksv5Thdce9Lbdq0UcOGDfXFF1+oefPm8vPzU5UqVTR+/Hilp6c7lXc1x7+8tsM2bdroo48+0qFDh5wub8t0NfmhINt7YZdpft/7BWKuwoIFC4wks337dpOWlmYuXbpkDh8+bB5//HHj4eFhPv30U8e0Fy5cMI0bNzYBAQHmpZdeMmvXrjXjx483Xl5e5o477nAqNyYmxgwYMMAYY8zp06cd9fzzn/8027ZtM9u2bTOHDx82xhizfv164+PjY2699Vbz3nvvmU8//dQMHDjQSDILFizIdx4GDhxo5s2bZ+Lj4018fLx57rnnjJ+fn5k0aVK+nx0wYIDx8fEx9erVM6+++qqJj483gwYNMpLM2LFjTe3atc28efPMmjVrTLdu3Ywks3PnTsfnf/zxRxMUFGRq1KhhFi9ebD766CNzzz33GEnmhRdeyLacDx486FT/hg0bjCSzYcMGY4wxZ8+eNaGhoaZp06bm/fffN5s2bTLvvfeeGTJkiNm3b58xxpht27YZPz8/c8cddziW5ffff+9oz6OPPmqWLVtmNm7caD788EMzePBg4+Hh4ajDGGPS0tJM27ZtjZeXlxk9erT5+OOPzapVq8wzzzxj3n33Xcd0kkzVqlXNzTffbN5//33z8ccfmzZt2hgvLy/zyy+/OKb7/vvvTXBwsGnUqJFZvHixWbt2rRk1apTx8PAwcXFxea6DjIwM07ZtW2O3282UKVPM2rVrzcSJE0316tWNJDNx4kSn9RUTE5OtjIkTJ5qCbPpvvfWWsdlspk2bNmbp0qVm3bp1Zs6cOWbo0KFOdWRuEy+99JJZt26dmTBhgrHZbNm2qSvbd/jwYbN8+XIjyQwfPtxs27bN7Nq1yxw4cMC89tprRpKZOnWq0/oaMGCA8fb2NlWrVjXTpk0zn332mVmzZo1JT083nTt3NgEBAWbSpEkmPj7evPXWW6ZKlSqmfv365vz583nOZ+b2duedd5rVq1ebJUuWmJo1a5qoqKgcl9+VNm3aZEaNGmX+85//mE2bNpkVK1aYnj17Gj8/P/Pjjz8aY4xJSkoy27ZtM5JMnz59nPbpnJZBQcs1xpiDBw9m2/cz129MTIx56qmnTHx8vFm5cqUxxpiHHnrIeHt7m1GjRplPP/3ULF261NStW9eEhYWZxMTEPOe1U6dOplKlSmbu3Llm48aNZuXKlWbChAlm2bJljmnefvttY7PZTM+ePc3y5cvN6tWrTbdu3Yynp6dZt25dtmV+5T4+depUI8ncc8895qOPPjKLFy821atXN8HBweann35yTHc121xOWrdubVq3bu14X9Dt5+DBg8bX19d06NDBrFy50mzcuNG888475r777jMnT57Ms87x48cbSaZ3797mgw8+MGvXrjUzZsww48ePL9L8F+Y4nLnso6KizAMPPGA++eQTM3fuXFO5cmUTFRXlNC+vv/66mTZtmlm1apXZtGmTWbRokYmNjTV16tQxly5dckz3ySefGA8PD9OmTRuzYsUK88EHH5hmzZqZqlWrZjvWFPQ4mdM2UtDtqyDbak7WrVtnPD09TatWrczy5cvNBx98YG666SYTHR2dbT6Ke19q3bq1CQ0NNZGRkeZf//qXWbNmjXn88ceNJDNs2DDHdFdz/MtvO/z+++9Ny5YtTXh4uOM4tW3bNsfnC5ofCrK95/T9VJBlWpDv/YIoVPjL+rLb7WbOnDlO077xxht
2024-03-05 03:15:03 +01:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
2024-03-05 00:36:48 +01:00
"source": [
2024-03-10 21:00:29 +01:00
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
2024-03-05 00:36:48 +01:00
"\n",
2024-03-10 17:41:43 +01:00
"# Filtrer les données pour inclure uniquement les valeurs positives de total_amount et exclusion des valeur aberrantes\n",
"filtered_products_purchased_reduced_spectacle = products_purchased_reduced_spectacle[(products_purchased_reduced_spectacle['total_amount'] > 0) & (products_purchased_reduced_spectacle['total_amount'] <= 255)]\n",
"\n",
"# Créer le graphique en utilisant les données filtrées\n",
2024-03-10 21:00:29 +01:00
"sns.boxplot(data=filtered_data, y=\"total_amount\", x=\"number_compagny\", showfliers=False, showmeans=True)\n",
2024-03-10 17:41:43 +01:00
"\n",
"# Titre du graphique\n",
"plt.title(\"Boite à moustache du chiffre d'affaire selon les compagnies de spectacles\")\n",
"\n",
"# Afficher le graphique\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
2024-03-10 19:08:50 +01:00
"execution_count": 87,
2024-03-05 15:37:29 +01:00
"id": "76e08ece-0b58-4b3a-abca-53e30ccc907b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"Statistique F : 317.1792172580724\n",
"Valeur de p : 3.665389608154993e-273\n",
2024-03-05 15:37:29 +01:00
"Nombre de degrés de liberté entre les groupes : 4\n",
2024-03-10 17:41:43 +01:00
"Nombre de degrés de liberté à l'intérieur des groupes : 670581\n",
"Il y a des différences significatives entre au moins une des entrepries .\n"
2024-03-05 15:37:29 +01:00
]
}
],
"source": [
"#test d'anova pour voir si la difference de chiffre d'affaire est statistiquement significative\n",
"\n",
"from scipy.stats import f_oneway\n",
"\n",
"# Créez une liste pour stocker les données de chaque groupe\n",
"groupes = []\n",
"\n",
"# Parcourez chaque modalité de la variable catégorielle et divisez les données en groupes\n",
2024-03-10 17:41:43 +01:00
"for modalite in filtered_products_purchased_reduced_spectacle['number_compagny'].unique():\n",
" groupe = filtered_products_purchased_reduced_spectacle[filtered_products_purchased_reduced_spectacle['number_compagny'] == modalite]['total_amount']\n",
2024-03-05 15:37:29 +01:00
" groupes.append(groupe)\n",
"\n",
"# Effectuez le test ANOVA\n",
"f_statistic, p_value = f_oneway(*groupes)\n",
"\n",
"# Nombre total d'observations\n",
"N = sum(len(groupe) for groupe in groupes)\n",
"\n",
"# Nombre de groupes ou de catégories\n",
"k = len(groupes)\n",
"\n",
"# Degrés de liberté entre les groupes\n",
"df_between = k - 1\n",
"\n",
"# Degrés de liberté à l'intérieur des groupes\n",
"df_within = N - k\n",
"\n",
"# Affichez les résultats\n",
"print(\"Statistique F :\", f_statistic)\n",
"print(\"Valeur de p :\", p_value)\n",
"\n",
"print(\"Nombre de degrés de liberté entre les groupes :\", df_between)\n",
"print(\"Nombre de degrés de liberté à l'intérieur des groupes :\", df_within)\n",
"\n",
"if p_value < 0.05:\n",
" print(\"Il y a des différences significatives entre au moins une des entrepries .\")\n",
"else:\n",
" print(\"Il n'y a pas de différences significatives entre les entreprises .\")"
]
},
2024-03-05 03:15:03 +01:00
{
"cell_type": "code",
2024-03-10 21:00:29 +01:00
"execution_count": 129,
"id": "9ec6e1c5-f3bc-4041-b32e-b62762246eb7",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmcAAAHFCAYAAAC3jl5pAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABSW0lEQVR4nO3deZyNdf/H8fdhZs6shhlm08wYsi8jKZkUY20wFQmpEJG03LaU240hTCmlCClrFPULt+JOZIlQYxlRkmyjGGowgxizXL8/3HNuxyxmda7h9Xw8zoPzvb7nOp/rOue6znu+13WdYzEMwxAAAABMoYyjCwAAAMD/EM4AAABMhHAGAABgIoQzAAAAEyGcAQAAmAjhDAAAwEQIZwAAACZCOAMAADARwtl1xMfHy9vbW++++66jSwEAALeAEg1n8+bNk8VisbtVqlRJLVq00Jdfflno+VapUkW9e/e23T9+/LhiYmIUHx9f9KKv0bBhQy1btkwjR47U1q1bi33+ZjRx4kQtX768xJ/HYrHo+eefL/Hnyev5Y2Jibuhz9u7dW56envnqm1N933zzjRo3biwPDw9ZLBbb67RkyRLVrVtXbm5uslgsJbItXE/v3r1VpUoVuzaLxaJ58+YVan67du1S8+bN5e3tLYvFoilTpkjKfR3kx5EjR4pUk6Nk7UuPHDlyw5+7RYsWatGixQ19zunTpxf5NSqO/Vhx1CFl35azXs+SEhMTI4vFor/++qvY5vnxxx/btsGiuPbz+0bIafvJaX9V3IqSTW7IyNncuXO1detWbdmyRbNmzVLZsmUVHR2tL774olDzW7ZsmUaNGmW7f/z4cY0dO7bEPpBatmypDz74QN27d9eff/5ZIs9hJjcqnCFvW7du1dNPP227bxiGunbtKmdnZ61YsUJbt25V8+bN9eeff+rJJ59UtWrV9NVXX2nr1q2qUaOGAysvHn369NGJEye0ePFibd26Vd27d891HeRXYGCgtm7dqg4dOpRg5Siqmy2c3QyKK5yZxahRo7Rs2bISfY6iZBOn4i8nu3r16qlx48a2+w888IAqVKigTz75RNHR0QWe3x133FGc5eVL9+7d1b179xv+vLh13XPPPXb3jx8/rtOnT6tTp05q1aqVrf27775TWlqannjiiesGlb///lvu7u4lUm9x27t3r/r166eoqChb2x9//JHjOsgvq9Wabb3mpDStJwAFV61aNUeXkCeHnHPm6uoqFxcXOTs727WfPn1aAwcOVOXKleXi4qKqVatq5MiRSk1Ntet39bDohg0bdNddd0mSnnrqKdvh06uHkLdv364HH3xQPj4+cnV11R133KFPP/00X7WOHTtWTZo0kY+Pj8qVK6dGjRpp9uzZys/vxWcdwvrll1/Url07eXh4KDAwUK+99pokadu2bWrWrJk8PDxUo0YNzZ8/P9s89u7dq4ceekgVKlSQq6urGjZsmK1fboc8NmzYIIvFog0bNtjadu3apY4dO8rPz09Wq1VBQUHq0KGDfv/9d0lXht8vXLig+fPn29Zl1iGNP//8UwMHDlSdOnXk6ekpPz8/tWzZUps2bcpWd2pqqsaNG6fatWvL1dVVvr6+ioyM1JYtW7L1/eijj1S7dm25u7srPDw8x0PeBw4cUI8ePWx1165dW++9916e6z9LSkqK+vXrJ19fX3l6euqBBx7Qr7/+mq1fbsPcWYcI8uOrr75Sq1at5O3tLXd3d9WuXVuxsbHZ+v32229q3769PD09FRwcrKFDh2Z7n1/9Po6JidFtt90mSXr55ZdlsVhs20GzZs0kSd26dbN7vbLef3v27FHbtm3l5eVlCzSXL1/W+PHjVatWLVmtVlWqVElPPfVUvkeG582bp5o1a9peiwULFuTrcb/99pueeuopVa9eXe7u7qpcubKio6O1Z88eu3lbLBalp6drxowZdtt0Tusgv/OVcj6smfX67ty5U126dFGFChVsO27DMDR9+nQ1bNhQbm5uqlChgrp06aJDhw5dd1n//PNP9e/fX8HBwbZ1fO+992rt2rV2/dauXatWrVqpXLlycnd317333qtvvvkmX+tzzpw5Cg8Pl6urq3x8fNSpUyft27fPrk/W+yA/77n8Ksr759ChQ+revbuCgoJktVrl7++vVq1a2UYXqlSpop9++kkbN260vfZZr/OlS5c0dOhQNWzYUN7e3vLx8VHTpk3173//2+458tqP5bY9X7sfzauO3OR3X5OTJUuWqG3btgoMDJSbm5tq166tV155RRcuXMjW9/vvv1d0dLR8fX3l6uqqatWqadCgQdn6nTx5Uo899pi8vb3l7++vPn36KDk52a7Pe++9p/vvv19+fn7y8PBQ/fr1NWnSJKWlpdn6tGjRQitXrtTRo0ftTlPKS1pamoYPH66AgAC5u7urWbNm+uGHH7L1y+/rkZf8ro+r5bS/z+/23qJFC9WrV09xcXG677775O7urqpVq+q1115TZmampPxlk7zckJGzjIwMpaenyzAMnTx5Um+88YYuXLigHj162PpcunRJkZGROnjwoMaOHasGDRpo06ZNio2NVXx8vFauXJnjvBs1aqS5c+fqqaee0r/+9S/b4Yqsnfj69ev1wAMPqEmTJpo5c6a8vb21ePFidevWTX///fd1j30fOXJEzzzzjEJCQiRdCVQvvPCC/vjjD40ePfq6y56WlqbOnTtrwIABeumll/Txxx9rxIgRSklJ0eeff66XX35Zt912m6ZOnarevXurXr16uvPOOyVJ+/fvV0REhPz8/PTuu+/K19dXCxcuVO/evXXy5EkNHz78us9/tQsXLqhNmzYKCwvTe++9J39/fyUmJmr9+vU6d+6cpCuH0lq2bKnIyEjboeNy5cpJuhKeJWnMmDEKCAjQ+fPntWzZMrVo0ULffPONbeeXnp6uqKgobdq0SYMGDVLLli2Vnp6ubdu2KSEhQREREbaaVq5cqbi4OI0bN06enp6aNGmSOnXqpP3796tq1aqSpJ9//lkREREKCQnR5MmTFRAQoNWrV+vFF1/UX3/9pTFjxuS6zIZh6OGHH9aWLVs0evRo3XXXXfruu+/sRmOKy+zZs9WvXz81b95cM2fOlJ+fn3799Vft3bvXrl9aWpoefPBB9e3bV0OHDtW3336rV199Vd7e3rm+p55++mmFh4erc+fOeuGFF9SjRw9ZrVaVK1dOd999t5577jlNnDhRkZGRttdLuvIh+uCDD+qZZ57RK6+8ovT0dGVmZuqhhx7Spk2bNHz4cEVEROjo0aMaM2aMWrRooe3bt8vNzS3X5Zw3b56eeuopPfTQQ5o8ebKSk5MVExOj1NRUlSlj//fetX/EHD9+XL6+vnrttddUqVIlnT59WvPnz1eTJk20a9cu1axZUx06dNDWrVvVtGlTdenSRUOHDpV0ZZvOaR3kd77X07lzZ3Xv3l0DBgywfSA+88wzmjdvnl588UW9/vrrOn36tMaNG6eIiAjt3r1b/v7+uc7vySef1M6dOzVhwgTVqFFDZ8+e1c6dO5WUlGTrs3DhQvXs2VMPPfSQ5s+fL2dnZ73//vtq166dVq9enefoYGxsrP75z3/qscceU2xsrJKSkhQTE6OmTZsqLi5O1atXt/UtzHsuN0V9/7Rv314ZGRmaNGmSQkJC9Ndff2nLli06e/aspCunrXTp0kXe3t6aPn26JNle59TUVJ0+fVrDhg1T5cqVdfnyZa1du1adO3fW3Llz1bNnT0l578fyK686clKQfU3v3r2zffYcOHBA7du316BBg+Th4aFffvlFr7/+un744QetW7fO1m/16tWKjo5W7dq19dZbbykkJERHjhzR119/ne15HnnkEXXr1k19+/bVnj17NGLECElXQn2WgwcPqkePHgoLC5OLi4t2796tCRMm6JdffrH1mz59uvr376+DBw/m+1Bgv379tGDBAg0bNkxt2rTR3r171blzZ9tnTXEpyPq4noJs74mJiXr88cc1dOhQjRkzRsuWLdOIESMUFBSknj17XjebXJdRgubOnWtIynazWq3G9OnT7frOnDnTkGR8+umndu2vv/6
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#repartition Chiffre d'affaire selon y_has_purchased\n",
"\n",
"# Filtrer les données pour inclure uniquement les valeurs positives de total_amount et exclusion des valeur aberrantes\n",
"train_set_spectacle_filtered = train_set_spectacle[(train_set_spectacle['total_amount'] > 0) & (train_set_spectacle['total_amount'] <= 255)]\n",
"\n",
"# Créer le graphique en utilisant les données filtrées\n",
"sns.boxplot(data=train_set_spectacle_filtered, y=\"total_amount\", x=\"y_has_purchased\", showfliers=False, showmeans=True)\n",
"\n",
"# Titre du graphique\n",
"plt.title(\"Boite à moustache du chiffre d'affaire selon le statut d'achat du client\")\n",
"\n",
"# Afficher le graphique\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
2024-03-10 19:08:50 +01:00
"id": "6b55de4b-913e-4bc1-b4f2-cc0b1824d0e2",
"metadata": {},
"outputs": [],
"source": [
"#graphe sur le taux de ticket acheté"
]
},
{
"cell_type": "code",
"execution_count": 89,
2024-03-05 03:15:03 +01:00
"id": "aacf2c34-f7ea-4d6e-935b-c5db01f03bbe",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>Taux_ticket_internet</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>492314</td>\n",
" <td>126262.0</td>\n",
" <td>25.646640</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>318969</td>\n",
" <td>16348.0</td>\n",
" <td>5.125263</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>591028</td>\n",
" <td>42045.0</td>\n",
" <td>7.113876</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>7024227</td>\n",
" <td>1247482.0</td>\n",
" <td>17.759705</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>335741</td>\n",
" <td>125638.0</td>\n",
" <td>37.421107</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny nb_tickets nb_tickets_internet Taux_ticket_internet\n",
"0 10 492314 126262.0 25.646640\n",
"1 11 318969 16348.0 5.125263\n",
"2 12 591028 42045.0 7.113876\n",
"3 13 7024227 1247482.0 17.759705\n",
"4 14 335741 125638.0 37.421107"
]
},
2024-03-10 19:08:50 +01:00
"execution_count": 89,
2024-03-05 03:15:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Taux de ticket payé par internet selon les compagnies\n",
"\n",
"purchase_spectacle = products_purchased_reduced_spectacle.groupby(\"number_compagny\")[[\"nb_tickets\", \"nb_tickets_internet\"]].sum().reset_index()\n",
"purchase_spectacle[\"Taux_ticket_internet\"] = purchase_spectacle[\"nb_tickets_internet\"]*100 / purchase_spectacle[\"nb_tickets\"]\n",
"purchase_spectacle"
]
},
{
"cell_type": "code",
2024-03-10 19:08:50 +01:00
"execution_count": 90,
2024-03-05 03:15:03 +01:00
"id": "f71bb53d-724b-454d-8743-305d20eec2b0",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlcAAAHFCAYAAADffdxRAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABULUlEQVR4nO3dd1QU5/4G8GelLH0VUIoiRUFERY0olqhgQbHFbjQxEKMxlnhtMZaoYMPeYtTotccWr+VaKTYSu0YQYzdBUSOioIAIqPD+/vDHXpeiuzjLsvp8ztlzmHdmZ555mR2+TFuZEEKAiIiIiCRRRtcBiIiIiN4nLK6IiIiIJMTiioiIiEhCLK6IiIiIJMTiioiIiEhCLK6IiIiIJMTiioiIiEhCLK6IiIiIJMTiioiIiEhCWimuZDKZWq+jR49qY/GSuXXrFmQyGdauXatsCwkJgYuLi9aWGRwcDAsLC0nnuXTpUpV1KC6ZTIaQkJB3no+6/vnnH4SEhCA2NrbAuJCQEMhkMo3n6efnh5o1a0qQDrh8+TJCQkJw69YtSeZXkoKDgwtsxy4uLggODtZJHl0orA9KSkl/lqh4dLmN5Ofn5wc/Pz9dxygRM2bMwK5du7S+HG3u8wy1MdOTJ0+qDE+dOhVHjhzB4cOHVdq9vLy0sXjKZ+nSpbC1tdW7P5z//PMPQkND4eLigjp16qiM69+/P9q2baubYP/v8uXLCA0NhZ+fX6nZAb+LnTt3wsrKStcxiEqNiRMn4l//+peuY3xwZsyYge7du6Nz5866jlJsWimuGjZsqDJcvnx5lClTpkA7UXFVqlQJlSpV0nWM90rdunV1HYGoVKlSpYquI5Ce0tk1Vz/99BOaNWuGChUqwNzcHLVq1cLs2bPx4sULlemKOmyX/xDpN998AxMTE/zxxx/KttzcXLRs2RJ2dna4f//+G/P8888/6NmzJywtLaFQKNCrVy8kJiZKui4AEB4ejpYtW0KhUMDMzAzVq1dHWFhYgelu3ryJdu3awcLCAk5OThg1ahSys7NVpgkNDYWvry+sra1hZWWFjz76CKtWrcLr38Xt4uKCS5cuITo6Wnk69m1HWdLS0jBgwADY2NjAwsICbdu2xfXr1wud9saNG+jTpw8qVKgAuVyO6tWr46efflKZJjc3F9OmTUO1atVgamqKsmXLwtvbG4sWLSoyw9GjR1G/fn0AwJdffqnMnncqpajTgps2bUKjRo1gYWEBCwsL1KlTB6tWrXrj+u7cuRNmZmbo378/Xr58CQA4d+4cOnXqBGtra5iYmKBu3br49ddfle9Zu3YtevToAQDw9/dX5ss7/RoTE4MOHToo+8XR0RHt27fH3bt335gFAA4ePIiWLVvCysoKZmZmaNKkCQ4dOqQyTd76X7p0Cb1794ZCoYCdnR369euH1NTUty6jMIV91i5duoSAgACYmZmhfPnyGDJkCPbt21fgtH7e6dazZ8+iadOmMDMzg5ubG2bOnInc3FyVeaalpWH06NFwdXWFsbExKlasiOHDhyMjI+OtGdXpVyEEli5dijp16sDU1BTlypVD9+7d8ffff791/llZWRg3bpxKtiFDhuDJkycF+qpDhw4IDw/HRx99BFNTU3h6emL16tVvXUZREhMTMXDgQFSqVAnGxsZwdXVFaGiocpvMs2zZMtSuXRsWFhawtLSEp6cnxo8f/9b5Z2dnY8qUKahevTpMTExgY2MDf39/nDhxotjrv3fvXtStWxempqaoXr069u7dC+DV56N69eowNzdHgwYNcO7cOZX3513+cOnSJbRs2RLm5uYoX748hg4dimfPnqlMq+7+VQiBGTNmwNnZGSYmJvDx8UFUVFSBvxVHjx6FTCbD5s2bMWHCBDg6OsLKygqtWrXCtWvXCuTMv79Ud/sq7j5ACIHZs2cr1+Ojjz7CgQMHCp1W258lmUyGoUOH4ueff4aHhwfkcjm8vLywZcuWAvNTd/t923Yok8mQkZGBdevWKfereb+/hw8fYvDgwfDy8oKFhQUqVKiAFi1a4Pfffy+QR53tXeo+VSFKQFBQkDA3N1dpGzFihFi2bJkIDw8Xhw8fFgsWLBC2trbiyy+/VJnO2dlZBAUFFZhn8+bNRfPmzZXDmZmZok6dOsLNzU08fvxYCCHEpEmTRJkyZURkZOQb8z179kxUr15dKBQK8eOPP4qIiAgxbNgwUblyZQFArFmz5o3vV3dd/v3vfwuZTCb8/PzEpk2bxMGDB8XSpUvF4MGDVfrK2NhYVK9eXcydO1ccPHhQTJo0SchkMhEaGqoyv+DgYLFq1SoRFRUloqKixNSpU4WpqanKdOfPnxdubm6ibt264uTJk+LkyZPi/PnzRa5Lbm6u8Pf3F3K5XEyfPl1ERkaKyZMnCzc3NwFATJ48WTntpUuXhEKhELVq1RLr168XkZGRYtSoUaJMmTIiJCREOV1YWJgwMDAQkydPFocOHRLh4eFi4cKFKtPkl5qaKtasWSMAiB9++EGZ/c6dO0IIISZPnizyb74TJ04UAETXrl3Ftm3bRGRkpJg/f76YOHGicprmzZuLGjVqKIfnz58vDAwMxNSpU5Vthw8fFsbGxqJp06Zi69atIjw8XAQHB6tsC0lJSWLGjBkCgPjpp5+U+ZKSksTTp0+FjY2N8PHxEb/++quIjo4WW7duFd988424fPlykesshBAbNmwQMplMdO7cWezYsUPs2bNHdOjQQRgYGIiDBw8qp8tb/2rVqolJkyaJqKgoMX/+fCGXywtsd4UJCgoSzs7OKm35P2v//POPsLGxEZUrVxZr164V+/fvF3379hUuLi4CgDhy5IhKv9rY2Ah3d3exfPlyERUVJQYPHiwAiHXr1imny8jIEHXq1BG2trZi/vz54uDBg2LRokVCoVCIFi1aiNzc3CIzq9uvAwYMEEZGRmLUqFEiPDxcbNq0SXh6ego7OzuRmJhYZB/k5uaKNm3aCENDQzFx4kQRGRkp5s6dK8zNzUXdunVFVlaWSl9VqlRJeHl5ifXr14uIiAjRo0cPAUBER0e/tf/zf5bu378vnJychLOzs/j555/FwYMHxdSpU4VcLhfBwcHK6TZv3iwAiG+//VZERkaKgwcPiuXLl4thw4a9cXkvXrwQ/v7+wtDQUIwePVrs379f7N69W4wfP15s3ry52Otfs2ZNsXnzZrF//37h6+srjIyMxKRJk0STJk3Ejh07xM6dO4WHh4ews7MTz549U+l7Y2NjUblyZeV+JiQkRBgaGooOHTqoZFd3/zpu3DgBQHz99dciPDxcrFy5UlSuXFk4ODio/K04cuSIACBcXFzEZ599Jvbt2yc2b94sKleuLNzd3cXLly9Vcub/nKizfb3LPiDvs/3VV1+JAwcOiBUrVoiKFSsKe3t7lfUoic8SAOHk5CS8vLzE5s2bxe7du0Xbtm0FALFt2zbldOpuv+pshydPnhSmpqaiXbt2yv3qpUuXhBBCXL16VQwaNEhs2bJFHD16VOzdu1d89dVXokyZMir7I3WWI0TBfd679Gl+OiuuXpeTkyNevHgh1q9fLwwMDERKSopynLrFlRBC3LhxQ1hZWYnOnTuLgwcPijJlyogffvjhrfmWLVsmAIj//ve/Ku0DBgxQq7hSZ13S09OFlZWV+Pjjj9/4CwoKChIAxK+//qrS3q5dO1GtWrW3LnfKlCnCxsZGZRk1atQo0FdFOXDggAAgFi1apNI+ffr0An8Q2rRpIypVqiRSU1NVph06dKgwMTFRrnuHDh1EnTp11Fr+686ePVtk/+cvrv7++29hYGAgPvvsszfOM6+4ysnJEUOHDhXGxsbil19+UZnG09NT1K1bV7x48UKlvUOHDsLBwUHk5OQIIYTYtm1bgSJDCCHOnTsnAIhdu3ZpsLavPtjW1taiY8eOKu05OTmidu3aokGDBsq2vPWfPXu2yrSDBw8WJiYmb90JqFNcfffdd0Imkyl3bHnatGlTaHEFQJw+fVplWi8vL9GmTRvlcFhYmChTpow4e/asynT/+c9/BACxf//+IjOr068nT54UAMS8efNU2u/cuSNMTU3FmDF
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(purchase_spectacle[\"number_compagny\"], purchase_spectacle[\"Taux_ticket_internet\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Taux d'achat de tickets en ligne (%)\")\n",
"plt.title(\"Taux d'achat des tickets en ligne selon les compagnies de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
2024-03-05 00:36:48 +01:00
]
},
2024-03-10 19:08:50 +01:00
{
"cell_type": "code",
2024-03-10 21:00:29 +01:00
"execution_count": 133,
2024-03-10 19:08:50 +01:00
"id": "86fa4d7f-9b5f-4487-beb8-eb23771f724c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
2024-03-10 19:49:34 +01:00
" <th>y_has_purchased</th>\n",
2024-03-10 19:08:50 +01:00
" <th>nb_tickets</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>Taux_ticket_internet</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
2024-03-10 19:49:34 +01:00
" <td>0.0</td>\n",
" <td>9957.0</td>\n",
" <td>5450.0</td>\n",
" <td>54.735362</td>\n",
2024-03-10 19:08:50 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-10 19:49:34 +01:00
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>7941.0</td>\n",
" <td>3424.0</td>\n",
" <td>43.117995</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-10 19:08:50 +01:00
" <td>11</td>\n",
2024-03-10 19:49:34 +01:00
" <td>0.0</td>\n",
" <td>10361.0</td>\n",
2024-03-10 19:08:50 +01:00
" <td>5.0</td>\n",
2024-03-10 19:49:34 +01:00
" <td>0.048258</td>\n",
2024-03-10 19:08:50 +01:00
" </tr>\n",
" <tr>\n",
2024-03-10 19:49:34 +01:00
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>9638.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2024-03-10 19:08:50 +01:00
" <td>12</td>\n",
2024-03-10 19:49:34 +01:00
" <td>0.0</td>\n",
" <td>35600.0</td>\n",
2024-03-10 19:08:50 +01:00
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
2024-03-10 19:49:34 +01:00
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>11520.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
2024-03-10 19:08:50 +01:00
" <td>13</td>\n",
2024-03-10 19:49:34 +01:00
" <td>0.0</td>\n",
" <td>131759.0</td>\n",
" <td>105406.0</td>\n",
" <td>79.999089</td>\n",
2024-03-10 19:08:50 +01:00
" </tr>\n",
" <tr>\n",
2024-03-10 19:49:34 +01:00
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>1004076.0</td>\n",
" <td>13902.0</td>\n",
" <td>1.384557</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
2024-03-10 19:08:50 +01:00
" <td>14</td>\n",
2024-03-10 19:49:34 +01:00
" <td>0.0</td>\n",
" <td>44596.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>16694.0</td>\n",
2024-03-10 19:08:50 +01:00
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-10 19:49:34 +01:00
" number_company y_has_purchased nb_tickets nb_tickets_internet \\\n",
"0 10 0.0 9957.0 5450.0 \n",
"1 10 1.0 7941.0 3424.0 \n",
"2 11 0.0 10361.0 5.0 \n",
"3 11 1.0 9638.0 0.0 \n",
"4 12 0.0 35600.0 0.0 \n",
"5 12 1.0 11520.0 0.0 \n",
"6 13 0.0 131759.0 105406.0 \n",
"7 13 1.0 1004076.0 13902.0 \n",
"8 14 0.0 44596.0 0.0 \n",
"9 14 1.0 16694.0 0.0 \n",
"\n",
" Taux_ticket_internet \n",
"0 54.735362 \n",
"1 43.117995 \n",
"2 0.048258 \n",
"3 0.000000 \n",
"4 0.000000 \n",
"5 0.000000 \n",
"6 79.999089 \n",
"7 1.384557 \n",
"8 0.000000 \n",
"9 0.000000 "
2024-03-10 19:08:50 +01:00
]
},
2024-03-10 21:00:29 +01:00
"execution_count": 133,
2024-03-10 19:08:50 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-10 21:00:29 +01:00
"#Taux de ticket payé en ligne selon y_has_purchase par compagnies avec la base de train\n",
2024-03-10 19:08:50 +01:00
"\n",
2024-03-10 19:49:34 +01:00
"purchase_spectacle_train = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[[\"nb_tickets\", \"nb_tickets_internet\"]].sum().reset_index()\n",
2024-03-10 19:08:50 +01:00
"purchase_spectacle_train[\"Taux_ticket_internet\"] = purchase_spectacle_train[\"nb_tickets_internet\"]*100 / purchase_spectacle_train[\"nb_tickets\"]\n",
"purchase_spectacle_train"
]
},
{
"cell_type": "code",
2024-03-10 19:49:34 +01:00
"execution_count": 106,
2024-03-10 19:08:50 +01:00
"id": "d11335b7-e35a-44c7-8ce4-661216978151",
"metadata": {},
2024-03-10 19:49:34 +01:00
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1UAAAIjCAYAAADr8zGuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACHE0lEQVR4nOzdeZyN9f//8edhzIrBMJtlZjB2QsqWxj6WLElIYSgR5WMJ+UiGLKFQfCIqS2VpQSXZskSWxpayhWZsmcg29mHm/fvDb87XMTOcOedoZvK4327ndpvzvt7Xdb2u67yv61yveV/X+1iMMUYAAAAAAIfkyOwAAAAAACA7I6kCAAAAACeQVAEAAACAE0iqAAAAAMAJJFUAAAAA4ASSKgAAAABwAkkVAAAAADiBpAoAAAAAnEBSBQAAAABOIKkC7HTixAn5+/trwIABDi9j9+7d8vb21pQpU1wYGQAAADJTtk6qLBaLXa9169Zldqh3FRcXJ4vFotmzZ7tkeVeuXFF0dHSa2z179mxZLBbFxcVlaJlRUVHKnTu3S+L7888/FR0drV27drlkeRllsVj08ssvZ2iemzdvqkOHDqpfv77efvttm2n2bs/FixfVtm1bvfLKK3rllVcyGvZ9ZbFYFB0dbX2/bt26bHHs3G9169ZV3bp1MzsMpzjS3h9EKefGbdu2ZXYoGRIVFaXQ0NDMDgP/IFdfMzjjQfqu2LRpk6Kjo3X+/Pn7uh5Hr9PuZcOGDfLw8NCRI0esZe+///59bUd3XltkJcuWLUszths3bqhEiRKaPHlyhpeZrZOqzZs327yaNWsmLy+vVOVVq1bN7FD/UVeuXNGIESPSPMk1b95cmzdvVlBQ0D8f2P/3559/asSIEZmWVDliyJAhcnNz09y5c2WxWGym2bs9zz//vB599FG99dZb9zFS16hateoDeewAQFYXFBSkzZs3q3nz5pkdygNl06ZNGjFixH1Pqu4HY4z69u2r7t27KyQkxFp+v5OqzZs364UXXrhvy3fGsmXLNGLEiFTluXLl0htvvKGRI0fqzJkzGVqmm6uCyww1atSweV+oUCHlyJEjVTn+T6FChVSoUKHMDiPbmTBhgtPL+Pzzz10QyT8jb968HEfI9owxunbtmry8vDI7lAcC+/uf4eHhwfkZGbJ8+XLt2LFD8+bNc3gZN27ckMVikZub/alDdm2nzzzzjPr3768PPvhA//3vf+2eL1v3VNnjf//7nx5//HH5+/vLx8dHFStW1Pjx43Xjxg2beqGhoYqKiko1/523//Ts2VOenp7avn27tSw5OVkNGjRQQECATp48edd4/vzzT7Vr10558uSRr6+v2rdvr/j4+DTrbtu2TS1btlSBAgXk6empKlWq3PPCPC4uzpo0jRgxwnoLZMq2pdetvHz5cjVo0EC+vr7y9vZW2bJlNXbs2Luu66efflLBggX1xBNP6PLly5KkgwcPqmPHjvL395eHh4fKli2r//3vf9Z51q1bp0ceeUSS1LVrV2t8d+sePn36tHr16qVy5copd+7c8vf3V/369bVhw4ZUda9fv66RI0eqbNmy8vT0lJ+fn+rVq6dNmzalqvvJJ5+obNmy8vb21kMPPaSlS5emquOK7XHkc0yRmJioUaNGqUyZMvLw8FChQoXUtWtXnT592qZeaGionnjiCS1fvlxVq1aVl5eXypQpo48//tiu9dwpvVs6Zs6cqVKlSsnDw0PlypXTvHnzUt1+lHJryttvv62JEycqLCxMuXPnVs2aNbVly5ZU63Jm/6RlzZo1qlu3rvz8/OTl5aVixYrpqaee0pUrV6x17N2vaTl79qx69eqlwoULy93dXcWLF9fQoUN1/fp1m3opt93Z085ud+nSJeXLl089evRINS0uLk45c+Z0KMm/VxyHDh1S165dFR4eLm9vbxUuXFgtWrTQr7/+alMvOTlZo0aNUunSpeXl5aV8+fKpUqVKevfdd+2OJaWNjB8/XqNHj1axYsXk6empatWq6YcffrCpm97tbdHR0al6jVP2+fTp01W2bFl5eHhozpw5kqT9+/frmWeeUUBAgDw8PFSsWDF17tw51ed28eJFvfTSSypYsKD8/PzUpk0b/fnnnzZ1Fi5cqMaNGysoKEheXl4qW7asXnvtNet5MMUff/yhDh06KDg4WB4eHgoICFCDBg1S9WovXLhQNWvWlI+Pj3Lnzq3IyEjt3Lkz1TbPnj1bpUuXtp6L5s6de9f9fLuUc8TixYtVqVIleXp6qnjx4nrvvfds6l27dk0DBgxQ5cqV5evrqwIFCqhmzZr6+uuvUy3zbvs7I7Zu3aoWLVrIz89Pnp6eKlGihPr27WtTZ+PGjWrQoIHy5Mkjb29v1apVS999951NnZTvtzVr1qh79+7y8/NT3rx51blzZ12+fFnx8fFq166d8uXLp6CgIL366qs21wIZaZf2Hi+StGfPHjVu3Fje3t4qVKiQevfure+++y7VObZu3bqqUKGCYmJiVKdOHXl7e6t48eJ66623lJycnCrOO3sY7vV9JTl3/O7fv19NmjSRt7e3ChYsqJ49e+rixYtp1l29erUaNGigvHnzytvbW7Vr1061D9NiT3wpx/7OnTvVpk0b5c2bV76+vnruuefSPIfbe3zdrR1GR0dr4MCBkqSwsLBUj5fYe06413ruxtF9KknTpk3TI488otKlS1vLQkNDtWfPHq1fv966PSnn2pRrgE8++UQDBgxQ4cKF5eHhoUOHDmXomuzO66GUY3Tt2rX3PM+mxVXn1KioKOuxcfvjQinXxu7u7mrfvr1mzJghY4xd+1iSZP5FunTpYnx8fGzK+vXrZ6ZNm2aWL19u1qxZYyZNmmQKFixounbtalMvJCTEdOnSJdUyIyIiTEREhPX91atXTeXKlU3x4sXNuXPnjDHGvPHGGyZHjhxm5cqVd43vypUrpmzZssbX19dMmTLFrFixwvTp08cUK1bMSDKzZs2y1l2zZo1xd3c3derUMQsXLjTLly83UVFRqerd6dq1a2b58uVGknn++efN5s2bzebNm82hQ4eMMcbMmjXLSDKxsbHWeT788ENjsVhM3bp1zbx588zq1avN+++/b3r16pXuvl24cKHx8PAwL730krl586Yxxpg9e/YYX19fU7FiRTN37lyzcuVKM2DAAJMjRw4THR1tjDHmwoUL1hhef/11a3zHjh1Ld5v2799vXnrpJbNgwQKzbt06s3TpUvP888+bHDlymLVr11rr3bhxw9SrV8+4ubmZV1991Sxbtsx888035r///a+ZP3++tZ4kExoaah599FHz+eefm2XLlpm6desaNzc3c/jwYWs9V2yPo5+jMcYkJSWZJk2aGB8fHzNixAizatUq8+GHH5rChQubcuXKmStXrljrhoSEmCJFiphy5cqZuXPnmhUrVpinn37aSDLr16+/63pS9snw4cOt79euXWsk2ezfDz74wEgyTz31lFm6dKn57LPPTKlSpUxISIgJCQmx1ouNjbXu4yZNmpglS5aYJUuWmIoVK5r8+fOb8+fPW+s6s3/SEhsbazw9PU2jRo3MkiVLzLp168xnn31mOnXqZD1eM7Jf0zr+K1WqZHx8fMzbb79tVq5caYYNG2bc3NxMs2bNUu1Te9pZWvr162d8fHxs9pUxxgwcONB4enqav//+2+59Ym8c69evNwMGDDBffvmlWb9+vVm8eLFp3bq18fLyMvv377fWGzt2rMmZM6cZPny4+eGHH8zy5cvN5MmTrceEPVLaSNGiRc1jjz1mvvrqK/PFF1+YRx55xOTKlcts2rTJWrdLly427SvF8OHDzZ1fYZJM4cKFTaVKlcy8efPMmjVrzG+//WZ27dplcufObUJDQ8306dPNDz/8YD799FPTrl07k5CQYIz5v3Nj8eLFzSuvvGJWrFhhPvzwQ5M/f35Tr149m/W8+eabZtKkSea7774z69atM9OnTzdhYWGp6pUuXdqULFnSfPLJJ2b9+vXmq6++MgMGDLA5rkaPHm0sFovp1q2bWbp0qVm0aJGpWbOm8fHxMXv27LHWS4mvVatW5ttvvzW
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"multiple_barplot(purchase_spectacle_train, x=\"number_company\", y=\"Taux_ticket_internet\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"clients n'ayant pas acheté\", 1 : \"clients ayant acheté sur la période\"},\n",
" xlabel = \"Numéro de compagnie\", ylabel = \"Taux de ticket acheté par internet (%)\", \n",
" title = \"Taux de ticket achété en ligne selon y_has_purchased par compagnies de spectacle (train set)\")"
]
},
2024-03-10 21:00:29 +01:00
{
"cell_type": "code",
"execution_count": 140,
"id": "f8444cab-d4c5-4afd-b472-476e702c09cc",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAHGCAYAAACYbuRTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABIYElEQVR4nO3dd3QUZd/G8WsT0iAhEEIaxtAEqdIUiGIISgepgqJCEBAERUREeBQpgtHYsOKjKMUGFuBBwUILRQICUkQBERNAIFQhoQWS3O8fnOzrkkIWdxNgvp9z9pzMPffM/HZ2Z/bKlF2bMcYIAADAgjyKuwAAAIDiQhACAACWRRACAACWRRACAACWRRACAACWRRACAACWRRACAACWRRACAACWRRACAACWRRACUOS2bNmikiVL6s033yzuUgBY3FUThGw2W6EeiYmJxV0q8lGxYkXFxcW5bH7vvPOOpk+fnqs9JSVFNpstz3EFmT59umw2m9avX++S+p5//nnNmzfPJfNyB5vNpkceecRl89u/f7/GjRunTZs2FdgvPT1d3bt316OPPqpHH33UZct3lcTExGLbl8TFxalixYpFusxPP/1UkydP/lfzyG9bLOo6pNz7mZzXMyUl5V/POy+u3m9I0sKFCzVu3Lh/PZ/mzZurefPm/3o+zshr+xk3bpxsNptbl3v69GmNGzfusrbbqyYIJSUlOTzatWsnPz+/XO0NGjQo7lJRRPLb+YaHhyspKUnt27cv+qL+4UoPQq62f/9+jR8//pJBqF+/frrlllv0wgsvFE1hKNC1FoSuBQsXLtT48eOLuwyX6d+/v5KSkty6jNOnT2v8+PGXFYRKuL4c92jSpInDcPny5eXh4ZGrHfDx8eF9cQX7/PPPi7sEAEXouuuu03XXXVfcZeTrqjkiVBhvv/22br/9doWEhKhUqVKqU6eOEhISdP78eYd++Z2iufgw4qBBg+Tr66sNGzbY27Kzs3XHHXcoNDRUBw4cyLeWnNMzL730kl588UVVrFhRfn5+at68uX7//XedP39eo0aNUkREhAIDA9WlSxcdOnTIYR7Z2dlKSEjQjTfeKB8fH4WEhKh3797666+/7H2ee+45lShRQnv37s1Vw4MPPqhy5crp7Nmz9rbZs2eradOmKlWqlPz9/dW6dWtt3LjRYbq4uDj5+/vrjz/+ULt27eTv76/IyEg98cQTysjIyPc55zh//rxGjhypsLAwlSxZUrfddpt++umnPPumpqZq4MCBuu666+Tt7a1KlSpp/PjxyszMLHAZFStW1K+//qrly5fbT4vmnFLI79TY9u3bde+99yo0NFQ+Pj66/vrr1bt37wKf04EDB9SwYUPdcMMN2rlzpyQpLS1NI0aMUKVKleTt7a0KFSpo2LBhOnXqlH06m82mU6dOacaMGfb6ct5bp0+ftk/v6+uroKAgNWrUSJ999lmBz/nw4cMaPHiwatasKX9/f4WEhKhFixZauXJlrr4ZGRmaMGGCatSoIV9fX5UrV06xsbFavXp1rr4fffSRatSooZIlS+qmm27SN998k6vPzp071atXL4WEhMjHx0c1atTQ22+/bR+fmJiom2++WZLUt29f+3P+5+H99evX66677lJQUJB8fX1Vv379XKHoctdNYacrTA35mT9/vpo2baqSJUsqICBALVu2zPVfbs4pgF9//VX33nuvAgMDFRoaqgcffFAnTpwo1HIuZozRO++8o3r16snPz09ly5ZV9+7d9eeff15y2sOHD+uhhx5SZGSkfHx8VL58ed16661avHixpAv7vAULFmj37t0OlxjkGD9+vBo3bqygoCCVLl1aDRo00AcffCBjjL1PQdtizmmji09LXXz65FJ15MWZ/czFFi1apE6dOum6666Tr6+vqlatqoEDB+rIkSO5+hZ2v5Genq6HH35YwcHBKleunLp27ar9+/c79Jk9e7ZatWql8PBw+fn5qUaNGho1apTDviMuLs6+bf1zXRR0as8Yo4SEBEVFRcnX11cNGjTQt99+m6tfYV+PglzOfjS/U2Ou+jxKSUlR+fLlJV14z+ass8JeinHVHBEqjF27dqlXr172D6jNmzdr0qRJ2r59uz788EOn5zd58mStXbtWPXr00IYNG1SmTBn7obfvvvtO4eHhl5zH22+/rbp16+rtt9/W8ePH9cQTT6hjx45q3LixvLy89OGHH2r37t0aMWKE+vfvr/nz59unffjhh/Xee+/pkUceUYcOHZSSkqIxY8YoMTFRP//8s4KDgzVw4EBNmjRJ//3vfzVx4kT7tMeOHdOsWbP0yCOPyNfXV9KFUzXPPPOM+vbtq2eeeUbnzp3TSy+9pGbNmumnn35SzZo17dOfP39ed911l/r166cnnnhCK1as0HPPPafAwEA9++yzBT7nAQMGaObMmRoxYoRatmyprVu3qmvXrkpPT3fol5qaqltuuUUeHh569tlnVaVKFSUlJWnixIlKSUnRtGnT8l3G3Llz1b17dwUGBuqdd96RdOFIUH42b96s2267TcHBwZowYYJuuOEGHThwQPPnz9e5c+fynHbr1q1q166drrvuOiUlJSk4OFinT59WTEyM/vrrL/3nP/9R3bp19euvv+rZZ5/VL7/8osWLF8tmsykpKUktWrRQbGysxowZI0kqXbq0JGn48OH66KOPNHHiRNWvX1+nTp3S1q1bdfTo0QLX67FjxyRJY8eOVVhYmE6ePKm5c+eqefPmWrJkiT1oZWZmqm3btlq5cqWGDRumFi1aKDMzU2vWrNGePXsUHR1tn+eCBQu0bt06TZgwQf7+/kpISFCXLl20Y8cOVa5cWZL022+/KTo6Wtdff71eeeUVhYWF6fvvv9fQoUN15MgRjR07Vg0aNNC0adPs762c05I5/wUuW7ZMbdq0UePGjfXuu+8qMDBQs2bNUs+ePXX69Gn7Duty101hpitsDXn59NNPdd9996lVq1b67LPPlJGRoYSEBPu6v+222xz6d+vWTT179lS/fv30yy+/aPTo0ZJ0WfuhgQMHavr06Ro6dKhefPFFHTt2TBMmTFB0dLQ2b96s0NDQfKd94IEH9PPPP2vSpEmqVq2ajh8/rp9//tm+Xt555x099NBD2rVrl+bOnZtr+pSUFA0cOFDXX3+9JGnNmjV69NFHtW/fPvt+wNltMS+XqiMvhd3PNG/e3CG4SRc+K5o2bar+/fsrMDBQKSkpevXVV3Xbbbfpl19+kZeXlyTn9hv9+/dX+/bt9emnn2rv3r168skndf/992vp0qX2Pjt37lS7du00bNgwlSpVStu3b9eLL76on376yd5vzJgxOnXqlL788kuHoF3Q58348eM1fvx49evXT927d9fevXs1YMAAZWVlqXr16oVan4VxOfvR/Ljy8yg8PFzfffed2rRpo379+ql///6SZA9Hl2SuUn369DGlSpXKd3xWVpY5f/68mTlzpvH09DTHjh2zj4uKijJ9+vTJNU1MTIyJiYlxaNu5c6cpXbq06dy5s1m8eLHx8PAwzzzzzCXrS05ONpLMTTfdZLKysuztkydPNpLMXXfd5dB/2LBhRpI5ceKEMcaYbdu2GUlm8ODBDv3Wrl1rJJn//Oc/DusiJCTEZGRk2NtefPFF4+HhYZKTk40xxuzZs8eUKFHCPProow7zS09PN2FhYaZHjx4O85NkPv/8c4e+7dq1M9WrVy/weefU/fjjjzu0f/LJJ0aSw3ofOHCg8ff3N7t373bo+/LLLxtJ5tdffy1wWbVq1cr1ehnz/+t+2rRp9rYWLVqYMmXKmEOHDuU7v2nTphlJZt26dWbRokWmdOnSpnv37ubMmTP2PvHx8cbDw8OsW7fOYdovv/zSSDILFy60t5UqVSrP91nt2rVN586dC3xuhZGZmWnOnz9v7rjjDtOlSxd7+8yZM40k8/777xc4vSQTGhpq0tLS7G2pqanGw8PDxMfH29tat25trrvuOvt7M8cjjzxifH197dvWunXrcq33HDfeeKOpX7++OX/+vEN7hw4dTHh4uH0budx1U5jpClvDsmXLjCSzbNkyY8yFfUlERISpU6e
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import seaborn as sns\n",
"\n",
"\n",
"# Créer le graphique à barres\n",
"sns.barplot(data=purchase_spectacle_train, x=\"y_has_purchased\", y=\"Taux_ticket_internet\",ci=None)\n",
"\n",
"\n",
"# Titre du graphique\n",
"plt.title(\"Taux moyen de tickets achetés selon le statut d'achat du client\")\n",
"\n",
"# Ajouter une étiquette à l'axe des abscisses\n",
"plt.xlabel(\"Statut d'achat du client\")\n",
"\n",
"# Ajouter une étiquette à l'axe des ordonnées\n",
"plt.ylabel(\"Taux de tickets internet\")\n",
"\n",
"# Afficher le graphique\n",
"plt.show()\n"
]
},
2024-03-10 19:49:34 +01:00
{
"cell_type": "code",
"execution_count": 107,
"id": "9ba02de7-3087-4b0c-884a-dc4a6ca92c3b",
"metadata": {},
2024-03-10 19:08:50 +01:00
"outputs": [],
2024-03-10 19:49:34 +01:00
"source": [
"#stat sur la variable temps ecoulé entre le premier et le dernier achat"
]
2024-03-10 19:08:50 +01:00
},
2024-03-05 03:51:39 +01:00
{
"cell_type": "code",
2024-03-10 19:49:34 +01:00
"execution_count": 108,
2024-03-05 03:51:39 +01:00
"id": "59a95248-0261-4970-9e91-e43d50cf4d69",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, 'Boite à moustache du temps ecoulés entre le premier et le dernier achat selon les compagnies de spectacles')"
]
},
2024-03-10 19:49:34 +01:00
"execution_count": 108,
2024-03-05 03:51:39 +01:00
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA6UAAAHGCAYAAACM3i2bAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABvz0lEQVR4nO3dd3gUVfv/8c+SnhAWkpCGSei9g9JUQGpoYkNAKYJgRRGw8KACPhRFaV9QEaUJEbAAKiBIVwggVYqIoFQhBCkJoYSQnN8f/rIPSxJIINmhvF/XtRfMmTMz95Sd2Ttn5ozNGGMEAAAAAIAF8lkdAAAAAADgzkVSCgAAAACwDEkpAAAAAMAyJKUAAAAAAMuQlAIAAAAALENSCgAAAACwDEkpAAAAAMAyJKUAAAAAAMuQlAIAAAAALHPHJaVbt26V3W7X//3f/1kdCoBckJqaqvr16+vee+/VhQsXrA4HAAAAOZSjpHTq1Kmy2WxOn8KFC6tBgwaaP3/+dQdRtGhRde3a1TF85MgRDRo0SFu3br3ueWalatWqmjt3rgYMGKC1a9fm+vxvRsOGDdO8efPyfDk2m00vvvhini/nassfNGhQrs3viy++0JgxY3Jtfrg+V54frjRgwADFx8fru+++k7e3t+sCy4G8PKdlZdCgQbLZbC5b3s2ma9euKlq0qNVhOPntt980aNAg7d+/P1v106+52a1/vVauXCmbzaaVK1fm6XJu1uXfiKJFi6pVq1a5Nr9z585p0KBBlmyL/fv3y2azaerUqS5f9p1+vrpVWHmMXOlWPm/kVGxsrAYNGqTTp0/n6XJcdc3JynW1lE6ZMkVr165VbGysJk6cKDc3N7Vu3Vrff//9dQUxd+5cvfXWW47hI0eOaPDgwXn2A+6BBx7Qp59+qvbt2+v48eN5soybiauS0tsNSenNb8GCBZo+fboWLVqkgIAAq8PJUl6f05DRW2+9pblz51odhpPffvtNgwcPtuyCf7OqXr261q5dq+rVq1sdiuXOnTunwYMH3xE/tHHrCQsL09q1a9WyZUurQ7mjxMbGavDgwXmelFrN/XomqlixomrWrOkYbt68uQoVKqSZM2eqdevWOZ5ftWrVrieMG9K+fXu1b9/e5csFkHtatmypv//+2+owct25c+fk6+trdRi5ytXrVKJEiVyd3+24T/JKTrdVgQIFVLt27Vxb/vnz5+Xt7U3LG5DLvLy8cvW7ClwuV54p9fb2lqenpzw8PJzKT548qeeff15FihSRp6enihcvrgEDBig5Odmp3uW3561cuVJ33323JOmpp55y3CZ8+W2ZGzduVJs2bRQQECBvb29Vq1ZNX375ZbZiHTx4sGrVqqWAgAAVKFBA1atX16RJk2SMuea0Xbt2Vf78+fX777+rWbNm8vPzU1hYmN59911J0rp163TvvffKz89PpUuX1rRp0zLMY8eOHXrwwQdVqFAheXt7q2rVqhnqZdV8ntmtClu2bFGrVq0UHBwsLy8vhYeHq2XLljp8+LCkf29pPXv2rKZNm+bYlg0aNJAkHT9+XM8//7zKly+v/PnzKzg4WA888IB+/vnnDHEnJyfrnXfeUbly5eTt7a3AwEA1bNhQsbGxGepOnz5d5cqVk6+vr6pUqZLprd179uxRx44dHXGXK1dOH3744VW3f7rExET16NFDgYGByp8/v5o3b64//vgjQ72sbt/Lzm1CDRo00IIFC3TgwAGn29XTXbx4UUOGDFHZsmXl5eWlwoUL66mnnsrQ8p5+a9f8+fNVrVo1+fj4qFy5co5tMnXqVJUrV05+fn665557tHHjxgzrkD9/fu3cuVONGjWSn5+fChcurBdffFHnzp1zqvvVV1+pVq1astvt8vX1VfHixdWtW7errqckGWP00UcfqWrVqvLx8VGhQoX06KOP6q+//spQd9GiRWrUqJFjGeXKldPw4cOd6nz33XeqU6eOfH195e/vryZNmmS4Vf5G9o307zHQr18/FStWTJ6enipSpIh69+6ts2fPunSbNGjQQBUrVtSGDRt03333OZbx7rvvKi0tTdK1z2np+3j79u1q2rSp/P391ahRI0nZP85yYvbs2apTp478/PyUP39+NWvWTFu2bLnmdOnnpSVLluipp55SQECA/Pz81Lp16yy3y08//aS6devK19fXsd2zu+/SHweYMmWKypQpIx8fH9WsWVPr1q2TMUbvv/++ihUrpvz58+uBBx7Q3r17nabP7BjL6X7NLP6sXOu6NHXqVD322GOSpIYNGzqOg+u5DW7p0qVq1KiRChQoIF9fX9WrV0/Lli3L1rS///67mjdvLl9fXwUFBenZZ5/VmTNnrns56d/ZzZs369FHH1WhQoUcfxBIP/8tWrRI1atXl4+Pj8qWLavJkyc7zSOr2/Cyc61PPy5//PFHdevWTYULF5avr2+G3xnpLly4oL59+6pq1aqy2+0KCAhQnTp19O2332aom5aWpnHjxjmOl4IFC6p27dr67rvvMtS91jpm53q7f/9+FS5cWNK/v1XSj5GrPb6QlpamIUOGOL4jBQsWVOXKlTV27FinejdyzV29erUaNWokf39/+fr6qm7dulqwYIFTnfT9sGLFCj333HMKCgpSYGCgHn74YR05ciRby8lMds5Xf/31l9q3b6/w8HB5eXkpJCREjRo1ytadKevXr1fr1q0VGBgob29vlShRQr17977u9V++fLnj90mBAgXUuXNnnT17VnFxcWrXrp0KFiyosLAw9evXTykpKY7p02+LHTFihIYOHarIyEh5e3urZs2aGb5ze/fu1VNPPaVSpUrJ19dXRYoUUevWrbV9+/YM67dz5041bdpUvr6+Kly4sF544QUtWLAgw/ctO9eyy+O88ryVneMru8dqZnL7vJWZ7MSXfr7bsmWLHn74YRUoUEB2u11PPvlkptfl7F5vr3YcDho0SK+++qokqVixYo7zQvr+mz17tpo2baqwsDDHb8w33ngjwzX1Wsu5muxs0+PHj6tnz56KiIhw/F6pV6+eli5des35O5gcmDJlipFk1q1bZ1JSUszFixfNoUOHzEsvvWTy5ctnFi1a5Kh7/vx5U7lyZePn52c++OAD8+OPP5q33nrLuLu7mxYtWjjNNyoqynTp0sUYY0xCQoJjOW+++aZZu3atWbt2rTl06JAxxpjly5cbT09Pc99995nZs2ebRYsWma5duxpJZsqUKddch65du5pJkyaZJUuWmCVLlpj//ve/xsfHxwwePPia03bp0sV4enqacuXKmbFjx5olS5aYp556ykgy/fv3N6VLlzaTJk0yixcvNq1atTKSzMaNGx3T//7778bf39+UKFHCfP7552bBggWmQ4cORpJ57733Mmznffv2OS1/xYoVRpJZsWKFMcaYpKQkExgYaGrWrGm+/PJLs2rVKjN79mzz7LPPmt9++80YY8zatWuNj4+PadGihWNb7ty50xHPc889Z2bNmmVWrlxp5s+fb7p3727y5cvnWIYxxqSkpJiGDRsad3d3069fP7Nw4ULz3Xffmf/85z9m5syZjnqSTNGiRc0999xjvvzyS7Nw4ULToEED4+7ubv78809HvZ07dxq73W4qVapkPv/8c/Pjjz+avn37mnz58plBgwZddR+kpaWZhg0bGi8vLzN06FDz448/moEDB5rixYsbSWbgwIFO+ysqKirDPAYOHGiudejv3LnT1KtXz4SGhjq229q1a40xxqSmpprmzZsbPz8/M3jwYLNkyRLz2WefmSJFipjy5cubc+fOOeYTFRVl7rrrLlOxYkUzc+ZMs3DhQlOrVi3j4eFh3n77bVOvXj0zZ84cM3fuXFO6dGkTEhLiNH36MRcZGelY30GDBhl3d3fTqlUrR73Y2Fhjs9lM+/btzcKFC83y5cvNlClTTKdOna66nsYY06NHD+Ph4WH69u1rFi1aZL744gtTtmxZExISYuLi4hz1PvvsM2Oz2UyDBg3MF198YZYuXWo++ugj8/zzzzvqxMTEGEmmadOmZt68eWb27NmmRo0axtPT0/z888/XtW8uPz8YY8zZs2dN1apVTVB
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-03-10 19:49:34 +01:00
"#repartition des client selon le temps ecoulés entre le premier et le denier achat par compagnie\n",
2024-03-05 03:51:39 +01:00
"\n",
"sns.boxplot(data=products_purchased_reduced_spectacle, y=\"time_between_purchase\",x=\"number_compagny\",showfliers=False,showmeans=True)\n",
"plt.title(\"Boite à moustache du temps ecoulés entre le premier et le dernier achat selon les compagnies de spectacles\")"
]
},
{
"cell_type": "code",
2024-03-10 19:49:34 +01:00
"execution_count": 109,
2024-03-05 14:34:43 +01:00
"id": "e2c51e28-6197-48f0-ab6d-9fc7b3b0de74",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Statistique F : 7956.05932109542\n",
"Valeur de p : 0.0\n",
"Nombre de degrés de liberté entre les groupes : 4\n",
"Nombre de degrés de liberté à l'intérieur des groupes : 764875\n",
2024-03-05 14:36:03 +01:00
"Il y a des différences significatives entre au moins une des entrepries .\n"
2024-03-05 14:34:43 +01:00
]
}
],
"source": [
2024-03-05 15:37:29 +01:00
"#test d'anova pour voir si la difference de temps entre le premier et le dernier achat est statistiquement significative\n",
"\n",
2024-03-05 14:34:43 +01:00
"from scipy.stats import f_oneway\n",
"\n",
"# Créez une liste pour stocker les données de chaque groupe\n",
"groupes = []\n",
"\n",
"# Parcourez chaque modalité de la variable catégorielle et divisez les données en groupes\n",
"for modalite in products_purchased_reduced_spectacle['number_compagny'].unique():\n",
" groupe = products_purchased_reduced_spectacle[products_purchased_reduced_spectacle['number_compagny'] == modalite]['time_between_purchase']\n",
" groupes.append(groupe)\n",
"\n",
"# Effectuez le test ANOVA\n",
"f_statistic, p_value = f_oneway(*groupes)\n",
"\n",
"# Nombre total d'observations\n",
"N = sum(len(groupe) for groupe in groupes)\n",
"\n",
"# Nombre de groupes ou de catégories\n",
"k = len(groupes)\n",
"\n",
"# Degrés de liberté entre les groupes\n",
"df_between = k - 1\n",
"\n",
"# Degrés de liberté à l'intérieur des groupes\n",
"df_within = N - k\n",
"\n",
"# Affichez les résultats\n",
"print(\"Statistique F :\", f_statistic)\n",
"print(\"Valeur de p :\", p_value)\n",
"\n",
"print(\"Nombre de degrés de liberté entre les groupes :\", df_between)\n",
"print(\"Nombre de degrés de liberté à l'intérieur des groupes :\", df_within)\n",
"\n",
"if p_value < 0.05:\n",
" print(\"Il y a des différences significatives entre au moins une des entrepries .\")\n",
"else:\n",
" print(\"Il n'y a pas de différences significatives entre les entreprises .\")"
]
2024-03-05 03:51:39 +01:00
},
2024-03-10 19:49:34 +01:00
{
"cell_type": "code",
"execution_count": 111,
"id": "75a003ab-f42a-4b2d-a0a8-284e673e71f7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>time_between_purchase</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
" <td>45.791114</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>193.080793</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>0.0</td>\n",
" <td>27.640469</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>129.853892</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>16.418446</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>58.548598</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
" <td>10.012525</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>93.545373</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
" <td>3.879196</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>10.745213</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_company y_has_purchased time_between_purchase\n",
"0 10 0.0 45.791114\n",
"1 10 1.0 193.080793\n",
"2 11 0.0 27.640469\n",
"3 11 1.0 129.853892\n",
"4 12 0.0 16.418446\n",
"5 12 1.0 58.548598\n",
"6 13 0.0 10.012525\n",
"7 13 1.0 93.545373\n",
"8 14 0.0 3.879196\n",
"9 14 1.0 10.745213"
]
},
"execution_count": 111,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#repartition des client selon le temps ecoulés entre le premier et le denier achat par compagnie\n",
"purchase_train_time= train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[\"time_between_purchase\"].mean().reset_index()\n",
"purchase_train_time"
]
},
{
"cell_type": "code",
"execution_count": 113,
"id": "f27921a9-1253-4c02-9bff-8cd3c4a9a5d9",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA9AAAAIiCAYAAAAggyBOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACN30lEQVR4nOzdeXhN1/7H8c+RWUQIIgmRpOZZ0JpaxBRDqaqiE0HR1nBNNdxWDW0pHehwaZVKKaXtRRU1lRiK1lg1ljbGSqOmEEQk6/dHfznXkYQTEseJ9+t58jw5a0/fvc/aw/esvde2GGOMAAAAAADATeVxdAAAAAAAADgDEmgAAAAAAOxAAg0AAAAAgB1IoAEAAAAAsAMJNAAAAAAAdiCBBgAAAADADiTQAAAAAADYgQQaAAAAAAA7kEADAAAAAGCHLCXQGzdu1KhRo3Tu3LkcCge5zZw5czRp0qS7ukyLxaJRo0bd1WXeKw4fPiyLxaLo6GhHh2Jj8uTJWYopNDRUUVFRORZPmoYNG6phw4Y5vpx7dfm3Kzo6WhaLRVu3bs22eS5dutRh+21UVJRCQ0MdsuzsEhoaqkcffdTRYdzzYmJiZLFY9M033zg6lCwZNWqULBaLo8PAXXavXM/cq9cWOWHv3r0aNWqUDh8+nKPLSTsWxcTEZOt8f//9d3l4eGjTpk3Wspy+Fr9b12y342a5a/369dW/f//bmm+WE+jRo0eTQMNujkig72eBgYHatGmTWrVq5ehQbGQ1gb5fTJ48WZMnT3Z0GPeEpUuXavTo0Y4OAwDuGZs2bdLzzz/v6DDuK3v37tXo0aNzPIHOKYMHD1bTpk1Vp04da1lOX4svWLBAI0aMyLH534mb5a6vv/66Jk+erAMHDmR5vq7ZEBuQLVJSUnTt2jV5eHg4OpRsk5ycLIvFIlfXu7OreXh4qHbt2tk2v7sdvzMzxujKlSvy8vKye5oKFSpkawyXL1/O0vKB3IK6f3exve+O7DyfI/fbt2+fFi5cqGXLlt32PG7nWjw8PPy2l+dIDRo0UNmyZfXuu+9q6tSpWZrW7hboUaNG6eWXX5YkhYWFyWKxpLv1YN68eapTp468vb2VL18+RUZGaseOHTbziYqKUr58+bR//35FRkbK29tbgYGBeuuttyRJmzdv1sMPPyxvb2+VKVNGn3/+uc30abcOrly5Ul27dpWfn5+8vb3VunVr/fHHHzbj7tixQ48++qj8/f3l4eGhoKAgtWrVSsePH7/pujZs2FCVKlXSpk2bVLduXXl5eSk0NFQzZsyQJC1ZskTVq1dX3rx5Vbly5Qwr6oYNG9S4cWP5+Pgob968qlu3rpYsWWIdfvjwYbm6umrcuHHppl23bp0sFou+/vpra9nBgwf19NNPW9elfPny+s9//mMzXdrtIF9++aVeeeUVBQUFKX/+/GrSpIndv65k53IaNmyoJUuW6MiRI9b6knYLWtrtQBMmTNAbb7yhsLAweXh4aM2aNZKkrVu3qk2bNvLz85Onp6fCw8P11Vdf2bUOGYmLi1OvXr1UvHhxubu7KywsTKNHj9a1a9duOW3arZELFixQlSpV5OnpqQceeEAffPBBhttl1qxZGjRokIoVKyYPDw8dOnRIkrRq1So1btxY+fPnV968eVWvXj398MMPNvNIu01v165devLJJ+Xr6ys/Pz8NHDhQ165d04EDB9S8eXP5+PgoNDRUEyZMsJk+s9ussvK9ZhZ/Rq5evao33nhD5cqVk4eHh4oUKaKuXbvq1KlTNttvz549Wrt2rbUO3M7tsgkJCRo8eLDCwsLk7u6uYsWKqX///kpMTLzltMYYTZgwQSEhIfL09FT16tX1/fff39FyLBaL+vTpo48//ljly5eXh4eHPv/8c+sxas2aNXrxxRdVuHBhFSpUSO3atdOff/5pM4+MbuG2Z5tK/6uX8+fPV3h4uDw9PW/agrty5Uo99thjKl68uDw9PVWqVCn16tVLf//9d7px9+/fr6eeekpFixaVh4eHSpQooc6dOyspKclmvAsXLtxyHefNm6dmzZopMDBQXl5eKl++vIYNG2azPaOioqz18fpjxc1aAOw5vhtjNHnyZFWrVk1eXl4qWLCg2rdvn+5ckZErV65o+PDhNvWgd+/e6X7FTvseli1bpurVq8vLy0vlypXTZ599dtP5G2NUunRpRUZGpht28eJF+fr6qnfv3reM80a3iuPUqVN66aWXVKFCBeXLl0/+/v5q1KiR1q9fn25eU6ZMUdWqVZUvXz75+PioXLly+ve//52leNL2k08++URlypSRh4eHKlSooLlz59qMl9ktymn70/V14WZ1/8SJE+rZs6eCg4Pl7u6uoKAgtW/fXn/99ZfNfJOTk295nrR3nzl16pR1mWn7bL169bRq1Sqb8ew5B0j/XGNUq1ZNHh4eCgsL0zvvvHPrDf3/0q5f1q9fr9q1a8vLy0vFihXTiBEjlJKSYjPu6NGjVatWLfn5+Sl//vyqXr26pk+fLmOMzXhZPdZkxp7jyu7du/XYY4+pYMGC8vT0VLVq1dJdB6adq+bMmaOhQ4cqMDBQ+fLlU+vWrfXXX3/pwoUL6tmzpwoXLqzChQura9euunjxos087K2XWdlfjh8/rvbt28vHx0cFChTQM888oy1btqQ7J6ddBx86dEgtW7ZUvnz5FBwcrEGDBqU7xmZ0C7e91zO3u//++eef6tChg3x8fOTr66uOHTsqLi4uw3Hv5FrtVvFl5Xpfsn//ulk9jI6O1pNPPilJioiIsJ6L0r6/nDiPZvc2DQgIUNOmTa1lt3stfuXKFQ0aNEjVqlWzXovWqVNH3377bbrl3ngL953mI9l1TLUnd33uuec0Z84cXbhwwa5tbGXsdOzYMdO3b18jycyfP99s2rTJbNq0yZw/f94YY8ybb75pLBaL6datm1m8eLGZP3++qVOnjvH29jZ79uyxzqdLly7G3d3dlC9f3rz//vtm5cqVpmvXrkaSGT58uClTpoyZPn26Wb58uXn00UeNJLN161br9DNmzDCSTHBwsOnWrZv5/vvvzdSpU42/v78JDg42Z8+eNcYYc/HiRVOoUCFTs2ZN89VXX5m1a9eaefPmmRdeeMHs3bv3puvaoEEDU6hQIVO2bNl0sYwePdpUrlzZfPnll2bp0qWmdu3axsPDw5w4ccI6fUxMjHFzczM1atQw8+bNMwsXLjTNmjUzFovFzJ071zre448/bkqUKGGuXbtms/wnn3zSBAUFmeTkZGOMMXv27DG+vr6mcuXKZubMmWbFihVm0KBBJk+ePGbUqFHW6dasWWMkmdDQUPPMM8+YJUuWmC+//NKUKFHClC5dOt1ybpTdy9mzZ4+pV6+eCQgIsNaXTZs2GWOMiY2NNZJMsWLFTEREhPnmm2/MihUrTGxsrFm9erVxd3c3jzzyiJk3b55ZtmyZiYqKMpLMjBkzbroOxhgjyYwcOdL6+eTJkyY4ONiEhISYTz75xKxatcq8/vrrxsPDw0RFRd1yfiEhIaZYsWKmRIkS5rPPPjNLly41zzzzjJFk3n777XTbpVixYqZ9+/Zm0aJFZvHixeb06dNm1qxZxmKxmLZt25r58+eb7777zjz66KPGxcXFrFq1yjqPkSNHGkmmbNmy5vXXXzcrV640Q4YMMZJMnz59TLly5cwHH3xgs9/897//tU6ftl2v305Z/V4zij8jKSkppnnz5sbb29uMHj3arFy50kybNs0UK1bMVKhQwVy6dMkYY8z27dvNAw88YMLDw611YPv27bfc5l26dLF+TkxMNNWqVTOFCxc27733nlm1apV5//33ja+vr2nUqJFJTU296fzStmv37t2tx4xixYqZgIAA06BBg9taTtq2qlKlipkzZ45ZvXq12b17t/UY9cADD5i+ffua5cuXm2nTppmCBQuaiIgIm7gaNGhgs3x7t2naNgoMDDQPPPCA+eyzz8yaNWvMzz//nOk2mDJlihk3bpxZtGiRWbt2rfn8889N1apVTdmyZc3Vq1et4+3cudPky5fPhIaGmo8//tj88MMP5osvvjAdOnQwCQkJxhiTpXV
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"multiple_barplot(purchase_train_time, x=\"number_company\", y=\"time_between_purchase\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"clients n'ayant pas acheté\", 1 : \"clients ayant acheté sur la période\"},\n",
" xlabel = \"Numéro de compagnie\", ylabel = \"Taux de ticket acheté par internet (%)\", \n",
" title = \"temps moyen entre le premier et le dernier achat selon y_has_purchased par compagnies de spectacle (train set)\")"
]
},
2024-03-05 15:37:29 +01:00
{
"cell_type": "code",
"execution_count": 33,
"id": "74f06e96-3c25-4eca-8190-25b0a4ab0d75",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"customer_id int64\n",
"nb_tickets int64\n",
"nb_purchases int64\n",
"total_amount float64\n",
"nb_suppliers int64\n",
"vente_internet_max int64\n",
"purchase_date_min float64\n",
"purchase_date_max float64\n",
"time_between_purchase float64\n",
"nb_tickets_internet float64\n",
"number_compagny int64\n",
"dtype: object"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_purchased_reduced_spectacle.dtypes"
]
},
{
"cell_type": "code",
2024-03-10 20:30:08 +01:00
"execution_count": 114,
"id": "aa6655c0-c602-4485-8b38-3117227464e1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>number_compagny</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>19482</td>\n",
" <td>88</td>\n",
" <td>29</td>\n",
" <td>872.0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2643.092500</td>\n",
" <td>718.149398</td>\n",
" <td>1924.943102</td>\n",
" <td>8.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>19484</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>62.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1745.021736</td>\n",
" <td>1743.045035</td>\n",
" <td>1.976701</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19485</td>\n",
" <td>131</td>\n",
" <td>21</td>\n",
" <td>1878.0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2649.044745</td>\n",
" <td>85.240845</td>\n",
" <td>2563.803900</td>\n",
" <td>84.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>19486</td>\n",
" <td>10</td>\n",
" <td>4</td>\n",
" <td>96.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1944.077604</td>\n",
" <td>1742.794225</td>\n",
" <td>201.283380</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>19487</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>33.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1742.877766</td>\n",
" <td>1742.877766</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99580</th>\n",
" <td>6884747</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>40.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.193750</td>\n",
" <td>0.193750</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99581</th>\n",
" <td>6884748</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>40.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.186806</td>\n",
" <td>0.186806</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99582</th>\n",
" <td>6884750</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>80.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.136111</td>\n",
" <td>0.136111</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99583</th>\n",
" <td>6884751</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>40.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.122917</td>\n",
" <td>0.122917</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99584</th>\n",
" <td>6884753</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>40.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.047222</td>\n",
" <td>0.047222</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>764880 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 19482 88 29 872.0 2 \n",
"1 19484 3 2 62.0 1 \n",
"2 19485 131 21 1878.0 2 \n",
"3 19486 10 4 96.0 1 \n",
"4 19487 2 1 33.0 1 \n",
"... ... ... ... ... ... \n",
"99580 6884747 2 1 40.0 1 \n",
"99581 6884748 2 1 40.0 1 \n",
"99582 6884750 4 1 80.0 1 \n",
"99583 6884751 2 1 40.0 1 \n",
"99584 6884753 2 1 40.0 1 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 1 2643.092500 718.149398 \n",
"1 0 1745.021736 1743.045035 \n",
"2 1 2649.044745 85.240845 \n",
"3 0 1944.077604 1742.794225 \n",
"4 0 1742.877766 1742.877766 \n",
"... ... ... ... \n",
"99580 0 0.193750 0.193750 \n",
"99581 0 0.186806 0.186806 \n",
"99582 0 0.136111 0.136111 \n",
"99583 0 0.122917 0.122917 \n",
"99584 0 0.047222 0.047222 \n",
"\n",
" time_between_purchase nb_tickets_internet number_compagny \n",
"0 1924.943102 8.0 10 \n",
"1 1.976701 0.0 10 \n",
"2 2563.803900 84.0 10 \n",
"3 201.283380 0.0 10 \n",
"4 0.000000 0.0 10 \n",
"... ... ... ... \n",
"99580 0.000000 0.0 14 \n",
"99581 0.000000 0.0 14 \n",
"99582 0.000000 0.0 14 \n",
"99583 0.000000 0.0 14 \n",
"99584 0.000000 0.0 14 \n",
"\n",
"[764880 rows x 11 columns]"
]
},
"execution_count": 114,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_purchased_reduced_spectacle"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "be04e2f9-60b9-4b44-ab36-06a365b21e32",
"metadata": {},
"outputs": [],
"source": [
"#Stat sur les canaux de vente"
]
},
{
"cell_type": "code",
"execution_count": 118,
2024-03-05 15:37:29 +01:00
"id": "20a70ec0-38f6-470e-a442-7884a150613a",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAArMAAAIhCAYAAABdSTJTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABSMUlEQVR4nO3deXwNZ///8feRPUgQJKIRQe1ro4uoBrW3aG8tym0pelNK0YXcrQalKV3u9Naquu2tqmrRUkXUVkurtmotXRRRErG0Yg1Jrt8ffjlfR04ih8QxvJ6Px3k8zDXXzHzmzJzkbXLNHJsxxggAAACwoELuLgAAAAC4VoRZAAAAWBZhFgAAAJZFmAUAAIBlEWYBAABgWYRZAAAAWBZhFgAAAJZFmAUAAIBlEWYBAABgWYRZuN2MGTNks9nk6+urAwcOZJvfuHFj1axZ0w2VST179lSRIkXcsm0417hxYzVu3NjdZdw2bDabRo4c6e4yrtuuXbs0cuRI7d+/392lWMKGDRs0cuRI/f333+4uJUcjR46UzWZzdxm4CRBmcdNIS0vTyy+/7O4yANyCdu3apVGjRhFm82jDhg0aNWrUTR1m+/Tpo40bN7q7DNwECLO4abRq1Uoff/yxfvzxR3eXki+MMTp37py7ywBwCzh37pyMMe4uI0c38mfd2bNnJUl33HGH7rvvvhu2Xdy8CLO4abz44osKCgrSsGHDrtr3/PnziomJUUREhLy9vVW2bFkNGDAg21WE8uXL6+GHH9bixYtVr149+fn5qVq1alq8eLGkS0McqlWrpsKFC+uee+7R5s2bnW5v586devDBB1W4cGGVKlVKzzzzjP0HahabzaZnnnlGkyZNUrVq1eTj46OZM2dKkn777Td16dJFpUuXlo+Pj6pVq6b33nsvT+9L1nqnT5+uKlWqyM/PT/Xr19d3330nY4zeeOMNRUREqEiRImratKl+//33bOuYNm2a6tSpI19fX5UoUUKPPvqodu/ebZ//4YcfymazOb3KMXr0aHl5eenw4cP2thUrVujBBx9UQECA/P391bBhQ33zzTcOy2X9CXDnzp164oknFBgYqODgYPXq1UsnT5686n4bYzR+/HiFh4fL19dXd911l77++munfVNTU/X88887nA+DBw/WmTNnrrodSVq6dKkefPBBBQYGyt/fX9WqVVNcXJx9/ubNm9W5c2eVL19efn5+Kl++vJ544olsw2KyhsysWrVKTz/9tEqWLKmgoCD94x//cHj/JGnu3Llq0aKFypQpYz8vhw8fnq3mnIZV9OzZU+XLl7dPv/766ypUqJAWLVqUrZ+/v79++umnXN+D1NRUPfXUUwoKClKRIkXUqlUr/frrr077Xuv5XK9ePTVq1Chbe0ZGhsqWLat//OMf9rYLFy5ozJgxqlq1qnx8fFSqVCk9+eSTOnr0qMOyWZ/xpUuX6q677pKfn5+qVq2qadOm2fvMmDFDjz/+uCSpSZMmstlsstlsmjFjhr1PXs5pZ1avXi2bzaaPPvpIQ4cOVUhIiPz8/BQdHa1t27Y59HX1PFq+fLl69eqlUqVKyd/fX2lpaTnWkdfPQNbPkw8//FDVqlWTv7+/6tSpY/+ZKF367L7wwguSpIiICPv7tXr1aof3fP78+apXr558fX01atQoSVJycrL69u2rO+64Q97e3oqIiNCoUaOUnp5uX//+/ftls9k0fvx4jR07VuXKlZOvr6/q16+f48+RrVu36rHHHlPx4sVVsWJFh3mXW7lypRo3bqygoCD5+fmpXLly6tChg8PP67yeW7AQA7jZ9OnTjSTzww8/mHfeecdIMt988419fnR0tKlRo4Z9OjMz07Rs2dJ4enqaESNGmOXLl5s333zTFC5c2NSrV8+cP3/e3jc8PNzccccdpmbNmmbOnDlmyZIl5t577zVeXl7mlVdeMQ0bNjTz5883CxYsMJUrVzbBwcHm7Nmz9uV79OhhvL29Tbly5czYsWPN8uXLzciRI42np6d5+OGHHfZDkilbtqypXbu2+fjjj83KlSvNzz//bHbu3GkCAwNNrVq1zKxZs8zy5cvNc889ZwoVKmRGjhx51fdHkgkPDzdRUVEOtZYoUcIMGTLEtG/f3ixevNjMnj3bBAcHm9q1a5vMzEz78q+99pqRZJ544gnz1VdfmVmzZpkKFSqYwMBA8+uvvxpjjElLSzMhISGma9euDtu+ePGiCQ0NNY8//ri97cMPPzQ2m8088sgjZv78+WbRokXm4YcfNh4eHmbFihX2frGxsUaSqVKlinnllVdMQkKCefvtt42Pj4958sknr7rfWcv37t3bfP3112by5MmmbNmyJiQkxERHR9v7nTlzxtStW9eULFnSvP3222bFihXmnXfeMYGBgaZp06YO74UzU6ZMMTabzTRu3Nh8/PHHZsWKFWbixImmf//+9j7z5s0zr7zyilmwYIFZs2aN+eSTT0x0dLQpVaqUOXr0qL1f1rlcoUIFM3DgQLNs2TIzZcoUU7x4cdOkSROH7b766qvmP//5j/nqq6/M6tWrzaRJk0xERES2ftHR0Q77m6VHjx4mPDzcPp2ZmWnatGljihcvbvbv32+MMWbatGlGkpkyZUqu70FmZqZp0qSJ8fHxsZ/nsbGxpkKFCkaSiY2Ntfe9nvM56/Oddd5lWbJkiZFkvvzyS2OMMRkZGaZVq1amcOHCZtSoUSYhIcFMmTLFlC1b1lSvXt3hM5r1Ga9evbqZNWuWWbZsmXn88ceNJLNmzRpjjDEpKSn2z8F7771nNm7caDZu3GhSUlKMMXk/p51ZtWqVkWTCwsJM+/btzaJFi8xHH31kKlWqZAICAszevXvtfV09j8qWLWv+9a9/ma+//tp89tlnJj093WkNrnwGJJny5cube+65x3z66admyZIlpnHjxsbT09Ne68GDB83AgQONJDN//nz7+3Xy5En7e16mTBlToUIFM23aNLNq1SqzadMmk5SUZMLCwkx4eLj54IMPzIoVK8yrr75qfHx8TM+ePe017Nu3z/6e3X///ebzzz838+bNM3fffbfx8vIyGzZssPfN+jkQHh5uhg0bZhISEszChQsd5l2+Xl9fX9O8eXOzcOFCs3r1ajN79mzTrVs389dff7l8bsE6CLNwu8vDbFpamqlQoYKpX7++/QfwlWF26dKlRpIZP368w3rmzp1rJJnJkyfb28LDw42fn5/5888/7W3bt283kkyZMmXMmTNn7O0LFy50+IVqzKXAIMm88847DtsaO3askWTWrVtnb5NkAgMDzYkTJxz6tmzZ0txxxx32XwRZnnnmGePr65ut/5UkmZCQEHP69OlstdatW9fhF1V8fLyRZHbs2GGMMeavv/4yfn5+pk2bNg7rTExMND4+PqZLly72ttjYWOPt7W2OHDlib8t6T7NCwZkzZ0yJEiVM27ZtHdaXkZFh6tSpY+655x6H9Tk7Tv379ze+vr65hsy//vrL+Pr6mkcffdShff369UaSQ7iLi4szhQoVMj/88IND388++8xIMkuWLMlxO6dOnTIBAQHm/vvvv2rovVx6ero5ffq0KVy4sMO5kXUuXx6EjTFm/PjxRpJJSkpyur7MzExz8eJFs2bNGiPJ/Pjjj/Z5eQ2zxhhz7Ngxc8cdd5h77rnHbN261fj7+5t//vOfV92fr7/+Otfz/PIwez3n87Fjx4y3t7f597//7dDesWNHExwcbC5evGiMMWbOnDlGkvn8888d+v3www9Gkpk4caK9LTw83Pj6+poDBw7Y286dO2dKlChh+vbta2+bN2+ekWRWrVrlsE5XzmlnssLsXXfd5XAO7d+/33h5eZk+ffrkuOzVzqPu3bvnuu0srnwGJJng4GCTmppqb0tOTjaFChUycXFx9rY33njDSDL79u3Ltr3w8HDj4eFhfvnlF4f2vn37miJFijgcC2OMefPNN40ks3PnTmPM/4XZ0NBQc+7cOXu/1NRUU6JECdOsWTN7W9bPkVdeeSVbHVeG2az93b59u9P3yRjXzi1YB8MMcFPx9vbWmDFjtHnzZn366adO+6xcuVLSpT+fXu7xxx9
"text/plain": [
"<Figure size 800x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#Repartition du nombre de canaux de vente selon les entreprise\n",
2024-03-10 20:30:08 +01:00
"\n",
"# Filtrer les données pour inclure uniquement les valeurs positives de total_amount et exclusion des valeur aberrantes\n",
"purchase_canaux = products_purchased_reduced_spectacle[(products_purchased_reduced_spectacle['nb_tickets'] > 0) ]\n",
"\n",
2024-03-05 15:37:29 +01:00
"plt.figure(figsize=(8, 6))\n",
2024-03-10 20:30:08 +01:00
"sns.barplot(x='number_compagny', y='nb_suppliers', data=purchase_canaux, ci=None) # ci=None pour ne pas afficher les intervalles de confiance\n",
2024-03-05 15:37:29 +01:00
"plt.title('Nombre moyen de canaux de vente par entreprise')\n",
"plt.xlabel('number_compagny')\n",
"plt.ylabel('Nombre moyen de caneaux ')\n",
"plt.show()"
]
},
2024-03-10 20:30:08 +01:00
{
"cell_type": "code",
"execution_count": 120,
"id": "ee901539-37d1-4dfa-8e78-38e4947c3d35",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 697297.000000\n",
"mean 0.110917\n",
"std 0.319561\n",
"min 0.000000\n",
"25% 0.000000\n",
"50% 0.000000\n",
"75% 0.000000\n",
"max 8.000000\n",
"Name: nb_suppliers, dtype: float64"
]
},
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_set_spectacle[\"nb_suppliers\"].describe()"
]
},
{
"cell_type": "code",
"execution_count": 125,
"id": "7389053e-54ae-4167-9afd-aa5d194822ef",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>nb_suppliers</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
" <td>1.118250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>1.340136</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>0.0</td>\n",
" <td>1.033992</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>1.155239</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>0.153296</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>0.220174</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
" <td>1.007711</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>1.083750</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_company y_has_purchased nb_suppliers\n",
"0 10 0.0 1.118250\n",
"1 10 1.0 1.340136\n",
"2 11 0.0 1.033992\n",
"3 11 1.0 1.155239\n",
"4 12 0.0 0.153296\n",
"5 12 1.0 0.220174\n",
"6 13 0.0 1.007711\n",
"7 13 1.0 1.083750\n",
"8 14 0.0 1.000000\n",
"9 14 1.0 1.000000"
]
},
"execution_count": 125,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#repartition des client selon le nombre moyen de canaux utilisé pour l'achat de ticket par compagnie sur base de train\n",
"\n",
"#purchase_train_canaux = train_set_spectacle[(train_set_spectacle['nb_tickets'] > 0) ]\n",
"\n",
"purchase_train_canaux_filtered= purchase_train_canaux.groupby([\"number_company\", \"y_has_purchased\"])[\"nb_suppliers\"].mean().reset_index()\n",
"purchase_train_canaux_filtered"
]
},
{
"cell_type": "code",
"execution_count": 126,
"id": "e4079e46-db8b-4a25-9da6-37b1405c57d9",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2QAAAIiCAYAAACnngsNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACE4ElEQVR4nOzdd3gUVfv/8c+G9ATSgBAgJKGjtABSpXcQQSwoSkdBRKpSHqWKUhQE8UFEQZAHEFRABQRC70gVpYgiTQhdWoCEJOf3B7/sl01jFxIWwvt1Xbmu7JkzM/fMnpnde8/MGYsxxggAAAAAcN+5ODsAAAAAAHhUkZABAAAAgJOQkAEAAACAk5CQAQAAAICTkJABAAAAgJOQkAEAAACAk5CQAQAAAICTkJABAAAAgJOQkAEAAACAkzwUCdn06dNlsVjk6empo0ePppheq1YtlSxZ0gmRSe3bt5evr69T1o3U1apVS7Vq1XJ2GI8Mi8WioUOHWl8nHa+ZZejQobJYLDp37twd637wwQdauHBhpsWSnuT75WGzZs0aWSwWfffdd84O5YH3sH4OhIeHq3379s4OA/dR0vn5yJEjzg7Fei5/FMyePVvjx4/P9PW0b99e4eHhGb7c4cOH67HHHlNiYqIk6dq1axo6dKjWrFmT4euS/u/zJ7OWf68mTZqk6dOnpyg/ePCg3N3dtXPnToeX+VAkZEliY2P17rvvOjsMAA8JZyZkAPCgadq0qTZv3qyQkBBnh/JIuV8JWWY4efKkxowZo+HDh8vF5VbacO3aNQ0bNizTEqZy5cpp8+bNKleuXKYs/16llZAVLVpUL7/8snr37u3wMh+qhKxRo0aaPXu2fv31V2eHkiGMMbp+/bqzwwAAJHPz5k3Fx8c7O4xHBvv7/siVK5cqV64sDw8PZ4eCh8SECRPk7++vli1b3vUyrl275lD9HDlyqHLlysqRI8ddr9NZunfvrnXr1mnTpk0OzfdQJWT9+vVTUFCQ+vfvf8e6N27c0MCBAxURESF3d3fly5dPb7zxhi5evGhTLzw8XE899ZQWLVqkyMhIeXl5qUSJElq0aJGkW937JUqUkI+PjypWrKjt27enur69e/eqbt268vHxUa5cudS9e/cUDdBisah79+6aPHmySpQoIQ8PD82YMUOS9Oeff6p169bKnTu3PDw8VKJECf33v/+1a78kLferr75SsWLF5OXlpQoVKmjLli0yxujDDz9URESEfH19VadOHf31118pljFt2jSVKVNGnp6eCgwM1DPPPKP9+/dbp8+cOVMWi0WbN29OMe/w4cPl5uamkydPWstWrFihunXrKkeOHPL29la1atW0cuVKm/mSLlfYu3evXnrpJfn5+Sk4OFgdO3bUpUuX7rjdxhiNGTNGYWFh8vT0VLly5fTzzz+nWvfy5ct66623bNpDr169FBMTc8f1SNLSpUtVt25d+fn5ydvbWyVKlNDIkSOt07dv364XX3xR4eHh8vLyUnh4uF566aUUl9gmXS6yevVqvf7668qZM6eCgoLUsmVLm/0nSXPnzlWDBg0UEhJibZcDBgxIEXNal2gmv3Rh1KhRcnFx0U8//ZSinre3t3777bd098Hly5f16quvKigoSL6+vmrUqJEOHjyY7jyOboskbd26Vc2aNVNQUJA8PT1VqFAh9erVK0W906dPp9tuLBaLYmJiNGPGDFksFlksljteyvrZZ5+pTJky8vX1Vfbs2VW8eHH95z//salz6tQpdenSRfnz55e7u7siIiI0bNgwu75M/v7772revLkCAgLk6empsmXLWs8BSZIu1ZgzZ47eeecd5c2bVzly5FC9evX0xx9/pLv89evXW+dN7uuvv5bFYtG2bdvuGOftbt68ecc4oqKi1Lx5c+XPn1+enp4qXLiwunTpkuKy0rNnz+q1115TaGioPDw8lCtXLlWrVk0rVqywO56kYygqKkodOnRQYGCgfHx81KxZM/399982ddO6JC/5MZO0z2fOnKm+ffsqX7588vDwsJ4r73T8J/nrr7/UpEkT+fr6KjQ0VH379lVsbKxNnWHDhqlSpUoKDAxUjhw5VK5cOU2dOlXGGJt6q1atUq1atRQUFCQvLy8VKFBAzz77rM3nSlxcnEaMGKHixYtb92eHDh109uxZm2XdvHlT/fr1U548eeTt7a0nn3xSv/zyi137+8iRI7JYLBozZozef/99FShQQJ6enqpQoUKKc/pff/2lDh06qEiRIvL29la+fPnUrFmzFOeWO+1vR8yePVtVqlSRr6+vfH19VbZsWU2dOtWmzp0+36T/u+z0wIEDatiwoXx8fBQSEqJRo0ZJkrZs2aInn3xSPj4+Klq0aIrj1pF2ae/xIkk//PCDSpcuLQ8PDxUsWFATJkxI9VK/pO8BM2fOVIkSJeTt7a0yZcpYv8skjzP5JYv2fGbfy/G7ePFilS1bVh4eHoqIiNBHH32Uaj1jjCZNmqSyZcvKy8tLAQEBeu6551Lsw9TYE1/SLS7r169X5cqV5eXlpXz58mnQoEFKSEiwWZ69x5eUfjusVauWFi9erKNHj1o/i25//+w9J9xpPWm5l30aFxenqVOnqnXr1tbesSNHjihXrlzW2JO2J+lcm9Q+d+7cqeeee04BAQEqVKiQJPu/K6V2yWLSMWrPeTY1GXVODQ8P1969e7V27Vrrtt/+Xat8+fIqUaKEJk+efMeYbJiHwFdffWUkmW3btpkJEyYYSWblypXW6TVr1jSPP/649XViYqJp2LChcXV1NYMGDTLLly83H330kfHx8TGRkZHmxo0b1rphYWEmf/78pmTJkmbOnDlmyZIlplKlSsbNzc0MHjzYVKtWzcyfP98sWLDAFC1a1AQHB5tr165Z52/Xrp1xd3c3BQoUMO+//75Zvny5GTp0qHF1dTVPPfWUzXZIMvny5TOlS5c2s2fPNqtWrTK///672bt3r/Hz8zOlSpUyX3/9tVm+fLnp27evcXFxMUOHDr3j/pFkwsLCTNWqVW1iDQwMNL179zbNmzc3ixYtMrNmzTLBwcGmdOnSJjEx0Tr/Bx98YCSZl156ySxevNh8/fXXpmDBgsbPz88cPHjQGGNMbGysyZMnj3n55Zdt1n3z5k2TN29e8/zzz1vLZs6caSwWi2nRooWZP3+++emnn8xTTz1lsmXLZlasWGGtN2TIECPJFCtWzAwePNhERUWZcePGGQ8PD9OhQ4c7bnfS/J06dTI///yzmTJlismXL5/JkyePqVmzprVeTEyMKVu2rMmZM6cZN26cWbFihZkwYYLx8/MzderUsdkXqfnyyy+NxWIxtWrVMrNnzzYrVqwwkyZNMt26dbPW+fbbb83gwYPNggULzNq1a80333xjatasaXLlymXOnj1rrZfUlgsWLGjefPNNs2zZMvPll1+agIAAU7t2bZv1vvfee+bjjz82ixcvNmvWrDGTJ082ERERKerVrFnTZnuTtGvXzoSFhVlfJyYmmiZNmpiAgABz5MgRY4wx06ZNM5LMl19+me4+SExMNLVr1zYeHh7Wdj5kyBBTsGBBI8kMGTIk3fnt3ZalS5caNzc3U7p0aTN9+nSzatUqM23aNPPiiy9a69jbbjZv3my8vLxMkyZNzObNm83mzZvN3r1704xxzpw5RpJ58803zfLly82KFSvM5MmTTY8ePax1oqOjTWhoqAkLCzOff/65WbFihXnvvfeMh4eHad++vc3yku+XAwcOmOzZs5tChQqZr7/+2ixevNi89NJLRpIZPXq0td7q1auNJBMeHm5efvlls3jxYjNnzhxToEABU6RIERMfH5/uvo6MjDTVqlVLUf7EE0+YJ554It15b+dIHJ999pkZOXKk+fHHH83atWvNjBkzTJkyZUyxYsVMXFyctV7Dhg1Nrly5zJQpU8yaNWvMwoULzeDBg80333xjd1xJx1BoaKjp2LGj9djPnTu3CQ0NNf/++6+1blhYmGnXrl2KZSQ/ZpK2NV++fOa5554zP/74o1m0aJE5f/68Xcd/0udAiRIlzEcffWRWrFhhBg8ebCwWixk2bJjNutu3b2+mTp1
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"multiple_barplot(purchase_train_canaux_filtered, x=\"number_company\", y=\"nb_suppliers\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"clients n'ayant pas acheté\", 1 : \"clients ayant acheté sur la période\"},\n",
" xlabel = \"Numéro de compagnie\", ylabel = \"Nombre moyen de canaux d'achat\", \n",
" title = \"Nombre moyen de canaux d'acht selon y_has_purchased par compagnies de spectacle (train set)\")"
]
},
2024-03-03 09:32:45 +01:00
{
"cell_type": "markdown",
"id": "b9e84af4-a02b-4f83-81ae-b7a73475d060",
"metadata": {},
"source": [
"### 4. target_information"
]
},
{
"cell_type": "code",
2024-03-05 14:34:43 +01:00
"execution_count": 11,
2024-03-03 09:32:45 +01:00
"id": "2867eceb-1f72-406c-adc2-adfedcaf60e6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de lignes de la table : 6240166\n"
]
},
{
"data": {
"text/plain": [
"id 0\n",
"customer_id 0\n",
"target_name 0\n",
"target_type_is_import 0\n",
"target_type_name 0\n",
"number_compagny 0\n",
"dtype: int64"
]
},
2024-03-05 14:34:43 +01:00
"execution_count": 11,
2024-03-03 09:32:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de nan\n",
"print(\"Nombre de lignes de la table : \",target_information_spectacle.shape[0])\n",
"target_information_spectacle.isna().sum()"
]
2024-03-05 00:36:48 +01:00
},
{
"cell_type": "code",
2024-03-05 15:50:46 +01:00
"execution_count": 47,
"id": "561f361d-7d39-430a-9e27-a32f6c2f7b50",
2024-03-05 00:36:48 +01:00
"metadata": {},
2024-03-05 15:50:46 +01:00
"outputs": [],
2024-03-05 00:36:48 +01:00
"source": [
2024-03-05 15:50:46 +01:00
"# pas exploitable"
2024-03-05 00:36:48 +01:00
]
},
2024-03-05 02:43:40 +01:00
{
"cell_type": "code",
"execution_count": null,
"id": "904cbf32-77b6-49dd-a96c-9e7e5a0175c3",
2024-03-05 00:36:48 +01:00
"metadata": {},
"outputs": [],
"source": []
2024-03-02 11:37:44 +01:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}