2024-03-02 11:37:44 +01:00
{
"cells": [
2024-03-03 09:32:45 +01:00
{
"cell_type": "markdown",
"id": "be628bfc-0bca-48b0-97c9-29063289127e",
"metadata": {},
"source": [
"# Statistiques descriptives : compagnies offrant des spectacles"
]
},
{
"cell_type": "markdown",
"id": "0bf5450b-f44d-430a-aed7-d875dc365048",
"metadata": {},
"source": [
"## Importations et chargement des données"
]
},
2024-03-02 11:37:44 +01:00
{
"cell_type": "code",
2024-03-10 12:31:28 +01:00
"execution_count": 1,
2024-03-02 11:37:44 +01:00
"id": "aa915888-cede-4eb0-8a26-7df573d29a3e",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import os\n",
"import s3fs\n",
"import warnings\n",
"from datetime import date, timedelta, datetime\n",
2024-03-03 09:32:45 +01:00
"import numpy as np\n",
2024-03-08 10:30:12 +01:00
"import matplotlib.pyplot as plt\n",
"import re"
2024-03-02 11:37:44 +01:00
]
},
{
"cell_type": "code",
2024-03-10 12:31:28 +01:00
"execution_count": 2,
2024-03-02 11:37:44 +01:00
"id": "17949e81-c30b-4fdf-9872-d7dc2b22ba9e",
"metadata": {},
"outputs": [],
"source": [
"# Import KPI construction functions\n",
"#exec(open('0_KPI_functions.py').read())\n",
"exec(open('../0_KPI_functions.py').read())\n"
]
},
{
"cell_type": "code",
2024-03-10 12:31:28 +01:00
"execution_count": 3,
2024-03-02 11:37:44 +01:00
"id": "9c1737a2-bad8-4266-8dec-452085d8cfe7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv',\n",
" 'projet-bdc2324-team1/0_Input/Company_10/target_information.csv']"
]
},
2024-03-10 12:31:28 +01:00
"execution_count": 3,
2024-03-02 11:37:44 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"BUCKET = \"projet-bdc2324-team1/0_Input/Company_10\"\n",
"fs.ls(BUCKET)"
]
},
{
"cell_type": "code",
2024-03-10 17:41:43 +01:00
"execution_count": 4,
2024-03-02 11:37:44 +01:00
"id": "a35dc2f6-2017-4b21-abd2-2c4c112c96b2",
"metadata": {},
"outputs": [],
"source": [
2024-03-08 08:44:28 +01:00
"# test avec company 10\n",
"\n",
2024-03-02 11:37:44 +01:00
"dic_base=['campaigns_information','customerplus_cleaned','products_purchased_reduced','target_information']\n",
"for nom_base in dic_base:\n",
" FILE_PATH_S3_fanta = 'projet-bdc2324-team1/0_Input/Company_10/' + nom_base + '.csv'\n",
" with fs.open(FILE_PATH_S3_fanta, mode=\"rb\") as file_in:\n",
" globals()[nom_base] = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
2024-03-10 17:41:43 +01:00
"execution_count": 5,
2024-03-02 11:37:44 +01:00
"id": "40b705eb-fd18-436b-b150-61611a3c6a84",
"metadata": {},
"outputs": [],
"source": [
2024-03-03 09:32:45 +01:00
"# fonction permettant d'extraire une table à partir du numéro de la compagnie (directory_path)\n",
2024-03-02 11:37:44 +01:00
"\n",
"def display_databases(directory_path, file_name, datetime_col = None):\n",
" \"\"\"\n",
" This function returns the file from s3 storage \n",
" \"\"\"\n",
" file_path = \"projet-bdc2324-team1\" + \"/0_Input/Company_\" + directory_path + \"/\" + file_name + \".csv\"\n",
" print(\"File path : \", file_path)\n",
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser) \n",
" return df \n"
]
},
{
"cell_type": "code",
2024-03-10 17:41:43 +01:00
"execution_count": 6,
2024-03-05 00:36:48 +01:00
"id": "c56decc3-de19-4786-82a4-1386c72a6bfb",
"metadata": {},
"outputs": [
{
2024-03-10 17:41:43 +01:00
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>customer_id</th>\n",
" <th>target_name</th>\n",
" <th>target_type_is_import</th>\n",
" <th>target_type_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1165098</td>\n",
" <td>618562</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1165100</td>\n",
" <td>618559</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1165101</td>\n",
" <td>618561</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1165102</td>\n",
" <td>618560</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1165103</td>\n",
" <td>618558</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69253</th>\n",
" <td>1698158</td>\n",
" <td>18580</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69254</th>\n",
" <td>1698159</td>\n",
" <td>18569</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69255</th>\n",
" <td>1698160</td>\n",
" <td>2962</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69256</th>\n",
" <td>1698161</td>\n",
" <td>3825</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69257</th>\n",
" <td>1698162</td>\n",
" <td>5731</td>\n",
" <td>Newsletter mensuelle</td>\n",
" <td>False</td>\n",
" <td>manual_static_filter</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>69258 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" id customer_id target_name target_type_is_import \\\n",
"0 1165098 618562 Newsletter mensuelle False \n",
"1 1165100 618559 Newsletter mensuelle False \n",
"2 1165101 618561 Newsletter mensuelle False \n",
"3 1165102 618560 Newsletter mensuelle False \n",
"4 1165103 618558 Newsletter mensuelle False \n",
"... ... ... ... ... \n",
"69253 1698158 18580 Newsletter mensuelle False \n",
"69254 1698159 18569 Newsletter mensuelle False \n",
"69255 1698160 2962 Newsletter mensuelle False \n",
"69256 1698161 3825 Newsletter mensuelle False \n",
"69257 1698162 5731 Newsletter mensuelle False \n",
"\n",
" target_type_name \n",
"0 manual_static_filter \n",
"1 manual_static_filter \n",
"2 manual_static_filter \n",
"3 manual_static_filter \n",
"4 manual_static_filter \n",
"... ... \n",
"69253 manual_static_filter \n",
"69254 manual_static_filter \n",
"69255 manual_static_filter \n",
"69256 manual_static_filter \n",
"69257 manual_static_filter \n",
"\n",
"[69258 rows x 5 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
2024-03-05 00:36:48 +01:00
}
],
"source": [
"target_information"
]
},
{
"cell_type": "code",
2024-03-10 17:41:43 +01:00
"execution_count": 7,
2024-03-05 00:36:48 +01:00
"id": "c825d64b-356c-4b71-aa3c-90e0dd7ca092",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ticket_id</th>\n",
" <th>customer_id</th>\n",
" <th>purchase_id</th>\n",
" <th>event_type_id</th>\n",
" <th>supplier_name</th>\n",
" <th>purchase_date</th>\n",
" <th>amount</th>\n",
" <th>is_full_price</th>\n",
" <th>name_event_types</th>\n",
" <th>name_facilities</th>\n",
" <th>name_categories</th>\n",
" <th>name_events</th>\n",
" <th>name_seasons</th>\n",
" <th>start_date_time</th>\n",
" <th>end_date_time</th>\n",
" <th>open</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1799177</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>2</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>danse</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>aringa rossa</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2016-09-27 00:00:00+02:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1799178</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>3</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>cirque</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>5èmes hurlants</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2016-11-18 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1799179</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>dom juan</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2016-12-07 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1799180</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>9.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>le grand t</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>vanishing point</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2017-01-04 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1799181</td>\n",
" <td>36984</td>\n",
" <td>409613</td>\n",
" <td>3</td>\n",
" <td>guichet</td>\n",
" <td>2016-04-28 17:58:26+02:00</td>\n",
" <td>12.0</td>\n",
" <td>False</td>\n",
" <td>cirque</td>\n",
" <td>la cite des congres</td>\n",
" <td>abo t gourmand jeune</td>\n",
" <td>a o lang pho</td>\n",
" <td>test 2016/2017</td>\n",
" <td>2017-01-03 00:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492309</th>\n",
" <td>3252232</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492310</th>\n",
" <td>3252233</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492311</th>\n",
" <td>3252234</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492312</th>\n",
" <td>3252235</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>492313</th>\n",
" <td>3252236</td>\n",
" <td>621716</td>\n",
" <td>710062</td>\n",
" <td>1</td>\n",
" <td>guichet</td>\n",
" <td>2023-03-09 12:08:45+01:00</td>\n",
" <td>7.0</td>\n",
" <td>False</td>\n",
" <td>théâtre</td>\n",
" <td>cap nort</td>\n",
" <td>tarif sco co 1 seance scolaire</td>\n",
" <td>sur moi, le temps</td>\n",
" <td>2022/2023</td>\n",
" <td>2023-03-13 14:00:00+01:00</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>492314 rows × 16 columns</p>\n",
"</div>"
],
"text/plain": [
" ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
"0 1799177 36984 409613 2 guichet \n",
"1 1799178 36984 409613 3 guichet \n",
"2 1799179 36984 409613 1 guichet \n",
"3 1799180 36984 409613 1 guichet \n",
"4 1799181 36984 409613 3 guichet \n",
"... ... ... ... ... ... \n",
"492309 3252232 621716 710062 1 guichet \n",
"492310 3252233 621716 710062 1 guichet \n",
"492311 3252234 621716 710062 1 guichet \n",
"492312 3252235 621716 710062 1 guichet \n",
"492313 3252236 621716 710062 1 guichet \n",
"\n",
" purchase_date amount is_full_price name_event_types \\\n",
"0 2016-04-28 17:58:26+02:00 9.0 False danse \n",
"1 2016-04-28 17:58:26+02:00 9.0 False cirque \n",
"2 2016-04-28 17:58:26+02:00 9.0 False théâtre \n",
"3 2016-04-28 17:58:26+02:00 9.0 False théâtre \n",
"4 2016-04-28 17:58:26+02:00 12.0 False cirque \n",
"... ... ... ... ... \n",
"492309 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492310 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492311 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492312 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"492313 2023-03-09 12:08:45+01:00 7.0 False théâtre \n",
"\n",
" name_facilities name_categories \\\n",
"0 le grand t abo t gourmand jeune \n",
"1 le grand t abo t gourmand jeune \n",
"2 le grand t abo t gourmand jeune \n",
"3 le grand t abo t gourmand jeune \n",
"4 la cite des congres abo t gourmand jeune \n",
"... ... ... \n",
"492309 cap nort tarif sco co 1 seance scolaire \n",
"492310 cap nort tarif sco co 1 seance scolaire \n",
"492311 cap nort tarif sco co 1 seance scolaire \n",
"492312 cap nort tarif sco co 1 seance scolaire \n",
"492313 cap nort tarif sco co 1 seance scolaire \n",
"\n",
" name_events name_seasons start_date_time \\\n",
"0 aringa rossa test 2016/2017 2016-09-27 00:00:00+02:00 \n",
"1 5èmes hurlants test 2016/2017 2016-11-18 00:00:00+01:00 \n",
"2 dom juan test 2016/2017 2016-12-07 00:00:00+01:00 \n",
"3 vanishing point test 2016/2017 2017-01-04 00:00:00+01:00 \n",
"4 a o lang pho test 2016/2017 2017-01-03 00:00:00+01:00 \n",
"... ... ... ... \n",
"492309 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492310 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492311 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492312 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"492313 sur moi, le temps 2022/2023 2023-03-13 14:00:00+01:00 \n",
"\n",
" end_date_time open \n",
"0 1901-01-01 00:09:21+00:09 True \n",
"1 1901-01-01 00:09:21+00:09 True \n",
"2 1901-01-01 00:09:21+00:09 True \n",
"3 1901-01-01 00:09:21+00:09 True \n",
"4 1901-01-01 00:09:21+00:09 True \n",
"... ... ... \n",
"492309 1901-01-01 00:09:21+00:09 True \n",
"492310 1901-01-01 00:09:21+00:09 True \n",
"492311 1901-01-01 00:09:21+00:09 True \n",
"492312 1901-01-01 00:09:21+00:09 True \n",
"492313 1901-01-01 00:09:21+00:09 True \n",
"\n",
"[492314 rows x 16 columns]"
]
},
2024-03-10 17:41:43 +01:00
"execution_count": 7,
2024-03-05 00:36:48 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_purchased_reduced"
]
},
{
"cell_type": "code",
2024-03-10 17:41:43 +01:00
"execution_count": 8,
2024-03-02 13:05:51 +01:00
"id": "afd044b8-ac83-4a35-b959-700cae0b3b41",
2024-03-02 12:16:24 +01:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 12:16:24 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 12:16:24 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:32:54 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:28: SettingWithCopyWarning: \n",
2024-03-02 13:05:51 +01:00
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-08 10:30:12 +01:00
"Tables imported for tenant 10\n",
2024-03-05 14:34:43 +01:00
"File path : projet-bdc2324-team1/0_Input/Company_11/customerplus_cleaned.csv\n"
2024-03-02 13:05:51 +01:00
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-08 10:30:12 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_11/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:05:51 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_11/products_purchased_reduced.csv\n"
]
},
2024-03-02 12:16:24 +01:00
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:32:54 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_11/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:28: SettingWithCopyWarning: \n",
2024-03-02 13:05:51 +01:00
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-08 10:30:12 +01:00
"Tables imported for tenant 11\n",
2024-03-02 13:05:51 +01:00
"File path : projet-bdc2324-team1/0_Input/Company_12/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:05:51 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:05:51 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: DtypeWarning: Columns (4,8,10) have mixed types. Specify dtype option on import or set low_memory=False.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:32:54 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_12/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:28: SettingWithCopyWarning: \n",
2024-03-02 13:05:51 +01:00
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-08 10:30:12 +01:00
"Tables imported for tenant 12\n",
2024-03-02 13:05:51 +01:00
"File path : projet-bdc2324-team1/0_Input/Company_13/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:05:51 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:05:51 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:32:54 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_13/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:28: SettingWithCopyWarning: \n",
2024-03-02 13:05:51 +01:00
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-08 10:30:12 +01:00
"Tables imported for tenant 13\n",
2024-03-02 13:05:51 +01:00
"File path : projet-bdc2324-team1/0_Input/Company_14/customerplus_cleaned.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:05:51 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/campaigns_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:05:51 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: DtypeWarning: Columns (8,9) have mixed types. Specify dtype option on import or set low_memory=False.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
2024-03-02 13:32:54 +01:00
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_14/target_information.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3170175140.py:10: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
2024-03-10 12:31:28 +01:00
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n",
"<string>:28: SettingWithCopyWarning: \n",
2024-03-02 12:16:24 +01:00
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
]
2024-03-08 10:30:12 +01:00
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tables imported for tenant 14\n"
]
2024-03-02 12:16:24 +01:00
}
],
"source": [
2024-03-03 09:32:45 +01:00
"# création des bases contenant les KPI pour les 5 compagnies de spectacle\n",
2024-03-02 12:16:24 +01:00
"\n",
2024-03-03 09:32:45 +01:00
"# liste des compagnies de spectacle\n",
2024-03-02 13:05:51 +01:00
"nb_compagnie=['10','11','12','13','14']\n",
2024-03-03 09:32:45 +01:00
"\n",
"# début de la boucle permettant de générer des datasets agrégés pour les 5 compagnies de spectacle\n",
2024-03-02 13:05:51 +01:00
"for directory_path in nb_compagnie:\n",
" df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
" df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
" df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
2024-03-02 13:32:54 +01:00
" df_target_information = display_databases(directory_path, file_name = \"target_information\")\n",
2024-03-03 09:32:45 +01:00
" \n",
2024-03-02 13:05:51 +01:00
" df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information) \n",
" df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n",
" df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)\n",
2024-03-02 13:32:54 +01:00
"\n",
" \n",
2024-03-03 09:32:45 +01:00
"# creation de la colonne Number compagnie, qui permettra d'agréger les résultats\n",
" df_tickets_kpi[\"number_compagny\"]=int(directory_path)\n",
" df_campaigns_kpi[\"number_compagny\"]=int(directory_path)\n",
" df_customerplus_clean[\"number_compagny\"]=int(directory_path)\n",
" df_target_information[\"number_compagny\"]=int(directory_path)\n",
2024-03-02 13:05:51 +01:00
"\n",
" if nb_compagnie.index(directory_path)>=1:\n",
" customerplus_clean_spectacle=pd.concat([customerplus_clean_spectacle,df_customerplus_clean],axis=0)\n",
" campaigns_information_spectacle=pd.concat([campaigns_information_spectacle,df_campaigns_kpi],axis=0)\n",
" products_purchased_reduced_spectacle=pd.concat([products_purchased_reduced_spectacle,df_tickets_kpi],axis=0)\n",
2024-03-02 13:32:54 +01:00
" target_information_spectacle=pd.concat([target_information_spectacle,df_target_information],axis=0)\n",
2024-03-02 13:05:51 +01:00
" else:\n",
" customerplus_clean_spectacle=df_customerplus_clean\n",
" campaigns_information_spectacle=df_campaigns_kpi\n",
2024-03-02 13:32:54 +01:00
" products_purchased_reduced_spectacle=df_tickets_kpi\n",
2024-03-08 08:44:28 +01:00
" target_information_spectacle=df_target_information\n",
"\n",
" print(f\"Tables imported for tenant {directory_path}\")"
2024-03-02 13:05:51 +01:00
]
},
2024-03-03 09:32:45 +01:00
{
"cell_type": "code",
"execution_count": 37,
"id": "b5a4a031-9533-4a50-8569-5f4246691a7a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>2</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18031</th>\n",
" <td>2</td>\n",
" <td>319517</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>1556</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>2</td>\n",
" <td>2020-01-01 14:06:52+00:00</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>291642</th>\n",
" <td>2</td>\n",
" <td>757541</td>\n",
" <td>303.0</td>\n",
" <td>5.0</td>\n",
" <td>1</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>2016-09-08 14:50:00+00:00</td>\n",
" <td>fr</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3 rows × 29 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"17 2 139 NaN NaN 0 \n",
"18031 2 319517 NaN NaN 0 \n",
"291642 2 757541 303.0 5.0 1 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"17 875 False NaN 2 False ... \n",
"18031 1556 False NaN 0 True ... \n",
"291642 862 False NaN 1 True ... \n",
"\n",
" purchase_count first_buying_date country gender_label \\\n",
"17 3 NaN NaN other \n",
"18031 2 2020-01-01 14:06:52+00:00 fr female \n",
"291642 3 2016-09-08 14:50:00+00:00 fr male \n",
"\n",
" gender_female gender_male gender_other country_fr has_tags \\\n",
"17 0 0 1 NaN 0 \n",
"18031 1 0 0 1.0 0 \n",
"291642 0 1 0 1.0 1 \n",
"\n",
" number_compagny \n",
"17 10 \n",
"18031 11 \n",
"291642 14 \n",
"\n",
"[3 rows x 29 columns]"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"customer_id\"]==2]"
]
},
{
"cell_type": "code",
2024-03-05 00:36:48 +01:00
"execution_count": 1,
2024-03-03 09:32:45 +01:00
"id": "b9b6ec1f-36fb-4ee9-a1ed-09ff41878005",
"metadata": {},
"outputs": [
{
2024-03-05 00:36:48 +01:00
"ename": "NameError",
"evalue": "name 'customerplus_clean_spectacle' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcustomerplus_clean_spectacle\u001b[49m[customerplus_clean_spectacle[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcustomer_id\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m==\u001b[39m\u001b[38;5;241m1\u001b[39m]\n",
"\u001b[0;31mNameError\u001b[0m: name 'customerplus_clean_spectacle' is not defined"
]
2024-03-03 09:32:45 +01:00
}
],
"source": [
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"customer_id\"]==1]"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "a12c1b7d-6f6f-483e-b215-6336d7a51057",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['customer_id', 'street_id', 'structure_id', 'mcp_contact_id',\n",
" 'fidelity', 'tenant_id', 'is_partner', 'deleted_at', 'gender',\n",
" 'is_email_true', 'opt_in', 'last_buying_date', 'max_price',\n",
" 'ticket_sum', 'average_price', 'average_purchase_delay',\n",
" 'average_price_basket', 'average_ticket_basket', 'total_price',\n",
" 'purchase_count', 'first_buying_date', 'country', 'gender_label',\n",
" 'gender_female', 'gender_male', 'gender_other', 'country_fr',\n",
" 'has_tags', 'number_compagny'],\n",
" dtype='object')"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle.columns"
]
},
2024-03-02 13:05:51 +01:00
{
"cell_type": "code",
2024-03-02 14:05:48 +01:00
"execution_count": 38,
2024-03-02 13:32:54 +01:00
"id": "05b9a396-dcd7-4d3d-8b39-5ca48beba4b0",
2024-03-02 12:16:24 +01:00
"metadata": {},
2024-03-02 14:05:48 +01:00
"outputs": [],
2024-03-02 12:16:24 +01:00
"source": [
2024-03-02 14:05:48 +01:00
"#customerplus_clean_spectacle.isna().sum()\n",
"#campaigns_information_spectacle.isna().sum()\n",
"#products_purchased_reduced_spectacle.isna().sum()\n",
"#target_information_spectacle.isna().sum()"
2024-03-02 12:16:24 +01:00
]
2024-03-03 09:32:45 +01:00
},
{
"cell_type": "markdown",
2024-03-08 10:30:12 +01:00
"id": "81e15508-32ca-46f1-a03d-1febddbbf5b4",
2024-03-03 09:32:45 +01:00
"metadata": {},
"source": [
2024-03-08 10:30:12 +01:00
"### Ajout : importation de la table train_set pour faire les stats desc dessus"
2024-03-03 09:32:45 +01:00
]
},
{
2024-03-08 10:30:12 +01:00
"cell_type": "code",
2024-03-10 17:41:43 +01:00
"execution_count": 9,
2024-03-08 10:30:12 +01:00
"id": "3a1fdd6b-ac43-4e90-9a31-4f522bcc44bb",
2024-03-03 09:32:45 +01:00
"metadata": {},
2024-03-08 10:30:12 +01:00
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"/tmp/ipykernel_427/3450421856.py:9: DtypeWarning: Columns (38) have mixed types. Specify dtype option on import or set low_memory=False.\n",
2024-03-08 10:30:12 +01:00
" train_set_spectacle = pd.read_csv(file_in, sep=\",\")\n"
]
}
],
2024-03-03 09:32:45 +01:00
"source": [
2024-03-08 10:30:12 +01:00
"# importation de la table train_set pour les compagnies de spectacle (ou musique)\n",
"\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"path_train_set_spectacle = \"projet-bdc2324-team1/Generalization/musique/Train_set.csv\"\n",
"\n",
"with fs.open(path_train_set_spectacle, mode=\"rb\") as file_in:\n",
" train_set_spectacle = pd.read_csv(file_in, sep=\",\")"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "code",
2024-03-10 17:41:43 +01:00
"execution_count": 12,
2024-03-08 10:30:12 +01:00
"id": "3a4c1ff4-2861-4e86-99df-26eea0370dc3",
2024-03-03 09:32:45 +01:00
"metadata": {},
2024-03-08 10:30:12 +01:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10_299341</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>12.0</td>\n",
" <td>3.0</td>\n",
" <td>0 days 05:47:26.333333333</td>\n",
2024-03-10 17:41:43 +01:00
" <td>0.0</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10_63788</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>62.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>393.205891</td>\n",
" <td>281.017639</td>\n",
" <td>112.188252</td>\n",
" <td>3.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>0 days 05:13:51</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10_759946</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
2024-03-10 17:41:43 +01:00
" <td>0.0</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10_20653</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>11.0</td>\n",
" <td>10.0</td>\n",
" <td>1 days 00:45:54</td>\n",
2024-03-10 17:41:43 +01:00
" <td>0.0</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10_824705</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
2024-03-10 17:41:43 +01:00
" <td>0.0</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 40 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 10_299341 0.0 0.0 0.0 0.0 \n",
"1 10_63788 3.0 2.0 62.0 1.0 \n",
"2 10_759946 0.0 0.0 0.0 0.0 \n",
"3 10_20653 0.0 0.0 0.0 0.0 \n",
"4 10_824705 0.0 0.0 0.0 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0.0 NaN NaN \n",
"1 1.0 393.205891 281.017639 \n",
"2 0.0 NaN NaN \n",
"3 0.0 NaN NaN \n",
"4 0.0 NaN NaN \n",
"\n",
" time_between_purchase nb_tickets_internet ... country gender_label \\\n",
"0 NaN 0.0 ... fr male \n",
"1 112.188252 3.0 ... fr female \n",
"2 NaN 0.0 ... NaN other \n",
"3 NaN 0.0 ... fr male \n",
"4 NaN 0.0 ... NaN other \n",
"\n",
" gender_female gender_male gender_other country_fr nb_campaigns \\\n",
"0 0 1 0 1.0 12.0 \n",
"1 1 0 0 1.0 3.0 \n",
"2 0 0 1 NaN 0.0 \n",
"3 0 1 0 1.0 11.0 \n",
"4 0 0 1 NaN 0.0 \n",
"\n",
" nb_campaigns_opened time_to_open y_has_purchased \n",
2024-03-10 17:41:43 +01:00
"0 3.0 0 days 05:47:26.333333333 0.0 \n",
2024-03-08 10:30:12 +01:00
"1 1.0 0 days 05:13:51 1.0 \n",
2024-03-10 17:41:43 +01:00
"2 0.0 NaN 0.0 \n",
"3 10.0 1 days 00:45:54 0.0 \n",
"4 0.0 NaN 0.0 \n",
2024-03-08 10:30:12 +01:00
"\n",
"[5 rows x 40 columns]"
]
},
2024-03-10 17:41:43 +01:00
"execution_count": 12,
2024-03-08 10:30:12 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
2024-03-03 09:32:45 +01:00
"source": [
2024-03-08 10:30:12 +01:00
"train_set_spectacle.head()"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "code",
2024-03-10 17:41:43 +01:00
"execution_count": 11,
2024-03-08 10:30:12 +01:00
"id": "4632384d-2a06-445d-9fdb-b0c91b37ebaf",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2024-03-08 10:30:12 +01:00
"array([0., 1.])"
2024-03-03 09:32:45 +01:00
]
},
2024-03-10 17:41:43 +01:00
"execution_count": 11,
2024-03-03 09:32:45 +01:00
"metadata": {},
2024-03-08 10:30:12 +01:00
"output_type": "execute_result"
2024-03-03 09:32:45 +01:00
}
],
"source": [
2024-03-08 10:30:12 +01:00
"# on remplace les valeurs has purchased = NaN par des 0\n",
"train_set_spectacle[\"y_has_purchased\"] = train_set_spectacle[\"y_has_purchased\"].fillna(0)\n",
"train_set_spectacle[\"y_has_purchased\"].unique()"
2024-03-08 08:44:28 +01:00
]
},
{
"cell_type": "code",
2024-03-10 19:08:50 +01:00
"execution_count": 66,
2024-03-08 10:30:12 +01:00
"id": "5fd56696-b479-46c7-8a59-fb8137db5fb5",
2024-03-08 08:44:28 +01:00
"metadata": {},
2024-03-08 10:30:12 +01:00
"outputs": [
{
"data": {
"text/plain": [
"array([10, 11, 12, 13, 14])"
]
},
2024-03-10 19:08:50 +01:00
"execution_count": 66,
2024-03-08 10:30:12 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
2024-03-08 08:44:28 +01:00
"source": [
2024-03-08 10:30:12 +01:00
"# on reproduit une colonne avec le numéro de la compagnie \n",
2024-03-08 08:44:28 +01:00
"\n",
2024-03-08 10:30:12 +01:00
"train_set_spectacle[\"number_company\"] = train_set_spectacle[\"customer_id\"].apply(lambda x : int(re.split(\"_\", str(x))[0]))\n",
"train_set_spectacle[\"number_company\"].unique()"
2024-03-08 08:44:28 +01:00
]
},
{
"cell_type": "code",
2024-03-10 19:08:50 +01:00
"execution_count": 71,
2024-03-08 10:30:12 +01:00
"id": "91c6e047-43d2-456c-81f1-087026eef4f0",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" <th>number_company</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10_299341</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>12.0</td>\n",
" <td>3.0</td>\n",
" <td>0 days 05:47:26.333333333</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10_63788</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>62.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>393.205891</td>\n",
" <td>281.017639</td>\n",
" <td>112.188252</td>\n",
" <td>3.0</td>\n",
" <td>...</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>0 days 05:13:51</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10_759946</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10_20653</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>11.0</td>\n",
" <td>10.0</td>\n",
" <td>1 days 00:45:54</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10_824705</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 41 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 10_299341 0.0 0.0 0.0 0.0 \n",
"1 10_63788 3.0 2.0 62.0 1.0 \n",
"2 10_759946 0.0 0.0 0.0 0.0 \n",
"3 10_20653 0.0 0.0 0.0 0.0 \n",
"4 10_824705 0.0 0.0 0.0 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0.0 NaN NaN \n",
"1 1.0 393.205891 281.017639 \n",
"2 0.0 NaN NaN \n",
"3 0.0 NaN NaN \n",
"4 0.0 NaN NaN \n",
"\n",
" time_between_purchase nb_tickets_internet ... gender_label \\\n",
"0 NaN 0.0 ... male \n",
"1 112.188252 3.0 ... female \n",
"2 NaN 0.0 ... other \n",
"3 NaN 0.0 ... male \n",
"4 NaN 0.0 ... other \n",
"\n",
" gender_female gender_male gender_other country_fr nb_campaigns \\\n",
"0 0 1 0 1.0 12.0 \n",
"1 1 0 0 1.0 3.0 \n",
"2 0 0 1 NaN 0.0 \n",
"3 0 1 0 1.0 11.0 \n",
"4 0 0 1 NaN 0.0 \n",
"\n",
" nb_campaigns_opened time_to_open y_has_purchased \\\n",
"0 3.0 0 days 05:47:26.333333333 0.0 \n",
"1 1.0 0 days 05:13:51 1.0 \n",
"2 0.0 NaN 0.0 \n",
"3 10.0 1 days 00:45:54 0.0 \n",
"4 0.0 NaN 0.0 \n",
"\n",
" number_company \n",
"0 10 \n",
"1 10 \n",
"2 10 \n",
"3 10 \n",
"4 10 \n",
"\n",
"[5 rows x 41 columns]"
]
},
2024-03-10 19:08:50 +01:00
"execution_count": 71,
2024-03-08 10:30:12 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_set_spectacle.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "21e562d4-035d-4112-9f94-527b7fd935cf",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "fff306c2-1d41-4ef6-867b-ba9a7cf4ee68",
"metadata": {},
"source": [
"## Statistiques descriptives"
]
},
{
"cell_type": "markdown",
"id": "0549bdc4-edd7-4511-916e-26e94b5a30f5",
"metadata": {},
"source": [
"### 0. Détection du client anonyme (outlier) - utile pour la section 3"
]
},
{
"cell_type": "code",
2024-03-10 19:49:34 +01:00
"execution_count": 101,
2024-03-08 10:30:12 +01:00
"id": "5b460061-f8b5-4a6b-ba59-539446d8487f",
"metadata": {},
"outputs": [],
"source": [
"def outlier_detection(directory_path = \"1\", coupure = 1):\n",
" df_tickets = display_databases(directory_path, file_name = 'products_purchased_reduced' , datetime_col = ['purchase_date'])\n",
" df_tickets_kpi = tickets_kpi_function(df_tickets)\n",
"\n",
" if directory_path == \"101\" :\n",
" df_tickets_1 = display_databases(directory_path, file_name = 'products_purchased_reduced_1' , datetime_col = ['purchase_date'])\n",
" df_tickets_kpi_1 = tickets_kpi_function(df_tickets_1)\n",
"\n",
" df_tickets_kpi = pd.concat([df_tickets_kpi, df_tickets_kpi_1])\n",
" # Part du CA par customer\n",
" total_amount_share = df_tickets_kpi.groupby('customer_id')['total_amount'].sum().reset_index()\n",
" total_amount_share['total_amount_entreprise'] = total_amount_share['total_amount'].sum()\n",
" total_amount_share['share_total_amount'] = total_amount_share['total_amount']/total_amount_share['total_amount_entreprise']\n",
" \n",
" total_amount_share_index = total_amount_share.set_index('customer_id')\n",
" df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False)\n",
" \n",
" top = df_circulaire[:coupure]\n",
" rest = df_circulaire[coupure:]\n",
" \n",
" # Calculez la somme du reste\n",
" rest_sum = rest.sum()\n",
" \n",
" # Créez une nouvelle série avec les cinq plus grandes parts et 'Autre'\n",
" new_series = pd.concat([top, pd.Series([rest_sum], index=['Autre'])])\n",
" \n",
" # Créez le graphique circulaire\n",
" plt.figure(figsize=(3, 3))\n",
" plt.pie(new_series, labels=new_series.index, autopct='%1.1f%%', startangle=140, pctdistance=0.5)\n",
" plt.axis('equal') # Assurez-vous que le graphique est un cercle\n",
" plt.title('Répartition des montants totaux')\n",
" plt.show()\n"
]
},
2024-03-10 12:31:28 +01:00
{
"cell_type": "code",
2024-03-10 19:49:34 +01:00
"execution_count": 102,
2024-03-10 12:31:28 +01:00
"id": "cccee90c-67d1-4e14-8410-1210a5ef97d9",
"metadata": {},
"outputs": [],
"source": [
"# def d'une fonction permettant de générer un barplot à plusieurs barres selon une modalité \n",
"\n",
"def multiple_barplot(data, x, y, var_labels, bar_width=0.35,\n",
" figsize=(10, 6), xlabel=None, ylabel=None, title=None, dico_labels = None) :\n",
"\n",
" # si on donne aucun nom pour la legende, le graphique reprend les noms des variables x et y \n",
" xlabel = x if xlabel==None else xlabel\n",
" ylabel = y if ylabel==None else ylabel\n",
" \n",
" fig, ax = plt.subplots(figsize=figsize)\n",
" \n",
" categories = data[x].unique()\n",
" bar_width = bar_width\n",
" bar_positions = np.arange(len(categories))\n",
" \n",
" # Grouper les données par label et créer les barres groupées\n",
" for label in data[var_labels].unique():\n",
" label_data = data[data[var_labels] == label]\n",
" values = [label_data[label_data[x] == category][y].values[0] for category in categories]\n",
" \n",
" # label_printed = \"achat durant la période\" if label else \"aucun achat\"\n",
" label_printed = f\"{var_labels}={label}\" if dico_labels==None else dico_labels[label]\n",
" \n",
" ax.bar(bar_positions, values, bar_width, label=label_printed)\n",
" \n",
" # Mise à jour des positions des barres pour le prochain groupe\n",
" bar_positions = [pos + bar_width for pos in bar_positions]\n",
"\n",
" # Ajout des étiquettes, de la légende, etc.\n",
" ax.set_xlabel(xlabel)\n",
" ax.set_ylabel(ylabel)\n",
" ax.set_title(title)\n",
" ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])\n",
" ax.set_xticklabels(categories)\n",
" ax.legend()\n",
" \n",
" # Affichage du plot - la proportion de français est la même selon qu'il y ait achat sur la période ou non\n",
" # sauf compagnie 12, et peut-être 13\n",
" plt.show()"
]
},
2024-03-08 10:30:12 +01:00
{
"cell_type": "code",
"execution_count": 48,
"id": "b6417f09-a6c7-4319-95b3-98c95ec5a3b7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_1173/2987234667.py:8: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" df = pd.read_csv(file_in, sep=\",\", parse_dates = datetime_col, date_parser=custom_date_parser)\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAUwAAAEQCAYAAADbIk3TAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA5TUlEQVR4nO3dd3xT9f7H8VeStkkX3dDSySxbliggU9kgyLiKskVUcKIX0SvrOlB/XkXFLVdEkesGUUQ2IgKCWPYqs0DL6KB7Jd/fH4FAaEtTaHvS5vN8PPrQJCcn76Snb8745hydUkohhBCiVHqtAwghRFUhhSmEEA6SwhRCCAdJYQohhIOkMIUQwkFSmEII4SApTCGEcJAUphBCOEgKUwghHFRtCnPnzp14eXnxzjvvaB1FCFFNOVVhzp8/H51OZ/txc3MjLCyMe+65h0OHDpX4vIyMDIYOHcqjjz7Ko48+WomJi1q2bBkzZ84s9rGYmBjGjBlju3369GlmzpxJXFxckWlnzpyJTqermJDXSafTlfjeXMXevXuZOXMmx44dq5TXe/nll1m8eHG5za888l9rGa/2lBP59NNPFaA+/fRTtWnTJrV27Vr14osvKk9PT1WzZk2VkpJS7POGDRum7rvvPmWxWCo5cVGTJk1SJX2s27dvV/Hx8bbbW7dutb3fqyUkJKhNmzZVVMzrAqgZM2ZoHUNT33zzjQLU2rVrK+X1vL291ejRo8ttfuWR/1rLeHXnpmFXl6hZs2a0bdsWgK5du2I2m5kxYwaLFy9m7NixRab/+uuvKztiEdnZ2Xh5eV1zmlatWjk8v4iICCIiIm40lhCiPGnd2Fe6tIa5detWu/t//vlnBajZs2fb3b9161Y1YMAAFRAQoIxGo2rZsqX66quvip3nihUr1JgxY1RAQIDy8vJS/fv3V4cPH7abdsWKFerOO+9U4eHhymg0qnr16qkJEyaoc+fO2U03Y8YMBai//vpLDRkyRPn7+6vQ0FA1evRoBRT5OXr0qFJKqejoaNvawtq1a4ud9tIa3KXXuJLZbFavvvqqio2NVR4eHiokJESNHDlSJSQk2E3XpUsX1bRpU/Xnn3+q2267TXl6eqo6deqo2bNnK7PZXOrv4cKFC2r8+PEqMDBQeXt7q169eqkDBw4Uu4Z58OBBNXz4cBUSEqI8PDxUo0aN1Ny5c4vkfuGFF1TDhg2VyWRSfn5+qnnz5mrOnDnXzHHpM1q4cKGaMmWKCg0NVd7e3qp///4qKSlJpaenqwceeEAFBQWpoKAgNWbMGJWRkWE3j5ycHDV16lQVExOj3N3dVe3atdXEiRNVamqq3XTR0dGqX79+6pdfflGtWrVSJpNJxcbGqnnz5tmmubQsXf1zaQuhrMvP7t271T333KNq1KihatasqcaOHavS0tJs0xX3Wl26dFFKKZWVlaWeeuopFRMTo4xGowoICFBt2rRRX375ZYmfZ2n5lVJq3rx5qkWLFrZ5Dho0SO3du9f2eGnL+Ny5c1WnTp1USEiI8vLyUs2aNVOvvvqqys/PL/J5F7fm3KVLF9t7VEqpBx98UBmNRrVt2zbbfWazWXXv3l3VrFlTnT59usT3WxGqRGHOnTtXAeq7776z3bdmzRrl4eGhOnXqpL766iu1fPlyNWbMmCILwKV5RkZGqnHjxqlffvlFffTRR6pmzZoqMjLS7g/n/fffV7Nnz1Y//vijWr9+vfrss8/UTTfdpGJjY+1+4ZcW+OjoaPXMM8+olStXqsWLF6v4+Hg1dOhQBahNmzbZfnJzc5VS9gvJhQsXbNmef/5527SXyq+4wpwwYYIC1COPPKKWL1+uPvjgAxUSEqIiIyPt/ii7dOmigoKCVIMGDdQHH3ygVq5cqSZOnKgA9dlnn13zd2CxWFS3bt2U0WhUL730klqxYoWaMWOGqlu3bpHC3LNnj638FixYoFasWKGeeuoppdfr1cyZM23TzZ49WxkMBjVjxgy1evVqtXz5cjVnzhy7aYpzqTCjo6PVmDFjbO/Zx8dHdevWTfXo0UM9/fTTasWKFerVV19VBoNBPfroo3bvpVevXsrNzU1NmzZNrVixQr3++uvK29tbtWrVyvZ7ufS7iYiIUE2aNFELFixQv/76qxo2bJgC1Pr165VSSp09e1a9/PLLClDvvvuu7Xd29uzZ61p+YmNj1fTp09XKlSvVG2+8oYxGoxo7dqxtuk2bNilPT0/Vt29f22vt2bNHKWUtEi8vL/XGG2+otWvXqp9++km98sor6p133inx8ywt/6XHhg8frn7++We1YMECVbduXeXn56cOHjyolFKlLuNPPvmkev/999Xy5cvVmjVr1JtvvqmCg4Pt3telz9uRwszJyVEtW7ZUdevWtf2tTp8+Xen1erVixYoS32tFccrC3Lx5syooKFAZGRlq+fLlKjQ0VHXu3FkVFBTYpm3UqJFq1aqV3X1KKdW/f38VFhZmW5O6NM+77rrLbrqNGzcqQL344ovFZrFYLKqgoEAdP35cAWrJkiW2xy4t8NOnTy/yvGvt37l6IbnWPsyrC3Pfvn0KUBMnTrSbbsuWLQpQzz33nO2+Ll26KEBt2bLFbtomTZqoXr16FZvtkl9++UUB6q233rK7/6WXXipSmL169VIRERHqwoULdtM+8sgjymQy2fY59+/fX7Vs2fKar1ucS4U5YMAAu/ufeOIJBajHHnvM7v5BgwapwMBA2+3ly5crQL322mt203311VcKUB999JHtvujoaGUymdTx48dt9+Xk5KjAwED14IMP2u5zdB+gI8vP1bkmTpyoTCaT3b74kvZhNmvWTA0aNOiaGYpTUv7U1FRbOV/pxIkTymg0qnvvvdd2n6P7MM1msyooKFALFixQBoPB7hiEo4WplFKHDh1SNWrUUIMGDVKrVq1Ser1ePf/886W/2QrgVEfJL7n11ltxd3fH19eX3r17ExAQwJIlS3Bzs+5yjY+PZ//+/dx3330AFBYW2n769u1LYmIiBw4csJvnpWkv6dChA9HR0axdu9Z239mzZ3nooYeIjIzEzc0Nd3d3oqOjAdi3b1+RnEOGDCnX930tl3JeeZQdoF27djRu3JjVq1fb3R8aGkq7du3s7mvRogXHjx936HWu/rzuvfdeu9u5ubmsXr2au+66Cy8vryK/g9zcXDZv3mzLuGPHDiZOnMivv/5Kenq6Y2/6ov79+9vdbty4MQD9+vUrcn9KSgqZmZkArFmzBij6mQ0bNgxvb+8in1nLli2Jioqy3TaZTDRs2LDUz+ySsi4/d955p93tFi1akJuby9mzZ0t9rXbt2vHLL78wdepU1q1bR05OjkMZS7Jp0yZycnKKfFaRkZF07969yGdVkr///ps777yToKAgDAYD7u7ujBo1CrPZzMGDB68rW/369fn4449ZvHgx/fv3p1OnTpodpXfKwlywYAFbt25lzZo1PPjgg+zbt4/hw4fbHj9z5gwATz/9NO7u7nY/EydOBOD8+fN28wwNDS3yOqGhoSQnJwNgsVjo2bMn33//PVOmTGH16tX8+eeftj/64hbIsLCw8nnDDriUs7jXrF27tu3xS4KCgopMZzQaS/3DSk5Oxs3Nrcjzr/78kpOTKSws5J133inyO+jbty9w+Xfw7LPP8vrrr7N582b69OlDUFAQt99+O9u2bSvlXVsFBgba3fbw8Ljm/bm5uXbvJSQkxG46nU5n97u/5Ho/M7i+5efq1zMajSVOe7W3336bZ555hsWLF9OtWzcCAwMZNGjQNYffXUtZl6/inDhxgk6dOnHq1CneeustNmzYwNatW3n33XcBx95XSfr160etWrXIzc1l8uTJGAyG657XjXDKo+SNGze2HSXv1q0bZrOZTz75hG+//ZahQ4cSHBwMWP8QBw8eXOw8YmNj7W4nJSUVmSYpKYn69esDsHv3bnbs2MH8+fMZPXq0bZr4+PgSc1bmOMlLf1yJiYlFjp6fPn3a9pmUx+sUFhaSnJxs9wd99ecXEBCAwWBg5MiRTJo0qdh51alTBwA3NzcmT57M5MmTSUtLY9WqVTz33HP06tWLhISEUkcX3Oh
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# outlier à enlever (dépend des stats desc !)\n",
"outlier_detection(directory_path=\"10\") # mettre 2 si on veut le 1er client non anonyme"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f08c082e-f76f-41f3-9530-3e6700eb74d9",
"metadata": {},
"outputs": [],
"source": [
"# boucle pour identifier les outliers de chaque compagnie (et le client principal non anonyme)\n",
"\n",
"# nb_compagnie=['10','11','12','13','14']\n",
"for company_number in nb_compagnie :\n",
" print(f\"outlier for tenant {company_number}\")\n",
" outlier_detection(directory_path=company_number, coupure = 2)"
]
},
{
"cell_type": "code",
"execution_count": null,
2024-03-08 08:44:28 +01:00
"id": "dbe1af6a-79e9-45c7-a810-c6df3bf647f7",
"metadata": {},
"outputs": [],
"source": [
"# print(products_purchased_reduced_spectacle.loc[products_purchased_reduced_spectacle[\"number_compagny\"]==10][\"total_amount\"].describe())\n",
"\n",
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==10) & \n",
"(products_purchased_reduced_spectacle[\"customer_id\"]==19521)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "20e2b8a2-f31c-42a4-8ea5-7ad67ab66915",
"metadata": {},
"outputs": [],
"source": [
"# company 11 \n",
"# etrange, pas de vente sur internet, et un seul supplier. Plus de 9k achats\n",
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==11) & \n",
"(products_purchased_reduced_spectacle[\"customer_id\"]==36)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5dbce57c-d091-4ce2-92f9-1201deb2462e",
"metadata": {},
"outputs": [],
"source": [
"# company 12\n",
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==12) & \n",
"(products_purchased_reduced_spectacle[\"customer_id\"]==1706757)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0a243b57-19da-4e29-a53d-bb8d03e2ab77",
"metadata": {},
"outputs": [],
"source": [
"# company 13\n",
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==13) & \n",
"(products_purchased_reduced_spectacle[\"customer_id\"]==8422)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d9b01bc-9584-4882-bd06-7de8acb8a88f",
"metadata": {},
"outputs": [],
"source": [
"# company 14\n",
"# a-t-on vrmt un outlier ? A acheté quasi 3k tickets, pr 96 achats\n",
"products_purchased_reduced_spectacle.loc[(products_purchased_reduced_spectacle[\"number_compagny\"]==14) & \n",
"(products_purchased_reduced_spectacle[\"customer_id\"]==6354)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "033c1e00-52bd-4651-b893-57bda531760e",
"metadata": {},
"outputs": [],
"source": [
"# verifs dans les tables customerplus (outlier incertain pr 11 et 14)\n",
"\n",
"customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==36) &\n",
"(customerplus_clean_spectacle[\"number_compagny\"]==11)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "28ac8cda-32fa-4fb7-a75b-e1cc24871c39",
"metadata": {},
"outputs": [],
"source": [
"customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==6354) &\n",
"(customerplus_clean_spectacle[\"number_compagny\"]==14)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3faea297-2cc5-4704-af85-77d95f600cc1",
"metadata": {},
"outputs": [],
"source": [
"customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==8422) &\n",
"(customerplus_clean_spectacle[\"number_compagny\"]==13)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b165ea79-347b-46fb-8217-635d9e888c65",
"metadata": {},
"outputs": [],
"source": [
"customerplus_clean_spectacle.loc[(customerplus_clean_spectacle[\"customer_id\"]==19521) &\n",
"(customerplus_clean_spectacle[\"number_compagny\"]==10)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "282b0a96-5e78-48aa-9c2c-7d00d3907add",
"metadata": {},
"outputs": [],
"source": [
"customerplus_clean_spectacle.columns"
2024-03-03 09:32:45 +01:00
]
},
2024-03-08 08:44:28 +01:00
{
"cell_type": "code",
"execution_count": null,
"id": "4918db6e-249b-412e-b646-9a6686989b79",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "e866edce-f4bc-4627-89d3-3ec7d9ef26e3",
"metadata": {},
"outputs": [],
"source": []
},
2024-03-03 09:32:45 +01:00
{
"cell_type": "markdown",
"id": "42f8171c-e80d-4faa-b278-21fcbe3b242c",
"metadata": {},
"source": [
"### 1. customerplus_clean"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "47f98721-53dd-4f8f-85ac-88043ee8d967",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>821538</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>809126</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11005</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>14</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>17663</td>\n",
" <td>12731</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>38100</td>\n",
" <td>12395</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>307036</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2946</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>8</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>18441</td>\n",
" <td>11139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>9231</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>9870</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10 rows × 29 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity tenant_id \\\n",
"0 821538 139 NaN NaN 0 875 \n",
"1 809126 1063 NaN NaN 0 875 \n",
"2 11005 1063 NaN NaN 0 875 \n",
"3 17663 12731 NaN NaN 0 875 \n",
"4 38100 12395 NaN NaN 0 875 \n",
"5 307036 139 NaN NaN 0 875 \n",
"6 2946 1063 NaN NaN 0 875 \n",
"7 18441 11139 NaN NaN 0 875 \n",
"8 9231 139 NaN NaN 0 875 \n",
"9 9870 139 NaN NaN 0 875 \n",
"\n",
" is_partner deleted_at gender is_email_true ... purchase_count \\\n",
"0 False NaN 2 True ... 0 \n",
"1 False NaN 2 True ... 0 \n",
"2 False NaN 2 False ... 14 \n",
"3 False NaN 0 False ... 1 \n",
"4 False NaN 0 True ... 1 \n",
"5 False NaN 2 True ... 1 \n",
"6 False NaN 2 False ... 8 \n",
"7 False NaN 2 False ... 3 \n",
"8 False NaN 0 True ... 1 \n",
"9 False NaN 2 True ... 1 \n",
"\n",
" first_buying_date country gender_label gender_female gender_male \\\n",
"0 NaN NaN other 0 0 \n",
"1 NaN fr other 0 0 \n",
"2 NaN fr other 0 0 \n",
"3 NaN fr female 1 0 \n",
"4 NaN fr female 1 0 \n",
"5 NaN NaN other 0 0 \n",
"6 NaN fr other 0 0 \n",
"7 NaN fr other 0 0 \n",
"8 NaN NaN female 1 0 \n",
"9 NaN NaN other 0 0 \n",
"\n",
" gender_other country_fr has_tags number_compagny \n",
"0 1 NaN 0 10 \n",
"1 1 1.0 0 10 \n",
"2 1 1.0 0 10 \n",
"3 0 1.0 0 10 \n",
"4 0 1.0 0 10 \n",
"5 1 NaN 0 10 \n",
"6 1 1.0 0 10 \n",
"7 1 1.0 0 10 \n",
"8 0 NaN 0 10 \n",
"9 1 NaN 0 10 \n",
"\n",
"[10 rows x 29 columns]"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# visu de la table\n",
"customerplus_clean_spectacle.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "738e063b-f84e-4a00-b35d-6d1d657e3c09",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de lignes de la table : 1523688\n"
]
},
{
"data": {
"text/plain": [
"customer_id 0\n",
"street_id 0\n",
"structure_id 1460624\n",
"mcp_contact_id 729167\n",
"fidelity 0\n",
"tenant_id 0\n",
"is_partner 0\n",
"deleted_at 1523688\n",
"gender 0\n",
"is_email_true 0\n",
"opt_in 0\n",
"last_buying_date 762879\n",
"max_price 762879\n",
"ticket_sum 0\n",
"average_price 667328\n",
"average_purchase_delay 762915\n",
"average_price_basket 762915\n",
"average_ticket_basket 762915\n",
"total_price 95551\n",
"purchase_count 0\n",
"first_buying_date 762879\n",
"country 429486\n",
"gender_label 0\n",
"gender_female 0\n",
"gender_male 0\n",
"gender_other 0\n",
"country_fr 429486\n",
"has_tags 0\n",
"number_compagny 0\n",
"dtype: int64"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de NaN\n",
"print(\"Nombre de lignes de la table : \",customerplus_clean_spectacle.shape[0])\n",
"customerplus_clean_spectacle.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 148,
"id": "296e51c5-30ae-4ade-ba3d-4ba4981a8758",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>customer_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>45264</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>35313</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>216105</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>388731</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>101642</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny customer_id\n",
"0 10 45264\n",
"1 11 35313\n",
"2 12 216105\n",
"3 13 388731\n",
"4 14 101642"
]
},
"execution_count": 148,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de clients de la compagnie (pas les clients visés par une campagne mais ceux ayant acheté)\n",
"# on rq le nbre de clients est très variable : de 35k à 389k\n",
"company_nb_clients = customerplus_clean_spectacle[customerplus_clean_spectacle[\"purchase_count\"]>0].groupby(\"number_compagny\")[\"customer_id\"].count().reset_index()\n",
"company_nb_clients"
]
},
{
"cell_type": "code",
"execution_count": 151,
"id": "5845aedf-78ca-4d3d-ad61-3561d4fc1886",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAHFCAYAAAAUpjivAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABREUlEQVR4nO3dd1RU1/428OdIGYqAAsqAIqBiC2LDqBhFFFRi14jGXEssMdZLwFhijJir2BL7VVOMNYrJVbyxiw1jsCBK7InmYoegiCCIQ9vvH76cnyOgDAwOHJ/PWrOWZ58953xnDwyP+5SRhBACRERERApVydAFEBEREZUlhh0iIiJSNIYdIiIiUjSGHSIiIlI0hh0iIiJSNIYdIiIiUjSGHSIiIlI0hh0iIiJSNIYdIiIiUjSGnTK2bt06SJIEMzMz3Lx5s8D6Dh06wMPDwwCVAcOGDUPlypUNsu9XkSQJoaGhr3WfHTp0QIcOHV57HXv27Hltr9UQ4+rq6oru3bu/1n1S+Xfjxg1IkoR169aV+b4M8XNvCPfu3UNoaCji4uLKdD+v873TF4ad10Sj0eDzzz83dBmkoxMnTmDkyJFluo89e/Zg1qxZZboPovLG0dERJ06cQLdu3QxdimLcu3cPs2bNKvOwUxEx7LwmXbt2xebNm/H7778buhS9EEIgMzPT0GWUudatW6NmzZqGLoNIcVQqFVq3bo1q1aoZuhR6AzDsvCaTJ0+GnZ0dpkyZ8sq+T58+xbRp0+Dm5gZTU1PUqFED48aNw6NHj7T65R8e2LVrF5o1awZzc3M0bNgQu3btAvDsEFrDhg1haWmJt99+G2fOnCl0f5cuXUKnTp1gaWmJatWqYfz48Xjy5IlWH0mSMH78eKxevRoNGzaESqXC+vXrAQDXrl3DoEGDUL16dahUKjRs2BD//ve/izUuaWlpGDVqFOzs7FC5cmV07doVf/75Z6F9S7OfvLw8LF++HE2bNoW5uTmqVKmC1q1b45dffnnp8wqb/k5MTMTo0aNRs2ZNmJqaws3NDbNmzUJOTo7cJ3+a96uvvsKiRYvg5uaGypUro02bNjh58qTcb9iwYfJrkCRJfty4cQMA8PPPP6NVq1awsbGBhYUFateujeHDh7/y9ZbHcd23bx+aN28Oc3NzNGjQAD/88IPW+vv372Ps2LFo1KgRKleujOrVq6Njx4749ddfC2zr3r17CAwMhJWVFWxsbDBgwACcPHmywNR6YYcmgWfj7urqqtWWlZWF2bNno0GDBlCpVKhWrRo+/PBD3L9/v1hjcerUKfTo0QN2dnYwMzNDnTp1EBQUpNXn+PHj6NSpE6ysrGBhYQFvb2/s3r1bq0/+oe/Dhw/L76G1tTWGDBmCjIwMJCYmIjAwEFWqVIGjoyMmTZqE7Oxs+fn5P3sLFizAnDlzUKtWLZiZmcHLywuHDh3S2tf169fx4Ycfwt3dHRYWFqhRowZ69OiBCxcuFHh9ly5dQufOnWFhYYFq1aph3Lhx2L17NyRJwtGjR7XG3MPDAzExMWjXrp38cztv3jzk5eUVqPPFQyEV4fPkVb+XR48ehSRJ2LRpE4KDg6FWq2Fubg4fHx+cO3euwPbOnDmDnj17wtbWFmZmZmjWrBl++umnAv3u3r2Ljz76CM7OzjA1NYWTkxPee+89/P333zh69ChatmwJAPjwww/lz5L8z68zZ85g4MCBcHV1hbm5OVxdXfH+++8XenrFy/bzMqUZ0zInqEytXbtWABAxMTFi6dKlAoA4dOiQvN7Hx0e89dZb8nJeXp7o0qWLMDY2FjNmzBAHDhwQX331lbC0tBTNmjUTT58+lfu6uLiImjVrCg8PD7FlyxaxZ88e0apVK2FiYiK++OIL0bZtW7F9+3YREREh6tWrJxwcHMSTJ0/k5w8dOlSYmpqKWrVqiTlz5ogDBw6I0NBQYWxsLLp37671OgCIGjVqCE9PT7F582Zx+PBhcfHiRXHp0iVhY2MjGjduLDZs2CAOHDggQkJCRKVKlURoaOhLxyYvL0/4+voKlUol73/mzJmidu3aAoCYOXOm3Lc0+xFCiMGDBwtJksTIkSPFf//7X7F3714xZ84csXTpUq33wsfHp8Drfr6OhIQE4ezsLFxcXMQ333wjDh48KP71r38JlUolhg0bJveLj48XAISrq6vo2rWr2LFjh9ixY4do3LixqFq1qnj06JEQQojr16+L9957TwAQJ06ckB9Pnz4V0dHRQpIkMXDgQLFnzx5x+PBhsXbtWjF48OAKNa75P6eNGjUSGzZsEPv37xf9+/cXAERUVJTc7+rVq2LMmDEiPDxcHD16VOzatUuMGDFCVKpUSRw5ckTu9+TJE9GwYUNhY2Mjli9fLvbv3y8mTpwoatWqJQCItWvXvvQ9FeLZz76Li4u8nJubK7p27SosLS3FrFmzRGRkpPj+++9FjRo1RKNGjbR+bwqzb98+YWJiIjw9PcW6devE4cOHxQ8//CAGDhwo9zl69KgwMTERLVq0EFu3bhU7duwQnTt3FpIkifDwcLlf/meGm5ubCAkJEQcOHBDz588XRkZG4v333xfNmzcXs2fPFpGRkWLKlCkCgPj666/l5+f/7Dk7O4t33nlHbNu2Tfz888+iZcuWwsTERERHR8t9o6KiREhIiPjPf/4joqKiREREhOjdu7cwNzcXV69elfvdu3dP2NnZiVq1aol169aJPXv2iMGDBwtXV1cBQOv98fHxEXZ2dsLd3V2sXr1aREZGirFjxwoAYv369QXqfP79qgifJ8X5vTxy5Ij8HvTq1Uvs3LlTbNq0SdStW1dYW1uLv/76S+57+PBhYWpqKtq1aye2bt0q9u3bJ4YNG1ZgbO7cuSMcHR2Fvb29WLRokTh48KDYunWrGD58uLhy5YpITU2Vf3Y+//xz+bPk9u3bQgghfv75Z/HFF1+IiIgIERUVJcLDw4WPj4+oVq2auH//frH3Uxbv3evAsFPGng87Go1G1K5dW3h5eYm8vDwhRMGws2/fPgFALFiwQGs7W7duFQDEt99+K7e5uLgIc3NzcefOHbktLi5OABCOjo4iIyNDbt+xY4cAIH755Re5bejQoQKA1h8mIYSYM2eOACCOHz8utwEQNjY24uHDh1p9u3TpImrWrClSU1O12sePHy/MzMwK9H/e3r17X7r/5z+cSrOfY8eOCQBi+vTpRfYRonhhZ/To0aJy5cri5s2bWv2++uorAUBcunRJCPF/HwaNGzcWOTk5cr/Tp08LAGLLli1y27hx40Rh/+/I32Z+MCqu8jauLi4uwszMTGvMMjMzha2trRg9enSRz8vJyRHZ2dmiU6dOok+fPnL7qlWrBADx3//+V6v/qFGjShx2tmzZIgCIbdu2afWLiYkRAMTKlStf+hrr1Kkj6tSpIzIzM4vs07p1a1G9enXx+PFjrdfo4eEhatasKX8m5H9mTJgwQev5vXv3FgDEokWLtNqbNm0qmjdvLi/n/+w5OTlp1ZOWliZsbW2Fn59fkTXm5OSIrKws4e7uLj755BO5/dNPPxWSJMk/3/m6dOlSaNgBIE6dOqXVt1GjRqJLly4F6nz+/aoInyfF+b3MDzvNmzeX31chhLhx44YwMTERI0eOlNsaNGggmjVrJrKzs7W20b17d+Ho6Chyc3OFEEIMHz5cmJiYiMuXLxe53/yf1+fHtCg5OTkiPT1dWFpaao1Zcfaj7/fudeBhrNfI1NQUs2fPxpkzZwqdogSAw4cPA3g2zf68/v37w9LSssA0dNOmTVGjRg15uWHDhgCeTSVbWFgUaC9syvKDDz7QWh40aBAA4MiRI1rtHTt2RNWqVeXlp0+f4tChQ+jTpw8sLCyQk5MjP9599108ffpU65DNi/K3X9T+9bWfvXv3AgDGjRtXZJ/i2rVrF3x9feHk5KRVR0BAAAAgKipKq3+3bt1gZGQkL3t6egIo/H14Uf6UdGBgIH766SfcvXu3WDWWx3Ft2rQpatWqJS+bmZmhXr16BcZh9erVaN68OczMzGBsbAwTExMcOnQIV65c0Xp9VlZW6Nmz50tfny527dqFKlWqoEePHlr
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_nb_clients[\"number_compagny\"], company_nb_clients[\"customer_id\"]/1000)\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Nombre de clients (milliers)\")\n",
"plt.title(\"Nombre de clients de chaque compagnie de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-03-08 10:30:12 +01:00
"execution_count": 37,
"id": "884a33d0-c275-4ab4-ab1f-8b53e563fb95",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
2024-03-08 10:30:12 +01:00
"name": "stdout",
"output_type": "stream",
"text": [
" number_compagny already_purchased customer_id\n",
"0 10 True 45264\n",
"1 11 True 35313\n",
"2 12 True 216105\n",
"3 13 True 388731\n",
"4 14 True 101642\n",
" number_compagny already_purchased customer_id\n",
"0 10 False 53530\n",
"1 11 False 35994\n",
"2 12 False 26620\n",
"3 13 False 379005\n",
"4 14 False 241484\n"
]
}
],
"source": [
"# nouveau barplot pr les clients : on regarde la taille totale de la base et on distingue clients ayant acheté / pas acheté\n",
"\n",
"# variable relative à l'achat\n",
"customerplus_clean_spectacle[\"already_purchased\"] = customerplus_clean_spectacle[\"purchase_count\"]>0\n",
"\n",
"nb_customers_purchasing_spectacle = customerplus_clean_spectacle[customerplus_clean_spectacle[\"already_purchased\"]].groupby([\"number_compagny\",\"already_purchased\"])[\"customer_id\"].count().reset_index()\n",
"nb_customers_no_purchase_spectacle = customerplus_clean_spectacle[~customerplus_clean_spectacle[\"already_purchased\"]].groupby([\"number_compagny\",\"already_purchased\"])[\"customer_id\"].count().reset_index()\n",
"\n",
"print(nb_customers_purchasing_spectacle)\n",
"print(nb_customers_no_purchase_spectacle)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "41c9fb5a-708b-4f85-9918-00337151f155",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAyYAAAHGCAYAAACB7J+ZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB1nUlEQVR4nO3dd1gU1/s28HulLL0jC4iCCjbARjRKIliwt6ixxhJ7D4rBGKOABQOxa9Svxt41lkRjw0ZU7Ig9RhNETSRYQRGp5/3Dl/m50lYps+L9ua69dGbOnnnOzOzsPpyZMwohhAAREREREZGMysgdABERERERERMTIiIiIiKSHRMTIiIiIiKSHRMTIiIiIiKSHRMTIiIiIiKSHRMTIiIiIiKSHRMTIiIiIiKSHRMTIiIiIiKSHRMTIiIiIiKSHRMTInpn//zzD8qWLYuAgAC5Q8lXr169UKVKFTx8+PCd3p+ZmQkfHx988sknePnyZRFHVzzel31DRESU7a0Sk1WrVkGhUMDAwABxcXE5lvv6+sLd3b3Ignsb/fr1g4mJiSzrLohCoUBwcHCJrtPX1xe+vr4lHseePXtKvK1ye/HiBYKDg3H06FFZ1u/s7Iy2bdsWWX2aticjIwPdu3dHkyZNMHPmzCJbf1FbsmQJDh06hH379sHGxkZt2aJFi7Bq1aoC65g4cSISEhLw66+/wsDAoEjiCg4OhkKhUJun6b68ffs2FApFnrG/L/umqBw9ehQKhUK2z2Bxc3Z2Rr9+/aTpgvZ/aSbH9ym9vX79+sHZ2VnuMADk/nuotAoNDcXOnTuLfT1vnpOK0jv1mKSmpuK7774r6liomJ08eRIDBw4s1nXs2bMHISEhxboObfPixQuEhISUmh9FmrZnwoQJ0NXVxZo1a3L8wNYW0dHRmDRpEvbs2QMXF5ccyzVJTH777TesXbsW+/btg5WVVZHFNnDgQJw8ebLI6nvd+7BvSHM7duzApEmT5A6DSGOTJk3Cjh075A7jg1NSiUlx0n2XN7Vs2RIbNmzAuHHjULNmzaKOqcQJIfDy5UsYGhrKHUqx+vjjj+UOgUqRH374Qe4QClSnTh08ePCgUHW0adMG//zzTxFF9H/KlSuHcuXKFXm9wPuxb/KSkpICAwMDJlSvqV27ttwhlJj09HQoFAro6r7TzxPSEpUqVZI7BHpPvVOPSWBgIKytrTF+/PgCy758+RITJkyAi4sL9PX14ejoiBEjRuDp06dq5bIvYdi9ezdq164NQ0NDVKtWDbt37wbw6jKyatWqwdjYGPXq1cO5c+dyXd/Vq1fRtGlTGBsbw9bWFiNHjsSLFy/UyigUCowcORJLlixBtWrVoFQqsXr1agDAzZs30bNnT5QtWxZKpRLVqlXDjz/+qNF2SUpKwqBBg2BtbQ0TExO0bNkSf/75Z65lC7OerKwsLFiwALVq1YKhoSEsLCzw8ccf49dff833fbl1gcfHx2PIkCEoV64c9PX14eLigpCQEGRkZEhlsi8bmDlzJmbPng0XFxeYmJigQYMGOHXqlFSuX79+UhsUCoX0un37NgBg69atqF+/PszNzWFkZISKFSuif//+Bbb3xx9/RKNGjVC2bFkYGxvDw8MD4eHhSE9Pl8pMnToVurq6uHv3bo739+/fH9bW1tK9AZs3b0bz5s1hb28vHWfffPMNkpOT1d6XfXngrVu30Lp1a5iYmMDJyQkBAQFITU2Vto2trS0AICQkRGpzfl2cL1++REBAAGrVqgVzc3NYWVmhQYMG+OWXX3KUfZt9vW/fPtSpUweGhoaoWrUqVqxYkaNMQftbk/YU5tjV9HyQ1+UamnYfp6WlYdq0aahatSqUSiVsbW3x5ZdfqiUpzs7OuHr1KiIjI6V2vn7pQVJSEsaNG6cWq7+/f47jJC/79u1D06ZNpeO9WrVqmDFjhrQ8t0u5su3YsQOenp4wMDBAxYoVMX/+fI3Wqcm+ycrKwrRp01ClShXpmPL09MS8efPyrTv7cql169Zh7NixUKlUMDQ0hI+PDy5cuKBW9ty5c+jevTucnZ1haGgIZ2dn9OjRI8clwNmXBx84cAD9+/eHra0tjIyMpM9Xbv744w+0bNkSRkZGsLGxwdChQ/Hs2bNcyx48eBBNmzaFmZkZjIyM4O3tjUOHDqmVefDgAQYPHgwnJyfpWPH29sbBgwfz3R7Z++/SpUv4/PPPpc/y2LFjkZGRgRs3bqBly5YwNTWFs7MzwsPD1d7/NucBTY77wrbjwoUL6NSpE8zMzGBubo4vvvgiR1KflZWF8PBw6XNVtmxZ9OnTB/fu3dMo3jcvqck+ptauXYuAgAA4OjpCqVTi1q1b+cb8Jk2+xwBg8eLFqFmzJkxMTGBqaoqqVavi22+/LbD+1NRUTJkyBdWqVYOBgQGsra3RuHFjREVFSWVK6rdO9veSJr91NPnuBF79cTY0NBQVKlSAgYEBvLy8EBERkef+2rhxIyZOnAgHBweYmZmhWbNmuHHjRo4437yUSwiBRYsWSd9nlpaW6NKlC/7++2+1chcuXEDbtm2l85iDgwPatGmT4zh7kxAC4eHhUjvq1KmDvXv35lq2MOd3TeLL/p35v//9D25ublAqlahevTo2bdqUoz5Nj9+CjkOFQoHk5GSsXr1a+k7L3n8PHjzA8OHDUb16dZiYmKBs2bJo0qQJjh07liMeTY73ot6masRbWLlypQAgzp49K+bNmycAiEOHDknLfXx8RI0aNaTprKws0aJFC6GrqysmTZokDhw4IGbOnCmMjY1F7dq1xcuXL6WyFSpUEOXKlRPu7u5i48aNYs+ePaJ+/fpCT09PTJ48WXh7e4vt27eLHTt2CDc3N2FnZydevHghvb9v375CX19flC9fXkyfPl0cOHBABAcHC11dXdG2bVu1dgAQjo6OwtPTU2zYsEEcPnxYXLlyRVy9elWYm5sLDw8PsWbNGnHgwAEREBAgypQpI4KDg/PdNllZWaJx48ZCqVRK6w8KChIVK1YUAERQUJBUtjDrEUKI3r17C4VCIQYOHCh++eUXsXfvXjF9+nQxb948tX3h4+OTo92vx3H//n3h5OQkKlSoIP73v/+JgwcPiqlTpwqlUin69esnlYuNjRUAhLOzs2jZsqXYuXOn2Llzp/Dw8BCWlpbi6dOnQgghbt26Jbp06SIAiJMnT0qvly9fiqioKKFQKET37t3Fnj17xOHDh8XKlStF7969C2zvmDFjxOLFi8W+ffvE4cOHxZw5c4SNjY348ssvpTL//fefUCqVYuLEiWrvffTokTA0NBRff/21NG/q1Klizpw54rfffhNHjx4VS5YsES4uLqJx48Zq780+pqpVqyZmzpwpDh48KCZPniwUCoUICQkRQgjx8uVLsW/fPgFADBgwQGrzrVu38mzP06dPRb9+/cTatWvF4cOHxb59+8S4ceNEmTJlxOrVq9XKarKvsz871atXF2vWrBH79+8Xn3/+uQAgIiMj32p/F9Sewn5GND0fvHmsvt7Wvn375ruezMxM0bJlS2FsbCxCQkJERESE+Omnn4Sjo6OoXr26dN6Ijo4WFStWFLVr15baGR0dLYQQIjk5WdSqVUvY2NiI2bNni4MHD4p58+YJc3Nz0aRJE5GVlZVvDD/99JNQKBTC19dXbNiwQRw8eFAsWrRIDB8+XCoTFBQk3jwFV6hQQTg6Oory5cuLFStWiD179ohevXoJAOKHH36QymV/JleuXCnN03TfzJgxQ+jo6IigoCBx6NAhsW/fPjF37twC99+RI0cEAOHk5CQ6dOggdu3aJdatWycqV64szMzMxF9//SWV3bp1q5g8ebLYsWOHiIyMFJs2bRI+Pj7C1tZWPHjwQCqX/Z3i6OgoBg8eLPbu3St+/vlnkZGRkWsM8fHxomzZssLR0VGsXLlS2j7ly5cXAMSRI0eksmvXrhUKhUJ07NhRbN++XezatUu0bdtW6OjoiIMHD0rlWrRoIWxtbcXSpUvF0aNHxc6dO8XkyZPFpk2b8t0e2fuvSpUqYurUqSIiIkIEBgYKAGLkyJGiatWqYv78+SIiIkJ8+eWXAoDYtm2
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(nb_customers_purchasing_spectacle[\"number_compagny\"], nb_customers_purchasing_spectacle[\"customer_id\"]/1000, label = \"has purchased\")\n",
"plt.bar(nb_customers_no_purchase_spectacle[\"number_compagny\"], nb_customers_no_purchase_spectacle[\"customer_id\"]/1000, \n",
" bottom = nb_customers_purchasing_spectacle[\"customer_id\"]/1000, label = \"has not purchased\")\n",
"\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Nombre de clients (en milliers)\")\n",
"plt.title(\"Nombre de clients ayant acheté ou été ciblés par des mails pour les compagnies de spectacle\")\n",
"plt.legend()\n",
"\n",
"# Affichage du barplot\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 152,
"id": "fd11c547-7128-4ef6-ad7b-4b7c2a30cd9e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
2024-03-03 09:32:45 +01:00
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>max_price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>13823.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>108.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>5000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>3180.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>456.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny max_price\n",
"0 10 13823.0\n",
"1 11 108.0\n",
"2 12 5000.0\n",
"3 13 3180.0\n",
"4 14 456.0"
]
},
"execution_count": 152,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# prix maximal payé par un client pour chaque compagnie - très variable : de 108 à 13823\n",
"\n",
"company_max_price = customerplus_clean_spectacle.groupby(\"number_compagny\")[\"max_price\"].max().reset_index()\n",
"company_max_price"
]
},
{
"cell_type": "code",
"execution_count": 153,
"id": "b8f8f162-4153-4cfe-bfaa-d981d414510d",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlAAAAHGCAYAAAC7NbWGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABc0UlEQVR4nO3dd1gUV/828HulCQgrRcBVBCyxgT0iGINGURNKNBoLilhiLyFifRJjSWKPGOUxamKJJWJMwMdYUGxEI1hQYkMTE6yIGMFFLIDLef/wx7yuC8joIgven+vaS/fMmZnvDLvLzZmyCiGEABERERGVWKWyLoCIiIiovGGAIiIiIpKJAYqIiIhIJgYoIiIiIpkYoIiIiIhkYoAiIiIikokBioiIiEgmBigiIiIimRigiIiIiGRigCIiekH/+c9/4OjoiL///rusSyGiV4wBSk/Wrl0LhUIhPYyNjVGzZk0MGjQIN27cKNEyBg4cCFdX19IttAwV7KPLly+/8nVfvnwZCoUCa9eufaH5Dx48CIVCgYMHD+q1rrKwc+dOzJgxo8zWX/A6OHHiRJnVoA+7du1CREQEtm/fjjp16pR1OVSKFArFK3nPuLq6YuDAgaW+nrL24MEDzJgx45V8npbmz44BSs/WrFmD+Ph4xMbGYujQodi0aRPatWuH+/fvP3feadOmITo6+hVUWTb8/PwQHx+P6tWrl3Upr7WdO3di5syZZV1GuXbt2jUMGjQImzdvxptvvlnW5VApi4+Px0cffVTWZVQYDx48wMyZM8v9H6TGZV1ARePu7o5WrVoBADp06ACNRoMvvvgCW7duRb9+/Qqd58GDB7CwsKjwf8VWq1YN1apVK+syiCQF7z25nJ2dkZaWVgoVvbwX3SYqWps2bcq6BDJAHIEqZQVvvCtXrgB4cpiuSpUqOHPmDDp37gwrKyt07NhRmvb0IbzIyEgoFApERERoLXP69OkwMjJCbGxsset2dXWFv78/tm/fjubNm8Pc3BwNGzbE9u3bATw5lNKwYUNYWlqidevWOodUTpw4gT59+sDV1RXm5uZwdXVF3759pW0BACEE3nvvPdjZ2eHq1atS+4MHD9C4cWM0bNhQGn0r7BBe+/bt4e7ujvj4eHh7e0vrWbNmDQBgx44daNGiBSwsLODh4YGYmBitGi9duoRBgwahXr16sLCwQI0aNRAQEIAzZ84Uu2+Kc+HCBXTt2hUWFhawt7fHiBEjcO/evUL77t27Fx07doS1tTUsLCzQtm1b7Nu3r9jl3759G6amppg2bVqh61YoFFiyZInUlpaWhuHDh6NmzZowNTWFm5sbZs6cicePH0t9Cg5RLly4EIsWLYKbmxuqVKkCLy8vJCQkSP0GDhyI//73vwCgdci54GcihMCyZcvQrFkzmJubw8bGBj179sQ///xTon13+PBhdOzYEVZWVrCwsIC3tzd27NhRaN/MzEwMGjQItra2sLS0REBAgM56Tp06BX9/fzg4OMDMzAwqlQp+fn64fv261KekNRe81n777Td4e3vDwsICgwcPRrdu3eDi4oL8/HydGj09PdGiRQvZ6yrMjBkzoFAocOrUKXzwwQewtraGUqlE//79cfv2ba2+mzdvRufOnVG9enXpfTtlyhSdkeziPk+KcuHCBfTt2xeOjo4wMzNDrVq1MGDAAOTk5Eh9zp49i/fffx82NjaoXLkymjVrhh9++EFrOQWHtX/88UdMnjwZ1atXR5UqVRAQEIBbt27h3r17GDZsGOzt7WFvb49BgwYhOztbaxkKhQJjxozBihUr8MYbb8DMzAyNGjVCZGSkVr/bt29j1KhRaNSoEapUqQIHBwe88847OHTokM72Xb9+HT179oSVlRWqVq2Kfv364fjx4zqH8Av23aVLl/Dee++hSpUqcHZ2RlhYmNa+KKjz2cNAJXlfFiUvLw+TJk2Ck5MTLCws8NZbb+HYsWOF9n2Z9ezfvx/t27eHnZ0dzM3NUatWLfTo0QMPHjwA8P8/N+bPn4+vvvoKtWrVQuXKldGqVatCP8f++usvBAUFSe/Hhg0bSp8nT7t79y7CwsJQu3ZtmJmZwcHBAe+99x4uXLiAy5cvS39Iz5w5U/oMKjh0Keczvbj1FOdl9qkWQXqxZs0aAUAcP35cq/2bb74RAMTKlSuFEEKEhIQIExMT4erqKubMmSP27dsndu/eLU1zcXHRmn/EiBHC1NRUWu6+fftEpUqVxGefffbcmlxcXETNmjWFu7u72LRpk9i5c6fw9PQUJiYm4vPPPxdt27YVUVFRIjo6WrzxxhvC0dFRPHjwQJp/y5Yt4vPPPxfR0dEiLi5OREZGCh8fH1GtWjVx+/Ztqd+///4ratasKTw9PUVubq60Lebm5uL06dM6+yglJUVq8/HxEXZ2dqJ+/fpi1apVYvfu3cLf318AEDNnzhQeHh5S7W3atBFmZmbixo0b0vxxcXEiLCxM/PzzzyIuLk5ER0eLbt26CXNzc3HhwgWpX0pKigAg1qxZU+w+S0tLEw4ODqJGjRpizZo1YufOnaJfv36iVq1aAoA4cOCA1Hf9+vVCoVCIbt26iaioKPHrr78Kf39/YWRkJPbu3Vvserp37y6cnZ2FRqPRap80aZIwNTUV//77rxBCiJs3bwpnZ2fh4uIiVqxYIfbu3Su++OILYWZmJgYOHKizfa6urqJr165i69atYuvWrcLDw0PY2NiIu3fvCiGEuHTpkujZs6cAIOLj46XHo0ePhBBCDB06VJiYmIiwsDARExMjfvzxR9GgQQPh6Ogo0tLSit2mgwcPChMTE9GyZUuxefNmsXXrVtG5c2ehUChEZGSk1K/gdeDs7CwGDx4sdu3aJVauXCkcHByEs7OzyMzMFEIIkZ2dLezs7ESrVq3ETz/9JOLi4sTmzZvFiBEjxPnz56XllbRmHx8fYWtrK5ydncXSpUvFgQMHRFxcnPjf//4nAIjY2Fit7UlOThYAxJIlS2SvqzDTp08XAISLi4uYOHGi2L17t1i0aJGwtLQUzZs3l947QgjxxRdfiPDwcLFjxw5x8OBBsXz5cuHm5iY6dOigtcziPk8Kk5SUJKpUqSJcXV3F8uXLxb59+8SGDRtEr169RFZWlhBCiAsXLggrKytRp04dsW7dOrFjxw7Rt29fAUDMmzdPWtaBAwek7Rk4cKCIiYkRy5cvF1WqVBEdOnQQvr6+YsKECWLPnj1i3rx5wsjISIwdO1arnoLXQaNGjcSmTZvEtm3bRNeuXQUAsWXLFqnfhQsXxMiRI0VkZKQ4ePCg2L59uxgyZIioVKmS1nsyOztb1K1bV9ja2or//ve/Yvfu3eKTTz4Rbm5uOu//kJAQYWpqKho2bCgWLlwo9u7dKz7//HOhUCjEzJkzdeqcPn269Lyk78uihISECIVCISZOnCj27NkjFi1aJGrUqCGsra1FSEiIXtaTkpIiKleuLHx9fcXWrVvFwYMHxcaNG0VwcLD0Hiv43HB2dhZvvfWW+OWXX8SWLVvEm2++KUxMTMSRI0ek5Z07d04olUrh4eEh1q1bJ/bs2SPCwsJEpUqVxIwZM6R+WVlZonHjxsLS0lLMmjVL7N69W/zyyy/i448/Fvv37xePHj0SMTExAoAYMmSI9Bl06dIlIUTJP9Oft57S+tk9jQFKTwp+KSQkJIi8vDxx7949sX37dlGtWjVhZWUlfbiGhIQIAGL16tU6yygsQD169Eg0b95cuLm5ifPnzwtHR0fh4+MjHj9+/NyaXFxchLm5ubh+/brUlpSUJACI6tWri/v370vtW7duFQDEtm3bilze48ePRXZ2trC0tBTffPON1rTDhw8LY2NjERoaKlavXi0AiO+//77QffRsgAIgTpw4IbXduXNHGBkZCXNzc62wVFD707/QCqsxNzdX1KtXT3zyySdSe0kD1OTJk4VCoRBJSUla7b6+vloB6v79+8LW1lYEBARo9dNoNKJp06aidevWxa5n27ZtAoDYs2ePVu0qlUr06NFDahs+fLioUqWKuHLlitb8CxcuFADEuXPntLbPw8ND67Vx7NgxAUBs2rRJahs9erQ
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_max_price[\"number_compagny\"], company_max_price[\"max_price\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Prix maximal d'un billet vendu\")\n",
"plt.title(\"Prix maximal de vente observé par compagnie de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "bff23e5d-d7ed-4092-ae3c-5df503e54a6d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 762879.000000\n",
"mean 0.079068\n",
"std 3.969729\n",
"min 0.000000\n",
"25% 0.000000\n",
"50% 0.000000\n",
"75% 0.000000\n",
"max 3334.000000\n",
"Name: purchase_count, dtype: float64"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"first_buying_date\"].isna()][\"purchase_count\"].describe()"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "89466dbd-14d2-4ede-9ca0-b9c32b764e25",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 7.608090e+05\n",
"mean 3.863940e+00\n",
"std 1.685825e+03\n",
"min 1.000000e+00\n",
"25% 1.000000e+00\n",
"50% 1.000000e+00\n",
"75% 2.000000e+00\n",
"max 1.469325e+06\n",
"Name: purchase_count, dtype: float64"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle[~customerplus_clean_spectacle[\"first_buying_date\"].isna()][\"purchase_count\"].describe()"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "5f9feae4-35f4-43b6-adeb-f75773900a2d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>821538</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>809126</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11005</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>17663</td>\n",
" <td>12731</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>38100</td>\n",
" <td>12395</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343121</th>\n",
" <td>4667645</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534181.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343122</th>\n",
" <td>4667649</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534177.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343123</th>\n",
" <td>4667660</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534165.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343124</th>\n",
" <td>4667679</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1534132.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>343125</th>\n",
" <td>4667686</td>\n",
" <td>122</td>\n",
" <td>NaN</td>\n",
" <td>1567949.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1523688 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"0 821538 139 NaN NaN 0 \n",
"1 809126 1063 NaN NaN 0 \n",
"2 11005 1063 NaN NaN 0 \n",
"3 17663 12731 NaN NaN 0 \n",
"4 38100 12395 NaN NaN 0 \n",
"... ... ... ... ... ... \n",
"343121 4667645 122 NaN 1534181.0 0 \n",
"343122 4667649 122 NaN 1534177.0 0 \n",
"343123 4667660 122 NaN 1534165.0 0 \n",
"343124 4667679 122 NaN 1534132.0 0 \n",
"343125 4667686 122 NaN 1567949.0 0 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"0 875 False NaN 2 True ... \n",
"1 875 False NaN 2 True ... \n",
"2 875 False NaN 2 False ... \n",
"3 875 False NaN 0 False ... \n",
"4 875 False NaN 0 True ... \n",
"... ... ... ... ... ... ... \n",
"343121 862 False NaN 2 True ... \n",
"343122 862 False NaN 2 True ... \n",
"343123 862 False NaN 0 True ... \n",
"343124 862 False NaN 2 True ... \n",
"343125 862 False NaN 0 True ... \n",
"\n",
" first_buying_date country gender_label gender_female gender_male \\\n",
"0 NaN NaN other 0 0 \n",
"1 NaN fr other 0 0 \n",
"2 NaN fr other 0 0 \n",
"3 NaN fr female 1 0 \n",
"4 NaN fr female 1 0 \n",
"... ... ... ... ... ... \n",
"343121 NaN NaN other 0 0 \n",
"343122 NaN NaN other 0 0 \n",
"343123 NaN NaN female 1 0 \n",
"343124 NaN NaN other 0 0 \n",
"343125 NaN NaN female 1 0 \n",
"\n",
" gender_other country_fr has_tags number_compagny already_purchased \n",
"0 1 NaN 0 10 False \n",
"1 1 1.0 0 10 False \n",
"2 1 1.0 0 10 False \n",
"3 0 1.0 0 10 False \n",
"4 0 1.0 0 10 False \n",
"... ... ... ... ... ... \n",
"343121 1 NaN 0 14 False \n",
"343122 1 NaN 0 14 False \n",
"343123 0 NaN 0 14 False \n",
"343124 1 NaN 0 14 False \n",
"343125 0 NaN 0 14 False \n",
"\n",
"[1523688 rows x 30 columns]"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customerplus_clean_spectacle[\"already_purchased\"] = customerplus_clean_spectacle[\"first_buying_date\"].isna()==False\n",
"customerplus_clean_spectacle"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "cec4f1eb-cec8-409d-8b2c-1e01f1bf81ff",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11005</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>17663</td>\n",
" <td>12731</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>38100</td>\n",
" <td>12395</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>307036</td>\n",
" <td>139</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2946</td>\n",
" <td>1063</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>875</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338933</th>\n",
" <td>3625705</td>\n",
" <td>648752</td>\n",
" <td>NaN</td>\n",
" <td>1253864.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338954</th>\n",
" <td>3627626</td>\n",
" <td>636890</td>\n",
" <td>NaN</td>\n",
" <td>1253887.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338959</th>\n",
" <td>3628124</td>\n",
" <td>653042</td>\n",
" <td>NaN</td>\n",
" <td>1253899.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338986</th>\n",
" <td>3631189</td>\n",
" <td>648423</td>\n",
" <td>NaN</td>\n",
" <td>1253928.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>339039</th>\n",
" <td>3635380</td>\n",
" <td>659417</td>\n",
" <td>NaN</td>\n",
" <td>1253975.0</td>\n",
" <td>0</td>\n",
" <td>862</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>26246 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"2 11005 1063 NaN NaN 0 \n",
"3 17663 12731 NaN NaN 0 \n",
"4 38100 12395 NaN NaN 0 \n",
"5 307036 139 NaN NaN 0 \n",
"6 2946 1063 NaN NaN 0 \n",
"... ... ... ... ... ... \n",
"338933 3625705 648752 NaN 1253864.0 0 \n",
"338954 3627626 636890 NaN 1253887.0 0 \n",
"338959 3628124 653042 NaN 1253899.0 0 \n",
"338986 3631189 648423 NaN 1253928.0 0 \n",
"339039 3635380 659417 NaN 1253975.0 0 \n",
"\n",
" tenant_id is_partner deleted_at gender is_email_true ... \\\n",
"2 875 False NaN 2 False ... \n",
"3 875 False NaN 0 False ... \n",
"4 875 False NaN 0 True ... \n",
"5 875 False NaN 2 True ... \n",
"6 875 False NaN 2 False ... \n",
"... ... ... ... ... ... ... \n",
"338933 862 False NaN 0 True ... \n",
"338954 862 False NaN 0 True ... \n",
"338959 862 False NaN 0 True ... \n",
"338986 862 False NaN 0 True ... \n",
"339039 862 False NaN 1 True ... \n",
"\n",
" first_buying_date country gender_label gender_female gender_male \\\n",
"2 NaN fr other 0 0 \n",
"3 NaN fr female 1 0 \n",
"4 NaN fr female 1 0 \n",
"5 NaN NaN other 0 0 \n",
"6 NaN fr other 0 0 \n",
"... ... ... ... ... ... \n",
"338933 NaN fr female 1 0 \n",
"338954 NaN fr female 1 0 \n",
"338959 NaN fr female 1 0 \n",
"338986 NaN fr female 1 0 \n",
"339039 NaN fr male 0 1 \n",
"\n",
" gender_other country_fr has_tags number_compagny already_purchased \n",
"2 1 1.0 0 10 False \n",
"3 0 1.0 0 10 False \n",
"4 0 1.0 0 10 False \n",
"5 1 NaN 0 10 False \n",
"6 1 1.0 0 10 False \n",
"... ... ... ... ... ... \n",
"338933 0 1.0 0 14 False \n",
"338954 0 1.0 0 14 False \n",
"338959 0 1.0 0 14 False \n",
"338986 0 1.0 0 14 False \n",
"339039 0 1.0 0 14 False \n",
"\n",
"[26246 rows x 30 columns]"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# attention, on a des cas où le client a pas de première date d'achat alors qu'il compte plusieurs achats\n",
"# on peut donc avoir une date de première achat valant NaN non pas parce que l'individu n'a jamais acheté \n",
"# mais simplement car elle n'est pas renseignée\n",
"\n",
"customerplus_clean_spectacle[(customerplus_clean_spectacle[\"already_purchased\"]==False) &\n",
"(customerplus_clean_spectacle[\"purchase_count\"]>0)]"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "b5904039-a967-47d5-ba13-1b805bcd76ca",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>first_buying_date</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>has_tags</th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"<p>0 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [customer_id, street_id, structure_id, mcp_contact_id, fidelity, tenant_id, is_partner, deleted_at, gender, is_email_true, opt_in, last_buying_date, max_price, ticket_sum, average_price, average_purchase_delay, average_price_basket, average_ticket_basket, total_price, purchase_count, first_buying_date, country, gender_label, gender_female, gender_male, gender_other, country_fr, has_tags, number_compagny, already_purchased]\n",
"Index: []\n",
"\n",
"[0 rows x 30 columns]"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# cpdt, si un client a un nombre d'achats nul, il a bien une date de premier achat valant NaN, OK\n",
"customerplus_clean_spectacle[(customerplus_clean_spectacle[\"already_purchased\"]) &\n",
"(customerplus_clean_spectacle[\"purchase_count\"]==0)]"
]
},
{
"cell_type": "code",
"execution_count": 89,
"id": "e940bfcf-29cc-4d4c-ae5e-e2a8cecf28af",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"number_compagny already_purchased\n",
"10 False 0.234840\n",
" True 0.236236\n",
"11 False 0.141746\n",
" True 0.002804\n",
"12 False 0.485950\n",
" True 0.244779\n",
"13 False 0.084057\n",
" True 0.177213\n",
"14 False 0.885553\n",
" True 0.308859\n",
"Name: opt_in, dtype: float64"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# différence de consentement aux campagnes de mails (opt in)\n",
"\n",
"# en se restreignant au personnes n'ayant pas acheté, on a quand même des individus acceptant d'être ciblés\n",
"customerplus_clean_spectacle[customerplus_clean_spectacle[\"first_buying_date\"].isna()][\"opt_in\"].unique()\n",
"\n",
"# taux de consentement variés\n",
"customerplus_clean_spectacle[\"already_purchased\"] = customerplus_clean_spectacle[\"purchase_count\"] > 0\n",
"customerplus_clean_spectacle.groupby([\"number_compagny\", \"already_purchased\"])[\"opt_in\"].mean()"
]
},
{
"cell_type": "code",
"execution_count": 94,
"id": "a5e79beb-9ba0-4c89-b084-e27ff0d65dcc",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>already_purchased</th>\n",
" <th>opt_in</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" <td>0.234840</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" <td>0.236236</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>False</td>\n",
" <td>0.141746</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>True</td>\n",
" <td>0.002804</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>False</td>\n",
" <td>0.485950</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>True</td>\n",
" <td>0.244779</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>False</td>\n",
" <td>0.084057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>True</td>\n",
" <td>0.177213</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" <td>0.885553</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" <td>0.308859</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny already_purchased opt_in\n",
"0 10 False 0.234840\n",
"1 10 True 0.236236\n",
"2 11 False 0.141746\n",
"3 11 True 0.002804\n",
"4 12 False 0.485950\n",
"5 12 True 0.244779\n",
"6 13 False 0.084057\n",
"7 13 True 0.177213\n",
"8 14 False 0.885553\n",
"9 14 True 0.308859"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_graph = customerplus_clean_spectacle.groupby([\"number_compagny\", \"already_purchased\"])[\"opt_in\"].mean().reset_index()\n",
"df_graph"
]
},
{
"cell_type": "code",
"execution_count": 127,
"id": "5be56c41-7697-481a-84ea-f77a2041484b",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0oAAAIhCAYAAABwnkrAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABeY0lEQVR4nO3deZyN9f//8ecx+2aYGWbRmBkZa9aU7ZN9J0lSKEsKSVJkSZmxjaXCRyp8ypCy9Uk+Eso62QmDkCj70giNYYwxM9fvj35zvp1rLHMyM2eMx/12O7eb876213XNe5an93W9j8UwDEMAAAAAAKtCji4AAAAAAPIbghIAAAAAmBCUAAAAAMCEoAQAAAAAJgQlAAAAADAhKAEAAACACUEJAAAAAEwISgAAAABgQlACAAAAABOCEpALZs+eLYvFYn05OzvrgQceUI8ePXT69OkcPVZMTIyWLFlyV/s4duyYLBaLZs+enSM13W8++uijAnntcqJv3cuio6NlsVhs2ho0aKAGDRrYtFksFkVHR+ddYbnEUeeR+fPy2LFjeX5s2Keg9HUguwhKQC6KjY3Vli1btGrVKr300kuaP3++HnvsMV29ejXHjnG//zGbHxCUCqYXX3xRW7ZsueN6W7Zs0YsvvpgHFQGORV/H/cbZ0QUABdlDDz2kGjVqSJIaNmyo9PR0jR49WkuWLFGXLl3uat/Xrl2Th4dHTpQJ4CYeeOABPfDAA3dcr1atWnlQDeB49HXcbxhRAvJQ5i+Z48ePS5JGjhypmjVrys/PT4ULF1b16tX16aefyjAMm+3Cw8PVpk0bLV68WNWqVZO7u7tGjhwpi8Wiq1evas6cOdbb/My3BZmdOXNGHTt2lI+Pj3x9ffXMM8/o3LlzN133xx9/VNu2beXn5yd3d3dVq1ZNixYtyta5Xr9+XaNGjVL58uXl7u4uf39/NWzYUJs3b7auk5KSomHDhikiIkKurq4qUaKEXnnlFf355583Pf+VK1eqevXq8vDwULly5TRr1iyb9ZKTkzVo0CBFRETI3d1dfn5+qlGjhubPn2/3eWXeDrRu3Tq9/PLLCggIkL+/v9q3b68zZ87Y1LZ//37FxcVZvwbh4eHW5ZcvX7bWlHmOAwYMyDKqaLFY1K9fP8XGxqps2bLy8PBQjRo1tHXrVhmGoXfffVcRERHy9vZWo0aNdOTIkSzXfPXq1WrcuLEKFy4sT09P1a1bV2vWrLFZJ/N2sv3796tTp07y9fVVYGCgXnjhBSUmJtrUY2/fym5/vtXtO+Hh4erevfttj5F5m+i7776rCRMmKDw8XB4eHmrQoIF++eUX3bhxQ0OHDlVISIh8fX315JNPKiEhwWYfCxcuVLNmzRQcHCwPDw+VL19eQ4cOzfI1udmtdzdjPp/s9h3pr++TgQMHKigoSJ6enqpXr5527tyZrWshSR9//LGqVKkib29v+fj4qFy5cnrrrbds1jl37px69+6tBx54QK6uroqIiNDIkSOVlpZ2x/3/9NNPeuKJJ1S0aFG5u7uratWqmjNnjs0669evl8Vi0fz58zV8+HCFhISocOHCatKkiQ4dOnTHY9xKdvrz+fPn1atXL4WGhsrNzU3FihVT3bp1tXr16jvu/+eff1anTp0UGBgoNzc3lSxZUl27dtX169f/0fnPmzdPQ4YMUXBwsLy9vfX444/r999/V1JSknr16qWAgAAFBASoR48eunLlis0+Mr//Z8yYoTJlysjNzU0VKlTQggULspxv3759VaFCBXl7e6t48eJq1KiRNmzYkOX8Tp06pQ4dOsjHx0dFihRRly5dtGPHjiy3WXfv3l3e3t46cuSIWrVqJW9vb4WGhmrgwIE21yKzTvP3bnb7V3b6KpDfMKIE5KHMP26LFSsm6a8/+nr37q2SJUtKkrZu3apXX31Vp0+f1ogRI2y23bVrlw4ePKi3335bERER8vLyUrt27dSoUSM1bNhQ77zzjiSpcOHCtzz+tWvX1KRJE505c0bjxo1TmTJl9O233+qZZ57Jsu66devUokUL1axZU9OnT5evr68WLFigZ555RsnJybf9Iy4tLU0tW7bUhg0bNGDAADVq1EhpaWnaunWrTpw4oTp16sgwDLVr105r1qzRsGHD9Nhjj2nv3r2KiorSli1btGXLFrm5uVn3uWfPHg0cOFBDhw5VYGCgPvnkE/Xs2VOlS5dWvXr1JElvvPGG5s6dqzFjxqhatWq6evWqfvrpJ124cOEfn9eLL76o1q1ba968eTp58qTefPNNPffcc1q7dq0k6euvv1aHDh3k6+urjz76SJKsdScnJ6t+/fo6deqU3nrrLVWuXFn79+/XiBEjtG/fPq1evdrmD/Fly5Zp9+7dGj9+vCwWi4YMGaLWrVurW7du+u233zRt2jQlJibqjTfe0FNPPaX4+Hjr9p9//rm6du2qJ554QnPmzJGLi4tmzJih5s2b67vvvlPjxo1tzuupp57SM888o549e2rfvn0aNmyYJFnD55YtW+zqW5J9/fluffjhh6pcubI+/PBD/fnnnxo4cKAef/xx1axZUy4uLpo1a5aOHz+uQYMG6cUXX9TSpUut2x4+fFitWrXSgAED5OXlpZ9//lkTJkzQ9u3brV/XnHCnviNJPXr00MKFCzV48GA1atRIBw4c0JNPPqnLly/fcf8LFixQ37599eqrr+q9995ToUKFdOTIER04cMC6zrlz5/Too4+qUKFCGjFihB588EFt2bJFY8aM0bFjxxQbG3vL/R86dEh16tRR8eLFNXXqVPn7++vzzz9X9+7d9fvvv2vw4ME267/11luqW7euPvnkE12+fFlDhgzR448/roMHD8rJycmua5fd/vz8889r165dGjt2rMqUKaM///xTu3btsvmev5k9e/boX//6lwICAjRq1ChFRkbq7NmzWrp0qVJTU+Xm5vaPzr9hw4aaPXu2jh07pkGDBqlTp05ydnZWlSpVNH/+fO3evVtvvfWWfHx8NHXqVJvtly5dqnXr1mnUqFHy8vLSRx99ZN2+Q4cOkqSLFy9KkqKiohQUFKQrV67o66+/VoMGDbRmzRrrf2ZcvXpVDRs21MWLFzVhwgSVLl1aK1euvOnPekm6ceOG2rZtq549e2rgwIH64YcfNHr0aPn6+t72eze7/Ss7fRXIlwwAOS42NtaQZGzdutW4ceOGkZSUZCxbtswoVqyY4ePjY5w7dy7LNunp6caNGzeMUaNGGf7+/kZGRoZ1WVhYmOHk5GQcOnQoy3ZeXl5Gt27dslXXxx9/bEgy/ve//9m0v/TSS4YkIzY21tpWrlw5o1q1asaNGzds1m3Tpo0RHBxspKen3/I4n332mSHJ+M9//nPLdVauXGlIMiZOnGjTvnDhQkOSMXPmTGtbWFiY4e7ubhw/ftzadu3aNcPPz8/o3bu3te2hhx4y2rVrd8tj2nNemV/Dvn372qw3ceJEQ5Jx9uxZa1vFihWN+vXrZznWuHHjjEKFChk7duywaf/vf/9rSDKWL19ubZNkBAUFGVeuXLG2LVmyxJBkVK1a1aY/TJkyxZBk7N271zAMw7h69arh5+dnPP744zbHSU9PN6pUqWI8+uij1raoqKibXve+ffsa7u7uNsexp2+Z3a4/SzKioqKybBMWFnbH4x09etSQZFSpUsWmD2Zek7Zt29qsP2DAAEOSkZiYeNP9ZWRkGDdu3DDi4uIMScaePXusyzKv1d/Vr18/y9fafD7Z7Tv79+83JBlDhgyxWW/+/PmGpDtei379+hlFihS57Tq9e/c2vL29bb53DMMw3nvvPUOSsX///luex7PPPmu4ubkZJ06csNm2ZcuWhqenp/Hnn38ahmEY69atMyQZrVq1sllv0aJFhiRjy5Ytt60x83odPXrUMAz7+rO3t7cxYMCA2+7/Zho1amQUKVLESEhIuOU69p6/ud7Mvte/f3+b9nbt2hl+fn42bZIMDw8Pm98NaWlpRrly5YzSpUvfssa0tDTjxo0bRuPGjY0nn3zS2v7hhx8akowVK1bYrN+7d+8sP+u7detmSDIWLVpks26rVq2MsmXLZqnz730
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot groupé\n",
"fig, ax = plt.subplots(figsize=(10, 6))\n",
"\n",
"categories = df_graph[\"number_compagny\"].unique()\n",
"bar_width = 0.35\n",
"bar_positions = np.arange(len(categories))\n",
"\n",
"# Grouper les données par label et créer les barres groupées\n",
"for label in df_graph[\"already_purchased\"].unique():\n",
" label_data = df_graph[df_graph['already_purchased'] == label]\n",
" values = [label_data[label_data['number_compagny'] == category]['opt_in'].values[0]*100 for category in categories]\n",
"\n",
" label_printed = \"purchased\" if label else \"no purchase\"\n",
" ax.bar(bar_positions, values, bar_width, label=label_printed)\n",
"\n",
" # Mise à jour des positions des barres pour le prochain groupe\n",
" bar_positions = [pos + bar_width for pos in bar_positions]\n",
"\n",
"# Ajout des étiquettes, de la légende, etc.\n",
"ax.set_xlabel('Numero de compagnie')\n",
"ax.set_ylabel('Part de consentement (%)')\n",
"ax.set_title('Part de consentement au mailing selon les compagnies')\n",
"ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])\n",
"ax.set_xticklabels(categories)\n",
"ax.legend()\n",
"\n",
"# Affichage du plot\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-03-10 19:08:50 +01:00
"execution_count": 70,
2024-03-08 10:30:12 +01:00
"id": "91b743c4-5473-41e1-b97e-cf06904f0fa8",
"metadata": {
"scrolled": true
},
2024-03-03 09:32:45 +01:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2024-03-08 10:30:12 +01:00
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>opt_in</th>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>22.681533</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-08 10:30:12 +01:00
" <td>10</td>\n",
" <td>1.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>45.617174</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-08 10:30:12 +01:00
" <td>11</td>\n",
" <td>0.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>8.681794</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-08 10:30:12 +01:00
" <td>11</td>\n",
" <td>1.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.034686</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2024-03-08 10:30:12 +01:00
" <td>12</td>\n",
" <td>0.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>38.730755</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.046081</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>12.596642</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>16.709675</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
2024-03-03 09:32:45 +01:00
" <td>14</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>77.789137</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>17.561409</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-10 12:31:28 +01:00
" number_company y_has_purchased opt_in\n",
"0 10 0.0 22.681533\n",
"1 10 1.0 45.617174\n",
"2 11 0.0 8.681794\n",
"3 11 1.0 0.034686\n",
"4 12 0.0 38.730755\n",
"5 12 1.0 0.046081\n",
"6 13 0.0 12.596642\n",
"7 13 1.0 16.709675\n",
"8 14 0.0 77.789137\n",
"9 14 1.0 17.561409"
2024-03-03 09:32:45 +01:00
]
},
2024-03-10 19:08:50 +01:00
"execution_count": 70,
2024-03-03 09:32:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-08 10:30:12 +01:00
"# on refait le graphique sur train set \n",
2024-03-03 09:32:45 +01:00
"\n",
2024-03-08 10:30:12 +01:00
"df_graph = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[\"opt_in\"].mean().reset_index()\n",
2024-03-10 12:31:28 +01:00
"df_graph[\"opt_in\"] = 100 * df_graph[\"opt_in\"]\n",
2024-03-08 10:30:12 +01:00
"df_graph"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "code",
2024-03-10 12:31:28 +01:00
"execution_count": 96,
2024-03-08 10:30:12 +01:00
"id": "728e0021-4f95-4601-bb01-032db2cf6571",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
2024-03-08 10:30:12 +01:00
"name": "stdout",
"output_type": "stream",
"text": [
"0.43578991448407206\n",
"0.2889600758160463\n"
]
2024-03-03 09:32:45 +01:00
}
],
"source": [
2024-03-08 10:30:12 +01:00
"# pourquoi une telle différence sur la variable opt in ??\n",
"print(train_set_spectacle[\"opt_in\"].mean())\n",
"print(customerplus_clean_spectacle[\"opt_in\"].mean())"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "274b4bc5-277f-476a-8bc1-c1764b1df2de",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.8473746548562269\n",
"0.7573747808905485\n"
]
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": 76,
"id": "e1d837e1-c445-424b-867a-48b1e790f703",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"genre = homme : \n",
"0.3175633912091978\n",
"0.3103916287323914\n",
"email vérifié : \n",
"0.9581971527197163\n",
"0.9360131470484772\n",
"nationalité française : \n",
"0.8473746548562269\n",
"0.7573747808905485\n",
"nbre d'achats : \n",
"2.925387603847428\n",
"1.968932616126136\n"
]
}
],
"source": [
"# pour les autres variables, la distribution semble similaire\n",
2024-03-03 09:32:45 +01:00
"\n",
2024-03-08 10:30:12 +01:00
"print(\"genre = homme : \")\n",
"print(train_set_spectacle[\"gender_male\"].mean())\n",
"print(customerplus_clean_spectacle[\"gender_male\"].mean())\n",
2024-03-03 09:32:45 +01:00
"\n",
2024-03-08 10:30:12 +01:00
"print(\"email vérifié : \")\n",
"print(train_set_spectacle[\"is_email_true\"].mean())\n",
"print(customerplus_clean_spectacle[\"is_email_true\"].mean())\n",
2024-03-03 09:32:45 +01:00
"\n",
2024-03-08 10:30:12 +01:00
"print(\"nationalité française : \")\n",
"print(train_set_spectacle[\"country_fr\"].mean())\n",
"print(customerplus_clean_spectacle[\"country_fr\"].mean())\n",
"\n",
"# sauf pr nbre d'achats - à verif\n",
"print(\"nbre d'achats : \")\n",
"print(train_set_spectacle[\"purchase_count\"].mean())\n",
"print(customerplus_clean_spectacle[\"purchase_count\"].mean())"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "code",
2024-03-10 12:31:28 +01:00
"execution_count": 98,
"id": "43deeeb5-8092-42fc-b80b-59d2c58093de",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0oAAAIiCAYAAAD2CjhuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABsWUlEQVR4nO3deVyU5f7/8ffIvisomyFg4r6WJ1NTQHPXMi0zzbVFMytSczlWYioulZqZ2mJii0udzOMxtdzLnVTU1MwMt5QoNXAFhfv3Rz/m6wygjAID+Ho+HvN4ONe9fe6Zi4G393VfYzIMwxAAAAAAwKyMvQsAAAAAgOKGoAQAAAAAVghKAAAAAGCFoAQAAAAAVghKAAAAAGCFoAQAAAAAVghKAAAAAGCFoAQAAAAAVghKAAAAAGCFoIQSIT4+XiaTyfxwdHTUXXfdpX79+un3338v0GPFxcVp6dKlt7WPo0ePymQyKT4+vkBqutPMmjWrVL52BdG3SrLY2FiZTCaLtqioKEVFRVm0mUwmxcbGFl1hhcRe55H9eXn06NEiPzZsY68+cuTIEbm4uGjr1q3mtgULFmj69OmFdsywsDD17du30PZ/O7Zs2aLY2Fj9/fffOZY1b95cMTExRV4TigeCEkqUefPmaevWrVq9erWeeeYZLVy4UM2aNdPFixcL7Bh3+h+zxQFBqXR6+umnLf4wy8vWrVv19NNPF0FFgH3Zq68PGzZMrVq1UuPGjc1thR2Uvv76a7322muFtv/bsWXLFo0dOzbXoDRu3DjNmjVLhw4dKvrCYHeO9i4AsEXt2rXVsGFDSVJ0dLQyMzM1btw4LV26VD179rytfV++fFlubm4FUSaAXNx111266667brre/fffXwTVAPZnj75+8OBBLV26VKtWrbrlfWRmZuratWtycXHJ9zYNGjS45ePZU2RkpKpVq6a3335bH3zwgb3LQRHjihJKtOxfMseOHZMkjR07Vo0aNZKvr6+8vb11zz33aO7cuTIMw2K7sLAwdezYUUuWLFGDBg3k6uqqsWPHymQy6eLFi5o/f755mJ/1sCBrp06dUrdu3eTl5SUfHx89/vjjSk5OznXdH3/8UQ899JB8fX3l6uqqBg0a6IsvvsjXuaanp+uNN95QjRo15OrqKj8/P0VHR2vLli3mda5cuaJRo0YpPDxczs7Oqlixop5//vkc/0uWff6rVq3SPffcIzc3N1WvXl0ff/yxxXqXLl3SsGHDFB4eLldXV/n6+qphw4ZauHChzeeVPRxo/fr1eu6551S+fHn5+fmpS5cuOnXqlEVt+/fv18aNG83vQVhYmHl5Wlqauabsc4yJiclxVdFkMmnw4MGaN2+eqlWrJjc3NzVs2FDbtm2TYRh68803FR4eLk9PT7Vo0UK//vprjtd8zZo1atmypby9veXu7q6mTZtq7dq1FutkDyfbv3+/nnjiCfn4+CggIED9+/dXamqqRT229q389ue8hu/kZ6hL9jDRN998U5MnT1ZYWJjc3NwUFRWlX375RVevXtXIkSMVHBwsHx8fPfLII0pJSbHYx+LFi9W6dWsFBQXJzc1NNWrU0MiRI3O8J7kNvcuN9fnkt+9I//ycDB06VIGBgXJ3d1fz5s21c+fOfA/7mT17turVqydPT095eXmpevXq+ve//22xTnJysgYMGKC77rpLzs7OCg8P19ixY3Xt2rWb7v+nn37Sww8/rHLlysnV1VX169fX/PnzLdbZsGGDTCaTFi5cqNGjRys4OFje3t568MEHb+t/tfPTn//88089++yzCgkJkYuLiypUqKCmTZtqzZo1N93/zz//rCeeeEIBAQFycXFRpUqV1Lt3b6Wnp9/S+S9YsEAjRoxQUFCQPD091alTJ/3xxx86f/68nn32WZUvX17ly5dXv379dOHCBYt9ZP/8v//++6patapcXFxUs2ZNLVq0KMf5Dho0SDVr1pSnp6f8/f3VokUL/fDDDznO7+TJk3r00Ufl5eWlsmXLqmfPnkpISMgxzLpv377y9PTUr7/+qvbt28vT01MhISEaOnSoxWuRXaf1z25++1d++mpuZs+ercDAQLVq1crcFhUVpW+++UbHjh2zGOYu/d9nxJQpUzR+/HiFh4fLxcVF69ev15UrVzR06FDVr19fPj4+8vX1VePGjfXf//43x3GtfwZvt5/nt6/erN/HxsbqlVdekSSFh4ebz33Dhg3mdXr16qUFCxbo/PnzN60LpQtXlFCiZf9xW6FCBUn/fKAPGDBAlSpVkiRt27ZNL7zwgn7//Xe9/vrrFtvu2rVLBw8e1Kuvvqrw8HB5eHioc+fOatGihaKjo81DBLy9vfM8/uXLl/Xggw/q1KlTmjhxoqpWrapvvvlGjz/+eI51169fr7Zt26pRo0aaM2eOfHx8tGjRIj3++OO6dOnSDf+Iu3btmtq1a6cffvhBMTExatGiha5du6Zt27bp+PHjatKkiQzDUOfOnbV27VqNGjVKzZo10969ezVmzBht3bpVW7dutfjfvz179mjo0KEaOXKkAgIC9NFHH+mpp55SlSpV1Lx5c0nSkCFD9Omnn2r8+PFq0KCBLl68qJ9++klnzpy55fN6+umn1aFDBy1YsEAnTpzQK6+8oieffFLr1q2T9M/wjEcffVQ+Pj6aNWuWJJnrvnTpkiIjI3Xy5En9+9//Vt26dbV//369/vrr2rdvn9asWWPxh/jy5cu1e/duTZo0SSaTSSNGjFCHDh3Up08f/fbbb5o5c6ZSU1M1ZMgQde3aVYmJiebtP/vsM/Xu3VsPP/yw5s+fLycnJ73//vtq06aNvv32W7Vs2dLivLp27arHH39cTz31lPbt26dRo0ZJkjl8bt261aa+JdnWn2/Xe++9p7p16+q9997T33//raFDh6pTp05q1KiRnJyc9PHHH+vYsWMaNmyYnn76aS1btsy87eHDh9W+fXvFxMTIw8NDP//8syZPnqwdO3aY39eCcLO+I0n9+vXT4sWLNXz4cLVo0UIHDhzQI488orS0tJvuf9GiRRo0aJBeeOEFvfXWWypTpox+/fVXHThwwLxOcnKy7rvvPpUpU0avv/667r77bm3dulXjx4/X0aNHNW/evDz3f+jQITVp0kT+/v6aMWOG/Pz89Nlnn6lv3776448/NHz4cIv1//3vf6tp06b66KOPlJaWphEjRqhTp046ePCgHBwcbHrt8tufe/XqpV27dmnChAmqWrWq/v77b+3atcviZz43e/bs0QMPPKDy5cvrjTfeUEREhE6fPq1ly5YpIyNDLi4ut3T+0dHRio+P19GjRzVs2DA98cQTcnR0VL169bRw4ULt3r1b//73v+Xl5aUZM2ZYbL9s2TKtX79eb7zxhjw8PDRr1izz9o8++qgk6ezZs5KkMWPGKDAwUBcuXNDXX3+tqKgorV271vyfGRcvXlR0dLTOnj2ryZMnq0qVKlq1alWun/WSdPXqVT300EN66qmnNHToUH3//fcaN26cfHx8bvizm9/+lZ++mpdvvvlGzZs3V5ky//d/5bNmzdKzzz6rI0eO6Ouvv851uxkzZqhq1ap666235O3trYiICKWnp+vs2bMaNmyYKlasqIyMDK1Zs0ZdunTRvHnz1Lt375vWc6v9PD99NT/9/umnn9bZs2f17rvvasmSJQoKCpIk1axZ07yfqKgojRgxQhs2bFCnTp1uek4oRQygBJg3b54hydi2bZtx9epV4/z588by5cuNChUqGF5eXkZycnKObTIzM42rV68ab7zxhuHn52dkZWWZl4WGhhoODg7GoUOHcmzn4eFh9OnTJ191zZ4925Bk/Pe//7Vof+aZZwxJxrx588xt1atXNxo0aGBcvXrVYt2OHTsaQUFBRmZmZp7H+eSTTwxJxocffpjnOqtWrTIkGVOmTLFoX7x4sSHJ+OCDD8xtoaGhhqurq3Hs2DFz2+XLlw1fX19jwIAB5rbatWsbnTt3zvOYtpxX9ns4aNAgi/WmTJliSDJOnz5tbqtVq5YRGRmZ41gTJ040ypQpYyQkJFi0/+c//zEkGStWrDC3STICAwONCxcumNu
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# with the generic function\n",
"multiple_barplot(df_graph, x=\"number_company\", y=\"opt_in\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"aucun achat\", 1 : \"achat durant la période\"},\n",
" xlabel = \"Numéro de compagnie\", ylabel = \"Part de consentement (%)\", \n",
" title = \"Part de consentement au mailing selon les compagnies (train set)\")"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "32960530-cb46-4eeb-a6d2-1dcf5fb640d8",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2024-03-10 12:31:28 +01:00
" <th>number_compagny</th>\n",
2024-03-08 10:30:12 +01:00
" <th>gender_male</th>\n",
2024-03-10 12:31:28 +01:00
" <th>gender_female</th>\n",
2024-03-08 10:30:12 +01:00
" <th>gender_other</th>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.181580</td>\n",
" <td>0.343837</td>\n",
" <td>0.474583</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-10 12:31:28 +01:00
" <td>11</td>\n",
" <td>0.179520</td>\n",
" <td>0.314443</td>\n",
" <td>0.506037</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-10 12:31:28 +01:00
" <td>12</td>\n",
" <td>0.346380</td>\n",
" <td>0.454036</td>\n",
" <td>0.199584</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-10 12:31:28 +01:00
" <td>13</td>\n",
" <td>0.318108</td>\n",
" <td>0.503092</td>\n",
" <td>0.178800</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.331954</td>\n",
" <td>0.316181</td>\n",
" <td>0.351865</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-10 12:31:28 +01:00
" number_compagny gender_male gender_female gender_other\n",
"0 10 0.181580 0.343837 0.474583\n",
"1 11 0.179520 0.314443 0.506037\n",
"2 12 0.346380 0.454036 0.199584\n",
"3 13 0.318108 0.503092 0.178800\n",
"4 14 0.331954 0.316181 0.351865"
2024-03-08 10:30:12 +01:00
]
},
2024-03-10 12:31:28 +01:00
"execution_count": 79,
2024-03-08 10:30:12 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"# genre \n",
"\n",
"company_genders = customerplus_clean_spectacle.groupby(\"number_compagny\")[[\"gender_male\", \"gender_female\", \"gender_other\"]].mean().reset_index()\n",
"company_genders"
2024-03-08 10:30:12 +01:00
]
},
{
"cell_type": "code",
2024-03-10 12:31:28 +01:00
"execution_count": 80,
"id": "1b4a49d7-7bfe-4e80-aa7e-c9c6d4bc46e2",
2024-03-08 10:30:12 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-03-10 12:31:28 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHFCAYAAAAOmtghAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABYJ0lEQVR4nO3dd1RU1/428GdoQxNQkaJS7QUVQSMYNUQFS9TYO9Z7LTEWNCrXKFgxJlFMwZKIaEIMMZZEQ1RiISqWiGiM2BuoIIoKVpCZ/f7hy/wyDugcGBgcn89as5azZ59zvmdP4fFUmRBCgIiIiMhAGOm7ACIiIiJdYrghIiIig8JwQ0RERAaF4YaIiIgMCsMNERERGRSGGyIiIjIoDDdERERkUBhuiIiIyKAw3BAREZFBYbjRgyNHjqBnz55wdXWFXC6Ho6Mj/Pz8MHXqVH2XJplMJkN4eLi+y9DwYl379u2DTCbDvn37ynS5UVFRiImJKdNlAOW3Pv929epVyGQyfPbZZ+W2THo9xMTEQCaT4erVq2W6HH187vUlKSkJ4eHhuH//fpkup7zeu/LGcFPOfvvtN/j7+yM3NxdLlizBrl27sHz5crRu3RpxcXH6Ls9gNW/eHIcOHULz5s3LdDnlFW6IKpKuXbvi0KFDcHZ21ncpBiMpKQlz584t83BjqEz0XcCbZsmSJfDw8MDOnTthYvJ/wz9gwAAsWbJEj5UZNhsbG7Rq1UrfZRAZpGrVqqFatWr6LoNIhVtuyll2djbs7e3Vgk0hIyPNtyMuLg5+fn6wsrKCtbU1goKCkJKSonr9wIEDMDU1xbRp09SmK9zUuGbNGlXbhQsXMGjQIDg4OEAul6NBgwb4+uuvtao7NzcX//nPf1C1alVYW1ujU6dOOH/+fJF9tVmOUqnEggULUK9ePVhYWMDOzg5NmjTB8uXLX1nL/fv3MXXqVHh6ekIul8PBwQFdunTB2bNni52muM3Zx44dQ/fu3VGlShWYm5vD29sbP/30k1qfwrHcu3cvxo0bB3t7e1StWhW9evXCzZs3Vf3c3d1x+vRpJCYmQiaTQSaTwd3dvdTre/bsWXTq1AmWlpawt7fH2LFj8eDBgyL7/vHHH2jfvj1sbGxgaWmJ1q1bY/fu3a9cBiBtXJcuXQoPDw9YW1vDz88Phw8fVnv92LFjGDBgANzd3WFhYQF3d3cMHDgQ165d05jX4cOH0bp1a5ibm6N69eoIDQ3FN998o7GpvLhdoO7u7hg+fLhaW2ZmJsaMGYOaNWvCzMwMHh4emDt3LgoKCrQaix9++AF+fn6wtraGtbU1mjVrpvZdAoDo6Gg0bdoU5ubmqFKlCnr27IkzZ86o9Rk+fDisra1x9uxZBAUFwcrKCs7Ozli8eLFq3d9++21YWVmhbt26WLdundr0hZ+9hIQEjBgxAlWqVIGVlRW6deuGy5cvq/VNSEhAjx49ULNmTZibm6N27doYM2YM7ty5o7F+v/zyC5o0aQK5XA5PT08sX74c4eHhkMlkav1kMhkmTJiA7777Dg0aNIClpSWaNm2K7du3F1nni7s2SvN5LI/PvTbfy8JxSUlJQa9evWBjYwNbW1sMGTIEt2/f1pjnq36zCx05cgTdunVD1apVYW5ujlq1amHy5MmqZX700UcAAA8PD9XvSeHvV1xcHAIDA+Hs7AwLCws0aNAAM2fOxKNHjyQt52VK895VCILK1ejRowUA8eGHH4rDhw+L/Pz8YvsuXLhQyGQyMXLkSLF9+3axefNm4efnJ6ysrMTp06dV/RYvXiwAiF9++UUIIcQ///wjLC0txZAhQ1R9Tp8+LWxtbYWXl5dYv3692LVrl5g6daowMjIS4eHhL61ZqVSKgIAAIZfLxcKFC8WuXbtEWFiY8PT0FABEWFiY5OVEREQIY2NjERYWJnbv3i127NghIiMjX1lLbm6uaNSokbCyshLz5s0TO3fuFJs2bRKTJk0Se/bsUfV7sa69e/cKAGLv3r2qtj179ggzMzPRpk0bERcXJ3bs2CGGDx8uAIi1a9eq+q1du1YAEJ6enuLDDz8UO3fuFN9++62oXLmyCAgIUPU7fvy48PT0FN7e3uLQoUPi0KFD4vjx46Va38zMTOHg4CBq1Kgh1q5dK+Lj48XgwYOFq6urxvp89913QiaTiffff19s3rxZbNu2Tbz33nvC2NhY/PHHH6Ue1ytXrggAwt3dXXTq1Els3bpVbN26VXh5eYnKlSuL+/fvq+a3ceNGMWfOHLFlyxaRmJgofvzxR9GuXTtRrVo1cfv2bVW/06dPC0tLS9GwYUOxYcMG8csvv4igoCDV+l25cqXY97SQm5ubGDZsmOp5RkaGcHFxEW5ubmLVqlXijz/+EPPnzxdyuVwMHz78peMghBCzZ88WAESvXr3Exo0bxa5du8TSpUvF7NmzVX0WLVokAIiBAweK3377Taxfv154enoKW1tbcf78eVW/YcOGCTMzM9GgQQOxfPlykZCQIEaMGCEAiNDQUFG3bl2xZs0asXPnTvHee+8JAOLYsWOq6Qs/ey4uLmLkyJHi999/F6tXrxYODg7CxcVF3Lt3T9V3xYoVIiIiQvz6668iMTFRrFu3TjRt2lTUq1dP7Xfm999/F0ZGRuKdd94RW7ZsERs3bhRvvfWWcHd3Fy/+SSh8v1u2bCl++uknER8fL9555x1hYmIiLl26pFHnv9+v0nwey+tzr833MiwsTAAQbm5u4qOPPhI7d+4US5cuFVZWVsLb21ttbLX9zd6xY4cwNTUVTZo0ETExMWLPnj0iOjpaDBgwQAghRHp6uvjwww8FALF582bV70lOTo4QQoj58+eLZcuWid9++03s27dPrFy5Unh4eKj9HmmznLJ47yoKhptydufOHfH2228LAAKAMDU1Ff7+/iIiIkI8ePBA1S8tLU2YmJiIDz/8UG36Bw8eCCcnJ9GvXz9Vm1KpFF26dBF2dnbin3/+EQ0bNhT169cXDx8+VPUJCgoSNWvWVH05Ck2YMEGYm5uLu3fvFlvz77//LgCI5cuXq7UvXLhQ4w+Otst57733RLNmzV4xWprmzZsnAIiEhISX9tMm3NSvX194e3uLZ8+eqU373nvvCWdnZ6FQKIQQ//flHz9+vFq/JUuWCAAiIyND1daoUSPRrl07jXpKur4zZswQMplMnDhxQq29Y8eOauvz6NEjUaVKFdGtWze1fgqFQjRt2lS0bNnypcvRZlwLw42Xl5coKChQtR89elQAEBs2bCh22oKCAvHw4UNhZWWl9jnq37+/sLCwEJmZmWp969evX+JwM2bMGGFtbS2uXbum1u+zzz4TANT+yLzo8uXLwtjYWAwePLjYPvfu3RMWFhaiS5cuau1paWlCLpeLQYMGqdqGDRsmAIhNmzap2p49eyaqVasmAKjCrxBCZGdnC2NjYxESEqJqK/zs9ezZU21ZBw8eFADEggULiqxRqVSKZ8+eiWvXrqn9x0cIIVq0aCFcXFxEXl6equ3BgweiatWqRYYbR0dHkZubq2rLzMwURkZGIiIiQqPOwvertJ/H8vrca/O9LAw3U6ZMUWuPjY0VAMT3338vhJD2m12rVi1Rq1Yt8eTJk2KX++mnn2p8B4pS+F4nJiYKAOLkyZOSlqPr966i4G6pcla1alXs378ff/31FxYvXowePXrg/PnzCA0NhZeXl2oT8s6dO1FQUIDg4GAUFBSoHubm5mjXrp3a7hWZTIb169ejUqVK8PX1xZUrV/DTTz/BysoKAPD06VPs3r0bPXv2hKWlpdr8unTpgqdPn2rsVvi3vXv3AgAGDx6s1j5o0CC151KW07JlS5w8eRLjx4/Hzp07kZubq9X4/f7776hbty46dOigVf/iXLx4EWfPnlWt04u1ZmRk4Ny5c2rTdO/eXe15kyZNAKDIXS0vKun67t27F40aNULTpk3V2l8c+6SkJNy9exfDhg1TWxelUolOnTrhr7/+KnKTdSEp49q1a1cYGxurnhc1Dg8fPsSMGTNQu3ZtmJiYwMTEBNbW1nj06JHarpu9e/eiffv2cHR0VLUZGxujf//+r6yjONu3b0d
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_genders[\"number_compagny\"], company_genders[\"gender_male\"], label = \"Homme\")\n",
"plt.bar(company_genders[\"number_compagny\"], company_genders[\"gender_female\"], \n",
" bottom = company_genders[\"gender_male\"], label = \"Femme\")\n",
"\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Part de clients de chaque sexe\")\n",
"plt.title(\"Sexe des clients de chaque compagnie de spectacle\")\n",
"plt.legend()\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "c7348c95-e506-4002-90d9-d3b6768af985",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
2024-03-08 10:30:12 +01:00
" <th>gender_male</th>\n",
2024-03-10 12:31:28 +01:00
" <th>gender_female</th>\n",
2024-03-08 10:30:12 +01:00
" <th>gender_other</th>\n",
2024-03-10 12:31:28 +01:00
" <th>share_of_women</th>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
" <td>0.171838</td>\n",
" <td>0.333929</td>\n",
" <td>0.494232</td>\n",
" <td>66.024263</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
2024-03-10 12:31:28 +01:00
" <td>1.0</td>\n",
" <td>0.312165</td>\n",
" <td>0.683363</td>\n",
" <td>0.004472</td>\n",
" <td>68.643306</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-10 12:31:28 +01:00
" <td>11</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.151162</td>\n",
" <td>0.273204</td>\n",
" <td>0.575635</td>\n",
" <td>64.379376</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-10 12:31:28 +01:00
" <td>11</td>\n",
2024-03-08 10:30:12 +01:00
" <td>1.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.328477</td>\n",
" <td>0.597641</td>\n",
" <td>0.073881</td>\n",
" <td>64.531835</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2024-03-10 12:31:28 +01:00
" <td>12</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.334546</td>\n",
" <td>0.433672</td>\n",
" <td>0.231782</td>\n",
" <td>56.451654</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
2024-03-10 12:31:28 +01:00
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>0.366020</td>\n",
" <td>0.506659</td>\n",
" <td>0.127321</td>\n",
" <td>58.057873</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
2024-03-10 12:31:28 +01:00
" <th>6</th>\n",
" <td>13</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.314243</td>\n",
" <td>0.503242</td>\n",
" <td>0.182515</td>\n",
" <td>61.559817</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
2024-03-08 10:30:12 +01:00
" <td>1.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.351721</td>\n",
" <td>0.504910</td>\n",
" <td>0.143369</td>\n",
" <td>58.941356</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
2024-03-10 12:31:28 +01:00
" <th>8</th>\n",
" <td>14</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.317971</td>\n",
" <td>0.296388</td>\n",
" <td>0.385641</td>\n",
" <td>48.243443</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
2024-03-10 12:31:28 +01:00
" <th>9</th>\n",
2024-03-08 10:30:12 +01:00
" <td>14</td>\n",
" <td>1.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.451289</td>\n",
" <td>0.485106</td>\n",
" <td>0.063605</td>\n",
" <td>51.805692</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-10 12:31:28 +01:00
" number_company y_has_purchased gender_male gender_female gender_other \\\n",
"0 10 0.0 0.171838 0.333929 0.494232 \n",
"1 10 1.0 0.312165 0.683363 0.004472 \n",
"2 11 0.0 0.151162 0.273204 0.575635 \n",
"3 11 1.0 0.328477 0.597641 0.073881 \n",
"4 12 0.0 0.334546 0.433672 0.231782 \n",
"5 12 1.0 0.366020 0.506659 0.127321 \n",
"6 13 0.0 0.314243 0.503242 0.182515 \n",
"7 13 1.0 0.351721 0.504910 0.143369 \n",
"8 14 0.0 0.317971 0.296388 0.385641 \n",
"9 14 1.0 0.451289 0.485106 0.063605 \n",
2024-03-08 10:30:12 +01:00
"\n",
2024-03-10 12:31:28 +01:00
" share_of_women \n",
"0 66.024263 \n",
"1 68.643306 \n",
"2 64.379376 \n",
"3 64.531835 \n",
"4 56.451654 \n",
"5 58.057873 \n",
"6 61.559817 \n",
"7 58.941356 \n",
"8 48.243443 \n",
"9 51.805692 "
2024-03-08 10:30:12 +01:00
]
},
2024-03-10 12:31:28 +01:00
"execution_count": 82,
2024-03-08 10:30:12 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"company_genders = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[[\"gender_male\", \"gender_female\", \"gender_other\"]].mean().reset_index()\n",
"company_genders[\"share_of_women\"] = 100 * (company_genders[\"gender_female\"]/(1-company_genders[\"gender_other\"]))\n",
"company_genders"
2024-03-08 10:30:12 +01:00
]
},
{
"cell_type": "code",
2024-03-10 12:31:28 +01:00
"execution_count": 84,
"id": "b36e5a8f-45dc-4b74-8137-80b7e916aa84",
2024-03-08 10:30:12 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-03-10 12:31:28 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0oAAAIiCAYAAAD2CjhuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABkGklEQVR4nO3dfXzO9f////thZ3bOxs6EjUbOhpKzYiRnoROVikLnkspZyrv6mjBnJUWREkpSvclbCMvJIqdhCAlNJGtCzhnb8/dHvx0fx2sbx8G2Y+Z2vVyOy8Xr+Tp7vF7Hc8d293q9nofNGGMEAAAAALAr4e4CAAAAAKCoISgBAAAAgAVBCQAAAAAsCEoAAAAAYEFQAgAAAAALghIAAAAAWBCUAAAAAMCCoAQAAAAAFgQlAAAAALAgKOG6MnXqVNlsNvvL09NTN9xwgx5//HEdOHAgX/eVmJioOXPmXNU29u7dK5vNpqlTp+ZLTdnbbNeunUJCQmSz2dS7d+982zYcFcT756yEhATZbLZC3y9c484+YrV8+XLZbDYtX77c3aUUuFWrVikhIUH//PNPge4n+3fO3r1783W7K1askI+Pj37//Xd72wcffFCg/chmsykhIaHAtn81FixYkGtt58+fV+XKlTV27NhCrwnFA0EJ16UpU6Zo9erVSkpK0tNPP60vvvhCTZo00alTp/JtH/kRlApCnz59tHbtWn3yySdavXq1+vTp4+6SgOtWZGSkVq9erXbt2rm7lOvKqlWrNHjw4AIPSgXBGKPevXvr6aefVsWKFe3tBR2UVq9eraeeeqrAtn81FixYoMGDB+do9/Ly0v/7f/9Pb775pg4fPuyGynCtIyjhulSzZk01bNhQzZs316BBgzRgwAClpqbmS7A5c+bM1RdYgH7++WfVr19f9957rxo2bOjwixZA4fLx8VHDhg1VtmxZd5eCa8TChQu1ceNGvfDCC1e8jfPnz+vChQsurdOwYUPdcMMNV7xPd3nkkUdks9n04YcfursUXIMISoD+/QUgyX4bw+DBg9WgQQOFhIQoKChIN998syZPnixjjMN60dHRat++vWbPnq26deuqZMmSGjx4sGw2m06dOqVp06bZb/Nr1qzZJWv4888/1alTJwUGBio4OFgPPfSQ0tLScl32p59+0t13362QkBCVLFlSdevW1VdffXXJ7WffVrN7925999139rqybwk5fvy4+vfvr5iYGHl7e6tcuXLq3bt3jqtsNptNvXr10pQpU1S1alX5+vqqXr16WrNmjYwxGj16tGJiYhQQEKA77rhDu3fvdli/WbNmqlmzplavXq3GjRvL19dX0dHRmjJliiRp/vz5uvnmm+Xn56datWpp4cKFOY5l165d6ty5s8LCwuTj46Nq1arp/fffd1gmKytLQ4cOtddYqlQpxcXF6d13373keXJ2PWdqyMvKlSvVokULBQYGys/PT40bN9b8+fMdlsm+ZWfZsmV67rnnVKZMGYWGhqpjx476888/ndpPbr788ks1atRI/v7+CggIUOvWrbVp0yaHZX777Tc9/PDDioqKko+Pj8LDw9WiRQulpKRcdvtr165Vhw4dFBoaqpIlS6py5co5bu905fiXLl2qp59+WqGhoQoKClLXrl116tQppaWlqVOnTipVqpQiIyPVv39/nT9/3r5+9i1to0aN0rBhw1ShQgWVLFlS9erV05IlSxz2tXv3bj3++OOKjY2Vn5+fypUrpw4dOmjr1q05jm/btm1q1aqV/Pz8VLZsWT3//POaP39+jlvWsvv5+vXr1aRJE/n5+alSpUoaMWKEsrKyctRpvRJQkH1ckn755Re1adNGfn5+KlOmjHr06KETJ07kuuz333+vFi1aKCgoSH5+frrttttynMPcOFNf9i2imzZtUseOHRUUFKTg4GA9+uijOnToUI5tOtN/pUv3w4SEBL388suSpJiYGPtnYfb79+WXX6pVq1aKjIyUr6+vqlWrpldffTXXOw6c6e/5eU4lacKECbr11ltVtWpVe1t0dLS2bdum5ORk+/FER0dL+r/P/s8++0z9+vVTuXLl5OPjo927d+vQoUPq2bOnqlevroCAAIWFhemOO+7QihUrcuzXeuvd1X5GOfs5c7n3vHv37vafjYtvrc/+3ebt7a2HHnpIkyZNyvE7HLgsA1xHpkyZYiSZ9evXO7S/++67RpKZNGmSMcaY7t27m8mTJ5ukpCSTlJRkhgwZYnx9fc3gwYMd1qtYsaKJjIw0lSpVMp988olZtmyZWbdunVm9erXx9fU1d911l1m9erVZvXq12bZtW551nT592lSrVs0EBwebcePGmUWLFpkXX3zRVKhQwUgyU6ZMsS+7dOlS4+3tbZo0aWK+/PJLs3DhQtO9e/ccy1kdO3bMrF692kRERJjbbrvNXtfZs2fNqVOnTJ06dUyZMmXMmDFjzPfff2/effddExwcbO644w6TlZVl344kU7FiRdO4cWMze/Zs880335gqVaqYkJAQ06dPH3PPPfeYefPmmc8//9yEh4ebuLg4h/Xj4+NNaGioqVq1qpk8ebJZtGiRad++vZFkBg8ebGrVqmW++OILs2DBAtOwYUPj4+NjDhw4YF9/27ZtJjg42NSqVct8+umnZvHixaZfv36mRIkSJiEhwb7c8OHDjYeHhxk0aJBZsmSJWbhwoRk7dqzDMrlxZj1na0hNTc3xvixfvtx4eXmZW265xXz55Zdmzpw5plWrVsZms5mZM2fal8vuq5UqVTIvvPCCWbRokfn4449N6dKlTfPmzS95DMYYM2jQIGP9iB82bJix2WzmiSeeMPPmzTOzZ882jRo1Mv7+/g79s2rVqubGG280n332mUlOTjazZs0y/fr1M8uWLbvkPhcuXGi8vLxMXFycmTp1qlm6dKn55JNPzMMPP3zFxx8TE2P69etnFi9ebEaOHGk8PDzMI488Ym6++WYzdOhQk5SUZF555RUjybz99ts5zn358uXN7bffbmbNmmW+/vprc+uttxovLy+zatUq+7LJycmmX79+5r///a9JTk4233zzjbn33nuNr6+v+eWXX+zL/fnnnyY0NNRUqFDBTJ061SxYsMA89thjJjo62khyOD/Z/Tw2NtZMnDjRJCUlmZ49expJZtq0aTnqvLiPFHQfT0tLM2FhYaZcuXJmypQpZsGCBaZLly72z5uLj+Ozzz4zNpvN3HvvvWb27Nnm22+/Ne3btzceHh7m+++/v+R+nKkvu59WrFjRvPzyy2bRokVmzJgxxt/f39StW9dkZGTYl3W2/16uH+7fv9+88MILRpKZPXu2/bPw2LFjxhhjhgwZYt555x0zf/58s3z5cjNx4kQTExOT4+fOmf6e3Y9TU1Pz5ZyeO3fO+Pr6mgEDBji0b9y40VSqVMnUrVvXfjwbN240xhizbNkyI8mUK1fOPPDAA2bu3Llm3rx55vDhw+aXX34xzz33nJk5c6ZZvny5mTdvnnnyySdNiRIlcvy8SzKDBg3KcWxX+hnlzOeMM+/57t27zQMPPGAk2Y89+3dbti+//NJIMlu2bLlsXcDFCEq4rmR/sK9Zs8acP3/enDhxwsybN8+ULVvWBAYGmrS0tBzrZGZmmvPnz5s333zThIaGOvzRX7FiRePh4WF27tyZYz1/f3/TrVs3p+qaMGGCkWT+97//ObQ//fTTOf6Iuummm0zdunXN+fPnHZZt3769iYyMNJmZmZfcV8WKFU27du0c2oYPH25KlCiRI0D+97//NZLMggUL7G2STEREhDl58qS9bc6cOUaSqVOnjsP5GTt2bI5fTvHx8UaS+emnn+xthw8fNh4eHsbX19chFKWkpBhJ5r333rO3tW7d2txwww32P2qy9erVy5QsWdIcOXLEfj7q1KlzyXORG2fWc7aG3P4IbtiwoQkLCzMnTpywt124cMHUrFnT3HDDDfbzl91Xe/bs6bCPUaNGGUnm4MGDl6zRGpT27dtnPD09zQsvvOCw3IkTJ0xERITp1KmTMcaYv//+20gyY8eOveT2c1O5cmVTuXJlc+bMmTyXcfX4rfXee++9RpIZM2a
2024-03-08 10:30:12 +01:00
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"# création barplot avec la fonction générique\n",
2024-03-08 10:30:12 +01:00
"\n",
2024-03-10 12:31:28 +01:00
"multiple_barplot(company_genders, x=\"number_company\", y=\"share_of_women\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"aucun achat\", 1 : \"achat durant la période\"},\n",
" xlabel = \"Numéro de compagnie\", ylabel = \"Part de femmes (%)\", \n",
" title = \"Part de femmes selon les compagnies de spectacle (train set)\")"
2024-03-08 10:30:12 +01:00
]
},
{
"cell_type": "code",
2024-03-10 12:31:28 +01:00
"execution_count": 87,
"id": "ed6374e5-f36c-4f8e-9dba-602715b726f1",
2024-03-08 10:30:12 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
2024-03-10 12:31:28 +01:00
" <th>country_fr</th>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.996136</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.994838</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.002119</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.831795</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.993978</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-10 12:31:28 +01:00
" number_compagny country_fr\n",
"0 10 0.996136\n",
"1 11 0.994838\n",
"2 12 0.002119\n",
"3 13 0.831795\n",
"4 14 0.993978"
2024-03-08 10:30:12 +01:00
]
},
2024-03-10 12:31:28 +01:00
"execution_count": 87,
2024-03-08 10:30:12 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"# pays d'origine (France VS reste du monde)\n",
2024-03-08 10:30:12 +01:00
"\n",
2024-03-10 12:31:28 +01:00
"company_country_fr = customerplus_clean_spectacle.groupby(\"number_compagny\")[\"country_fr\"].mean().reset_index()\n",
"company_country_fr"
2024-03-08 10:30:12 +01:00
]
},
{
"cell_type": "code",
2024-03-10 12:31:28 +01:00
"execution_count": 88,
"id": "8d95cdd9-2ab3-4c9a-8442-bb9b98e0dd18",
2024-03-08 10:30:12 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-03-10 12:31:28 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHGCAYAAACIDqqPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABINElEQVR4nO3deVxU9f7H8fcAAoKAggliCph7LrmUe7jhkqm3zCXNLe1qWl63NDOXvC7pLTMrtXJBy7pmmql5UzIzS819yy1LwQUXRMUVFc7vDx/Mz3FAZ2Bw9PR6Ph7zeDjf8z3nfM53zgxvzzJjMQzDEAAAgEl4uLsAAAAAVyLcAAAAUyHcAAAAUyHcAAAAUyHcAAAAUyHcAAAAUyHcAAAAUyHcAAAAUyHcAAAAUyHcIFddvHhRZcqUUZs2bZSenu7ucgAAfwOEm1vExsbKYrHI19dX8fHxdtPr1aun8uXLZ2vZX3zxhSZPnpzpNIvFolGjRmVrua6WMQaHDx+2tnXt2lWRkZE2/caNG6fFixffdXkvvfSSQkND9fnnn8vDI/u7W2RkpLp27Zrt+XPL7XUdPnxYFotFsbGxubreO+1PrnSvtud2FotFr7zyyj1dJ+5/P/30kywWi3766adcXY+79nt32LNnj0aNGmXzmZ8b7tVrl4Fwk4nU1FS9+eabLl3mnf4YrV+/Xj169HDp+lxp+PDh+uabb2zaHAk3H330kXbu3Klvv/1WPj4+uVjh/aNw4cJav369mjdvnqvruVfhBrifVKlSRevXr1eVKlXcXYpp7NmzR2+99Vauh5t7jXCTiaZNm+qLL77Qjh077sn6atSooYcffvierCs7HnnkEVWuXNnp+fr06aPff/9d+fPnd31R9ykfHx/VqFFDDz30kLtLAUwnMDBQNWrUUGBgoLtLwX2OcJOJwYMHKyQkREOGDLlr348++khPPvmkChUqJH9/f1WoUEETJ07U9evXrX3q1aun7777TvHx8bJYLNZHhsxOS+3evVutWrVSgQIF5Ovrq8cee0xz5syx6ZNxmO/LL7/UsGHDFB4ersDAQDVq1Ej79++36RsXF6dWrVrp4Ycflq+vr0qUKKGePXsqKSnprtt4+2kpi8WiS5cuac6cOdZtqVevnnX6iRMn1LNnTz388MPy9vZWVFSU3nrrLd24ceOu67p+/boGDx6ssLAw+fn5qU6dOtq4cWOmfR1dz7Rp01SpUiXly5dPAQEBKlOmjN5444271pKamqrRo0erbNmy8vX1VUhIiOrXr69169ZlOU9Wh7P/+OMPdejQQYUKFZKPj4/Kli2rjz76yKaPo6/n3fan7G7v8ePH1bZtWwUEBCgoKEjt2rXTiRMnMu27efNmtWzZUsHBwfL19VXlypX11Vdf3XUdknPj+tlnn6ls2bLy8/NTpUqVtGzZMpvpBw8eVLdu3VSyZEn5+fmpSJEiatGihXbt2mW3rH379qlp06by8/NTwYIF1atXLy1dutTuUHlWp0Dr1atns59LUkpKigYNGqSoqCh5e3urSJEi6tevny5duuTQWHz//fdq2LChgoKC5Ofnp7Jly2r8+PE2fZYsWaKaNWvKz89PAQEBiomJ0fr16236jBo1ShaLRTt37lSbNm0UFBSk4OBgDRgwQDdu3ND+/fvVtGlTBQQEKDIyUhMnTrSZP2Pf+/zzzzVgwACFhYUpb968io6O1rZt22z6bt68We3bt1dkZKTy5s2ryMhIPf/885meyv/ll19Us2ZN+fr6qkiRIho+fLhmzJhhd+o7MjJSTz/9tL7//ntVqVJFefPmVZkyZTRr1qxM67z91EZO9sd7td/f7X2ZcUlAXFycunXrpuDgYPn7+6tFixb666+/7Jb3ww8/qGHDhgoMDJSfn59q166tVatW2fXbt2+fnn/+eYWGhsrHx0fFihVT586dlZqaqtjYWLVp00aSVL9+fetnScbnlzN/N+60njvJyZjeiVeOl2BCAQEBevPNN/Wvf/1LP/74oxo0aJBl3z///FMdOnSwfrjt2LFDY8eO1b59+6xvzKlTp+qf//yn/vzzT7vTO5nZv3+/atWqpUKFCmnKlCkKCQnR559/rq5du+rkyZMaPHiwTf833nhDtWvX1owZM5SSkqIhQ4aoRYsW2rt3rzw9Pa111qxZUz169FBQUJAOHz6sSZMmqU6dOtq1a5fy5Mnj8PisX79eDRo0UP369TV8+HBJsv5P6sSJE3riiSfk4eGhESNG6JFHHtH69es1ZswYHT58WLNnz77jsl966SXNnTtXgwYNUkxMjHbv3q1nn31WFy5csOnn6Hr++9//qnfv3nr11Vf1zjvvyMPDQwcPHtSePXvuWMeNGzfUrFkzrV27Vv369VODBg1048YNbdiwQQkJCapVq5bD47Vnzx7VqlVLxYoV07vvvquwsDCtWLFCffv2VVJSkkaOHGnT/26v5532p+xu75UrV9SoUSMdP35c48ePV6lSpfTdd9+pXbt2dn1Xr16tpk2bqnr16po+fbqCgoL03//+V+3atdPly5fveG2UM+P63XffadOmTRo9erTy5cuniRMn6plnntH+/ftVvHhxSTf/MIWEhOjtt9/WQw89pOTkZM2ZM0fVq1fXtm3bVLp0aUnSyZMnFR0drTx58mjq1KkKDQ3VvHnzcnRdz+XLlxUdHa2jR4/qjTfeUMWKFfX7779rxIgR2rVrl3744Qeb0Hm7mTNn6qWXXlJ0dLSmT5+uQoUK6cCBA9q9e7e1zxdffKGOHTuqcePG+vLLL5WamqqJEyeqXr16WrVqlerUqWOzzLZt2+qFF15Qz549FRcXZ/2P1g8//KDevXtr0KBB+uKLLzRkyBCVKFFCzz77rM38b7zxhqpUqaIZM2bo/PnzGjVqlOrVq6dt27ZZx/zw4cMqXbq02rdvr+DgYCUmJmratGl6/PHHtWfPHhUsWFCStHPnTsXExKhUqVKaM2eO/Pz8NH36dH3++eeZjseOHTs0cOBAvf766woNDdWMGTPUvXt3lShRQk8++WSW45iT/fFe7ffOvC+7d++umJgYffHFFzpy5IjefPNN1atXTzt37rQeBf/888/VuXNntWrVSnPmzFGePHn08ccfq0mTJlqxYoUaNmxoHdM6deqoYMGCGj16tEqWLKnExEQtWbJE165dU/PmzTVu3Di98cYb+uijj6yn+x555BFJjv/duNt6srosISdjelcGrGbPnm1IMjZt2mSkpqYaxYsXN6pVq2akp6cbhmEY0dHRxqOPPprl/Glpacb169eNuXPnGp6enkZycrJ1WvPmzY2IiIhM55NkjBw50vq8ffv2ho+Pj5GQkGDTr1mzZoafn59x7tw5wzAMY/Xq1YYk46mnnrLp99VXXxmSjPXr12e6vvT0dOP69etGfHy8Icn49ttv7cbg0KFD1rYuXbrY1e7v72906dLFbtk9e/Y08uXLZ8THx9u0v/POO4Yk4/fff8+0JsMwjL179xqSjP79+9u0z5s3z5Bksz5H1/PKK68Y+fPnz3KdWZk7d64hyfj000/v2C8iIsKmrkOHDhmSjNmzZ1vbmjRpYjz88MPG+fPnbeZ95ZVXDF9fX+t+4szrmdX+lN3tnTZtmt2+YBiG8dJLL9ltT5kyZYzKlSsb169ft+n79NNPG4ULFzbS0tKyXI+j4yrJCA0NNVJSUqxtJ06cMDw8PIzx48dnOd+NGzeMa9euGSVLlrTZj4YMGWJYLBZj+/btNv1jYmIMScbq1autbbe/phmio6ON6Oho6/Px48cbHh4exqZNm2z6ff3114YkY/ny5VnWeeHCBSMwMNCoU6eO9fPldmlpaUZ4eLhRoUIFmzG9cOGCUahQIaNWrVrWtpEjRxqSjHfffddmGY899pghyVi0aJG17fr168ZDDz1kPPvss9a2jH2vSpUqNvUcPnzYyJMnj9GjR48st+XGjRvGxYsXDX9/f+P999+3trdp08bw9/c3Tp8+bbNN5cqVs/uMiYiIMHx9fW3ez1euXDGCg4ONnj172tV56+uVk/3xXu33jrwvMz57n3nmGZv2X3/91ZBkjBkzxjA
2024-03-08 10:30:12 +01:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
2024-03-10 12:31:28 +01:00
"plt.bar(company_country_fr[\"number_compagny\"], company_country_fr[\"country_fr\"])\n",
2024-03-08 10:30:12 +01:00
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
2024-03-10 12:31:28 +01:00
"plt.ylabel(\"Part de clients français\")\n",
"plt.title(\"Nationalité des clients de chaque compagnie de spectacle\")\n",
2024-03-08 10:30:12 +01:00
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-03-10 12:31:28 +01:00
"execution_count": 90,
"id": "b459f81f-6d30-44fa-ad65-e85acbf12fd2",
2024-03-08 10:30:12 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
2024-03-10 12:31:28 +01:00
" <th>country_fr</th>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>99.542095</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>1.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>99.909747</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>0.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>99.543280</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>99.501602</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.156470</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.265579</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>84.389610</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>77.596741</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>99.520205</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>98.471506</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-10 12:31:28 +01:00
" number_company y_has_purchased country_fr\n",
"0 10 0.0 99.542095\n",
"1 10 1.0 99.909747\n",
"2 11 0.0 99.543280\n",
"3 11 1.0 99.501602\n",
"4 12 0.0 0.156470\n",
"5 12 1.0 0.265579\n",
"6 13 0.0 84.389610\n",
"7 13 1.0 77.596741\n",
"8 14 0.0 99.520205\n",
"9 14 1.0 98.471506"
2024-03-08 10:30:12 +01:00
]
},
2024-03-10 12:31:28 +01:00
"execution_count": 90,
2024-03-08 10:30:12 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"# graphique sur le train set\n",
"\n",
"company_country_fr = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[[\"country_fr\"]].mean().reset_index()\n",
"company_country_fr[\"country_fr\"] = 100 * company_country_fr[\"country_fr\"]\n",
"company_country_fr"
2024-03-08 10:30:12 +01:00
]
},
{
"cell_type": "code",
2024-03-10 12:31:28 +01:00
"execution_count": 92,
"id": "4a037b48-1d65-4ed3-a012-7d6f5a312533",
2024-03-08 10:30:12 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-03-10 12:31:28 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1IAAAIiCAYAAADCc/lyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABptElEQVR4nO3dd3QUZf/+8WuTkF5IQipCEjpIF6lCQKRXURFBqgVERASkiEiQjgoIUh4VCYqUR0VEpIqA9B5ARAQMRcpDbwECJPP7g1/2y5IEdiAhm/B+nbPnZO9pnymZzZWZuddiGIYhAAAAAIDdnLK6AAAAAADIbghSAAAAAGASQQoAAAAATCJIAQAAAIBJBCkAAAAAMIkgBQAAAAAmEaQAAAAAwCSCFAAAAACYRJACAAAAAJMIUnjkxMbGymKxWF8uLi567LHH1LFjRx09ejRDlzV8+HDNmzfvgeZx8OBBWSwWxcbGZkhN9oqMjFSHDh0eeh0zZ87UuHHjMnSeBw8eVKNGjRQQECCLxaIePXpk6PwftoexL2JiYmSxWDJt/sgYK1eulMVi0cqVK7O6FOu59eDBg1ldSqZbuHChYmJiMn05mfV7+PXXXysoKEiXLl2ytmXE51V6supzzF7pfe6cO3dOuXPnzrTtguyPIIVH1rRp07R+/XotW7ZMr732mmbNmqXq1asrISEhw5aRmR9MD1tYWJjWr1+vRo0aZepyMiNIvfPOO9q4caO++uorrV+/Xu+8806Gzv9he1j7Ao6vfPnyWr9+vcqXL5/VpTxSFi5cqMGDB2d1GfflypUreu+999S3b1/5+PhY2zPz88rRz1npfe74+/vrnXfe0bvvvqvr168//MLg8FyyugAgq5QsWVIVKlSQJNWqVUtJSUkaMmSI5s2bpzZt2jzQvK9evSoPD4+MKNNhuLm5qXLlylldxn35448/VLFiRTVv3vyu4924ccN6ldKRZed9gYzl6+vLsQBTpk+frjNnzujVV1+973lcvXpV7u7udl8ty87nrC5dumjo0KH6/vvv1bp166wuBw6GK1LA/5dykj906JAkafDgwapUqZICAgLk6+ur8uXLa+rUqTIMw2a6yMhINW7cWHPnzlW5cuXk7u6uwYMHy2KxKCEhQdOnT7feRlizZs271nDs2DG1bNlSPj4+8vPz04svvqgTJ06kOe6WLVvUtGlTBQQEyN3dXeXKldN///tfu9Y1MTFRH374oYoXLy53d3cFBgaqVq1aWrduXbrTpHdrxr59+9S6dWsFBwfLzc1NxYsX18SJE23GSbn9aNasWRowYIDCw8Pl6+urZ555Rnv37rWOV7NmTf3yyy86dOiQze2XKSZPnqwyZcrI29tbPj4+KlasmN577710a05Z7v79+7Vo0SLr/A4ePGgd9s0336hXr17Kmzev3NzctH//fp06dUpdu3ZViRIl5O3treDgYD399NNavXp1mtvk448/1pgxYxQVFSVvb29VqVJFGzZsSFXPxo0b1aRJEwUGBsrd3V0FCxa0uc1w//796tixowoXLixPT0/lzZtXTZo00a5du+65L06dOqXXX39d+fLlk5ubm4KCglStWjX9+uuv6W6fFL/88ovKli0rNzc3RUVF6eOPP05zPMMwNGnSJJUtW1YeHh7y9/fX888/r3/++cdmvO3bt6tx48bWYyI8PFyNGjXSv//+e89aFi9erNq1a8vPz0+enp4qXry4RowYYTPO/PnzVaVKFXl6esrHx0d16tTR+vXrbcZJuSVq586deuGFF+Tn56eAgAD17NlTN2/e1N69e1W/fn35+PgoMjJSo0ePtpk+5fiYMWOGevbsqdDQUHl4eCg6Olrbt2+3GXfLli1q1aqVIiMj5eHhocjISL300kvWc8nt1qxZoypVqsjd3V158+bVwIED9eWXX6a6JS7lvLJ48WKVL19eHh4eKlasmL766qs067zz1j57zg9XrlxR7969FRUVJXd3dwUEBKhChQqaNWvWXfeRJG3YsEHVqlWTu7u7wsPD1b9/f924cSPNcefMmaMqVarIy8tL3t7eqlevXqptmBZ76uvQoYO8vb21e/du1a5dW15eXgoKClK3bt105coVm/nZe/xKdz8OO3ToYD3H3X6eStl/EydOVI0aNRQcHCwvLy+VKlVKo0ePTnP72HO8Z+Q2lW6dR5s0aaLcuXNb2+72eZVyy+bSpUvVqVMnBQUFydPTU4mJiQ90zkr5Hd29e7deeukl+fn5KSQkRJ06ddKFCxfuuR72nGfs2ef3+twJCQlRnTp1NGXKFLu2Lx4tjv1vV+Ah2r9/vyQpKChI0q0Tf+fOnZU/f35Jt/5weOutt3T06FF98MEHNtNu27ZNe/bs0fvvv6+oqCh5eXmpefPmevrpp1WrVi0NHDhQ0q3/Hqfn6tWreuaZZ3Ts2DGNGDFCRYoU0S+//KIXX3wx1bgrVqxQ/fr1ValSJU2ZMkV+fn6aPXu2XnzxRV25csXm2aY73bx5Uw0aNNDq1avVo0cPPf3007p586Y2bNigw4cPq2rVqnZvsz///FNVq1ZV/vz59cknnyg0NFRLlixR9+7ddfr0aQ0aNMhm/Pfee0/VqlXTl19+qYsXL6pv375q0qSJ9uzZI2dnZ02aNEmvv/66Dhw4oB9//NFm2tmzZ6tr165666239PHHH8vJyUn79+/Xn3/+mW59Kbc9PfvssypYsKA1IISFhVn/6Onfv7+qVKmiKVOmyMnJScHBwTp16pQkadCgQQoNDdXly5f1448/qmbNmlq+fHmqQDxx4kQVK1bMemvIwIED1bBhQ8XHx8vPz0+StGTJEjVp0kTFixfXmDFjlD9/fh08eFBLly61zufYsWMKDAzUyJEjFRQUpLNnz2r69OmqVKmStm/frqJFi6a7rm3bttW2bds0bNgwFSlSROfPn9e2bdt05syZ9HegpOXLl6tZs2aqUqWKZs+eraSkJI0ePVr/+9//Uo3buXNnxcbGqnv37ho1apTOnj2rDz/8UFWrVtWOHTsUEhKihIQE1alTR1FRUZo4caJCQkJ04sQJrVixwuZ5jLRMnTpVr732mqKjozVlyhQFBwfr77//1h9//GEdZ+bMmWrTpo3q1q2rWbNmKTExUaNHj7bum6eeespmni1bttTLL7+szp07a9myZdY/aH/99Vd17dpVvXv31syZM9W3b18VKlRILVq0sJn+vffeU/ny5fXll1/qwoULiomJUc2aNbV9+3YVKFBA0q1zRdGiRdWqVSsFBATo+PHjmjx5sp588kn9+eefypMnjyRp586dqlOnjooUKaLp06fL09NTU6ZM0YwZM9LcHjt27FCvXr3Ur18/hYSE6Msvv9Qrr7yiQoUKqUaNGuluR3vPDz179tQ333yjoUOHqly5ckpISNAff/xxz2Pmzz//VO3atRUZGanY2Fh5enpq0qRJmjlzZqpxhw8frvfff18dO3bU+++/r+vXr+ujjz5S9erVtWnTJpUoUSLd5dhb340bN9SwYUN17txZ/fr107p16zR06FAdOnRIP//8s3U8e45f6d7H4cCBA5WQkKDvv//eJsCHhYVJkg4cOKDWrVsrKipKrq6u2rFjh4YNG6a//vrLJgjbc7yn5UG26b///qtdu3bpjTfesGlfv379PT+vOnXqpEaNGumbb75RQkKCcuXK9UDnrBTPPfecXnzxRb3yyivatWuX+vfvL0mp/mlwO3vPM/bs87t97qSoWbOm+vfvr/Pnz9sEUEAG8IiZNm2aIcnYsGGDcePGDePSpUvGggULjKCgIMPHx8c4ceJEqmmSkpKMGzduGB9++KERGBhoJCcnW4dFREQYzs7Oxt69e1NN5+XlZbRv396uuiZPnmxIMn766Seb9tdee82QZEybNs3aVqxYMaNcuXLGjRs3bMZt3LixERYWZiQlJaW7nK+//tqQZHzxxRd3rSciIsKm9vj4+FR11KtXz3jssceMCxcu2EzbrVs3w93d3Th79qxhGIaxYsUKQ5LRsGFDm/H++9//GpKM9evXW9saNWpkREREpKqnW7duRu7cue9a893WpVGjRjZ
2024-03-08 10:30:12 +01:00
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"# generic function to generate the barplot - nationality\n",
2024-03-08 10:30:12 +01:00
"\n",
2024-03-10 12:31:28 +01:00
"multiple_barplot(company_country_fr, x=\"number_company\", y=\"country_fr\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"aucun achat\", 1 : \"achat durant la période\"},\n",
" xlabel = \"Numéro de compagnie\", ylabel = \"Part de clients français (%)\", \n",
" title = \"Part de clients français des compagnies de spectacle (train set)\")"
]
},
{
"cell_type": "markdown",
"id": "ecfd112e-270a-4223-b80f-7e95e57d199d",
"metadata": {},
"source": [
"### 2. campaigns_information"
2024-03-08 10:30:12 +01:00
]
},
{
"cell_type": "code",
2024-03-10 12:31:28 +01:00
"execution_count": 189,
"id": "b37e7ddf-321a-4ebe-9742-9e760a541d29",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de lignes de la table : 688953\n"
]
},
{
"data": {
"text/plain": [
"customer_id 0\n",
"nb_campaigns 0\n",
"nb_campaigns_opened 0\n",
"time_to_open 301495\n",
"number_compagny 0\n",
"dtype: int64"
]
},
"execution_count": 189,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de nan\n",
"print(\"Nombre de lignes de la table : \",campaigns_information_spectacle.shape[0])\n",
"campaigns_information_spectacle.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 192,
"id": "de1ecaac-25bb-4853-b8ab-3ef2ca6917ed",
2024-03-08 10:30:12 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2024-03-10 12:31:28 +01:00
" <th>customer_id</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
2024-03-08 10:30:12 +01:00
" <th>number_compagny</th>\n",
2024-03-10 12:31:28 +01:00
" <th>no_campaign_opened</th>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
2024-03-10 12:31:28 +01:00
" <td>29</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
2024-03-08 10:30:12 +01:00
" <td>10</td>\n",
2024-03-10 12:31:28 +01:00
" <td>True</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-10 12:31:28 +01:00
" <td>37</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-10 12:31:28 +01:00
" <td>39</td>\n",
" <td>4</td>\n",
" <td>1.0</td>\n",
" <td>0 days 05:16:38</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-10 12:31:28 +01:00
" <td>41</td>\n",
" <td>4</td>\n",
" <td>1.0</td>\n",
" <td>0 days 01:12:29</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2024-03-10 12:31:28 +01:00
" <td>44</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254699</th>\n",
" <td>6837769</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0 days 23:42:15</td>\n",
2024-03-08 10:30:12 +01:00
" <td>14</td>\n",
2024-03-10 12:31:28 +01:00
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254700</th>\n",
" <td>6875038</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254701</th>\n",
" <td>6875066</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254702</th>\n",
" <td>6875099</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>NaT</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254703</th>\n",
" <td>6875143</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0 days 01:17:01</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2024-03-10 12:31:28 +01:00
"<p>688953 rows × 6 columns</p>\n",
2024-03-03 09:32:45 +01:00
"</div>"
],
"text/plain": [
2024-03-10 12:31:28 +01:00
" customer_id nb_campaigns nb_campaigns_opened time_to_open \\\n",
"0 29 4 0.0 NaT \n",
"1 37 3 0.0 NaT \n",
"2 39 4 1.0 0 days 05:16:38 \n",
"3 41 4 1.0 0 days 01:12:29 \n",
"4 44 4 0.0 NaT \n",
"... ... ... ... ... \n",
"254699 6837769 1 1.0 0 days 23:42:15 \n",
"254700 6875038 1 0.0 NaT \n",
"254701 6875066 1 0.0 NaT \n",
"254702 6875099 1 0.0 NaT \n",
"254703 6875143 1 1.0 0 days 01:17:01 \n",
"\n",
" number_compagny no_campaign_opened \n",
"0 10 True \n",
"1 10 True \n",
"2 10 False \n",
"3 10 False \n",
"4 10 True \n",
"... ... ... \n",
"254699 14 False \n",
"254700 14 True \n",
"254701 14 True \n",
"254702 14 True \n",
"254703 14 False \n",
"\n",
"[688953 rows x 6 columns]"
2024-03-03 09:32:45 +01:00
]
},
2024-03-10 12:31:28 +01:00
"execution_count": 192,
2024-03-03 09:32:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"# part de clients n'ouvrant jamais les mails par compagnie\n",
2024-03-03 09:32:45 +01:00
"\n",
2024-03-10 12:31:28 +01:00
"campaigns_information_spectacle[\"no_campaign_opened\"] = pd.isna(campaigns_information_spectacle[\"time_to_open\"])\n",
"campaigns_information_spectacle"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "code",
2024-03-10 12:31:28 +01:00
"execution_count": 197,
"id": "b5a0060f-a9dd-435b-844f-b24674b8bc27",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-03-10 12:31:28 +01:00
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>no_campaign_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.605656</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>0.294001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>0.475719</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>0.353820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>0.428148</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny no_campaign_opened\n",
"0 10 0.605656\n",
"1 11 0.294001\n",
"2 12 0.475719\n",
"3 13 0.353820\n",
"4 14 0.428148"
]
},
"execution_count": 197,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"company_lazy_customers = campaigns_information_spectacle.groupby(\"number_compagny\")[\"no_campaign_opened\"].mean().reset_index()\n",
"company_lazy_customers"
]
},
{
"cell_type": "code",
"execution_count": 198,
"id": "788c90e0-f13a-4804-ace7-e5159fddd7fd",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAo0AAAHFCAYAAACXTsPRAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABcSElEQVR4nO3dd1gUV9sG8HvpHQWlKQJGQRSwR9EodoJEsEUNKoqaiMauMXYFNagxSkxiS1Q0lhBrjDEFu4ldQE00dgEVREHFCric7w8/9s2yy+6CyADev+vaS/bMzDnPzM7OPp6ZMyMTQggQEREREWmgJ3UARERERFT2MWkkIiIiIq2YNBIRERGRVkwaiYiIiEgrJo1EREREpBWTRiIiIiLSikkjEREREWnFpJGIiIiItGLSSERERERaFSlpjImJgUwmU7wMDAxQvXp1hIWF4datWyUa2GeffYYdO3a8Uh03btyATCZDTExMicSkK1dXVwwcOLDU49i4cSOio6NfaxuFmTVrFlxdXSVpu7iePn2KWbNm4cCBA1KHUip2796NWbNmSR3GG0kmkylt+wMHDkAmk1W4fa/gsY/KplmzZkEmk0kdBgBg4MCB5e63o7iWLl1aKvlImzZt0KZNm9dSd7F6GtesWYOjR48iLi4OH374ITZt2oRWrVrhyZMnJRZYSSSNZYWjoyOOHj2KwMDA19qOlEljefT06VNERERUuB/uwuzevRsRERFSh/FGOnr0KIYMGSJ1GEQAgCFDhuDo0aNSh/HGKa2k8XUyKM5CXl5eaNKkCQCgbdu2kMvlmD17Nnbs2IG+ffu+UkDPnj2DqanpK9VR1hgbG6N58+ZSh0FvqKdPn8LMzEzqMN5oFeX7L5fL8eLFCxgbG0sdCr2C6tWro3r16lKHQeVQiVzTmH9ATEpKAgBERESgWbNmsLGxgZWVFRo1aoRVq1ZBCKG0nKurK9577z1s27YNDRs2hImJCSIiIiCTyfDkyROsXbtWcSpcW1fr7du30atXL1haWsLa2hq9e/dGWlqa2nlPnTqFoKAg2NjYwMTEBA0bNsSPP/6o07pmZ2cjMjISnp6eMDExga2tLdq2bYsjR44Uukxhp6cvX76MkJAQ2NnZwdjYGJ6envjmm2+U5sk/jbVp0yZMnToVTk5OsLKyQocOHXDx4kXFfG3atMEvv/yCpKQkpUsI8i1btgz169eHhYUFLC0tUadOHUyZMkXjuubHvXDhQixatAhubm6wsLCAr68vjh07pnVbffPNN2jdujXs7Oxgbm4Ob29vLFiwALm5uYp5Zs+eDQMDA6SkpKgsP2jQINja2uL58+cAgNjYWHTq1AmOjo4wNTWFp6cnJk2apNLDPXDgQFhYWODKlSvo3LkzLCws4OzsjPHjxyM7O1uxblWrVgUAxT4nk8m0nlpLTk5Gv379lD6zL774Anl5eYp5Cjv1WHA/iI6Ohkwmw5UrV1Ta+fTTT2FkZIR79+4pyvbs2YP27dvDysoKZmZmaNmyJfbu3au0XP5pp/j4ePTs2ROVK1fGW2+9hYEDByr2rf/uHzdu3Ch0XePi4hAcHIzq1avDxMQEtWrVwtChQ5Viyt/e6k4vqTsFlpeXh6+++goNGjSAqakpKlWqhObNm2Pnzp2KeQqeys1X8NRn/uUy+/fvx7Bhw1ClShXY2tqie/fuuH37dqHr9d+4LSws8O+//8Lf3x/m5uZwdHTEvHnzAADHjh3DO++8A3Nzc7i7u2Pt2rVKy9+9exfDhw9H3bp1YWFhATs7O7Rr1w6HDx9WaauwddImfx3j4uIQFhYGGxsbmJubo0uXLrh27ZrK/KtXr0b9+vVhYmICGxsbdOvWDRcuXFCap7BTVwU/x/z9dcGCBZgzZw7c3NxgbGyM/fv3F2kdsrKyMGHCBLi5ucHIyAjVqlXDmDFjVL63mzdvRrNmzWBtbQ0zMzPUrFkTgwYN0lq/LvtUXl4eFixYgDp16sDY2Bh2dnYIDQ3FzZs3VbaNl5cXjh49ihYtWsDU1BSurq5Ys2YNAOCXX35Bo0aNYGZmBm9vb/z2229Ky+fv8wkJCejevTusrKxgbW2Nfv364e7du0rz6no8A4Bvv/0W7u7uMDY2Rt26dbFx48ZCPy9djteFnZ6OjY2Fr68vzM3NYWFhAX9/fyQkJCjNc+3aNfTp0wdOTk4wNjaGvb092rdvj8TExMI/pP8XExMDDw8PxbFz3bp1aufLycnBnDlzFJ9X1apVERYWprIN1dElvvzcY/v27fDx8YGJiQlq1qyJJUuWqNSn6/6rbT90dXXFP//8g4MHDyqOv/mf3/PnzzF+/Hg0aNAA1tbWsLGxga+vL3766SeVeHTZ30t6m/5XsXoaC8r/0cv/Eb5x4waGDh2KGjVqAHh58B05ciRu3bqFGTNmKC0bHx+PCxcuYNq0aXBzc4O5uTm6du2Kdu3aoW3btpg+fToAwMrKqtD2nz17hg4dOuD27duIioqCu7s7fvnlF/Tu3Vtl3v379+Pdd99Fs2bNsHz5clhbW+OHH35A79698fTpU41Jw4sXLxAQEIDDhw9jzJgxaNeuHV68eIFjx44hOTkZLVq00HmbnT9/Hi1atECNGjXwxRdfwMHBAb///jtGjRqFe/fuYebMmUrzT5kyBS1btsR3332HrKwsfPrpp+jSpQsuXLgAfX19LF26FB999BGuXr2K7du3Ky37ww8/YPjw4Rg5ciQWLlwIPT09XLlyBefPn9cp1m+++QZ16tRRnPqePn06OnfujOvXr8Pa2hrAy4NQwR/Fq1evIiQkRPFlO3PmDObOnYt///0Xq1evBgAMHToUc+fOxYoVKzBnzhzFspmZmfjhhx8wYsQImJiYAHiZZHfu3BljxoyBubk5/v33X8yfPx8nTpzAvn37lNrOzc1FUFAQBg8ejPHjx+PQoUOYPXs2rK2tMWPGDDg6OuK3337Du+++i8GDBytOHebvw+rcvXsXLVq0QE5ODmbPng1XV1fs2rULEyZMwNWrV7F06VKdtme+fv364dNPP0VMTIzSusvlcqxfvx5dunRBlSpVAADr169HaGgogoODsXbtWhgaGmLFihXw9/fH77//jvbt2yvV3b17d/Tp0wfh4eF48uQJvLy88OTJE2zZskXptJSjo2Oh8V29ehW+vr4YMmQIrK2tcePGDSxatAjvvPMOzp07B0NDwyKtL/AyMVm/fj0GDx6MyMhIGBkZIT4+XmPyqs2QIUMQGBiIjRs3IiUlBZ988gn69eunsk+ok5ubi+7duyM8PByffPIJNm7ciMmTJyMrKwtbt27Fp59+iurVq+Orr77CwIED4eXlhcaNGwN4uY8CwMyZM+Hg4IDHjx9j+/btaNOmDfbu3Vui1xQNHjwYHTt2VKzjtGnT0KZNG5w9exaVKlUCAERFRWHKlCn44IMPEBUVhYyMDMyaNQu+vr44efIkateuXay2lyxZAnd3dyxcuBBWVlZFqufp06fw8/PDzZs3MWXKFPj4+OCff/7BjBkzcO7cOezZswcymQxHjx5F79690bt3b8yaNQsmJiZISkrS6TPUZZ8aNmwYVq5ciREjRuC9997DjRs3MH36dBw4cADx8fGK7xkApKWlISwsDBMnTlR89oMGDUJKSgq2bNmCKVOmwNraGpGRkejatSuuXbsGJycnpZi6deuGXr16ITw8HP/88w+mT5+O8+fP4/jx44rvja7Hs5UrV2Lo0KHo0aMHFi9ejIcPHyIiIkLxH+CCdDleq/PZZ59h2rRpCAsLw7Rp05CTk4PPP/8crVq1wokTJ1C3bl0AQOfOnSGXy7FgwQLUqFED9+7dw5EjR/DgwQONn1NMTAzCwsIQHByML774Ag8fPsSsWbOQnZ0NPb3/9V/l5eUhODgYhw8fxsSJE9GiRQskJSVh5syZaNOmDU6dOqXxbKSu8SUmJmLMmDGYNWsWHBwcsGHDBowePRo5OTmYMGECAN33X0D7frh9+3b07NkT1tbWit+K/B777OxsZGZmYsKECahWrRpycnKwZ88edO/eHWvWrEFoaKgi7uIcQ191myoRRbBmzRoBQBw7dkz
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_lazy_customers[\"number_compagny\"], company_lazy_customers[\"no_campaign_opened\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Part de clients n'ayant ouvert aucun mail\")\n",
"plt.title(\"Part de clients n'ayant ouvert aucun mail pour les compagnies de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
2024-03-03 09:32:45 +01:00
"plt.show()"
]
},
2024-03-08 10:30:12 +01:00
{
"cell_type": "code",
2024-03-10 12:31:28 +01:00
"execution_count": 203,
"id": "c48015c2-6451-4089-93b7-6d55d3b2e553",
2024-03-08 10:30:12 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2024-03-10 12:31:28 +01:00
" <th>number_compagny</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>ratio_campaigns_opened</th>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
2024-03-10 12:31:28 +01:00
" <td>734772</td>\n",
" <td>126151.0</td>\n",
" <td>0.171687</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-10 12:31:28 +01:00
" <td>11</td>\n",
" <td>342396</td>\n",
" <td>129833.0</td>\n",
" <td>0.379190</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-10 12:31:28 +01:00
" <td>12</td>\n",
" <td>3168123</td>\n",
" <td>810722.0</td>\n",
" <td>0.255900</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-10 12:31:28 +01:00
" <td>13</td>\n",
" <td>3218569</td>\n",
" <td>793581.0</td>\n",
" <td>0.246563</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2024-03-10 12:31:28 +01:00
" <td>14</td>\n",
" <td>2427043</td>\n",
" <td>723846.0</td>\n",
" <td>0.298242</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny nb_campaigns nb_campaigns_opened ratio_campaigns_opened\n",
"0 10 734772 126151.0 0.171687\n",
"1 11 342396 129833.0 0.379190\n",
"2 12 3168123 810722.0 0.255900\n",
"3 13 3218569 793581.0 0.246563\n",
"4 14 2427043 723846.0 0.298242"
]
},
"execution_count": 203,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# taux d'ouverture des campaigns\n",
"\n",
"company_campaigns_stats = campaigns_information_spectacle.groupby(\"number_compagny\")[[\"nb_campaigns\", \"nb_campaigns_opened\"]].sum().reset_index()\n",
"company_campaigns_stats[\"ratio_campaigns_opened\"] = company_campaigns_stats[\"nb_campaigns_opened\"] / company_campaigns_stats[\"nb_campaigns\"]\n",
"company_campaigns_stats"
]
},
{
"cell_type": "code",
2024-03-10 17:41:43 +01:00
"execution_count": 15,
2024-03-10 12:31:28 +01:00
"id": "d06ab865-4832-4fe9-918b-e5ff72bebee4",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'company_campaigns_stats' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
2024-03-10 17:41:43 +01:00
"Cell \u001b[0;32mIn[15], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Création du barplot\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m plt\u001b[38;5;241m.\u001b[39mbar(\u001b[43mcompany_campaigns_stats\u001b[49m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnumber_compagny\u001b[39m\u001b[38;5;124m\"\u001b[39m], \u001b[38;5;241m100\u001b[39m \u001b[38;5;241m*\u001b[39m company_campaigns_stats[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mratio_campaigns_opened\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# Ajout de titres et d'étiquettes\u001b[39;00m\n\u001b[1;32m 5\u001b[0m plt\u001b[38;5;241m.\u001b[39mxlabel(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCompany\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
2024-03-10 12:31:28 +01:00
"\u001b[0;31mNameError\u001b[0m: name 'company_campaigns_stats' is not defined"
]
}
],
"source": [
"# Création du barplot\n",
"plt.bar(company_campaigns_stats[\"number_compagny\"], 100 * company_campaigns_stats[\"ratio_campaigns_opened\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Taux d'ouverture (%)\")\n",
"plt.title(\"Taux d'ouverture des campagnes de mails pour les compagnies de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 219,
"id": "5c37e063-a717-4a8c-828e-b386b87e8409",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkoAAAHFCAYAAAANLdYJAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABbXUlEQVR4nO3dd1gUV/828HulLB0EgQVdARWxYSUSNQloFHuPPSoajQY1sT2WqBEsoEYRyyMao4IFjcYSW1RsmAQL9l4fFI0iiQUUpJ/3D1/m5wJDE1jU+3Nde13OmTMz351dZm/PzOwqhBACRERERJRDOW0XQERERFRWMSgRERERyWBQIiIiIpLBoEREREQkg0GJiIiISAaDEhEREZEMBiUiIiIiGQxKRERERDIYlIiIiIhklKmgFBISAoVCgdOnT8v2uXv3LhQKBUJCQkqvsBIUFhaGoKCgUtteUlISfH19cfTo0VLbZnHz9vaGo6OjtssoUQ8fPoSvry/Onz+v7VIK7H3728zO09MTnp6eGm0KhQK+vr6lVsP78PdbkOO8tuT2Gpc0X19fKBSKUq/j6tWr8PX1xd27d0t0O0WVV325fQY8ffoUvXv3ho2NDRQKBbp06VJstegW25pKiZ2dHY4fP46qVatqu5RiERYWhsuXL2P06NGlsr2kpCT4+fkBQKkfEKjgHj58CD8/Pzg6OqJ+/fraLocALFu2TNsl8O/3A1Ea77WrV6/Cz88Pnp6eZfI/nnnVN23aNHz33XcabTNnzsT27duxevVqVK1aFZaWlsVWyzsXlJRKJT7++GNtl6EVGRkZSE9Ph1Kp1HYpVEKyXuOSXj/fQ4VXq1YtbZdAeB0WjYyMtF1GieJ7LW+5DZRcvnwZVatWRb9+/Yp9e2Xq1FtByA3v37p1C3379oWNjQ2USiVq1qyJ//73vxp9jh49CoVCgY0bN2LKlCmwt7eHmZkZWrZsiRs3bhRo+8W5HU9PT+zZswf37t2DQqGQHm8+z3nz5mHWrFlwcnKCUqnEkSNHAACnT59Gp06dYGlpCQMDAzRo0ACbN2/Od99ZW1sDAPz8/KTteXt7AwBu376NQYMGwdnZGUZGRqhYsSI6duyIS5cuaawna+g8+5Bo1vPOOi1w69YtmJmZoUePHhr9Dh8+DB0dHUybNi3f/R0SEgIXFxdpX69duzbfZbJkZmZi3rx5qFGjBpRKJWxsbDBgwAA8ePBAo5+jo6O0D9705vD3P//8A319/Vxrvn79OhQKBRYvXiy1xcbGYtiwYahUqRL09fXh5OQEPz8/jRCU12v80UcfAQAGDRokvU5Zp3jkhuWzD0eXxHsoy8OHD9GzZ0+YmprC3NwcvXr1QmxsbK59C7KdpKQkjB8/Hk5OTjAwMIClpSXc3NywcePGPOvIei8ePnwYQ4cOhZWVFczMzDBgwAAkJiYiNjYWPXv2hIWFBezs7DB+/HikpaVprMPPzw/u7u6wtLSEmZkZGjZsiFWrViH774UX5HRIUZ8HkP97Jr+/39wU9pi3evVq1KtXT6q9a9euuHbtmkYfb29vmJiY4Pr162jdujWMjY1hZ2eHOXPmAABOnDiBTz75BMbGxqhevTpCQ0Nzre3Zs2cYNGgQLC0tYWxsjI4dO+J///ufRh9PT0/UqVMHx44dQ9OmTWFkZITBgwcDABISEqR9ra+vj4oVK2L06NFITEzMd18LITBv3jw4ODjAwMAADRs2xO+//55r37fZDgDs27cPn3/+OczNzWFkZISaNWsiICAgz2Vye6+lpqZi1qxZ0vHM2toagwYNwj///KPRz9HRER06dMC+ffvQsGFDGBoaokaNGli9erXUJyQkRDouN2/eXHovZX2unjt3Dh06dJA+5+zt7dG+ffscx87swsPD0blzZ1SqVAkGBgaoVq0ahg0bhn///TdH3+vXr6NPnz6wtbWFUqlE5cqVMWDAAKSkpORb35vHuqzj3MGDB3Ht2jWpb9bnUHBwMOrVqwcTExOYmpqiRo0a+P777/N8HjmIMmTNmjUCgIiKipLtEx0dLQCINWvWSG1XrlwR5ubmwtXVVaxdu1YcOHBAjBs3TpQrV074+vpK/Y4cOSIACEdHR9GvXz+xZ88esXHjRlG5cmXh7Ows0tPT86yvuLdz5coV0axZM6FSqcTx48elx5vPs2LFiqJ58+bi119/FQcOHBDR0dHi8OHDQl9fX3z66afil19+Efv27RPe3t459kt2ycnJYt++fQKA+Oqrr6Tt3b59WwghREREhBg3bpz49ddfRUREhNi+fbvo0qWLMDQ0FNevX8/xOkVHR2usP+t5HzlyRGrbtGmTACAWLVokhBDi0aNHwtbWVnh4eOS7v7O207lzZ7Fr1y6xfv16Ua1aNaFWq4WDg0OeywohxNdffy0AiJEjR4p9+/aJ5cuXC2tra6FWq8U///wj9XNwcBADBw7MsbyHh4fw8PCQprt27SrUarXIyMjQ6DdhwgShr68v/v33X+k5ZtW4YsUKcfDgQTFz5kyhVCqFt7e3tJzca3zhwgXpuU+dOlV6ne7fv59rXVkGDhyosV9K4j0khBBJSUmiZs2awtzcXCxZskTs379ffPvtt6Jy5co5li/odoYNGyaMjIxEYGCgOHLkiNi9e7eYM2eOWLJkSZ61ZO0nJycnMW7cOHHgwAExd+5coaOjI/r06SMaNmwoZs2aJcLDw8XEiRMFALFgwQKNdXh7e4tVq1aJ8PBwER4eLmbOnCkMDQ2Fn5+fRr/c9jsAMX369Ld+HgV5z+T395ubwhzz/P39BQDRp08fsWfPHrF27VpRpUoVYW5uLm7evCn1GzhwoNDX1xc1a9YUixYtEuHh4WLQoEECgJg8ebKoXr26WLVqldi/f7/o0KGDACBOnz6d4zVTq9Vi8ODB4vfffxc//fSTsLGxEWq1Wjx79kxjn1taWgq1Wi2WLFkijhw5IiIiIkRiYqKoX7++qFChgggMDBQHDx4UixYtEubm5qJFixYiMzMzz/09ffp0aT9mbb9ixYpCpVJpvMZvu52ff/5ZKBQK4enpKcLCwsTBgwfFsmXLhI+PT45a3pT9vZaRkSHatGkjjI2NhZ+fnwgPDxc///yzqFixoqhVq5ZISkqS+jo4OIhKlSqJWrVqibVr14r9+/eLHj16CAAiIiJCCCFEXFyc9Hr/97//ld5LcXFx4uXLl8LKykq4ubmJzZs3i4iICPHLL7+I4cOHi6tXr+b5fIODg0VAQIDYuXOniIiIEKGhoaJevXrCxcVFpKamSv3Onz8vTExMhKOjo1i+fLk4dOiQWL9+vejZs6dISEjIsz4hNI91ycnJ4vjx46JBgwaiSpUqUt/4+HixceNGAUCMGjVKHDhwQBw8eFAsX75cfPvtt3k+j+zei6DUunVrUalSJREfH6/Rd+TIkcLAwEA8ffpUCPF/B4127dpp9Nu8ebMAIIUUOSWxnfbt2+f6oZ/1PKtWrarxBhNCiBo1aogGDRqItLQ0jfYOHToIOzu7HB/kb/rnn39yHNzlpKeni9TUVOHs7CzGjBkjtRcmKAkhxDfffCP09fXF8ePHRYsWLYSNjY14+PBhntvOyMgQ9vb2omHDhhoHo7t37wo9Pb18g9K1a9cEAI0DkhBCnDx5UgAQ33//vdRW0KC0c+dOAUAcOHBAaktPTxf29vaie/fuUtuwYcOEiYmJuHfvnsb65s+fLwCIK1euCCHyfo2joqJkQ0thg1Jxv4eCg4MFAPHbb79ptA8dOjRHzQXdTp06dUSXLl1ktykn6704atQojfYuXboIACIwMFCjvX79+qJhw4ay68vIyBBpaWlixowZwsrKSuO9V5CgVNTnUdD3TGH+foUo+LHo2bNnwtDQMEe/mJgYoVQqRd++faW2gQMHCgBi69atUltaWpqwtrYWAMTZs2el9idPnggdHR0xduxYqS3rNevatavGtv766y8BQMyaNUtq8/DwEADEoUOHNPoGBASIcuXK5fis+PXXXwUAsXfvXtl98uzZM2FgYCC7/Tdf47fZzosXL4SZmZn45JNP8gxUBQlKWR/4b+5zIf7vOLFs2TK
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# création d'un barplot permettant de visualiser les 2 indicateurs sur le même graphique\n",
"\n",
"# Création du premier barplot\n",
"plt.bar(company_campaigns_stats[\"number_compagny\"], 100 * company_campaigns_stats[\"ratio_campaigns_opened\"],\n",
" label = \"taux d'ouverture\", alpha = 0.7)\n",
"\n",
"# Création du deuxième barplot à côté du premier\n",
"bar_width = 0.4 # Largeur des barres\n",
"indices2 = company_campaigns_stats[\"number_compagny\"] + bar_width\n",
"plt.bar(indices2, 100 * (1 - company_lazy_customers[\"no_campaign_opened\"]), \n",
" label='Part de clients ouvrant des mails', alpha=0.7, width=bar_width)\n",
"\n",
"# Ajout des étiquettes et de la légende\n",
"plt.xlabel('Compagnie')\n",
"plt.ylabel('Taux (%)')\n",
"plt.title('Lien entre taux d ouverture des mails et nombre de clients actifs')\n",
"plt.legend()\n",
"\n",
"# Affichage du graphique\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 102,
"id": "4fdf4134-d32c-42c3-ab4f-36ad4783332c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" <th>number_company</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10_299341</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>12.0</td>\n",
" <td>3.0</td>\n",
" <td>0 days 05:47:26.333333333</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10_63788</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>62.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>393.205891</td>\n",
" <td>281.017639</td>\n",
" <td>112.188252</td>\n",
" <td>3.0</td>\n",
" <td>...</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>0 days 05:13:51</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10_759946</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10_20653</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>11.0</td>\n",
" <td>10.0</td>\n",
" <td>1 days 00:45:54</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10_824705</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 41 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 10_299341 0.0 0.0 0.0 0.0 \n",
"1 10_63788 3.0 2.0 62.0 1.0 \n",
"2 10_759946 0.0 0.0 0.0 0.0 \n",
"3 10_20653 0.0 0.0 0.0 0.0 \n",
"4 10_824705 0.0 0.0 0.0 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0.0 NaN NaN \n",
"1 1.0 393.205891 281.017639 \n",
"2 0.0 NaN NaN \n",
"3 0.0 NaN NaN \n",
"4 0.0 NaN NaN \n",
"\n",
" time_between_purchase nb_tickets_internet ... gender_label \\\n",
"0 NaN 0.0 ... male \n",
"1 112.188252 3.0 ... female \n",
"2 NaN 0.0 ... other \n",
"3 NaN 0.0 ... male \n",
"4 NaN 0.0 ... other \n",
"\n",
" gender_female gender_male gender_other country_fr nb_campaigns \\\n",
"0 0 1 0 1.0 12.0 \n",
"1 1 0 0 1.0 3.0 \n",
"2 0 0 1 NaN 0.0 \n",
"3 0 1 0 1.0 11.0 \n",
"4 0 0 1 NaN 0.0 \n",
"\n",
" nb_campaigns_opened time_to_open y_has_purchased \\\n",
"0 3.0 0 days 05:47:26.333333333 0.0 \n",
"1 1.0 0 days 05:13:51 1.0 \n",
"2 0.0 NaN 0.0 \n",
"3 10.0 1 days 00:45:54 0.0 \n",
"4 0.0 NaN 0.0 \n",
"\n",
" number_company \n",
"0 10 \n",
"1 10 \n",
"2 10 \n",
"3 10 \n",
"4 10 \n",
"\n",
"[5 rows x 41 columns]"
]
},
"execution_count": 102,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# same statistics on the train set\n",
"\n",
"train_set_spectacle.head()"
]
},
{
"cell_type": "code",
"execution_count": 105,
"id": "14ff9886-742c-4a60-8824-5d31f7c76aea",
"metadata": {},
"outputs": [],
"source": [
"train_set_spectacle[\"no_campaign_opened\"] = train_set_spectacle[\"nb_campaigns_opened\"]==0"
]
},
{
"cell_type": "code",
"execution_count": 108,
"id": "16285593-a0fa-461c-aeb8-c64ffdf9a0d6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>no_campaign_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
" <td>91.227517</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>62.343470</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>0.0</td>\n",
" <td>84.608320</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>78.598682</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>100.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>100.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
" <td>90.124799</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>94.158651</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
" <td>72.903385</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>73.549517</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_company y_has_purchased no_campaign_opened\n",
"0 10 0.0 91.227517\n",
"1 10 1.0 62.343470\n",
"2 11 0.0 84.608320\n",
"3 11 1.0 78.598682\n",
"4 12 0.0 100.000000\n",
"5 12 1.0 100.000000\n",
"6 13 0.0 90.124799\n",
"7 13 1.0 94.158651\n",
"8 14 0.0 72.903385\n",
"9 14 1.0 73.549517"
]
},
"execution_count": 108,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"company_lazy_customers = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[\"no_campaign_opened\"].mean().reset_index()\n",
"company_lazy_customers[\"no_campaign_opened\"] = 100 * company_lazy_customers[\"no_campaign_opened\"] \n",
"company_lazy_customers"
]
},
{
"cell_type": "code",
"execution_count": 110,
"id": "d35f00e3-b9b0-42b3-9dce-785c1ad5506c",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1IAAAIiCAYAAADCc/lyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB0wklEQVR4nO3dd3yN9///8edJZItIIlNJqL2pUZRQtTcdSqtolaqqVaVLjNpVRbWlilaNalHV0qrV2jGCqpoxanzslSAk1+8Pv5yvIwnn4qQ5icf9dsvtlvO+1vO6znXOySvv63ofi2EYhgAAAAAAdnPJ7AAAAAAAkNVQSAEAAACASRRSAAAAAGAShRQAAAAAmEQhBQAAAAAmUUgBAAAAgEkUUgAAAABgEoUUAAAAAJhEIQUAAAAAJlFIIU3Tp0+XxWKx/uTIkUOPPPKIOnbsqGPHjjl0W8OGDdPChQsfaB2HDh2SxWLR9OnTHZLJXpGRkerQocN/nmPWrFkaN25chm5DyrzjCvPuPBczk8ViUXR0dGbHyHDHjx9XdHS0YmNjM3Q7mfk6TPkseBj9V++zsJXW+Z5yHh46dMiudQwePFglSpRQcnKyJCkhIUHR0dFatWqV4wNLWrVqlSwWS4at/0FNmjQpzfePvXv3yt3dXVu3bv3vQ2UTFFK4q2nTpmn9+vVatmyZOnfurNmzZ6tGjRqKj4932DYcUUg5i7CwMK1fv16NGzfO0O3wAY87LViwQO+//35mx3ioHD9+XIMGDcrwQgqZg/fZzPGgn6PHjx/XqFGjNHjwYLm43PozNyEhQYMGDcqwQqdChQpav369KlSokCHrf1DpFVJFihRRu3bt1KtXr/8+VDaRI7MDwLmVKlVKFStWlCTVrl1bSUlJGjJkiBYuXKh27do90LqvXr0qLy8vR8R0Gh4eHnr88cczOwYeQuXLl8/sCMB/7urVq/L09Hxoe82yowf9HP3kk0+UO3dutWrV6r7XkZCQIG9vb7vnz5UrV5b97O/evbsqVqyodevWqVq1apkdJ8uhRwqmpLxRHD58WJI0aNAgValSRQEBAcqVK5cqVKigqVOnyjAMm+UiIyPVpEkTzZ8/X+XLl5enp6cGDRoki8Wi+Ph4zZgxw3oZYa1ate6a4fjx43r22Wfl6+srPz8/Pffcczp58mSa827evFnNmjVTQECAPD09Vb58eX333Xd27ev169c1ePBgFS9eXJ6engoMDFTt2rW1bt26dJdJ7xKcffv2qW3btgoODpaHh4eKFy+uTz/91GaelEsDZs+erXfffVfh4eHKlSuXnnrqKe3Zs8c6X61atfTzzz/r8OHDNpdfpvjss89UtmxZ5cyZU76+vipWrJjeeeede+6vo49rQkKC+vbtqwIFCsjT01MBAQGqWLGiZs+efc8sx44d06uvvqp8+fLJ3d1d4eHhevrpp/W///3POs+RI0f0wgsv2BzTjz76yHoph/R/z8fo0aM1cuRIRUZGysvLS7Vq1dLevXt148YN9e/fX+Hh4fLz81PLli116tQpmywp5+6CBQtUpkwZeXp6qmDBgho/frzNfNeuXVOfPn1Urlw5+fn5KSAgQFWrVtWPP/6Yav8uXLigl19+WQEBAcqZM6caN26sgwcPprokLjo6WhaLRbt27dLzzz8vPz8/hYSEqFOnTrp48WKqnHde2nfp0iXrc+Du7q68efOqZ8+eqXqU582bpypVqsjPz0/e3t4qWLCgOnXqdM/n6dKlS+rcubMCAwOVM2dONWjQQHv37k1zXnteA+m5V76U187MmTPVu3dvhYaGysvLS1FRUdq2bVuq9dn7vnC383DVqlWqVKmSJKljx47W12HK87d582a1adPGes5FRkbq+eeft7532rudu3mQY2qxWNS9e3d98803Kl68uLy9vVW2bFktXrz4nssmJydr1KhRKlasmDw8PBQcHKz27dvr33//tZkvvctNa9WqZX2fP336tNzd3dPsTf3nn39ksVisr7WUy7t+++03derUSUFBQfL29tb169e1f/9+dezYUYULF5a3t7fy5s2rpk2baufOnTbrdNT7bFrmzp2revXqKSwsTF5eXipevLj69++f6vV2+/7frkOHDoqMjLRpu9fn0N0u+3yQ95O01KpVS6VKldL69etVrVo163k9bdo0SdLPP/+sChUqyNvbW6VLl9bSpUttlrf3OXqQS1kTExM1depUtW3b1tobdejQIQUFBUmS9e8Oi8ViPTdTjsvWrVv19NNPy9/fX48++qgk+1/HaV3a16FDB+XMmVP79+9Xo0aNlDNnTuXLl099+vTR9evX77kvK1asUK1atRQYGCgvLy/lz59frVu3VkJCgs3+Dh061PpaDAoKUseOHXX69GnrPJGRkdq1a5dWr15t3ffbz7PHHntMxYsX1+eff27qWOMWeqRgyv79+yXJ+qZ06NAhdenSRfnz55ckbdiwQW+88YaOHTumDz74wGbZrVu3avfu3XrvvfdUoEAB+fj4qEWLFnryySdVu3Zt6wdprly50t3+1atX9dRTT+n48eMaPny4ihQpop9//lnPPfdcqnlXrlypBg0aqEqVKvr888/l5+enOXPm6LnnnlNCQsJd7ye5efOmGjZsqD///FM9e/bUk08+qZs3b2rDhg06cuSIqf/a/P3336pWrZry58+vjz76SKGhofr111/Vo0cPnTlzRgMHDrSZ/5133lH16tX15Zdf6tKlS3r77bfVtGlT7d69W66urpo0aZJeffVVHThwQAsWLLBZds6cOerWrZveeOMNjRkzRi4uLtq/f7/+/vvvu2bMiOPau3dvffPNNxo6dKjKly+v+Ph4/fXXXzp79uxdsxw7dkyVKlXSjRs39M4776hMmTI6e/asfv31V50/f14hISE6ffq0qlWrpsTERA0ZMkSRkZFavHix+vbtqwMHDmjSpEk26/z0009VpkwZffrpp7pw4YL69Omjpk2bqkqVKnJzc9NXX32lw4cPq2/fvnrllVe0aNEim+VjY2PVs2dPRUdHKzQ0VN9++63efPNNJSYmqm/fvpJu/cFz7tw59e3bV3nz5lViYqJ+//13tWrVStOmTVP79u0l3fojtGnTptq8ebOio6Otl4Q0aNAg3WPSunVrPffcc3r55Ze1c+dODRgwQJL01VdfpbtMQkKCoqKi9O+//1qP465du/TBBx9o586d+v3332WxWLR+/Xo999xzeu655xQdHS1PT08dPnxYK1asuOvzZBiGWrRooXXr1umDDz5QpUqVtHbtWjVs2DDVvGZfA7czk++dd95RhQoV9OWXX+rixYuKjo5WrVq1tG3bNhUsWFCS/efvvc7DChUqaNq0aerYsaPee+8962VIjzzyiKRb741FixZVmzZtFBAQoBMnTuizzz5TpUqV9PfffytPnjx2bSckJCTN4/IgxzTFzz//rJiYGA0ePFg5c+bUqFGj1LJlS+3Zs8d6vDp06JDqvfK1117T5MmT1b17dzVp0kSHDh3S+++/r1WrVmnr1q3WfbNHUFCQmjRpohkzZmjQoEHWP36lW5eWu7u7p7r6oVOnTmrcuLG++eYbxcfHy83NTcePH1dgYKBGjBihoKAgnTt3TjNmzFCVKlW0bds2FS1a1GYdD/I+m559+/apUaNG6tmzp3x8fPTPP/9o5MiR2rRp0z1fT2lx5OfQ7e7n/STFyZMn1bFjR/Xr10+PPPKIJkyYoE6dOuno0aP6/vvv9c4778jPz0+DBw9WixYtdPDgQYWHh0uS6efofmzcuFFnz55V7dq1rW1hYWFaunSpGjRooJdfflmvvPKKpP/7OyZFq1at1KZNG3Xt2tVa/Nr7Ok7PjRs31KxZM7388svq06eP/vjjDw0ZMkR+fn6p/ka63aFDh9S4cWPVqFFDX331lXLnzq1jx45p6dKlSkxMlLe3t5KTk9W8eXP9+eef6tevn6pVq6bDhw9r4MCBqlWrljZv3iwvLy8tWLBATz/9tPz8/KyfjR4eHjbbq1WrlubNmyfDMOjdNcsA0jBt2jRDkrFhwwbjxo0bxuXLl43FixcbQUFBhq+
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"multiple_barplot(company_lazy_customers, x=\"number_company\", y=\"no_campaign_opened\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"aucun achat\", 1 : \"achat durant la période\"},\n",
" xlabel = \"Numéro de compagnie\", ylabel = \"Part de clients n'ayant ouvert aucun mail (%)\", \n",
" title = \"Part de clients des compagnies de spectacle n'ouvrant aucun mail (train set)\")"
]
},
{
"cell_type": "code",
"execution_count": 111,
"id": "b391f5b2-2424-4758-8ae5-f0fdacdfae66",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" <th>number_company</th>\n",
" <th>no_campaign_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10_299341</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>12.0</td>\n",
" <td>3.0</td>\n",
" <td>0 days 05:47:26.333333333</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10_63788</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>62.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>393.205891</td>\n",
" <td>281.017639</td>\n",
" <td>112.188252</td>\n",
" <td>3.0</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>0 days 05:13:51</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10_759946</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10_20653</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>11.0</td>\n",
" <td>10.0</td>\n",
" <td>1 days 00:45:54</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10_824705</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>697292</th>\n",
" <td>14_119950</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>697293</th>\n",
" <td>14_938</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>697294</th>\n",
" <td>14_5004707</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>2 days 16:42:51</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>697295</th>\n",
" <td>14_108184</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>697296</th>\n",
" <td>14_4663981</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-10 12:31:28 +01:00
" <td>NaN</td>\n",
2024-03-08 10:30:12 +01:00
" <td>0.0</td>\n",
" <td>14</td>\n",
2024-03-10 12:31:28 +01:00
" <td>True</td>\n",
2024-03-08 10:30:12 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2024-03-10 12:31:28 +01:00
"<p>697297 rows × 42 columns</p>\n",
2024-03-08 10:30:12 +01:00
"</div>"
],
"text/plain": [
2024-03-10 12:31:28 +01:00
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 10_299341 0.0 0.0 0.0 0.0 \n",
"1 10_63788 3.0 2.0 62.0 1.0 \n",
"2 10_759946 0.0 0.0 0.0 0.0 \n",
"3 10_20653 0.0 0.0 0.0 0.0 \n",
"4 10_824705 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... ... \n",
"697292 14_119950 0.0 0.0 0.0 0.0 \n",
"697293 14_938 0.0 0.0 0.0 0.0 \n",
"697294 14_5004707 0.0 0.0 0.0 0.0 \n",
"697295 14_108184 0.0 0.0 0.0 0.0 \n",
"697296 14_4663981 0.0 0.0 0.0 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0.0 NaN NaN \n",
"1 1.0 393.205891 281.017639 \n",
"2 0.0 NaN NaN \n",
"3 0.0 NaN NaN \n",
"4 0.0 NaN NaN \n",
"... ... ... ... \n",
"697292 0.0 NaN NaN \n",
"697293 0.0 NaN NaN \n",
"697294 0.0 NaN NaN \n",
"697295 0.0 NaN NaN \n",
"697296 0.0 NaN NaN \n",
"\n",
" time_between_purchase nb_tickets_internet ... gender_female \\\n",
"0 NaN 0.0 ... 0 \n",
"1 112.188252 3.0 ... 1 \n",
"2 NaN 0.0 ... 0 \n",
"3 NaN 0.0 ... 0 \n",
"4 NaN 0.0 ... 0 \n",
"... ... ... ... ... \n",
"697292 NaN 0.0 ... 0 \n",
"697293 NaN 0.0 ... 0 \n",
"697294 NaN 0.0 ... 0 \n",
"697295 NaN 0.0 ... 0 \n",
"697296 NaN 0.0 ... 0 \n",
"\n",
" gender_male gender_other country_fr nb_campaigns \\\n",
"0 1 0 1.0 12.0 \n",
"1 0 0 1.0 3.0 \n",
"2 0 1 NaN 0.0 \n",
"3 1 0 1.0 11.0 \n",
"4 0 1 NaN 0.0 \n",
"... ... ... ... ... \n",
"697292 1 0 1.0 0.0 \n",
"697293 1 0 1.0 0.0 \n",
"697294 1 0 1.0 2.0 \n",
"697295 0 1 1.0 0.0 \n",
"697296 0 1 NaN 0.0 \n",
"\n",
" nb_campaigns_opened time_to_open y_has_purchased \\\n",
"0 3.0 0 days 05:47:26.333333333 0.0 \n",
"1 1.0 0 days 05:13:51 1.0 \n",
"2 0.0 NaN 0.0 \n",
"3 10.0 1 days 00:45:54 0.0 \n",
"4 0.0 NaN 0.0 \n",
"... ... ... ... \n",
"697292 0.0 NaN 0.0 \n",
"697293 0.0 NaN 0.0 \n",
"697294 1.0 2 days 16:42:51 0.0 \n",
"697295 0.0 NaN 0.0 \n",
"697296 0.0 NaN 0.0 \n",
"\n",
" number_company no_campaign_opened \n",
"0 10 False \n",
"1 10 False \n",
"2 10 True \n",
"3 10 False \n",
"4 10 True \n",
"... ... ... \n",
"697292 14 True \n",
"697293 14 True \n",
"697294 14 False \n",
"697295 14 True \n",
"697296 14 True \n",
"\n",
"[697297 rows x 42 columns]"
2024-03-08 10:30:12 +01:00
]
},
2024-03-10 12:31:28 +01:00
"execution_count": 111,
2024-03-08 10:30:12 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"# part de mails ouverts de chaque compagnie\n",
2024-03-08 10:30:12 +01:00
"\n",
2024-03-10 12:31:28 +01:00
"train_set_spectacle"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "code",
2024-03-10 12:31:28 +01:00
"execution_count": 112,
"id": "dc8cfd36-0eb2-4ef3-877d-626fd0a9ced4",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2024-03-10 12:31:28 +01:00
" <th>number_compagny</th>\n",
2024-03-03 09:32:45 +01:00
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
2024-03-10 12:31:28 +01:00
" <th>ratio_campaigns_opened</th>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
2024-03-10 12:31:28 +01:00
" <td>734772</td>\n",
" <td>126151.0</td>\n",
" <td>0.171687</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-10 12:31:28 +01:00
" <td>11</td>\n",
" <td>342396</td>\n",
" <td>129833.0</td>\n",
" <td>0.379190</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-10 12:31:28 +01:00
" <td>12</td>\n",
" <td>3168123</td>\n",
" <td>810722.0</td>\n",
" <td>0.255900</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-10 12:31:28 +01:00
" <td>13</td>\n",
" <td>3218569</td>\n",
" <td>793581.0</td>\n",
" <td>0.246563</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
2024-03-10 12:31:28 +01:00
" <td>2427043</td>\n",
" <td>723846.0</td>\n",
" <td>0.298242</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-10 12:31:28 +01:00
" number_compagny nb_campaigns nb_campaigns_opened ratio_campaigns_opened\n",
"0 10 734772 126151.0 0.171687\n",
"1 11 342396 129833.0 0.379190\n",
"2 12 3168123 810722.0 0.255900\n",
"3 13 3218569 793581.0 0.246563\n",
"4 14 2427043 723846.0 0.298242"
2024-03-03 09:32:45 +01:00
]
},
2024-03-10 12:31:28 +01:00
"execution_count": 112,
2024-03-03 09:32:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"# taux d'ouverture des campaigns\n",
2024-03-03 09:32:45 +01:00
"\n",
2024-03-10 12:31:28 +01:00
"company_campaigns_stats = campaigns_information_spectacle.groupby(\"number_compagny\")[[\"nb_campaigns\", \"nb_campaigns_opened\"]].sum().reset_index()\n",
"company_campaigns_stats[\"ratio_campaigns_opened\"] = company_campaigns_stats[\"nb_campaigns_opened\"] / company_campaigns_stats[\"nb_campaigns\"]\n",
"company_campaigns_stats"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "code",
2024-03-10 12:31:28 +01:00
"execution_count": 119,
"id": "30b28426-088a-4153-b2aa-c20f11b2b771",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2024-03-10 12:31:28 +01:00
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>perc_campaigns_opened</th>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
" <td>61668.0</td>\n",
" <td>8240.0</td>\n",
" <td>13.361873</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-10 12:31:28 +01:00
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>4361.0</td>\n",
" <td>2002.0</td>\n",
" <td>45.906902</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-10 12:31:28 +01:00
" <td>11</td>\n",
" <td>0.0</td>\n",
" <td>37799.0</td>\n",
" <td>12286.0</td>\n",
" <td>32.503505</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-10 12:31:28 +01:00
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>8824.0</td>\n",
" <td>4493.0</td>\n",
" <td>50.917951</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2024-03-10 12:31:28 +01:00
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
" <td>505008.0</td>\n",
" <td>118071.0</td>\n",
" <td>23.380026</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>45824.0</td>\n",
" <td>17233.0</td>\n",
" <td>37.606931</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
2024-03-03 09:32:45 +01:00
" <td>14</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
" <td>1176373.0</td>\n",
" <td>313379.0</td>\n",
" <td>26.639425</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>129157.0</td>\n",
" <td>47987.0</td>\n",
" <td>37.154006</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-10 12:31:28 +01:00
" number_company y_has_purchased nb_campaigns nb_campaigns_opened \\\n",
"0 10 0.0 61668.0 8240.0 \n",
"1 10 1.0 4361.0 2002.0 \n",
"2 11 0.0 37799.0 12286.0 \n",
"3 11 1.0 8824.0 4493.0 \n",
"4 12 0.0 0.0 0.0 \n",
"5 12 1.0 0.0 0.0 \n",
"6 13 0.0 505008.0 118071.0 \n",
"7 13 1.0 45824.0 17233.0 \n",
"8 14 0.0 1176373.0 313379.0 \n",
"9 14 1.0 129157.0 47987.0 \n",
"\n",
" perc_campaigns_opened \n",
"0 13.361873 \n",
"1 45.906902 \n",
"2 32.503505 \n",
"3 50.917951 \n",
"4 NaN \n",
"5 NaN \n",
"6 23.380026 \n",
"7 37.606931 \n",
"8 26.639425 \n",
"9 37.154006 "
2024-03-03 09:32:45 +01:00
]
},
2024-03-10 12:31:28 +01:00
"execution_count": 119,
2024-03-03 09:32:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"company_campaigns_stats = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[[\"nb_campaigns\", \"nb_campaigns_opened\"]].sum().reset_index()\n",
"company_campaigns_stats[\"perc_campaigns_opened\"] = 100* (company_campaigns_stats[\"nb_campaigns_opened\"] / company_campaigns_stats[\"nb_campaigns\"])\n",
"company_campaigns_stats"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "code",
2024-03-10 12:31:28 +01:00
"execution_count": 120,
"id": "9cebe912-fce1-4f4f-9d87-9649605296c8",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2024-03-10 12:31:28 +01:00
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
2024-03-03 09:32:45 +01:00
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
2024-03-10 12:31:28 +01:00
" <th>perc_campaigns_opened</th>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
" <td>61668.0</td>\n",
" <td>8240.0</td>\n",
" <td>13.361873</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-10 12:31:28 +01:00
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>4361.0</td>\n",
" <td>2002.0</td>\n",
" <td>45.906902</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-10 12:31:28 +01:00
" <td>11</td>\n",
" <td>0.0</td>\n",
" <td>37799.0</td>\n",
" <td>12286.0</td>\n",
" <td>32.503505</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-10 12:31:28 +01:00
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>8824.0</td>\n",
" <td>4493.0</td>\n",
" <td>50.917951</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
2024-03-03 09:32:45 +01:00
" <td>13</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
" <td>505008.0</td>\n",
" <td>118071.0</td>\n",
" <td>23.380026</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" <tr>\n",
2024-03-10 12:31:28 +01:00
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>45824.0</td>\n",
" <td>17233.0</td>\n",
" <td>37.606931</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
2024-03-03 09:32:45 +01:00
" <td>14</td>\n",
2024-03-10 12:31:28 +01:00
" <td>0.0</td>\n",
" <td>1176373.0</td>\n",
" <td>313379.0</td>\n",
" <td>26.639425</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>129157.0</td>\n",
" <td>47987.0</td>\n",
" <td>37.154006</td>\n",
2024-03-03 09:32:45 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-10 12:31:28 +01:00
" number_company y_has_purchased nb_campaigns nb_campaigns_opened \\\n",
"0 10 0.0 61668.0 8240.0 \n",
"1 10 1.0 4361.0 2002.0 \n",
"2 11 0.0 37799.0 12286.0 \n",
"3 11 1.0 8824.0 4493.0 \n",
"6 13 0.0 505008.0 118071.0 \n",
"7 13 1.0 45824.0 17233.0 \n",
"8 14 0.0 1176373.0 313379.0 \n",
"9 14 1.0 129157.0 47987.0 \n",
"\n",
" perc_campaigns_opened \n",
"0 13.361873 \n",
"1 45.906902 \n",
"2 32.503505 \n",
"3 50.917951 \n",
"6 23.380026 \n",
"7 37.606931 \n",
"8 26.639425 \n",
"9 37.154006 "
2024-03-03 09:32:45 +01:00
]
},
2024-03-10 12:31:28 +01:00
"execution_count": 120,
2024-03-03 09:32:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"company_campaigns_stats = company_campaigns_stats[company_campaigns_stats[\"number_company\"]!=12]\n",
2024-03-03 09:32:45 +01:00
"company_campaigns_stats"
]
},
{
"cell_type": "code",
2024-03-10 12:31:28 +01:00
"execution_count": 123,
"id": "8418531b-4f30-4d96-8035-f3630c789d6f",
2024-03-03 09:32:45 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-03-10 12:31:28 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0oAAAIjCAYAAAA9VuvLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABqBElEQVR4nO3deZyN9f//8ecx+86MWTUMWcueJYSR7Pta+FhbCElItJlS9iRLKsmIUD7JR0JkK/uQQUjSWMJk3waDmev3R785X+eaGc5hZs7gcb/dzu3mvK/rXNfrWp3nXNf1PhbDMAwBAAAAAKzyOLsAAAAAAMhtCEoAAAAAYEJQAgAAAAATghIAAAAAmBCUAAAAAMCEoAQAAAAAJgQlAAAAADAhKAEAAACACUEJAAAAAEwISgCAXGvnzp3y9vbWpEmTnF0KAOABQ1DKQhaLxa7XmjVrnF3qLR08eFAWi0WxsbHWtpiYGEVFRTmtpqy2Z88excTE6ODBg84u5a5069btjrdLVFSUmjZtmqX1REVFqVu3bnf8eYvFopiYmCyrJzfJaFvd7fq63128eFFt27bVSy+9pJdeesnZ5WSZNWvW3BP/FyB3HaP38/nxZseOHVNMTIzi4+OzdT4ZfdfJCufOnVP+/Pk1b948a9uSJUuyddtFR0crOjo626Z/N271fatz585q2bJljtfkCFdnF3A/2bhxo8374cOHa/Xq1Vq1apVN+yOPPJKTZSEDe/bs0TvvvKPo6Oj7KgAi93rrrbf08ssvO7uMe8qzzz6rKlWqaNSoUc4uBQ+o7777Tv7+/s4u44Fy7NgxvfPOO4qKilL58uWdXY7D3nnnHUVEROjpp5+2ti1ZskRTpkzJtrD08ccfZ8t0s8Ktvm/FxMSoZMmSWrVqlZ588knnFHgbBKUs9Pjjj9u8Dw4OVp48edK1w3muX78ui8WS7dN3deXQgq2HH37Y2SXcc7755htnl3DHLl++LG9vb2eXgbtUoUIFZ5eAe8iZM2f06aef6sMPP7zj7xqGYejq1avy8vKy+zP36h/gH374YTVs2FCjRo3KtUGJW+9y2JQpU1SrVi2FhITIx8dHZcqU0ZgxY3T9+nWb8TK73G++vNqrVy95enpq27Zt1rbU1FTVrVtXoaGhOn78+C3rOXbsmNq3by8/Pz8FBATo6aefVmJiol3LcvXqVQ0dOlSFCxeWu7u7ChQooD59+ujcuXM242V2u8DNy7hjxw5ZLBZNnz493XhLly6VxWLRokWLrG379+9Xx44dFRISIg8PD5UqVUpTpkyx+Vza7S2zZs3SwIEDVaBAAXl4eOjzzz9Xu3btJEl16tSx3hKZdvnd3nWf2fT//PNPSdJPP/2kunXryt/fX97e3qpRo4ZWrlx5m7X6r927d6t+/fry9vZWcHCw+vTpox9++MGu23Xs3S5pvvvuO5UtW1aenp4qUqSIJk6cmG56AwcOVPny5RUQEKDAwEBVq1ZN//vf/+xaloxcuHBBzz//vIKCguTr66uGDRvqjz/+yHBce7Z1amqq3nvvPZUoUUJeXl7KmzevypYtq48++uiWdaRtwzlz5ui1115TeHi4fH191axZM/3zzz+6ePGiXnjhBeXPn1/58+dX9+7ddenSJZtp2HtM23Ob5J0uh/TvOh00aJDNdu/fv7+SkpJsxrNYLOrbt69mzZqlUqVKydvbW+XKldPixYut4yxcuFAWiyXD/XXq1KmyWCzauXOntW3RokWqVq2avL295efnp3r16tlcYf/ll19ksVg0d+7cdNP78ssvZbFYFBcXZ23bunWrmjdvrsDAQHl6eqpChQrpQtPly5ety+vp6anAwEBVqlQpw3ncLDY2VhaLRStWrFD37t0VGBgoHx8fNWvWTH/99ZfNuCtWrFCLFi300EMPydPTU0WLFlXPnj116tQpm/FiYmJksVj066+/qm3btsqXL98dBePsXG5JOnr0qF544QVFRkbK3d1dERERatu2rf755x/rOIcPH9Z//vMfm+Ptgw8+UGpqqnWctFuWxo4dq9GjRysqKkpeXl6Kjo7WH3/8oevXr2vIkCGKiIhQQECAWrVqpRMnTtjUknbrb1aef86dO6dnn31WgYGB8vX1VZMmTfTXX3+l+z8obXvt3r1bHTp0UEBAgEJDQ9WjRw+dP38+XZ3m/w/sPdbmz5+vqlWrKiAgQN7e3ipSpIh69Ohx2+2U1efHzNyuvrTz4+zZszVgwACFhYXJy8tLtWvX1vbt29NNz579V7r1frhmzRpVrlxZktS9e3fr/89p22/r1q165plnrPtcVFSUOnTooEOHDjk0n1u5m3UaGxurGzdu2FxN6tatm/XzNz+GkXYrWto5+ZNPPlGpUqXk4eGhmTNnSvr36lTVqlUVGBgof39/VaxYUdOnT5dhGDbzNX8/STtGx40bp/Hjx6tw4cLy9fVVtWrVtGnTptsuh73nmdtt89jY2Ft+35L+vf3up59+0oEDB26/gp2AP3vnsAMHDqhjx47WE+yOHTv0/vvv6/fff9cXX3zh8PQmTJigzZs3q3379tq2bZvy5s2rd955R2vWrNGyZcsUHh6e6WevXLmip556SseOHdPIkSNVvHhx/fDDDzYHeJqYmBib/2gMw1DLli21cuVKDR06VDVr1tTOnTs1bNgwbdy4URs3bpSHh4fdy1GuXDlVqFBBM2bM0LPPPmszLDY2ViEhIWrcuLGkfy/jVq9eXQULFtQHH3ygsLAw/fjjj+rXr59OnTqlYcOG2Xx+6NChqlatmj755BPlyZNHlSpV0tmzZ/X6669rypQpqlixoqQ7/4u/efohISGaPXu2unTpohYtWmjmzJlyc3PTp59+qgYNGujHH39U3bp1M53e8ePHVbt2bfn4+Gjq1KkKCQnR3Llz1bdv39vW4uh2iY+PV//+/RUTE6OwsDB99dVXevnll3Xt2jUNGjRIkpScnKwzZ85o0KBBKlCggK5du6affvpJrVu31owZM9SlSxeH1ldajRs2bNDbb7+typUra/369WrUqFG6ce3d1mPGjFFMTIzefPNN1apVS9evX9fvv/+eaTg0e/3111WnTh3Fxsbq4MGDGjRokDp06CBXV1eVK1dOc+fO1fbt2/X666/Lz8/P5stcVh7Td7ocly9fVu3atfX333/r9ddfV9myZbV79269/fbb2rVrl3766Sebv27+8MMPiouL07vvvitfX1+NGTNGrVq10r59+1SkSBE1bdpUISEhmjFjRrp9NTY2VhUrVlTZsmUlSXPmzFGnTp1Uv359zZ07V8nJyRozZoyio6O1cuVKPfHEE6pZs6YqVKigKVOmqEOHDjbTmzx5sipXrmz9crR69Wo1bNhQVatW1SeffKKAgADNmzdPTz/9tC5fvmz90jpgwADNmjVL7733nipUqKCkpCT99ttvOn36tF3r+tlnn1W9evU0Z84cHTlyRG+++aaio6O1c+dO5c2bV9K/27ZatWp67rnnFBAQoIMHD2r8+PF64okntGvXLrm5udlMs3Xr1nrmmWfUq1evdF+abye7l/vo0aOqXLmyrl+/bt1HTp8+rR9//FFnz55VaGioTp48qerVq+vatWsaPny4oqKitHjxYg0aNEgHDhxId3vPlClTVLZsWU2ZMkXnzp3TwIED1axZM1WtWlVubm764osvdOjQIQ0aNEjPPfeczR+7pKw9/6SmpqpZs2baunWrYmJiVLFiRW3cuFENGzbMdJ20adNGTz/9tJ599lnt2rVLQ4cOlaRbHrf2HmsbN27U008/raeffloxMTHy9PTUoUOH0t2Kb5Yd58eMOFLf66+/rooVK+rzzz/X+fPnFRMTo+joaG3fvl1FihSRZP/+e7v9sGLFipoxY4a6d++uN998U02aNJEkPfTQQ5L+DQAlSpTQM888o8DAQB0/flxTp05V5cqVtWfPHuXPn9+u+YSGhma4Xu5mnUr/nlsrVKhgPYdI/952nZSUpP/+9782f0C6+fvZwoUL9csvv+jtt99WWFiYQkJCrMvbs2dPFSxYUJK0adMmvfTSSzp69KjefvvtW9Yi/XuMlix
2024-03-03 09:32:45 +01:00
"text/plain": [
2024-03-10 12:31:28 +01:00
"<Figure size 1000x600 with 1 Axes>"
2024-03-03 09:32:45 +01:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-03-10 12:31:28 +01:00
"multiple_barplot(company_campaigns_stats, x=\"number_company\", y=\"perc_campaigns_opened\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"clients n'ayant pas acheté\", 1 : \"clients ayant acheté sur la période\"},\n",
" xlabel = \"Numéro de compagnie\", ylabel = \"Part de mails ouverts (%)\", \n",
" title = \"Taux d'ouverture global des mails envoyés par les compagnies de spectacle (train set)\")"
2024-03-03 09:32:45 +01:00
]
},
{
"cell_type": "markdown",
"id": "783f6fb2-5f26-42a9-a22d-f4ece44bfaf2",
"metadata": {},
"source": [
"### 3. products_purchased_reduced"
]
},
2024-03-05 03:15:03 +01:00
{
"cell_type": "code",
2024-03-10 17:41:43 +01:00
"execution_count": 16,
2024-03-03 09:32:45 +01:00
"id": "74534ded-8121-43fb-8cf8-af353bed2c77",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de lignes de la table : 764880\n"
]
},
{
"data": {
"text/plain": [
"customer_id 0\n",
"nb_tickets 0\n",
"nb_purchases 0\n",
"total_amount 0\n",
"nb_suppliers 0\n",
"vente_internet_max 0\n",
"purchase_date_min 0\n",
"purchase_date_max 0\n",
"time_between_purchase 0\n",
"nb_tickets_internet 0\n",
"number_compagny 0\n",
"dtype: int64"
]
},
2024-03-10 17:41:43 +01:00
"execution_count": 16,
2024-03-03 09:32:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de nan\n",
"print(\"Nombre de lignes de la table : \",products_purchased_reduced_spectacle.shape[0])\n",
"products_purchased_reduced_spectacle.isna().sum()"
]
},
{
"cell_type": "code",
2024-03-10 17:41:43 +01:00
"execution_count": 21,
2024-03-05 03:15:03 +01:00
"id": "6db089d5-5517-4aee-a5fd-53f20ae3f0d7",
2024-03-05 00:36:48 +01:00
"metadata": {},
"outputs": [],
2024-03-05 03:15:03 +01:00
"source": [
"#importation librairies\n",
"import warnings\n",
"warnings.simplefilter(\"ignore\")\n",
"import pandas as pd\n",
"import numpy as np\n",
"import statsmodels\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from scipy.stats import shapiro\n",
"from numpy.random import randn\n",
"import scipy.stats as st\n",
2024-03-05 03:25:59 +01:00
"%matplotlib inline\n",
"\n",
"#col_purchase=[\"nb_tickets\",\"nb_purchases\",\"total_amount\",\"nb_suppliers\",\"time_between_purchase\",\"nb_tickets_internet\"]"
]
},
{
"cell_type": "code",
2024-03-10 17:41:43 +01:00
"execution_count": 39,
"id": "943b8088-9ca2-40a4-b658-2cfae1589fac",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"30.0\n",
"62.0\n",
"120.0\n",
"90.0\n",
"Moustache inferieure -105.0\n",
"Moustache superieure 255.0\n"
]
}
],
"source": [
"#identification des valeur manquantes\n",
"#calcule des quartile de la variable valeur(taille de la population)\n",
"Q1=np.percentile(products_purchased_reduced_spectacle[\"total_amount\"], 25) # Q1\n",
"Q2=np.percentile(products_purchased_reduced_spectacle[\"total_amount\"], 50) # Q2\n",
"Q3=np.percentile(products_purchased_reduced_spectacle[\"total_amount\"], 75) # Q3\n",
"print(Q1)\n",
"print(Q2)\n",
"print(Q3)\n",
"\n",
"#intervale interquartile de la variable Valeur\n",
"\n",
"IQ=Q3-Q1\n",
"print(IQ)\n",
"\n",
"#la valeur minimale des moustache de la variable Valeur\n",
"\n",
"M_inf=Q1-1.5*IQ\n",
"M_sup=Q3+1.5*IQ\n",
"\n",
"print(\"Moustache inferieure\",M_inf)#moustache inferieur\n",
"print(\"Moustache superieure\",M_sup)#moustache sup\n"
]
},
2024-03-10 19:08:50 +01:00
{
"cell_type": "code",
"execution_count": 62,
"id": "c3adb0cd-8292-4c6f-9d4e-8352a6967022",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"customer_id int64\n",
"nb_tickets int64\n",
"nb_purchases int64\n",
"total_amount float64\n",
"nb_suppliers int64\n",
"vente_internet_max int64\n",
"purchase_date_min float64\n",
"purchase_date_max float64\n",
"time_between_purchase float64\n",
"nb_tickets_internet float64\n",
"number_compagny int64\n",
"dtype: object"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_purchased_reduced_spectacle.dtypes"
]
},
2024-03-10 17:41:43 +01:00
{
"cell_type": "markdown",
"id": "a63e6d13-429b-4b01-ad11-27e5eea68cbd",
2024-03-05 03:25:59 +01:00
"metadata": {},
"source": [
"#histogrames des variable quantitatives\n",
"col_purchase=[\"nb_tickets\",\"nb_purchases\",\"total_amount\",\"nb_suppliers\",\"time_between_purchase\",\"nb_tickets_internet\"]\n",
"for col in col_purchase:\n",
" plt.figure()\n",
" sns.histplot(products_purchased_reduced_spectacle[col], kde=True, color='red')"
2024-03-05 03:15:03 +01:00
]
},
{
"cell_type": "code",
2024-03-10 19:08:50 +01:00
"execution_count": 86,
2024-03-10 17:41:43 +01:00
"id": "5a08b5a5-7d56-4543-945a-38f6219d831d",
2024-03-05 03:15:03 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-03-10 17:41:43 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAn8AAAHGCAYAAAAFY+3bAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABPdUlEQVR4nO3deZyNdeP/8feZ7cxqmMEsmhn7zqgUoZA9S8gt6S6kRaSEW8mNUZZSSTepW1mTVPeNmzZGlrIVoZJSSoaYRmMZ6xgzn98f/eZ8ndnNnHFm5no9H4/z4FzXdT6fz7We93yu5diMMUYAAACwBA93NwAAAADXDuEPAADAQgh/AAAAFkL4AwAAsBDCHwAAgIUQ/gAAACyE8AcAAGAhhD8AAAALIfwBAABYiOXC3549exQcHKx//etf7m4KAADANXdV4W/hwoWy2WxOr0qVKqlNmzb68MMPC92IqlWrauDAgY73R48eVVxcnPbs2VPoMnPTpEkTrVixQuPGjdO2bdtcXn5JNHXqVK1cubLY67HZbHrssceKvZ686o+Li7umdQ4cOFCBgYEFmjan9n322Wdq2rSpAgICZLPZHOvpvffeU4MGDeTn5yebzVYs+0J+Bg4cqKpVqzoNs9lsWrhwYaHK2717t1q3bq3g4GDZbDbNnDlTUu7LoCB+++23IrXJXTKPpb/99ts1r7tNmzZq06bNNa8XV8ed20hWcXFxstls7m7GNbF06VLHsak45XR8vZa8CvOhBQsWqG7dujLGKDExUbNnz1b37t21atUqde/e/arLW7FihcqVK+d4f/ToUU2aNElVq1ZVkyZNCtPEPN1+++1688031a9fP+3cuVOVKlVyeR0lydSpU9WnTx/17NnT3U2xtG3btum6665zvDfGqG/fvqpdu7ZWrVqlgIAA1alTR8ePH9d9992nzp07a86cObLb7apdu7YbW+4aDzzwgM6dO6dly5apQoUKqlq1aq7LoKAiIiK0bds21ahRoxhbDlx7Xbt21bZt2xQREeHupljK0qVLtXfvXo0YMcLdTSlWhQp/DRs2VNOmTR3vO3furAoVKujdd98tVPi7/vrrC9OMIunXr5/69et3zeuFdTVv3tzp/dGjR3XixAn16tVL7dq1cwzfsmWL0tLS9Pe//12tW7fOs8zz58/L39+/WNrranv37tVDDz2kLl26OIb9/vvvOS6DgrLb7dmWa05K03ICJKlSpUplvmMC7uOSa/58fX3l4+Mjb29vp+EnTpzQ0KFDVaVKFfn4+Kh69eoaN26cUlNTnaa78rTvxo0bddNNN0mSBg0a5Di9fOXpsp07d6pHjx4KCQmRr6+vrr/+er3//vsFauukSZPUrFkzhYSEqFy5crrhhhs0b948GWPy/WzmKb4ff/xRnTp1UkBAgCIiIvT8889LkrZv365WrVopICBAtWvX1qJFi7KVsXfvXt15552qUKGCfH191aRJk2zT5dbdv3HjRtlsNm3cuNExbPfu3erWrZsqV64su92uyMhIde3aVUeOHJH012m6c+fOadGiRY5lmXnK5/jx4xo6dKjq16+vwMBAVa5cWbfffru++OKLbO1OTU3Vs88+q3r16snX11ehoaFq27attm7dmm3at99+W/Xq1ZO/v79iY2NzvCTg559/Vv/+/R3trlevnl577bU8l3+mlJQUPfTQQwoNDVVgYKA6d+6sn376Kdt0uXWrX80pjE8//VTt2rVTcHCw/P39Va9ePU2bNi3bdAcOHNAdd9yhwMBARUVFadSoUdm28yu347i4OEcv4FNPPSWbzebYD1q1aiVJuvvuu53WV+b2991336ljx44KCgpyBKZLly5p8uTJqlu3rux2uypVqqRBgwbp+PHjBZrPhQsXqk6dOo51sXjx4gJ97sCBAxo0aJBq1aolf39/ValSRd27d9d3333nVLbNZtPly5f1+uuvO+3TOS2DgpYr5XzaN3P97tq1S3369FGFChUcPYPGGM2ZM0dNmjSRn5+fKlSooD59+ujXX3/Nd16PHz+uhx9+WFFRUY5l3LJlS61bt85punXr1qldu3YqV66c/P391bJlS3322WcFWp7z589XbGysfH19FRISol69eumHH35wmiZzOyjINldQBd1+1q9frzZt2ig0NFR+fn6Kjo7WXXfdpfPnz+dbx9KlS3XLLbcoMDBQgYGBatKkiebNm1fo+S/scThze4yPj9egQYMUEhKigIAAde/ePdt2EB8frzvvvFPXXXedfH19VbNmTT3yyCP6888/s83f//73PzVu3Fh2u13Vq1fXq6++muOxJvPymPyOk7l9DxRk+yrotpqTjz76SE2aNJHdble1atX00ksv5Thdce9Lbdq0UcOGDfXFF1+oefPm8vPzU5UqVTR+/Hilp6c7lXc1x7+8tsM2bdroo48+0qFDh5wub8t0NfmhINt7YZdpft/7BWKuwoIFC4wks337dpOWlmYuXbpkDh8+bB5//HHj4eFhPv30U8e0Fy5cMI0bNzYBAQHmpZdeMmvXrjXjx483Xl5e5o477nAqNyYmxgwYMMAYY8zp06cd9fzzn/8027ZtM9u2bTOHDx82xhizfv164+PjY2699Vbz3nvvmU8//dQMHDjQSDILFizIdx4GDhxo5s2bZ+Lj4018fLx57rnnjJ+fn5k0aVK+nx0wYIDx8fEx9erVM6+++qqJj483gwYNMpLM2LFjTe3atc28efPMmjVrTLdu3Ywks3PnTsfnf/zxRxMUFGRq1KhhFi9ebD766CNzzz33GEnmhRdeyLacDx486FT/hg0bjCSzYcMGY4wxZ8+eNaGhoaZp06bm/fffN5s2bTLvvfeeGTJkiNm3b58xxpht27YZPz8/c8cddziW5ffff+9oz6OPPmqWLVtmNm7caD788EMzePBg4+Hh4ajDGGPS0tJM27ZtjZeXlxk9erT5+OOPzapVq8wzzzxj3n33Xcd0kkzVqlXNzTffbN5//33z8ccfmzZt2hgvLy/zyy+/OKb7/vvvTXBwsGnUqJFZvHixWbt2rRk1apTx8PAwcXFxea6DjIwM07ZtW2O3282UKVPM2rVrzcSJE0316tWNJDNx4kSn9RUTE5OtjIkTJ5qCbPpvvfWWsdlspk2bNmbp0qVm3bp1Zs6cOWbo0KFOdWRuEy+99JJZt26dmTBhgrHZbNm2qSvbd/jwYbN8+XIjyQwfPtxs27bN7Nq1yxw4cMC89tprRpKZOnWq0/oaMGCA8fb2NlWrVjXTpk0zn332mVmzZo1JT083nTt3NgEBAWbSpEkmPj7evPXWW6ZKlSqmfv365vz583nOZ+b2duedd5rVq1ebJUuWmJo1a5qoqKgcl9+VNm3aZEaNGmX+85//mE2bNpkVK1aYnj17Gj8/P/Pjjz8aY4xJSkoy27ZtM5JMnz59nPbpnJZBQcs1xpiDBw9m2/cz129MTIx56qmnTHx8vFm5cqUxxpiHHnrIeHt7m1GjRplPP/3ULF261NStW9eEhYWZxMTEPOe1U6dOplKlSmbu3Llm48aNZuXKlWbChAlm2bJljmnefvttY7PZTM+ePc3y5cvN6tWrTbdu3Yynp6dZt25dtmV+5T4+depUI8ncc8895qOPPjKLFy821atXN8HBweann35yTHc121xOWrdubVq3bu14X9Dt5+DBg8bX19d06NDBrFy50mzcuNG888475r777jMnT57Ms87x48cbSaZ3797mgw8+MGvXrjUzZsww48ePL9L8F+Y4nLnso6KizAMPPGA++eQTM3fuXFO5cmUTFRXlNC+vv/66mTZtmlm1apXZtGmTWbRokYmNjTV16tQxly5dckz3ySefGA8PD9OmTRuzYsUK88EHH5hmzZqZqlWrZjvWFPQ4mdM2UtDtqyDbak7WrVtnPD09TatWrczy5cvNBx98YG666SYTHR2dbT6Ke19q3bq1CQ0NNZGRkeZf//qXWbNmjXn88ceNJDNs2DDHdFdz/MtvO/z+++9Ny5YtTXh4uOM4tW3bNsfnC5ofCrK95/T9VJBlWpDv/YIoVPjL+rLb7WbOnDlO077xxht
2024-03-05 03:15:03 +01:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
2024-03-05 00:36:48 +01:00
"source": [
2024-03-05 03:57:08 +01:00
"#repartition Chiffre d'affaire selon les compagnie de spectacle\n",
2024-03-05 00:36:48 +01:00
"\n",
2024-03-10 17:41:43 +01:00
"# Filtrer les données pour inclure uniquement les valeurs positives de total_amount et exclusion des valeur aberrantes\n",
"filtered_products_purchased_reduced_spectacle = products_purchased_reduced_spectacle[(products_purchased_reduced_spectacle['total_amount'] > 0) & (products_purchased_reduced_spectacle['total_amount'] <= 255)]\n",
"\n",
"# Créer le graphique en utilisant les données filtrées\n",
"sns.boxplot(data=filtered_products_purchased_reduced_spectacle, y=\"total_amount\", x=\"number_compagny\", showfliers=False, showmeans=True)\n",
"\n",
"# Titre du graphique\n",
"plt.title(\"Boite à moustache du chiffre d'affaire selon les compagnies de spectacles\")\n",
"\n",
"# Afficher le graphique\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
2024-03-10 19:08:50 +01:00
"execution_count": 87,
2024-03-05 15:37:29 +01:00
"id": "76e08ece-0b58-4b3a-abca-53e30ccc907b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-10 17:41:43 +01:00
"Statistique F : 317.1792172580724\n",
"Valeur de p : 3.665389608154993e-273\n",
2024-03-05 15:37:29 +01:00
"Nombre de degrés de liberté entre les groupes : 4\n",
2024-03-10 17:41:43 +01:00
"Nombre de degrés de liberté à l'intérieur des groupes : 670581\n",
"Il y a des différences significatives entre au moins une des entrepries .\n"
2024-03-05 15:37:29 +01:00
]
}
],
"source": [
"#test d'anova pour voir si la difference de chiffre d'affaire est statistiquement significative\n",
"\n",
"from scipy.stats import f_oneway\n",
"\n",
"# Créez une liste pour stocker les données de chaque groupe\n",
"groupes = []\n",
"\n",
"# Parcourez chaque modalité de la variable catégorielle et divisez les données en groupes\n",
2024-03-10 17:41:43 +01:00
"for modalite in filtered_products_purchased_reduced_spectacle['number_compagny'].unique():\n",
" groupe = filtered_products_purchased_reduced_spectacle[filtered_products_purchased_reduced_spectacle['number_compagny'] == modalite]['total_amount']\n",
2024-03-05 15:37:29 +01:00
" groupes.append(groupe)\n",
"\n",
"# Effectuez le test ANOVA\n",
"f_statistic, p_value = f_oneway(*groupes)\n",
"\n",
"# Nombre total d'observations\n",
"N = sum(len(groupe) for groupe in groupes)\n",
"\n",
"# Nombre de groupes ou de catégories\n",
"k = len(groupes)\n",
"\n",
"# Degrés de liberté entre les groupes\n",
"df_between = k - 1\n",
"\n",
"# Degrés de liberté à l'intérieur des groupes\n",
"df_within = N - k\n",
"\n",
"# Affichez les résultats\n",
"print(\"Statistique F :\", f_statistic)\n",
"print(\"Valeur de p :\", p_value)\n",
"\n",
"print(\"Nombre de degrés de liberté entre les groupes :\", df_between)\n",
"print(\"Nombre de degrés de liberté à l'intérieur des groupes :\", df_within)\n",
"\n",
"if p_value < 0.05:\n",
" print(\"Il y a des différences significatives entre au moins une des entrepries .\")\n",
"else:\n",
" print(\"Il n'y a pas de différences significatives entre les entreprises .\")"
]
},
2024-03-05 03:15:03 +01:00
{
"cell_type": "code",
2024-03-10 19:08:50 +01:00
"execution_count": 88,
"id": "6b55de4b-913e-4bc1-b4f2-cc0b1824d0e2",
"metadata": {},
"outputs": [],
"source": [
"#graphe sur le taux de ticket acheté"
]
},
{
"cell_type": "code",
"execution_count": 89,
2024-03-05 03:15:03 +01:00
"id": "aacf2c34-f7ea-4d6e-935b-c5db01f03bbe",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_compagny</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>Taux_ticket_internet</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>492314</td>\n",
" <td>126262.0</td>\n",
" <td>25.646640</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11</td>\n",
" <td>318969</td>\n",
" <td>16348.0</td>\n",
" <td>5.125263</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12</td>\n",
" <td>591028</td>\n",
" <td>42045.0</td>\n",
" <td>7.113876</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>7024227</td>\n",
" <td>1247482.0</td>\n",
" <td>17.759705</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>14</td>\n",
" <td>335741</td>\n",
" <td>125638.0</td>\n",
" <td>37.421107</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_compagny nb_tickets nb_tickets_internet Taux_ticket_internet\n",
"0 10 492314 126262.0 25.646640\n",
"1 11 318969 16348.0 5.125263\n",
"2 12 591028 42045.0 7.113876\n",
"3 13 7024227 1247482.0 17.759705\n",
"4 14 335741 125638.0 37.421107"
]
},
2024-03-10 19:08:50 +01:00
"execution_count": 89,
2024-03-05 03:15:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Taux de ticket payé par internet selon les compagnies\n",
"\n",
"purchase_spectacle = products_purchased_reduced_spectacle.groupby(\"number_compagny\")[[\"nb_tickets\", \"nb_tickets_internet\"]].sum().reset_index()\n",
"purchase_spectacle[\"Taux_ticket_internet\"] = purchase_spectacle[\"nb_tickets_internet\"]*100 / purchase_spectacle[\"nb_tickets\"]\n",
"purchase_spectacle"
]
},
{
"cell_type": "code",
2024-03-10 19:08:50 +01:00
"execution_count": 90,
2024-03-05 03:15:03 +01:00
"id": "f71bb53d-724b-454d-8743-305d20eec2b0",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlcAAAHFCAYAAADffdxRAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABULUlEQVR4nO3dd1QU5/4G8GelLH0VUIoiRUFERY0olqhgQbHFbjQxEKMxlnhtMZaoYMPeYtTotccWr+VaKTYSu0YQYzdBUSOioIAIqPD+/vDHXpeiuzjLsvp8ztlzmHdmZ555mR2+TFuZEEKAiIiIiCRRRtcBiIiIiN4nLK6IiIiIJMTiioiIiEhCLK6IiIiIJMTiioiIiEhCLK6IiIiIJMTiioiIiEhCLK6IiIiIJMTiioiIiEhCWimuZDKZWq+jR49qY/GSuXXrFmQyGdauXatsCwkJgYuLi9aWGRwcDAsLC0nnuXTpUpV1KC6ZTIaQkJB3no+6/vnnH4SEhCA2NrbAuJCQEMhkMo3n6efnh5o1a0qQDrh8+TJCQkJw69YtSeZXkoKDgwtsxy4uLggODtZJHl0orA9KSkl/lqh4dLmN5Ofn5wc/Pz9dxygRM2bMwK5du7S+HG3u8wy1MdOTJ0+qDE+dOhVHjhzB4cOHVdq9vLy0sXjKZ+nSpbC1tdW7P5z//PMPQkND4eLigjp16qiM69+/P9q2baubYP/v8uXLCA0NhZ+fX6nZAb+LnTt3wsrKStcxiEqNiRMn4l//+peuY3xwZsyYge7du6Nz5866jlJsWimuGjZsqDJcvnx5lClTpkA7UXFVqlQJlSpV0nWM90rdunV1HYGoVKlSpYquI5Ce0tk1Vz/99BOaNWuGChUqwNzcHLVq1cLs2bPx4sULlemKOmyX/xDpN998AxMTE/zxxx/KttzcXLRs2RJ2dna4f//+G/P8888/6NmzJywtLaFQKNCrVy8kJiZKui4AEB4ejpYtW0KhUMDMzAzVq1dHWFhYgelu3ryJdu3awcLCAk5OThg1ahSys7NVpgkNDYWvry+sra1hZWWFjz76CKtWrcLr38Xt4uKCS5cuITo6Wnk69m1HWdLS0jBgwADY2NjAwsICbdu2xfXr1wud9saNG+jTpw8qVKgAuVyO6tWr46efflKZJjc3F9OmTUO1atVgamqKsmXLwtvbG4sWLSoyw9GjR1G/fn0AwJdffqnMnncqpajTgps2bUKjRo1gYWEBCwsL1KlTB6tWrXrj+u7cuRNmZmbo378/Xr58CQA4d+4cOnXqBGtra5iYmKBu3br49ddfle9Zu3YtevToAQDw9/dX5ss7/RoTE4MOHToo+8XR0RHt27fH3bt335gFAA4ePIiWLVvCysoKZmZmaNKkCQ4dOqQyTd76X7p0Cb1794ZCoYCdnR369euH1NTUty6jMIV91i5duoSAgACYmZmhfPnyGDJkCPbt21fgtH7e6dazZ8+iadOmMDMzg5ubG2bOnInc3FyVeaalpWH06NFwdXWFsbExKlasiOHDhyMjI+OtGdXpVyEEli5dijp16sDU1BTlypVD9+7d8ffff791/llZWRg3bpxKtiFDhuDJkycF+qpDhw4IDw/HRx99BFNTU3h6emL16tVvXUZREhMTMXDgQFSqVAnGxsZwdXVFaGiocpvMs2zZMtSuXRsWFhawtLSEp6cnxo8f/9b5Z2dnY8qUKahevTpMTExgY2MDf39/nDhxotjrv3fvXtStWxempqaoXr069u7dC+DV56N69eowNzdHgwYNcO7cOZX3513+cOnSJbRs2RLm5uYoX748hg4dimfPnqlMq+7+VQiBGTNmwNnZGSYmJvDx8UFUVFSBvxVHjx6FTCbD5s2bMWHCBDg6OsLKygqtWrXCtWvXCuTMv79Ud/sq7j5ACIHZs2cr1+Ojjz7CgQMHCp1W258lmUyGoUOH4ueff4aHhwfkcjm8vLywZcuWAvNTd/t923Yok8mQkZGBdevWKfereb+/hw8fYvDgwfDy8oKFhQUqVKiAFi1a4Pfffy+QR53tXeo+VSFKQFBQkDA3N1dpGzFihFi2bJkIDw8Xhw8fFgsWLBC2trbiyy+/VJnO2dlZBAUFFZhn8+bNRfPmzZXDmZmZok6dOsLNzU08fvxYCCHEpEmTRJkyZURkZOQb8z179kxUr15dKBQK8eOPP4qIiAgxbNgwUblyZQFArFmz5o3vV3dd/v3vfwuZTCb8/PzEpk2bxMGDB8XSpUvF4MGDVfrK2NhYVK9eXcydO1ccPHhQTJo0SchkMhEaGqoyv+DgYLFq1SoRFRUloqKixNSpU4WpqanKdOfPnxdubm6ibt264uTJk+LkyZPi/PnzRa5Lbm6u8Pf3F3K5XEyfPl1ERkaKyZMnCzc3NwFATJ48WTntpUuXhEKhELVq1RLr168XkZGRYtSoUaJMmTIiJCREOV1YWJgwMDAQkydPFocOHRLh4eFi4cKFKtPkl5qaKtasWSMAiB9++EGZ/c6dO0IIISZPnizyb74TJ04UAETXrl3Ftm3bRGRkpJg/f76YOHGicprmzZuLGjVqKIfnz58vDAwMxNSpU5Vthw8fFsbGxqJp06Zi69atIjw8XAQHB6tsC0lJSWLGjBkCgPjpp5+U+ZKSksTTp0+FjY2N8PHxEb/++quIjo4WW7duFd988424fPlykesshBAbNmwQMplMdO7cWezYsUPs2bNHdOjQQRgYGIiDBw8qp8tb/2rVqolJkyaJqKgoMX/+fCGXywtsd4UJCgoSzs7OKm35P2v//POPsLGxEZUrVxZr164V+/fvF3379hUuLi4CgDhy5IhKv9rY2Ah3d3exfPlyERUVJQYPHiwAiHXr1imny8jIEHXq1BG2trZi/vz54uDBg2LRokVCoVCIFi1aiNzc3CIzq9uvAwYMEEZGRmLUqFEiPDxcbNq0SXh6ego7OzuRmJhYZB/k5uaKNm3aCENDQzFx4kQRGRkp5s6dK8zNzUXdunVFVlaWSl9VqlRJeHl5ifXr14uIiAjRo0cPAUBER0e/tf/zf5bu378vnJychLOzs/j555/FwYMHxdSpU4VcLhfBwcHK6TZv3iwAiG+//VZERkaKgwcPiuXLl4thw4a9cXkvXrwQ/v7+wtDQUIwePVrs379f7N69W4wfP15s3ry52Otfs2ZNsXnzZrF//37h6+srjIyMxKRJk0STJk3Ejh07xM6dO4WHh4ews7MTz549U+l7Y2NjUblyZeV+JiQkRBgaGooOHTqoZFd3/zpu3DgBQHz99dciPDxcrFy5UlSuXFk4ODio/K04cuSIACBcXFzEZ599Jvbt2yc2b94sKleuLNzd3cXLly9Vcub/nKizfb3LPiDvs/3VV1+JAwcOiBUrVoiKFSsKe3t7lfUoic8SAOHk5CS8vLzE5s2bxe7du0Xbtm0FALFt2zbldOpuv+pshydPnhSmpqaiXbt2yv3qpUuXhBBCXL16VQwaNEhs2bJFHD16VOzdu1d89dVXokyZMir7I3WWI0TBfd679Gl+OiuuXpeTkyNevHgh1q9fLwwMDERKSopynLrFlRBC3LhxQ1hZWYnOnTuLgwcPijJlyogffvjhrfmWLVsmAIj//ve/Ku0DBgxQq7hSZ13S09OFlZWV+Pjjj9/4CwoKChIAxK+//qrS3q5dO1GtWrW3LnfKlCnCxsZGZRk1atQo0FdFOXDggAAgFi1apNI+ffr0An8Q2rRpIypVqiRSU1NVph06dKgwMTFRrnuHDh1EnTp11Fr+686ePVtk/+cvrv7++29hYGAgPvvsszfOM6+4ysnJEUOHDhXGxsbil19+UZnG09NT1K1bV7x48UKlvUOHDsLBwUHk5OQIIYTYtm1bgSJDCCHOnTsnAIhdu3ZpsLavPtjW1taiY8eOKu05OTmidu3aokGDBsq2vPWfPXu2yrSDBw8WJiYmb90JqFNcfffdd0Imkyl3bHnatGlTaHEFQJw+fVplWi8vL9GmTRvlcFhYmChTpow4e/asynT/+c9/BACxf//+IjOr068nT54UAMS8efNU2u/cuSNMTU3FmDF
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Création du barplot\n",
"plt.bar(purchase_spectacle[\"number_compagny\"], purchase_spectacle[\"Taux_ticket_internet\"])\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Company')\n",
"plt.ylabel(\"Taux d'achat de tickets en ligne (%)\")\n",
"plt.title(\"Taux d'achat des tickets en ligne selon les compagnies de spectacle\")\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
2024-03-05 00:36:48 +01:00
]
},
2024-03-10 19:08:50 +01:00
{
"cell_type": "code",
"execution_count": 94,
"id": "69aad59a-e93d-4edc-a559-8f2452d7f19d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" <th>number_company</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10_299341</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>12.0</td>\n",
" <td>3.0</td>\n",
" <td>0 days 05:47:26.333333333</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10_63788</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>62.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>393.205891</td>\n",
" <td>281.017639</td>\n",
" <td>112.188252</td>\n",
" <td>3.0</td>\n",
" <td>...</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>0 days 05:13:51</td>\n",
" <td>1.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10_759946</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10_20653</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>11.0</td>\n",
" <td>10.0</td>\n",
" <td>1 days 00:45:54</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10_824705</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>697292</th>\n",
" <td>14_119950</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>697293</th>\n",
" <td>14_938</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>697294</th>\n",
" <td>14_5004707</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>2 days 16:42:51</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>697295</th>\n",
" <td>14_108184</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>697296</th>\n",
" <td>14_4663981</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>other</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>697297 rows × 41 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 10_299341 0.0 0.0 0.0 0.0 \n",
"1 10_63788 3.0 2.0 62.0 1.0 \n",
"2 10_759946 0.0 0.0 0.0 0.0 \n",
"3 10_20653 0.0 0.0 0.0 0.0 \n",
"4 10_824705 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... ... \n",
"697292 14_119950 0.0 0.0 0.0 0.0 \n",
"697293 14_938 0.0 0.0 0.0 0.0 \n",
"697294 14_5004707 0.0 0.0 0.0 0.0 \n",
"697295 14_108184 0.0 0.0 0.0 0.0 \n",
"697296 14_4663981 0.0 0.0 0.0 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0.0 NaN NaN \n",
"1 1.0 393.205891 281.017639 \n",
"2 0.0 NaN NaN \n",
"3 0.0 NaN NaN \n",
"4 0.0 NaN NaN \n",
"... ... ... ... \n",
"697292 0.0 NaN NaN \n",
"697293 0.0 NaN NaN \n",
"697294 0.0 NaN NaN \n",
"697295 0.0 NaN NaN \n",
"697296 0.0 NaN NaN \n",
"\n",
" time_between_purchase nb_tickets_internet ... gender_label \\\n",
"0 NaN 0.0 ... male \n",
"1 112.188252 3.0 ... female \n",
"2 NaN 0.0 ... other \n",
"3 NaN 0.0 ... male \n",
"4 NaN 0.0 ... other \n",
"... ... ... ... ... \n",
"697292 NaN 0.0 ... male \n",
"697293 NaN 0.0 ... male \n",
"697294 NaN 0.0 ... male \n",
"697295 NaN 0.0 ... other \n",
"697296 NaN 0.0 ... other \n",
"\n",
" gender_female gender_male gender_other country_fr nb_campaigns \\\n",
"0 0 1 0 1.0 12.0 \n",
"1 1 0 0 1.0 3.0 \n",
"2 0 0 1 NaN 0.0 \n",
"3 0 1 0 1.0 11.0 \n",
"4 0 0 1 NaN 0.0 \n",
"... ... ... ... ... ... \n",
"697292 0 1 0 1.0 0.0 \n",
"697293 0 1 0 1.0 0.0 \n",
"697294 0 1 0 1.0 2.0 \n",
"697295 0 0 1 1.0 0.0 \n",
"697296 0 0 1 NaN 0.0 \n",
"\n",
" nb_campaigns_opened time_to_open y_has_purchased \\\n",
"0 3.0 0 days 05:47:26.333333333 0.0 \n",
"1 1.0 0 days 05:13:51 1.0 \n",
"2 0.0 NaN 0.0 \n",
"3 10.0 1 days 00:45:54 0.0 \n",
"4 0.0 NaN 0.0 \n",
"... ... ... ... \n",
"697292 0.0 NaN 0.0 \n",
"697293 0.0 NaN 0.0 \n",
"697294 1.0 2 days 16:42:51 0.0 \n",
"697295 0.0 NaN 0.0 \n",
"697296 0.0 NaN 0.0 \n",
"\n",
" number_company \n",
"0 10 \n",
"1 10 \n",
"2 10 \n",
"3 10 \n",
"4 10 \n",
"... ... \n",
"697292 14 \n",
"697293 14 \n",
"697294 14 \n",
"697295 14 \n",
"697296 14 \n",
"\n",
"[697297 rows x 41 columns]"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_set_spectacle"
]
},
{
"cell_type": "code",
2024-03-10 19:49:34 +01:00
"execution_count": 97,
2024-03-10 19:08:50 +01:00
"id": "86fa4d7f-9b5f-4487-beb8-eb23771f724c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
2024-03-10 19:49:34 +01:00
" <th>y_has_purchased</th>\n",
2024-03-10 19:08:50 +01:00
" <th>nb_tickets</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>Taux_ticket_internet</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
2024-03-10 19:49:34 +01:00
" <td>0.0</td>\n",
" <td>9957.0</td>\n",
" <td>5450.0</td>\n",
" <td>54.735362</td>\n",
2024-03-10 19:08:50 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-10 19:49:34 +01:00
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>7941.0</td>\n",
" <td>3424.0</td>\n",
" <td>43.117995</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-10 19:08:50 +01:00
" <td>11</td>\n",
2024-03-10 19:49:34 +01:00
" <td>0.0</td>\n",
" <td>10361.0</td>\n",
2024-03-10 19:08:50 +01:00
" <td>5.0</td>\n",
2024-03-10 19:49:34 +01:00
" <td>0.048258</td>\n",
2024-03-10 19:08:50 +01:00
" </tr>\n",
" <tr>\n",
2024-03-10 19:49:34 +01:00
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>9638.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2024-03-10 19:08:50 +01:00
" <td>12</td>\n",
2024-03-10 19:49:34 +01:00
" <td>0.0</td>\n",
" <td>35600.0</td>\n",
2024-03-10 19:08:50 +01:00
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
2024-03-10 19:49:34 +01:00
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>11520.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
2024-03-10 19:08:50 +01:00
" <td>13</td>\n",
2024-03-10 19:49:34 +01:00
" <td>0.0</td>\n",
" <td>131759.0</td>\n",
" <td>105406.0</td>\n",
" <td>79.999089</td>\n",
2024-03-10 19:08:50 +01:00
" </tr>\n",
" <tr>\n",
2024-03-10 19:49:34 +01:00
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>1004076.0</td>\n",
" <td>13902.0</td>\n",
" <td>1.384557</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
2024-03-10 19:08:50 +01:00
" <td>14</td>\n",
2024-03-10 19:49:34 +01:00
" <td>0.0</td>\n",
" <td>44596.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>16694.0</td>\n",
2024-03-10 19:08:50 +01:00
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-10 19:49:34 +01:00
" number_company y_has_purchased nb_tickets nb_tickets_internet \\\n",
"0 10 0.0 9957.0 5450.0 \n",
"1 10 1.0 7941.0 3424.0 \n",
"2 11 0.0 10361.0 5.0 \n",
"3 11 1.0 9638.0 0.0 \n",
"4 12 0.0 35600.0 0.0 \n",
"5 12 1.0 11520.0 0.0 \n",
"6 13 0.0 131759.0 105406.0 \n",
"7 13 1.0 1004076.0 13902.0 \n",
"8 14 0.0 44596.0 0.0 \n",
"9 14 1.0 16694.0 0.0 \n",
"\n",
" Taux_ticket_internet \n",
"0 54.735362 \n",
"1 43.117995 \n",
"2 0.048258 \n",
"3 0.000000 \n",
"4 0.000000 \n",
"5 0.000000 \n",
"6 79.999089 \n",
"7 1.384557 \n",
"8 0.000000 \n",
"9 0.000000 "
2024-03-10 19:08:50 +01:00
]
},
2024-03-10 19:49:34 +01:00
"execution_count": 97,
2024-03-10 19:08:50 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-10 19:49:34 +01:00
"#Taux de ticket payé par en ligne selon y_has_purchase par compagnies avec la base de train\n",
2024-03-10 19:08:50 +01:00
"\n",
2024-03-10 19:49:34 +01:00
"purchase_spectacle_train = train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[[\"nb_tickets\", \"nb_tickets_internet\"]].sum().reset_index()\n",
2024-03-10 19:08:50 +01:00
"purchase_spectacle_train[\"Taux_ticket_internet\"] = purchase_spectacle_train[\"nb_tickets_internet\"]*100 / purchase_spectacle_train[\"nb_tickets\"]\n",
"purchase_spectacle_train"
]
},
{
"cell_type": "code",
2024-03-10 19:49:34 +01:00
"execution_count": 106,
2024-03-10 19:08:50 +01:00
"id": "d11335b7-e35a-44c7-8ce4-661216978151",
"metadata": {},
2024-03-10 19:49:34 +01:00
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1UAAAIjCAYAAADr8zGuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACHE0lEQVR4nOzdeZyN9f//8edhzIrBMJtlZjB2QsqWxj6WLElIYSgR5WMJ+UiGLKFQfCIqS2VpQSXZskSWxpayhWZsmcg29mHm/fvDb87XMTOcOedoZvK4327ndpvzvt7Xdb2u67yv61yveV/X+1iMMUYAAAAAAIfkyOwAAAAAACA7I6kCAAAAACeQVAEAAACAE0iqAAAAAMAJJFUAAAAA4ASSKgAAAABwAkkVAAAAADiBpAoAAAAAnEBSBQAAAABOIKkC7HTixAn5+/trwIABDi9j9+7d8vb21pQpU1wYGQAAADJTtk6qLBaLXa9169Zldqh3FRcXJ4vFotmzZ7tkeVeuXFF0dHSa2z179mxZLBbFxcVlaJlRUVHKnTu3S+L7888/FR0drV27drlkeRllsVj08ssvZ2iemzdvqkOHDqpfv77efvttm2n2bs/FixfVtm1bvfLKK3rllVcyGvZ9ZbFYFB0dbX2/bt26bHHs3G9169ZV3bp1MzsMpzjS3h9EKefGbdu2ZXYoGRIVFaXQ0NDMDgP/IFdfMzjjQfqu2LRpk6Kjo3X+/Pn7uh5Hr9PuZcOGDfLw8NCRI0esZe+///59bUd3XltkJcuWLUszths3bqhEiRKaPHlyhpeZrZOqzZs327yaNWsmLy+vVOVVq1bN7FD/UVeuXNGIESPSPMk1b95cmzdvVlBQ0D8f2P/3559/asSIEZmWVDliyJAhcnNz09y5c2WxWGym2bs9zz//vB599FG99dZb9zFS16hateoDeewAQFYXFBSkzZs3q3nz5pkdygNl06ZNGjFixH1Pqu4HY4z69u2r7t27KyQkxFp+v5OqzZs364UXXrhvy3fGsmXLNGLEiFTluXLl0htvvKGRI0fqzJkzGVqmm6uCyww1atSweV+oUCHlyJEjVTn+T6FChVSoUKHMDiPbmTBhgtPL+Pzzz10QyT8jb968HEfI9owxunbtmry8vDI7lAcC+/uf4eHhwfkZGbJ8+XLt2LFD8+bNc3gZN27ckMVikZub/alDdm2nzzzzjPr3768PPvhA//3vf+2eL1v3VNnjf//7nx5//HH5+/vLx8dHFStW1Pjx43Xjxg2beqGhoYqKiko1/523//Ts2VOenp7avn27tSw5OVkNGjRQQECATp48edd4/vzzT7Vr10558uSRr6+v2rdvr/j4+DTrbtu2TS1btlSBAgXk6empKlWq3PPCPC4uzpo0jRgxwnoLZMq2pdetvHz5cjVo0EC+vr7y9vZW2bJlNXbs2Luu66efflLBggX1xBNP6PLly5KkgwcPqmPHjvL395eHh4fKli2r//3vf9Z51q1bp0ceeUSS1LVrV2t8d+sePn36tHr16qVy5copd+7c8vf3V/369bVhw4ZUda9fv66RI0eqbNmy8vT0lJ+fn+rVq6dNmzalqvvJJ5+obNmy8vb21kMPPaSlS5emquOK7XHkc0yRmJioUaNGqUyZMvLw8FChQoXUtWtXnT592qZeaGionnjiCS1fvlxVq1aVl5eXypQpo48//tiu9dwpvVs6Zs6cqVKlSsnDw0PlypXTvHnzUt1+lHJryttvv62JEycqLCxMuXPnVs2aNbVly5ZU63Jm/6RlzZo1qlu3rvz8/OTl5aVixYrpqaee0pUrV6x17N2vaTl79qx69eqlwoULy93dXcWLF9fQoUN1/fp1m3opt93Z085ud+nSJeXLl089evRINS0uLk45c+Z0KMm/VxyHDh1S165dFR4eLm9vbxUuXFgtWrTQr7/+alMvOTlZo0aNUunSpeXl5aV8+fKpUqVKevfdd+2OJaWNjB8/XqNHj1axYsXk6empatWq6YcffrCpm97tbdHR0al6jVP2+fTp01W2bFl5eHhozpw5kqT9+/frmWeeUUBAgDw8PFSsWDF17tw51ed28eJFvfTSSypYsKD8/PzUpk0b/fnnnzZ1Fi5cqMaNGysoKEheXl4qW7asXnvtNet5MMUff/yhDh06KDg4WB4eHgoICFCDBg1S9WovXLhQNWvWlI+Pj3Lnzq3IyEjt3Lkz1TbPnj1bpUuXtp6L5s6de9f9fLuUc8TixYtVqVIleXp6qnjx4nrvvfds6l27dk0DBgxQ5cqV5evrqwIFCqhmzZr6+uuvUy3zbvs7I7Zu3aoWLVrIz89Pnp6eKlGihPr27WtTZ+PGjWrQoIHy5Mkjb29v1apVS999951NnZTvtzVr1qh79+7y8/NT3rx51blzZ12+fFnx8fFq166d8uXLp6CgIL366qs21wIZaZf2Hi+StGfPHjVu3Fje3t4qVKiQevfure+++y7VObZu3bqqUKGCYmJiVKdOHXl7e6t48eJ66623lJycnCrOO3sY7vV9JTl3/O7fv19NmjSRt7e3ChYsqJ49e+rixYtp1l29erUaNGigvHnzytvbW7Vr1061D9NiT3wpx/7OnTvVpk0b5c2bV76+vnruuefSPIfbe3zdrR1GR0dr4MCBkqSwsLBUj5fYe06413ruxtF9KknTpk3TI488otKlS1vLQkNDtWfPHq1fv966PSnn2pRrgE8++UQDBgxQ4cKF5eHhoUOHDmXomuzO66GUY3Tt2rX3PM+mxVXn1KioKOuxcfvjQinXxu7u7mrfvr1mzJghY4xd+1iSZP5FunTpYnx8fGzK+vXrZ6ZNm2aWL19u1qxZYyZNmmQKFixounbtalMvJCTEdOnSJdUyIyIiTEREhPX91atXTeXKlU3x4sXNuXPnjDHGvPHGGyZHjhxm5cqVd43vypUrpmzZssbX19dMmTLFrFixwvTp08cUK1bMSDKzZs2y1l2zZo1xd3c3derUMQsXLjTLly83UVFRqerd6dq1a2b58uVGknn++efN5s2bzebNm82hQ4eMMcbMmjXLSDKxsbHWeT788ENjsVhM3bp1zbx588zq1avN+++/b3r16pXuvl24cKHx8PAwL730krl586Yxxpg9e/YYX19fU7FiRTN37lyzcuVKM2DAAJMjRw4THR1tjDHmwoUL1hhef/11a3zHjh1Ld5v2799vXnrpJbNgwQKzbt06s3TpUvP888+bHDlymLVr11rr3bhxw9SrV8+4ubmZV1991Sxbtsx888035r///a+ZP3++tZ4kExoaah599FHz+eefm2XLlpm6desaNzc3c/jwYWs9V2yPo5+jMcYkJSWZJk2aGB8fHzNixAizatUq8+GHH5rChQubcuXKmStXrljrhoSEmCJFiphy5cqZuXPnmhUrVpinn37aSDLr16+/63pS9snw4cOt79euXWsk2ezfDz74wEgyTz31lFm6dKn57LPPTKlSpUxISIgJCQmx1ouNjbXu4yZNmpglS5aYJUuWmIoVK5r8+fOb8+fPW+s6s3/SEhsbazw9PU2jRo3MkiVLzLp168xnn31mOnXqZD1eM7Jf0zr+K1WqZHx8fMzbb79tVq5caYYNG2bc3NxMs2bNUu1Te9pZWvr162d8fHxs9pUxxgwcONB4enqav//+2+59Ym8c69evNwMGDDBffvmlWb9+vVm8eLFp3bq18fLyMvv377fWGzt2rMmZM6cZPny4+eGHH8zy5cvN5MmTrceEPVLaSNGiRc1jjz1mvvrqK/PFF1+YRx55xOTKlcts2rTJWrdLly427SvF8OHDzZ1fYZJM4cKFTaVKlcy8efPMmjVrzG+//WZ27dplcufObUJDQ8306dPNDz/8YD799FPTrl07k5CQYIz5v3Nj8eLFzSuvvGJWrFhhPvzwQ5M/f35Tr149m/W8+eabZtKkSea7774z69atM9OnTzdhYWGp6pUuXdqULFnSfPLJJ2b9+vXmq6++MgMGDLA5rkaPHm0sFovp1q2bWbp0qVm0aJGpWbOm8fHxMXv27LHWS4mvVatW5ttvvzW
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"multiple_barplot(purchase_spectacle_train, x=\"number_company\", y=\"Taux_ticket_internet\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"clients n'ayant pas acheté\", 1 : \"clients ayant acheté sur la période\"},\n",
" xlabel = \"Numéro de compagnie\", ylabel = \"Taux de ticket acheté par internet (%)\", \n",
" title = \"Taux de ticket achété en ligne selon y_has_purchased par compagnies de spectacle (train set)\")"
]
},
{
"cell_type": "code",
"execution_count": 107,
"id": "9ba02de7-3087-4b0c-884a-dc4a6ca92c3b",
"metadata": {},
2024-03-10 19:08:50 +01:00
"outputs": [],
2024-03-10 19:49:34 +01:00
"source": [
"#stat sur la variable temps ecoulé entre le premier et le dernier achat"
]
2024-03-10 19:08:50 +01:00
},
2024-03-05 03:51:39 +01:00
{
"cell_type": "code",
2024-03-10 19:49:34 +01:00
"execution_count": 108,
2024-03-05 03:51:39 +01:00
"id": "59a95248-0261-4970-9e91-e43d50cf4d69",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, 'Boite à moustache du temps ecoulés entre le premier et le dernier achat selon les compagnies de spectacles')"
]
},
2024-03-10 19:49:34 +01:00
"execution_count": 108,
2024-03-05 03:51:39 +01:00
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA6UAAAHGCAYAAACM3i2bAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABvz0lEQVR4nO3dd3gUVfv/8c+SnhAWkpCGSei9g9JUQGpoYkNAKYJgRRGw8KACPhRFaV9QEaUJEbAAKiBIVwggVYqIoFQhBCkJoYSQnN8f/rIPSxJIINmhvF/XtRfMmTMz95Sd2Ttn5ozNGGMEAAAAAIAF8lkdAAAAAADgzkVSCgAAAACwDEkpAAAAAMAyJKUAAAAAAMuQlAIAAAAALENSCgAAAACwDEkpAAAAAMAyJKUAAAAAAMuQlAIAAAAALHPHJaVbt26V3W7X//3f/1kdCoBckJqaqvr16+vee+/VhQsXrA4HAAAAOZSjpHTq1Kmy2WxOn8KFC6tBgwaaP3/+dQdRtGhRde3a1TF85MgRDRo0SFu3br3ueWalatWqmjt3rgYMGKC1a9fm+vxvRsOGDdO8efPyfDk2m00vvvhini/nassfNGhQrs3viy++0JgxY3Jtfrg+V54frjRgwADFx8fru+++k7e3t+sCy4G8PKdlZdCgQbLZbC5b3s2ma9euKlq0qNVhOPntt980aNAg7d+/P1v106+52a1/vVauXCmbzaaVK1fm6XJu1uXfiKJFi6pVq1a5Nr9z585p0KBBlmyL/fv3y2azaerUqS5f9p1+vrpVWHmMXOlWPm/kVGxsrAYNGqTTp0/n6XJcdc3JynW1lE6ZMkVr165VbGysJk6cKDc3N7Vu3Vrff//9dQUxd+5cvfXWW47hI0eOaPDgwXn2A+6BBx7Qp59+qvbt2+v48eN5soybiauS0tsNSenNb8GCBZo+fboWLVqkgIAAq8PJUl6f05DRW2+9pblz51odhpPffvtNgwcPtuyCf7OqXr261q5dq+rVq1sdiuXOnTunwYMH3xE/tHHrCQsL09q1a9WyZUurQ7mjxMbGavDgwXmelFrN/XomqlixomrWrOkYbt68uQoVKqSZM2eqdevWOZ5ftWrVrieMG9K+fXu1b9/e5csFkHtatmypv//+2+owct25c+fk6+trdRi5ytXrVKJEiVyd3+24T/JKTrdVgQIFVLt27Vxb/vnz5+Xt7U3LG5DLvLy8cvW7ClwuV54p9fb2lqenpzw8PJzKT548qeeff15FihSRp6enihcvrgEDBig5Odmp3uW3561cuVJ33323JOmpp55y3CZ8+W2ZGzduVJs2bRQQECBvb29Vq1ZNX375ZbZiHTx4sGrVqqWAgAAVKFBA1atX16RJk2SMuea0Xbt2Vf78+fX777+rWbNm8vPzU1hYmN59911J0rp163TvvffKz89PpUuX1rRp0zLMY8eOHXrwwQdVqFAheXt7q2rVqhnqZdV8ntmtClu2bFGrVq0UHBwsLy8vhYeHq2XLljp8+LCkf29pPXv2rKZNm+bYlg0aNJAkHT9+XM8//7zKly+v/PnzKzg4WA888IB+/vnnDHEnJyfrnXfeUbly5eTt7a3AwEA1bNhQsbGxGepOnz5d5cqVk6+vr6pUqZLprd179uxRx44dHXGXK1dOH3744VW3f7rExET16NFDgYGByp8/v5o3b64//vgjQ72sbt/Lzm1CDRo00IIFC3TgwAGn29XTXbx4UUOGDFHZsmXl5eWlwoUL66mnnsrQ8p5+a9f8+fNVrVo1+fj4qFy5co5tMnXqVJUrV05+fn665557tHHjxgzrkD9/fu3cuVONGjWSn5+fChcurBdffFHnzp1zqvvVV1+pVq1astvt8vX1VfHixdWtW7errqckGWP00UcfqWrVqvLx8VGhQoX06KOP6q+//spQd9GiRWrUqJFjGeXKldPw4cOd6nz33XeqU6eOfH195e/vryZNmmS4Vf5G9o307zHQr18/FStWTJ6enipSpIh69+6ts2fPunSbNGjQQBUrVtSGDRt03333OZbx7rvvKi0tTdK1z2np+3j79u1q2rSp/P391ahRI0nZP85yYvbs2apTp478/PyUP39+NWvWTFu2bLnmdOnnpSVLluipp55SQECA/Pz81Lp16yy3y08//aS6devK19fXsd2zu+/SHweYMmWKypQpIx8fH9WsWVPr1q2TMUbvv/++ihUrpvz58+uBBx7Q3r17nabP7BjL6X7NLP6sXOu6NHXqVD322GOSpIYNGzqOg+u5DW7p0qVq1KiRChQoIF9fX9WrV0/Lli3L1rS///67mjdvLl9fXwUFBenZZ5/VmTNnrns56d/ZzZs369FHH1WhQoUcfxBIP/8tWrRI1atXl4+Pj8qWLavJkyc7zSOr2/Cyc61PPy5//PFHdevWTYULF5avr2+G3xnpLly4oL59+6pq1aqy2+0KCAhQnTp19O2332aom5aWpnHjxjmOl4IFC6p27dr67rvvMtS91jpm53q7f/9+FS5cWNK/v1XSj5GrPb6QlpamIUOGOL4jBQsWVOXKlTV27FinejdyzV29erUaNWokf39/+fr6qm7dulqwYIFTnfT9sGLFCj333HMKCgpSYGCgHn74YR05ciRby8lMds5Xf/31l9q3b6/w8HB5eXkpJCREjRo1ytadKevXr1fr1q0VGBgob29vlShRQr17977u9V++fLnj90mBAgXUuXNnnT17VnFxcWrXrp0KFiyosLAw9evXTykpKY7p02+LHTFihIYOHarIyEh5e3urZs2aGb5ze/fu1VNPPaVSpUrJ19dXRYoUUevWrbV9+/YM67dz5041bdpUvr6+Kly4sF544QUtWLAgw/ctO9eyy+O88ryVneMru8dqZnL7vJWZ7MSXfr7bsmWLHn74YRUoUEB2u11PPvlkptfl7F5vr3YcDho0SK+++qokqVixYo7zQvr+mz17tpo2baqwsDDHb8w33ngjwzX1Wsu5muxs0+PHj6tnz56KiIhw/F6pV6+eli5des35O5gcmDJlipFk1q1bZ1JSUszFixfNoUOHzEsvvWTy5ctnFi1a5Kh7/vx5U7lyZePn52c++OAD8+OPP5q33nrLuLu7mxYtWjjNNyoqynTp0sUYY0xCQoJjOW+++aZZu3atWbt2rTl06JAxxpjly5cbT09Pc99995nZs2ebRYsWma5duxpJZsqUKddch65du5pJkyaZJUuWmCVLlpj//ve/xsfHxwwePPia03bp0sV4enqacuXKmbFjx5olS5aYp556ykgy/fv3N6VLlzaTJk0yixcvNq1atTKSzMaNGx3T//7778bf39+UKFHCfP7552bBggWmQ4cORpJ57733Mmznffv2OS1/xYoVRpJZsWKFMcaYpKQkExgYaGrWrGm+/PJLs2rVKjN79mzz7LPPmt9++80YY8zatWuNj4+PadGihWNb7ty50xHPc889Z2bNmmVWrlxp5s+fb7p3727y5cvnWIYxxqSkpJiGDRsad3d3069fP7Nw4ULz3Xffmf/85z9m5syZjnqSTNGiRc0999xjvvzyS7Nw4ULToEED4+7ubv78809HvZ07dxq73W4qVapkPv/8c/Pjjz+avn37mnz58plBgwZddR+kpaWZhg0bGi8vLzN06FDz448/moEDB5rixYsbSWbgwIFO+ysqKirDPAYOHGiudejv3LnT1KtXz4SGhjq229q1a40xxqSmpprmzZsbPz8/M3jwYLNkyRLz2WefmSJFipjy5cubc+fOOeYTFRVl7rrrLlOxYkUzc+ZMs3DhQlOrVi3j4eFh3n77bVOvXj0zZ84cM3fuXFO6dGkTEhLiNH36MRcZGelY30GDBhl3d3fTqlUrR73Y2Fhjs9lM+/btzcKFC83y5cvNlClTTKdOna66nsYY06NHD+Ph4WH69u1rFi1aZL744gtTtmxZExISYuLi4hz1PvvsM2Oz2UyDBg3MF198YZYuXWo++ugj8/zzzzvqxMTEGEmmadOmZt68eWb27NmmRo0axtPT0/z888/XtW8uPz8YY8zZs2dN1apVTVB
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-03-10 19:49:34 +01:00
"#repartition des client selon le temps ecoulés entre le premier et le denier achat par compagnie\n",
2024-03-05 03:51:39 +01:00
"\n",
"sns.boxplot(data=products_purchased_reduced_spectacle, y=\"time_between_purchase\",x=\"number_compagny\",showfliers=False,showmeans=True)\n",
"plt.title(\"Boite à moustache du temps ecoulés entre le premier et le dernier achat selon les compagnies de spectacles\")"
]
},
{
"cell_type": "code",
2024-03-10 19:49:34 +01:00
"execution_count": 109,
2024-03-05 14:34:43 +01:00
"id": "e2c51e28-6197-48f0-ab6d-9fc7b3b0de74",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Statistique F : 7956.05932109542\n",
"Valeur de p : 0.0\n",
"Nombre de degrés de liberté entre les groupes : 4\n",
"Nombre de degrés de liberté à l'intérieur des groupes : 764875\n",
2024-03-05 14:36:03 +01:00
"Il y a des différences significatives entre au moins une des entrepries .\n"
2024-03-05 14:34:43 +01:00
]
}
],
"source": [
2024-03-05 15:37:29 +01:00
"#test d'anova pour voir si la difference de temps entre le premier et le dernier achat est statistiquement significative\n",
"\n",
2024-03-05 14:34:43 +01:00
"from scipy.stats import f_oneway\n",
"\n",
"# Créez une liste pour stocker les données de chaque groupe\n",
"groupes = []\n",
"\n",
"# Parcourez chaque modalité de la variable catégorielle et divisez les données en groupes\n",
"for modalite in products_purchased_reduced_spectacle['number_compagny'].unique():\n",
" groupe = products_purchased_reduced_spectacle[products_purchased_reduced_spectacle['number_compagny'] == modalite]['time_between_purchase']\n",
" groupes.append(groupe)\n",
"\n",
"# Effectuez le test ANOVA\n",
"f_statistic, p_value = f_oneway(*groupes)\n",
"\n",
"# Nombre total d'observations\n",
"N = sum(len(groupe) for groupe in groupes)\n",
"\n",
"# Nombre de groupes ou de catégories\n",
"k = len(groupes)\n",
"\n",
"# Degrés de liberté entre les groupes\n",
"df_between = k - 1\n",
"\n",
"# Degrés de liberté à l'intérieur des groupes\n",
"df_within = N - k\n",
"\n",
"# Affichez les résultats\n",
"print(\"Statistique F :\", f_statistic)\n",
"print(\"Valeur de p :\", p_value)\n",
"\n",
"print(\"Nombre de degrés de liberté entre les groupes :\", df_between)\n",
"print(\"Nombre de degrés de liberté à l'intérieur des groupes :\", df_within)\n",
"\n",
"if p_value < 0.05:\n",
" print(\"Il y a des différences significatives entre au moins une des entrepries .\")\n",
"else:\n",
" print(\"Il n'y a pas de différences significatives entre les entreprises .\")"
]
2024-03-05 03:51:39 +01:00
},
2024-03-10 19:49:34 +01:00
{
"cell_type": "code",
"execution_count": 111,
"id": "75a003ab-f42a-4b2d-a0a8-284e673e71f7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>time_between_purchase</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
" <td>45.791114</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>193.080793</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>0.0</td>\n",
" <td>27.640469</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>129.853892</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>16.418446</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>58.548598</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
" <td>10.012525</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>93.545373</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
" <td>3.879196</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>10.745213</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_company y_has_purchased time_between_purchase\n",
"0 10 0.0 45.791114\n",
"1 10 1.0 193.080793\n",
"2 11 0.0 27.640469\n",
"3 11 1.0 129.853892\n",
"4 12 0.0 16.418446\n",
"5 12 1.0 58.548598\n",
"6 13 0.0 10.012525\n",
"7 13 1.0 93.545373\n",
"8 14 0.0 3.879196\n",
"9 14 1.0 10.745213"
]
},
"execution_count": 111,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#repartition des client selon le temps ecoulés entre le premier et le denier achat par compagnie\n",
"purchase_train_time= train_set_spectacle.groupby([\"number_company\", \"y_has_purchased\"])[\"time_between_purchase\"].mean().reset_index()\n",
"purchase_train_time"
]
},
{
"cell_type": "code",
"execution_count": 113,
"id": "f27921a9-1253-4c02-9bff-8cd3c4a9a5d9",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA9AAAAIiCAYAAAAggyBOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACN30lEQVR4nOzdeXhN1/7H8c+RWUQIIgmRpOZZ0JpaxBRDqaqiE0HR1nBNNdxWDW0pHehwaZVKKaXtRRU1lRiK1lg1ljbGSqOmEEQk6/dHfznXkYQTEseJ9+t58jw5a0/fvc/aw/esvde2GGOMAAAAAADATeVxdAAAAAAAADgDEmgAAAAAAOxAAg0AAAAAgB1IoAEAAAAAsAMJNAAAAAAAdiCBBgAAAADADiTQAAAAAADYgQQaAAAAAAA7kEADAAAAAGCHLCXQGzdu1KhRo3Tu3LkcCge5zZw5czRp0qS7ukyLxaJRo0bd1WXeKw4fPiyLxaLo6GhHh2Jj8uTJWYopNDRUUVFRORZPmoYNG6phw4Y5vpx7dfm3Kzo6WhaLRVu3bs22eS5dutRh+21UVJRCQ0MdsuzsEhoaqkcffdTRYdzzYmJiZLFY9M033zg6lCwZNWqULBaLo8PAXXavXM/cq9cWOWHv3r0aNWqUDh8+nKPLSTsWxcTEZOt8f//9d3l4eGjTpk3Wspy+Fr9b12y342a5a/369dW/f//bmm+WE+jRo0eTQMNujkig72eBgYHatGmTWrVq5ehQbGQ1gb5fTJ48WZMnT3Z0GPeEpUuXavTo0Y4OAwDuGZs2bdLzzz/v6DDuK3v37tXo0aNzPIHOKYMHD1bTpk1Vp04da1lOX4svWLBAI0aMyLH534mb5a6vv/66Jk+erAMHDmR5vq7ZEBuQLVJSUnTt2jV5eHg4OpRsk5ycLIvFIlfXu7OreXh4qHbt2tk2v7sdvzMzxujKlSvy8vKye5oKFSpkawyXL1/O0vKB3IK6f3exve+O7DyfI/fbt2+fFi5cqGXLlt32PG7nWjw8PPy2l+dIDRo0UNmyZfXuu+9q6tSpWZrW7hboUaNG6eWXX5YkhYWFyWKxpLv1YN68eapTp468vb2VL18+RUZGaseOHTbziYqKUr58+bR//35FRkbK29tbgYGBeuuttyRJmzdv1sMPPyxvb2+VKVNGn3/+uc30abcOrly5Ul27dpWfn5+8vb3VunVr/fHHHzbj7tixQ48++qj8/f3l4eGhoKAgtWrVSsePH7/pujZs2FCVKlXSpk2bVLduXXl5eSk0NFQzZsyQJC1ZskTVq1dX3rx5Vbly5Qwr6oYNG9S4cWP5+Pgob968qlu3rpYsWWIdfvjwYbm6umrcuHHppl23bp0sFou+/vpra9nBgwf19NNPW9elfPny+s9//mMzXdrtIF9++aVeeeUVBQUFKX/+/GrSpIndv65k53IaNmyoJUuW6MiRI9b6knYLWtrtQBMmTNAbb7yhsLAweXh4aM2aNZKkrVu3qk2bNvLz85Onp6fCw8P11Vdf2bUOGYmLi1OvXr1UvHhxubu7KywsTKNHj9a1a9duOW3arZELFixQlSpV5OnpqQceeEAffPBBhttl1qxZGjRokIoVKyYPDw8dOnRIkrRq1So1btxY+fPnV968eVWvXj398MMPNvNIu01v165devLJJ+Xr6ys/Pz8NHDhQ165d04EDB9S8eXP5+PgoNDRUEyZMsJk+s9ussvK9ZhZ/Rq5evao33nhD5cqVk4eHh4oUKaKuXbvq1KlTNttvz549Wrt2rbUO3M7tsgkJCRo8eLDCwsLk7u6uYsWKqX///kpMTLzltMYYTZgwQSEhIfL09FT16tX1/fff39FyLBaL+vTpo48//ljly5eXh4eHPv/8c+sxas2aNXrxxRdVuHBhFSpUSO3atdOff/5pM4+MbuG2Z5tK/6uX8+fPV3h4uDw9PW/agrty5Uo99thjKl68uDw9PVWqVCn16tVLf//9d7px9+/fr6eeekpFixaVh4eHSpQooc6dOyspKclmvAsXLtxyHefNm6dmzZopMDBQXl5eKl++vIYNG2azPaOioqz18fpjxc1aAOw5vhtjNHnyZFWrVk1eXl4qWLCg2rdvn+5ckZErV65o+PDhNvWgd+/e6X7FTvseli1bpurVq8vLy0vlypXTZ599dtP5G2NUunRpRUZGpht28eJF+fr6qnfv3reM80a3iuPUqVN66aWXVKFCBeXLl0/+/v5q1KiR1q9fn25eU6ZMUdWqVZUvXz75+PioXLly+ve//52leNL2k08++URlypSRh4eHKlSooLlz59qMl9ktymn70/V14WZ1/8SJE+rZs6eCg4Pl7u6uoKAgtW/fXn/99ZfNfJOTk295nrR3nzl16pR1mWn7bL169bRq1Sqb8ew5B0j/XGNUq1ZNHh4eCgsL0zvvvHPrDf3/0q5f1q9fr9q1a8vLy0vFihXTiBEjlJKSYjPu6NGjVatWLfn5+Sl//vyqXr26pk+fLmOMzXhZPdZkxp7jyu7du/XYY4+pYMGC8vT0VLVq1dJdB6adq+bMmaOhQ4cqMDBQ+fLlU+vWrfXXX3/pwoUL6tmzpwoXLqzChQura9euunjxos087K2XWdlfjh8/rvbt28vHx0cFChTQM888oy1btqQ7J6ddBx86dEgtW7ZUvnz5FBwcrEGDBqU7xmZ0C7e91zO3u//++eef6tChg3x8fOTr66uOHTsqLi4uw3Hv5FrtVvFl5Xpfsn//ulk9jI6O1pNPPilJioiIsJ6L0r6/nDiPZvc2DQgIUNOmTa1lt3stfuXKFQ0aNEjVqlWzXovWqVNH3377bbrl3ngL953mI9l1TLUnd33uuec0Z84cXbhwwa5tbGXsdOzYMdO3b18jycyfP99s2rTJbNq0yZw/f94YY8ybb75pLBaL6datm1m8eLGZP3++qVOnjvH29jZ79uyxzqdLly7G3d3dlC9f3rz//vtm5cqVpmvXrkaSGT58uClTpoyZPn26Wb58uXn00UeNJLN161br9DNmzDCSTHBwsOnWrZv5/vvvzdSpU42/v78JDg42Z8+eNcYYc/HiRVOoUCFTs2ZN89VXX5m1a9eaefPmmRdeeMHs3bv3puvaoEEDU6hQIVO2bNl0sYwePdpUrlzZfPnll2bp0qWmdu3axsPDw5w4ccI6fUxMjHFzczM1atQw8+bNMwsXLjTNmjUzFovFzJ071zre448/bkqUKGGuXbtms/wnn3zSBAUFmeTkZGOMMXv27DG+vr6mcuXKZubMmWbFihVm0KBBJk+ePGbUqFHW6dasWWMkmdDQUPPMM8+YJUuWmC+//NKUKFHClC5dOt1ybpTdy9mzZ4+pV6+eCQgIsNaXTZs2GWOMiY2NNZJMsWLFTEREhPnmm2/MihUrTGxsrFm9erVxd3c3jzzyiJk3b55ZtmyZiYqKMpLMjBkzbroOxhgjyYwcOdL6+eTJkyY4ONiEhISYTz75xKxatcq8/vrrxsPDw0RFRd1yfiEhIaZYsWKmRIkS5rPPPjNLly41zzzzjJFk3n777XTbpVixYqZ9+/Zm0aJFZvHixeb06dNm1qxZxmKxmLZt25r58+eb7777zjz66KPGxcXFrFq1yjqPkSNHGkmmbNmy5vXXXzcrV640Q4YMMZJMnz59TLly5cwHH3xgs9/897//tU6ftl2v305Z/V4zij8jKSkppnnz5sbb29uMHj3arFy50kybNs0UK1bMVKhQwVy6dMkYY8z27dvNAw88YMLDw611YPv27bfc5l26dLF+TkxMNNWqVTOFCxc27733nlm1apV5//33ja+vr2nUqJFJTU296fzStmv37t2tx4xixYqZgIAA06BBg9taTtq2qlKlipkzZ45ZvXq12b17t/UY9cADD5i+ffua5cuXm2nTppmCBQuaiIgIm7gaNGhgs3x7t2naNgoMDDQPPPCA+eyzz8yaNWvMzz//nOk2mDJlihk3bpxZtGiRWbt2rfn8889N1apVTdmyZc3Vq1et4+3cudPky5fPhIaGmo8//tj88MMP5osvvjAdOnQwCQkJxhiTpXV
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"multiple_barplot(purchase_train_time, x=\"number_company\", y=\"time_between_purchase\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"clients n'ayant pas acheté\", 1 : \"clients ayant acheté sur la période\"},\n",
" xlabel = \"Numéro de compagnie\", ylabel = \"Taux de ticket acheté par internet (%)\", \n",
" title = \"temps moyen entre le premier et le dernier achat selon y_has_purchased par compagnies de spectacle (train set)\")"
]
},
2024-03-05 15:37:29 +01:00
{
"cell_type": "code",
"execution_count": 33,
"id": "74f06e96-3c25-4eca-8190-25b0a4ab0d75",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"customer_id int64\n",
"nb_tickets int64\n",
"nb_purchases int64\n",
"total_amount float64\n",
"nb_suppliers int64\n",
"vente_internet_max int64\n",
"purchase_date_min float64\n",
"purchase_date_max float64\n",
"time_between_purchase float64\n",
"nb_tickets_internet float64\n",
"number_compagny int64\n",
"dtype: object"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_purchased_reduced_spectacle.dtypes"
]
},
{
"cell_type": "code",
2024-03-10 20:30:08 +01:00
"execution_count": 114,
"id": "aa6655c0-c602-4485-8b38-3117227464e1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>number_compagny</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>19482</td>\n",
" <td>88</td>\n",
" <td>29</td>\n",
" <td>872.0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2643.092500</td>\n",
" <td>718.149398</td>\n",
" <td>1924.943102</td>\n",
" <td>8.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>19484</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>62.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1745.021736</td>\n",
" <td>1743.045035</td>\n",
" <td>1.976701</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19485</td>\n",
" <td>131</td>\n",
" <td>21</td>\n",
" <td>1878.0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2649.044745</td>\n",
" <td>85.240845</td>\n",
" <td>2563.803900</td>\n",
" <td>84.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>19486</td>\n",
" <td>10</td>\n",
" <td>4</td>\n",
" <td>96.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1944.077604</td>\n",
" <td>1742.794225</td>\n",
" <td>201.283380</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>19487</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>33.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1742.877766</td>\n",
" <td>1742.877766</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99580</th>\n",
" <td>6884747</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>40.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.193750</td>\n",
" <td>0.193750</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99581</th>\n",
" <td>6884748</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>40.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.186806</td>\n",
" <td>0.186806</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99582</th>\n",
" <td>6884750</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>80.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.136111</td>\n",
" <td>0.136111</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99583</th>\n",
" <td>6884751</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>40.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.122917</td>\n",
" <td>0.122917</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99584</th>\n",
" <td>6884753</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>40.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.047222</td>\n",
" <td>0.047222</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>764880 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 19482 88 29 872.0 2 \n",
"1 19484 3 2 62.0 1 \n",
"2 19485 131 21 1878.0 2 \n",
"3 19486 10 4 96.0 1 \n",
"4 19487 2 1 33.0 1 \n",
"... ... ... ... ... ... \n",
"99580 6884747 2 1 40.0 1 \n",
"99581 6884748 2 1 40.0 1 \n",
"99582 6884750 4 1 80.0 1 \n",
"99583 6884751 2 1 40.0 1 \n",
"99584 6884753 2 1 40.0 1 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 1 2643.092500 718.149398 \n",
"1 0 1745.021736 1743.045035 \n",
"2 1 2649.044745 85.240845 \n",
"3 0 1944.077604 1742.794225 \n",
"4 0 1742.877766 1742.877766 \n",
"... ... ... ... \n",
"99580 0 0.193750 0.193750 \n",
"99581 0 0.186806 0.186806 \n",
"99582 0 0.136111 0.136111 \n",
"99583 0 0.122917 0.122917 \n",
"99584 0 0.047222 0.047222 \n",
"\n",
" time_between_purchase nb_tickets_internet number_compagny \n",
"0 1924.943102 8.0 10 \n",
"1 1.976701 0.0 10 \n",
"2 2563.803900 84.0 10 \n",
"3 201.283380 0.0 10 \n",
"4 0.000000 0.0 10 \n",
"... ... ... ... \n",
"99580 0.000000 0.0 14 \n",
"99581 0.000000 0.0 14 \n",
"99582 0.000000 0.0 14 \n",
"99583 0.000000 0.0 14 \n",
"99584 0.000000 0.0 14 \n",
"\n",
"[764880 rows x 11 columns]"
]
},
"execution_count": 114,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_purchased_reduced_spectacle"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "be04e2f9-60b9-4b44-ab36-06a365b21e32",
"metadata": {},
"outputs": [],
"source": [
"#Stat sur les canaux de vente"
]
},
{
"cell_type": "code",
"execution_count": 118,
2024-03-05 15:37:29 +01:00
"id": "20a70ec0-38f6-470e-a442-7884a150613a",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAArMAAAIhCAYAAABdSTJTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABSMUlEQVR4nO3deXwNZ///8feRPUgQJKIRQe1ro4uoBrW3aG8tym0pelNK0YXcrQalKV3u9Naquu2tqmrRUkXUVkurtmotXRRRErG0Yg1Jrt8ffjlfR04ih8QxvJ6Px3k8zDXXzHzmzJzkbXLNHJsxxggAAACwoELuLgAAAAC4VoRZAAAAWBZhFgAAAJZFmAUAAIBlEWYBAABgWYRZAAAAWBZhFgAAAJZFmAUAAIBlEWYBAABgWYRZuN2MGTNks9nk6+urAwcOZJvfuHFj1axZ0w2VST179lSRIkXcsm0417hxYzVu3NjdZdw2bDabRo4c6e4yrtuuXbs0cuRI7d+/392lWMKGDRs0cuRI/f333+4uJUcjR46UzWZzdxm4CRBmcdNIS0vTyy+/7O4yANyCdu3apVGjRhFm82jDhg0aNWrUTR1m+/Tpo40bN7q7DNwECLO4abRq1Uoff/yxfvzxR3eXki+MMTp37py7ywBwCzh37pyMMe4uI0c38mfd2bNnJUl33HGH7rvvvhu2Xdy8CLO4abz44osKCgrSsGHDrtr3/PnziomJUUREhLy9vVW2bFkNGDAg21WE8uXL6+GHH9bixYtVr149+fn5qVq1alq8eLGkS0McqlWrpsKFC+uee+7R5s2bnW5v586devDBB1W4cGGVKlVKzzzzjP0HahabzaZnnnlGkyZNUrVq1eTj46OZM2dKkn777Td16dJFpUuXlo+Pj6pVq6b33nsvT+9L1nqnT5+uKlWqyM/PT/Xr19d3330nY4zeeOMNRUREqEiRImratKl+//33bOuYNm2a6tSpI19fX5UoUUKPPvqodu/ebZ//4YcfymazOb3KMXr0aHl5eenw4cP2thUrVujBBx9UQECA/P391bBhQ33zzTcOy2X9CXDnzp164oknFBgYqODgYPXq1UsnT5686n4bYzR+/HiFh4fL19dXd911l77++munfVNTU/X88887nA+DBw/WmTNnrrodSVq6dKkefPBBBQYGyt/fX9WqVVNcXJx9/ubNm9W5c2eVL19efn5+Kl++vJ544olsw2KyhsysWrVKTz/9tEqWLKmgoCD94x//cHj/JGnu3Llq0aKFypQpYz8vhw8fnq3mnIZV9OzZU+XLl7dPv/766ypUqJAWLVqUrZ+/v79++umnXN+D1NRUPfXUUwoKClKRIkXUqlUr/frrr077Xuv5XK9ePTVq1Chbe0ZGhsqWLat//OMf9rYLFy5ozJgxqlq1qnx8fFSqVCk9+eSTOnr0qMOyWZ/xpUuX6q677pKfn5+qVq2qadOm2fvMmDFDjz/+uCSpSZMmstlsstlsmjFjhr1PXs5pZ1avXi2bzaaPPvpIQ4cOVUhIiPz8/BQdHa1t27Y59HX1PFq+fLl69eqlUqVKyd/fX2lpaTnWkdfPQNbPkw8//FDVqlWTv7+/6tSpY/+ZKF367L7wwguSpIiICPv7tXr1aof3fP78+apXr558fX01atQoSVJycrL69u2rO+64Q97e3oqIiNCoUaOUnp5uX//+/ftls9k0fvx4jR07VuXKlZOvr6/q16+f48+RrVu36rHHHlPx4sVVsWJFh3mXW7lypRo3bqygoCD5+fmpXLly6tChg8PP67yeW7AQA7jZ9OnTjSTzww8/mHfeecdIMt988419fnR0tKlRo4Z9OjMz07Rs2dJ4enqaESNGmOXLl5s333zTFC5c2NSrV8+cP3/e3jc8PNzccccdpmbNmmbOnDlmyZIl5t577zVeXl7mlVdeMQ0bNjTz5883CxYsMJUrVzbBwcHm7Nmz9uV79OhhvL29Tbly5czYsWPN8uXLzciRI42np6d5+OGHHfZDkilbtqypXbu2+fjjj83KlSvNzz//bHbu3GkCAwNNrVq1zKxZs8zy5cvNc889ZwoVKmRGjhx51fdHkgkPDzdRUVEOtZYoUcIMGTLEtG/f3ixevNjMnj3bBAcHm9q1a5vMzEz78q+99pqRZJ544gnz1VdfmVmzZpkKFSqYwMBA8+uvvxpjjElLSzMhISGma9euDtu+ePGiCQ0NNY8//ri97cMPPzQ2m8088sgjZv78+WbRokXm4YcfNh4eHmbFihX2frGxsUaSqVKlinnllVdMQkKCefvtt42Pj4958sknr7rfWcv37t3bfP3112by5MmmbNmyJiQkxERHR9v7nTlzxtStW9eULFnSvP3222bFihXmnXfeMYGBgaZp06YO74UzU6ZMMTabzTRu3Nh8/PHHZsWKFWbixImmf//+9j7z5s0zr7zyilmwYIFZs2aN+eSTT0x0dLQpVaqUOXr0qL1f1rlcoUIFM3DgQLNs2TIzZcoUU7x4cdOkSROH7b766qvmP//5j/nqq6/M6tWrzaRJk0xERES2ftHR0Q77m6VHjx4mPDzcPp2ZmWnatGljihcvbvbv32+MMWbatGlGkpkyZUqu70FmZqZp0qSJ8fHxsZ/nsbGxpkKFCkaSiY2Ntfe9nvM56/Oddd5lWbJkiZFkvvzyS2OMMRkZGaZVq1amcOHCZtSoUSYhIcFMmTLFlC1b1lSvXt3hM5r1Ga9evbqZNWuWWbZsmXn88ceNJLNmzRpjjDEpKSn2z8F7771nNm7caDZu3GhSUlKMMXk/p51ZtWqVkWTCwsJM+/btzaJFi8xHH31kKlWqZAICAszevXvtfV09j8qWLWv+9a9/ma+//tp89tlnJj093WkNrnwGJJny5cube+65x3z66admyZIlpnHjxsbT09Ne68GDB83AgQONJDN//nz7+3Xy5En7e16mTBlToUIFM23aNLNq1SqzadMmk5SUZMLCwkx4eLj54IMPzIoVK8yrr75qfHx8TM+ePe017Nu3z/6e3X///ebzzz838+bNM3fffbfx8vIyGzZssPfN+jkQHh5uhg0bZhISEszChQsd5l2+Xl9fX9O8eXOzcOFCs3r1ajN79mzTrVs389dff7l8bsE6CLNwu8vDbFpamqlQoYKpX7++/QfwlWF26dKlRpIZP368w3rmzp1rJJnJkyfb28LDw42fn5/5888/7W3bt283kkyZMmXMmTNn7O0LFy50+IVqzKXAIMm88847DtsaO3askWTWrVtnb5NkAgMDzYkTJxz6tmzZ0txxxx32XwRZnnnmGePr65ut/5UkmZCQEHP69OlstdatW9fhF1V8fLyRZHbs2GGMMeavv/4yfn5+pk2bNg7rTExMND4+PqZLly72ttjYWOPt7W2OHDlib8t6T7NCwZkzZ0yJEiVM27ZtHdaXkZFh6tSpY+655x6H9Tk7Tv379ze+vr65hsy//vrL+Pr6mkcffdShff369UaSQ7iLi4szhQoVMj/88IND388++8xIMkuWLMlxO6dOnTIBAQHm/vvvv2rovVx6ero5ffq0KVy4sMO5kXUuXx6EjTFm/PjxRpJJSkpyur7MzExz8eJFs2bNGiPJ/Pjjj/Z5eQ2zxhhz7Ngxc8cdd5h77rnHbN261fj7+5t//vOfV92fr7/+Otfz/PIwez3n87Fjx4y3t7f597//7dDesWNHExwcbC5evGiMMWbOnDlGkvn8888d+v3www9Gkpk4caK9LTw83Pj6+poDBw7Y286dO2dKlChh+vbta2+bN2+ekWRWrVrlsE5XzmlnssLsXXfd5XAO7d+/33h5eZk+ffrkuOzVzqPu3bvnuu0srnwGJJng4GCTmppqb0tOTjaFChUycXFx9rY33njDSDL79u3Ltr3w8HDj4eFhfvnlF4f2vn37miJFijgcC2OMefPNN40ks3PnTmPM/4XZ0NBQc+7cOXu/1NRUU6JECdOsWTN7W9bPkVdeeSVbHVeG2az93b59u9P3yRjXzi1YB8MMcFPx9vbWmDFjtHnzZn366adO+6xcuVLSpT+fXu7xxx9
"text/plain": [
"<Figure size 800x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#Repartition du nombre de canaux de vente selon les entreprise\n",
2024-03-10 20:30:08 +01:00
"\n",
"# Filtrer les données pour inclure uniquement les valeurs positives de total_amount et exclusion des valeur aberrantes\n",
"purchase_canaux = products_purchased_reduced_spectacle[(products_purchased_reduced_spectacle['nb_tickets'] > 0) ]\n",
"\n",
2024-03-05 15:37:29 +01:00
"plt.figure(figsize=(8, 6))\n",
2024-03-10 20:30:08 +01:00
"sns.barplot(x='number_compagny', y='nb_suppliers', data=purchase_canaux, ci=None) # ci=None pour ne pas afficher les intervalles de confiance\n",
2024-03-05 15:37:29 +01:00
"plt.title('Nombre moyen de canaux de vente par entreprise')\n",
"plt.xlabel('number_compagny')\n",
"plt.ylabel('Nombre moyen de caneaux ')\n",
"plt.show()"
]
},
2024-03-10 20:30:08 +01:00
{
"cell_type": "code",
"execution_count": 120,
"id": "ee901539-37d1-4dfa-8e78-38e4947c3d35",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 697297.000000\n",
"mean 0.110917\n",
"std 0.319561\n",
"min 0.000000\n",
"25% 0.000000\n",
"50% 0.000000\n",
"75% 0.000000\n",
"max 8.000000\n",
"Name: nb_suppliers, dtype: float64"
]
},
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_set_spectacle[\"nb_suppliers\"].describe()"
]
},
{
"cell_type": "code",
"execution_count": 125,
"id": "7389053e-54ae-4167-9afd-aa5d194822ef",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>number_company</th>\n",
" <th>y_has_purchased</th>\n",
" <th>nb_suppliers</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>0.0</td>\n",
" <td>1.118250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>1.0</td>\n",
" <td>1.340136</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>11</td>\n",
" <td>0.0</td>\n",
" <td>1.033992</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11</td>\n",
" <td>1.0</td>\n",
" <td>1.155239</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>12</td>\n",
" <td>0.0</td>\n",
" <td>0.153296</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12</td>\n",
" <td>1.0</td>\n",
" <td>0.220174</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>13</td>\n",
" <td>0.0</td>\n",
" <td>1.007711</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13</td>\n",
" <td>1.0</td>\n",
" <td>1.083750</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>14</td>\n",
" <td>0.0</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14</td>\n",
" <td>1.0</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" number_company y_has_purchased nb_suppliers\n",
"0 10 0.0 1.118250\n",
"1 10 1.0 1.340136\n",
"2 11 0.0 1.033992\n",
"3 11 1.0 1.155239\n",
"4 12 0.0 0.153296\n",
"5 12 1.0 0.220174\n",
"6 13 0.0 1.007711\n",
"7 13 1.0 1.083750\n",
"8 14 0.0 1.000000\n",
"9 14 1.0 1.000000"
]
},
"execution_count": 125,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#repartition des client selon le nombre moyen de canaux utilisé pour l'achat de ticket par compagnie sur base de train\n",
"\n",
"#purchase_train_canaux = train_set_spectacle[(train_set_spectacle['nb_tickets'] > 0) ]\n",
"\n",
"purchase_train_canaux_filtered= purchase_train_canaux.groupby([\"number_company\", \"y_has_purchased\"])[\"nb_suppliers\"].mean().reset_index()\n",
"purchase_train_canaux_filtered"
]
},
{
"cell_type": "code",
"execution_count": 126,
"id": "e4079e46-db8b-4a25-9da6-37b1405c57d9",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2QAAAIiCAYAAACnngsNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACE4ElEQVR4nOzdd3gUVfv/8c+G9ATSgBAgJKGjtABSpXcQQSwoSkdBRKpSHqWKUhQE8UFEQZAHEFRABQRC70gVpYgiTQhdWoCEJOf3B7/sl01jFxIWwvt1Xbmu7JkzM/fMnpnde8/MGYsxxggAAAAAcN+5ODsAAAAAAHhUkZABAAAAgJOQkAEAAACAk5CQAQAAAICTkJABAAAAgJOQkAEAAACAk5CQAQAAAICTkJABAAAAgJOQkAEAAACAkzwUCdn06dNlsVjk6empo0ePppheq1YtlSxZ0gmRSe3bt5evr69T1o3U1apVS7Vq1XJ2GI8Mi8WioUOHWl8nHa+ZZejQobJYLDp37twd637wwQdauHBhpsWSnuT75WGzZs0aWSwWfffdd84O5YH3sH4OhIeHq3379s4OA/dR0vn5yJEjzg7Fei5/FMyePVvjx4/P9PW0b99e4eHhGb7c4cOH67HHHlNiYqIk6dq1axo6dKjWrFmT4euS/u/zJ7OWf68mTZqk6dOnpyg/ePCg3N3dtXPnToeX+VAkZEliY2P17rvvOjsMAA8JZyZkAPCgadq0qTZv3qyQkBBnh/JIuV8JWWY4efKkxowZo+HDh8vF5VbacO3aNQ0bNizTEqZy5cpp8+bNKleuXKYs/16llZAVLVpUL7/8snr37u3wMh+qhKxRo0aaPXu2fv31V2eHkiGMMbp+/bqzwwAAJHPz5k3Fx8c7O4xHBvv7/siVK5cqV64sDw8PZ4eCh8SECRPk7++vli1b3vUyrl275lD9HDlyqHLlysqRI8ddr9NZunfvrnXr1mnTpk0OzfdQJWT9+vVTUFCQ+vfvf8e6N27c0MCBAxURESF3d3fly5dPb7zxhi5evGhTLzw8XE899ZQWLVqkyMhIeXl5qUSJElq0aJGkW937JUqUkI+PjypWrKjt27enur69e/eqbt268vHxUa5cudS9e/cUDdBisah79+6aPHmySpQoIQ8PD82YMUOS9Oeff6p169bKnTu3PDw8VKJECf33v/+1a78kLferr75SsWLF5OXlpQoVKmjLli0yxujDDz9URESEfH19VadOHf31118pljFt2jSVKVNGnp6eCgwM1DPPPKP9+/dbp8+cOVMWi0WbN29OMe/w4cPl5uamkydPWstWrFihunXrKkeOHPL29la1atW0cuVKm/mSLlfYu3evXnrpJfn5+Sk4OFgdO3bUpUuX7rjdxhiNGTNGYWFh8vT0VLly5fTzzz+nWvfy5ct66623bNpDr169FBMTc8f1SNLSpUtVt25d+fn5ydvbWyVKlNDIkSOt07dv364XX3xR4eHh8vLyUnh4uF566aUUl9gmXS6yevVqvf7668qZM6eCgoLUsmVLm/0nSXPnzlWDBg0UEhJibZcDBgxIEXNal2gmv3Rh1KhRcnFx0U8//ZSinre3t3777bd098Hly5f16quvKigoSL6+vmrUqJEOHjyY7jyOboskbd26Vc2aNVNQUJA8PT1VqFAh9erVK0W906dPp9tuLBaLYmJiNGPGDFksFlksljteyvrZZ5+pTJky8vX1Vfbs2VW8eHH95z//salz6tQpdenSRfnz55e7u7siIiI0bNgwu75M/v7772revLkCAgLk6empsmXLWs8BSZIu1ZgzZ47eeecd5c2bVzly5FC9evX0xx9/pLv89evXW+dN7uuvv5bFYtG2bdvuGOftbt68ecc4oqKi1Lx5c+XPn1+enp4qXLiwunTpkuKy0rNnz+q1115TaGioPDw8lCtXLlWrVk0rVqywO56kYygqKkodOnRQYGCgfHx81KxZM/399982ddO6JC/5MZO0z2fOnKm+ffsqX7588vDwsJ4r73T8J/nrr7/UpEkT+fr6KjQ0VH379lVsbKxNnWHDhqlSpUoKDAxUjhw5VK5cOU2dOlXGGJt6q1atUq1atRQUFCQvLy8VKFBAzz77rM3nSlxcnEaMGKHixYtb92eHDh109uxZm2XdvHlT/fr1U548eeTt7a0nn3xSv/zyi137+8iRI7JYLBozZozef/99FShQQJ6enqpQoUKKc/pff/2lDh06qEiRIvL29la+fPnUrFmzFOeWO+1vR8yePVtVqlSRr6+vfH19VbZsWU2dOtWmzp0+36T/u+z0wIEDatiwoXx8fBQSEqJRo0ZJkrZs2aInn3xSPj4+Klq0aIrj1pF2ae/xIkk//PCDSpcuLQ8PDxUsWFATJkxI9VK/pO8BM2fOVIkSJeTt7a0yZcpYv8skjzP5JYv2fGbfy/G7ePFilS1bVh4eHoqIiNBHH32Uaj1jjCZNmqSyZcvKy8tLAQEBeu6551Lsw9TYE1/SLS7r169X5cqV5eXlpXz58mnQoEFKSEiwWZ69x5eUfjusVauWFi9erKNHj1o/i25//+w9J9xpPWm5l30aFxenqVOnqnXr1tbesSNHjihXrlzW2JO2J+lcm9Q+d+7cqeeee04BAQEqVKiQJPu/K6V2yWLSMWrPeTY1GXVODQ8P1969e7V27Vrrtt/+Xat8+fIqUaKEJk+efMeYbJiHwFdffWUkmW3btpkJEyYYSWblypXW6TVr1jSPP/649XViYqJp2LChcXV1NYMGDTLLly83H330kfHx8TGRkZHmxo0b1rphYWEmf/78pmTJkmbOnDlmyZIlplKlSsbNzc0MHjzYVKtWzcyfP98sWLDAFC1a1AQHB5tr165Z52/Xrp1xd3c3BQoUMO+//75Zvny5GTp0qHF1dTVPPfWUzXZIMvny5TOlS5c2s2fPNqtWrTK///672bt3r/Hz8zOlSpUyX3/9tVm+fLnp27evcXFxMUOHDr3j/pFkwsLCTNWqVW1iDQwMNL179zbNmzc3ixYtMrNmzTLBwcGmdOnSJjEx0Tr/Bx98YCSZl156ySxevNh8/fXXpmDBgsbPz88cPHjQGGNMbGysyZMnj3n55Zdt1n3z5k2TN29e8/zzz1vLZs6caSwWi2nRooWZP3+++emnn8xTTz1lsmXLZlasWGGtN2TIECPJFCtWzAwePNhERUWZcePGGQ8PD9OhQ4c7bnfS/J06dTI///yzmTJlismXL5/JkyePqVmzprVeTEyMKVu2rMmZM6cZN26cWbFihZkwYYLx8/MzderUsdkXqfnyyy+NxWIxtWrVMrNnzzYrVqwwkyZNMt26dbPW+fbbb83gwYPNggULzNq1a80333xjatasaXLlymXOnj1rrZfUlgsWLGjefPNNs2zZMvPll1+agIAAU7t2bZv1vvfee+bjjz82ixcvNmvWrDGTJ082ERERKerVrFnTZnuTtGvXzoSFhVlfJyYmmiZNmpiAgABz5MgRY4wx06ZNM5LMl19+me4+SExMNLVr1zYeHh7Wdj5kyBBTsGBBI8kMGTIk3fnt3ZalS5caNzc3U7p0aTN9+nSzatUqM23aNPPiiy9a69jbbjZv3my8vLxMkyZNzObNm83mzZvN3r1704xxzpw5RpJ58803zfLly82KFSvM5MmTTY8ePax1oqOjTWhoqAkLCzOff/65WbFihXnvvfeMh4eHad++vc3yku+XAwcOmOzZs5tChQqZr7/+2ixevNi89NJLRpIZPXq0td7q1auNJBMeHm5efvlls3jxYjNnzhxToEABU6RIERMfH5/uvo6MjDTVqlVLUf7EE0+YJ554It15b+dIHJ999pkZOXKk+fHHH83atWvNjBkzTJkyZUyxYsVMXFyctV7Dhg1Nrly5zJQpU8yaNWvMwoULzeDBg80333xjd1xJx1BoaKjp2LGj9djPnTu3CQ0NNf/++6+1blhYmGnXrl2KZSQ/ZpK2NV++fOa5554zP/74o1m0aJE5f/68Xcd/0udAiRIlzEcffWRWrFhhBg8ebCwWixk2bJjNutu3b2+mTp1
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"multiple_barplot(purchase_train_canaux_filtered, x=\"number_company\", y=\"nb_suppliers\", var_labels=\"y_has_purchased\",\n",
" dico_labels = {0 : \"clients n'ayant pas acheté\", 1 : \"clients ayant acheté sur la période\"},\n",
" xlabel = \"Numéro de compagnie\", ylabel = \"Nombre moyen de canaux d'achat\", \n",
" title = \"Nombre moyen de canaux d'acht selon y_has_purchased par compagnies de spectacle (train set)\")"
]
},
2024-03-03 09:32:45 +01:00
{
"cell_type": "markdown",
"id": "b9e84af4-a02b-4f83-81ae-b7a73475d060",
"metadata": {},
"source": [
"### 4. target_information"
]
},
{
"cell_type": "code",
2024-03-05 14:34:43 +01:00
"execution_count": 11,
2024-03-03 09:32:45 +01:00
"id": "2867eceb-1f72-406c-adc2-adfedcaf60e6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nombre de lignes de la table : 6240166\n"
]
},
{
"data": {
"text/plain": [
"id 0\n",
"customer_id 0\n",
"target_name 0\n",
"target_type_is_import 0\n",
"target_type_name 0\n",
"number_compagny 0\n",
"dtype: int64"
]
},
2024-03-05 14:34:43 +01:00
"execution_count": 11,
2024-03-03 09:32:45 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# nombre de nan\n",
"print(\"Nombre de lignes de la table : \",target_information_spectacle.shape[0])\n",
"target_information_spectacle.isna().sum()"
]
2024-03-05 00:36:48 +01:00
},
{
"cell_type": "code",
2024-03-05 15:50:46 +01:00
"execution_count": 47,
"id": "561f361d-7d39-430a-9e27-a32f6c2f7b50",
2024-03-05 00:36:48 +01:00
"metadata": {},
2024-03-05 15:50:46 +01:00
"outputs": [],
2024-03-05 00:36:48 +01:00
"source": [
2024-03-05 15:50:46 +01:00
"# pas exploitable"
2024-03-05 00:36:48 +01:00
]
},
2024-03-05 02:43:40 +01:00
{
"cell_type": "code",
"execution_count": null,
"id": "904cbf32-77b6-49dd-a96c-9e7e5a0175c3",
2024-03-05 00:36:48 +01:00
"metadata": {},
"outputs": [],
"source": []
2024-03-02 11:37:44 +01:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}