2024-02-10 19:37:38 +01:00
{
"cells": [
{
"cell_type": "markdown",
"id": "3f41343f-7205-41d9-89dd-88039e301413",
"metadata": {},
"source": [
"# Statistiques descriptives"
]
},
{
"cell_type": "code",
2024-02-13 18:45:33 +01:00
"execution_count": 1,
2024-02-10 19:37:38 +01:00
"id": "abfaf341-7b35-4407-9133-d21336c04027",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import s3fs\n",
2024-02-10 22:05:09 +01:00
"import re\n",
2024-02-13 23:41:13 +01:00
"import matplotlib.pyplot as plt\n",
"import matplotlib.dates as mdates\n",
"from datetime import datetime, date, timedelta\n",
"from dateutil.relativedelta import relativedelta"
2024-02-10 19:37:38 +01:00
]
},
{
"cell_type": "code",
2024-02-13 18:45:33 +01:00
"execution_count": 2,
2024-02-10 19:37:38 +01:00
"id": "7fb72fa3-7940-496f-ac78-c2837f65eefa",
"metadata": {},
"outputs": [],
"source": [
2024-02-10 23:19:35 +01:00
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
2024-02-10 19:37:38 +01:00
]
},
{
"cell_type": "markdown",
"id": "45d5261f-4d46-49cb-8582-dd2121122b05",
"metadata": {},
"source": [
"# 1 - Comportement d'achat"
]
},
2024-02-13 23:41:13 +01:00
{
"cell_type": "markdown",
"id": "3479960c-0d23-45f1-8fff-d87395205731",
"metadata": {},
"source": [
"## Outlier"
]
},
2024-02-10 19:37:38 +01:00
{
"cell_type": "code",
2024-02-10 22:05:09 +01:00
"execution_count": 3,
2024-02-10 19:37:38 +01:00
"id": "9376af51-4320-44b6-8f30-1e1234371556",
"metadata": {},
"outputs": [],
"source": [
"# Chargement des données temporaires\n",
"BUCKET = \"projet-bdc2324-team1\"\n",
"FILE_KEY_S3 = \"0_Temp/Company 1 - Purchasing behaviour.csv\"\n",
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" tickets_kpi = pd.read_csv(file_in, sep=\",\")"
]
},
{
"cell_type": "code",
2024-02-10 22:05:09 +01:00
"execution_count": 4,
2024-02-10 19:37:38 +01:00
"id": "1855dcca-cfce-4c54-90ae-55d9a1ab5d45",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>event_type_id</th>\n",
" <th>nb_tickets</th>\n",
2024-02-13 18:45:33 +01:00
" <th>nb_purchases</th>\n",
2024-02-10 19:37:38 +01:00
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
2024-02-13 18:45:33 +01:00
" <th>name_event_types</th>\n",
" <th>avg_amount</th>\n",
2024-02-10 19:37:38 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>384226</td>\n",
2024-02-13 18:45:33 +01:00
" <td>194790</td>\n",
2024-02-10 19:37:38 +01:00
" <td>2686540.5</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
2024-02-13 18:45:33 +01:00
" <td>3262.190868</td>\n",
" <td>4.179306</td>\n",
" <td>3258.011562</td>\n",
2024-02-10 19:37:38 +01:00
" <td>51.0</td>\n",
2024-02-13 18:45:33 +01:00
" <td>offre muséale individuel</td>\n",
" <td>6.150659</td>\n",
2024-02-10 19:37:38 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>453242</td>\n",
2024-02-13 18:45:33 +01:00
" <td>228945</td>\n",
2024-02-10 19:37:38 +01:00
" <td>3248965.5</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
2024-02-13 18:45:33 +01:00
" <td>3698.198229</td>\n",
" <td>5.221840</td>\n",
" <td>3692.976389</td>\n",
2024-02-10 19:37:38 +01:00
" <td>2988.0</td>\n",
2024-02-13 18:45:33 +01:00
" <td>spectacle vivant</td>\n",
" <td>7.762474</td>\n",
2024-02-10 19:37:38 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>201750</td>\n",
2024-02-13 18:45:33 +01:00
" <td>107110</td>\n",
2024-02-10 19:37:38 +01:00
" <td>1459190.0</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
2024-02-13 18:45:33 +01:00
" <td>3803.369792</td>\n",
" <td>0.146331</td>\n",
" <td>3803.223461</td>\n",
2024-02-10 19:37:38 +01:00
" <td>9.0</td>\n",
2024-02-13 18:45:33 +01:00
" <td>offre muséale groupe</td>\n",
" <td>4.452618</td>\n",
2024-02-10 19:37:38 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>217356</td>\n",
2024-02-13 18:45:33 +01:00
" <td>111786</td>\n",
2024-02-10 19:37:38 +01:00
" <td>1435871.5</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
2024-02-13 18:45:33 +01:00
" <td>2502.715509</td>\n",
" <td>1408.715532</td>\n",
" <td>1093.999977</td>\n",
2024-02-10 19:37:38 +01:00
" <td>5.0</td>\n",
2024-02-13 18:45:33 +01:00
" <td>formule adhésion</td>\n",
" <td>6.439463</td>\n",
2024-02-10 19:37:38 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>143</td>\n",
2024-02-13 18:45:33 +01:00
" <td>143</td>\n",
2024-02-10 19:37:38 +01:00
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
2024-02-13 18:45:33 +01:00
" <td>2041.274549</td>\n",
" <td>1340.308160</td>\n",
" <td>700.966389</td>\n",
2024-02-10 19:37:38 +01:00
" <td>0.0</td>\n",
2024-02-13 18:45:33 +01:00
" <td>offre muséale individuel</td>\n",
" <td>6.150659</td>\n",
2024-02-10 19:37:38 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-02-13 18:45:33 +01:00
" customer_id event_type_id nb_tickets nb_purchases total_amount \\\n",
"0 1 2 384226 194790 2686540.5 \n",
"1 1 4 453242 228945 3248965.5 \n",
"2 1 5 201750 107110 1459190.0 \n",
"3 1 6 217356 111786 1435871.5 \n",
"4 2 2 143 143 0.0 \n",
"\n",
" nb_suppliers vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 7 1 3262.190868 4.179306 \n",
"1 6 1 3698.198229 5.221840 \n",
"2 6 1 3803.369792 0.146331 \n",
"3 5 1 2502.715509 1408.715532 \n",
"4 1 0 2041.274549 1340.308160 \n",
2024-02-10 19:37:38 +01:00
"\n",
2024-02-13 18:45:33 +01:00
" time_between_purchase nb_tickets_internet name_event_types \\\n",
"0 3258.011562 51.0 offre muséale individuel \n",
"1 3692.976389 2988.0 spectacle vivant \n",
"2 3803.223461 9.0 offre muséale groupe \n",
"3 1093.999977 5.0 formule adhésion \n",
"4 700.966389 0.0 offre muséale individuel \n",
2024-02-10 19:37:38 +01:00
"\n",
2024-02-13 18:45:33 +01:00
" avg_amount \n",
"0 6.150659 \n",
"1 7.762474 \n",
"2 4.452618 \n",
"3 6.439463 \n",
"4 6.150659 "
2024-02-10 19:37:38 +01:00
]
},
2024-02-10 22:05:09 +01:00
"execution_count": 4,
2024-02-10 19:37:38 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tickets_kpi.head()"
]
2024-02-10 22:05:09 +01:00
},
{
"cell_type": "code",
2024-02-10 23:19:35 +01:00
"execution_count": 5,
2024-02-10 22:05:09 +01:00
"id": "0e5d3b2e-1a75-4d46-80e6-c306e9f8de84",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2024-02-13 18:45:33 +01:00
"Index(['customer_id', 'event_type_id', 'nb_tickets', 'nb_purchases',\n",
" 'total_amount', 'nb_suppliers', 'vente_internet_max',\n",
" 'purchase_date_min', 'purchase_date_max', 'time_between_purchase',\n",
" 'nb_tickets_internet', 'name_event_types', 'avg_amount'],\n",
2024-02-10 22:05:09 +01:00
" dtype='object')"
]
},
2024-02-10 23:19:35 +01:00
"execution_count": 5,
2024-02-10 22:05:09 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tickets_kpi.columns"
]
},
{
"cell_type": "code",
2024-02-10 23:19:35 +01:00
"execution_count": 6,
2024-02-10 22:05:09 +01:00
"id": "7667e8eb-9a1e-4216-96f4-bf987c6e30b5",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>event_type_id</th>\n",
" <th>nb_tickets</th>\n",
2024-02-13 18:45:33 +01:00
" <th>nb_purchases</th>\n",
2024-02-10 22:05:09 +01:00
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
2024-02-13 18:45:33 +01:00
" <th>name_event_types</th>\n",
" <th>avg_amount</th>\n",
2024-02-10 22:05:09 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>453242</td>\n",
2024-02-13 18:45:33 +01:00
" <td>228945</td>\n",
2024-02-10 22:05:09 +01:00
" <td>3248965.5</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
2024-02-13 18:45:33 +01:00
" <td>3698.198229</td>\n",
" <td>5.221840</td>\n",
" <td>3692.976389</td>\n",
2024-02-10 22:05:09 +01:00
" <td>2988.0</td>\n",
2024-02-13 18:45:33 +01:00
" <td>spectacle vivant</td>\n",
" <td>7.762474</td>\n",
2024-02-10 22:05:09 +01:00
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>384226</td>\n",
2024-02-13 18:45:33 +01:00
" <td>194790</td>\n",
2024-02-10 22:05:09 +01:00
" <td>2686540.5</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
2024-02-13 18:45:33 +01:00
" <td>3262.190868</td>\n",
" <td>4.179306</td>\n",
" <td>3258.011562</td>\n",
2024-02-10 22:05:09 +01:00
" <td>51.0</td>\n",
2024-02-13 18:45:33 +01:00
" <td>offre muséale individuel</td>\n",
" <td>6.150659</td>\n",
2024-02-10 22:05:09 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>217356</td>\n",
2024-02-13 18:45:33 +01:00
" <td>111786</td>\n",
2024-02-10 22:05:09 +01:00
" <td>1435871.5</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
2024-02-13 18:45:33 +01:00
" <td>2502.715509</td>\n",
" <td>1408.715532</td>\n",
" <td>1093.999977</td>\n",
2024-02-10 22:05:09 +01:00
" <td>5.0</td>\n",
2024-02-13 18:45:33 +01:00
" <td>formule adhésion</td>\n",
" <td>6.439463</td>\n",
2024-02-10 22:05:09 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>201750</td>\n",
2024-02-13 18:45:33 +01:00
" <td>107110</td>\n",
2024-02-10 22:05:09 +01:00
" <td>1459190.0</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
2024-02-13 18:45:33 +01:00
" <td>3803.369792</td>\n",
" <td>0.146331</td>\n",
" <td>3803.223461</td>\n",
2024-02-10 22:05:09 +01:00
" <td>9.0</td>\n",
2024-02-13 18:45:33 +01:00
" <td>offre muséale groupe</td>\n",
" <td>4.452618</td>\n",
2024-02-10 22:05:09 +01:00
" </tr>\n",
" <tr>\n",
" <th>5032</th>\n",
" <td>6733</td>\n",
" <td>6</td>\n",
" <td>14208</td>\n",
2024-02-13 18:45:33 +01:00
" <td>114</td>\n",
2024-02-10 22:05:09 +01:00
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
2024-02-13 18:45:33 +01:00
" <td>2492.187199</td>\n",
" <td>1442.405116</td>\n",
" <td>1049.782083</td>\n",
2024-02-10 22:05:09 +01:00
" <td>13497.0</td>\n",
2024-02-13 18:45:33 +01:00
" <td>formule adhésion</td>\n",
" <td>6.439463</td>\n",
2024-02-10 22:05:09 +01:00
" </tr>\n",
" <tr>\n",
" <th>5029</th>\n",
" <td>6733</td>\n",
" <td>2</td>\n",
" <td>11656</td>\n",
2024-02-13 18:45:33 +01:00
" <td>158</td>\n",
2024-02-10 22:05:09 +01:00
" <td>471.0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
2024-02-13 18:45:33 +01:00
" <td>2982.237384</td>\n",
" <td>489.495324</td>\n",
" <td>2492.742060</td>\n",
2024-02-10 22:05:09 +01:00
" <td>9815.0</td>\n",
2024-02-13 18:45:33 +01:00
" <td>offre muséale individuel</td>\n",
" <td>6.150659</td>\n",
2024-02-10 22:05:09 +01:00
" </tr>\n",
" <tr>\n",
" <th>5030</th>\n",
" <td>6733</td>\n",
" <td>4</td>\n",
" <td>7440</td>\n",
2024-02-13 18:45:33 +01:00
" <td>162</td>\n",
2024-02-10 22:05:09 +01:00
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
2024-02-13 18:45:33 +01:00
" <td>1036.392674</td>\n",
" <td>426.201944</td>\n",
" <td>610.190729</td>\n",
2024-02-10 22:05:09 +01:00
" <td>7419.0</td>\n",
2024-02-13 18:45:33 +01:00
" <td>spectacle vivant</td>\n",
" <td>7.762474</td>\n",
2024-02-10 22:05:09 +01:00
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>41</td>\n",
" <td>6</td>\n",
" <td>6583</td>\n",
2024-02-13 18:45:33 +01:00
" <td>634</td>\n",
2024-02-10 22:05:09 +01:00
" <td>12546.5</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
2024-02-13 18:45:33 +01:00
" <td>2501.337905</td>\n",
" <td>1409.370521</td>\n",
" <td>1091.967384</td>\n",
2024-02-10 22:05:09 +01:00
" <td>6391.0</td>\n",
2024-02-13 18:45:33 +01:00
" <td>formule adhésion</td>\n",
" <td>6.439463</td>\n",
2024-02-10 22:05:09 +01:00
" </tr>\n",
" <tr>\n",
" <th>57</th>\n",
" <td>41</td>\n",
" <td>2</td>\n",
" <td>6514</td>\n",
2024-02-13 18:45:33 +01:00
" <td>812</td>\n",
2024-02-10 22:05:09 +01:00
" <td>22423.0</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
2024-02-13 18:45:33 +01:00
" <td>3576.106609</td>\n",
" <td>247.232697</td>\n",
" <td>3328.873912</td>\n",
2024-02-10 22:05:09 +01:00
" <td>5321.0</td>\n",
2024-02-13 18:45:33 +01:00
" <td>offre muséale individuel</td>\n",
" <td>6.150659</td>\n",
2024-02-10 22:05:09 +01:00
" </tr>\n",
" <tr>\n",
" <th>36376</th>\n",
" <td>63488</td>\n",
" <td>4</td>\n",
" <td>5750</td>\n",
2024-02-13 18:45:33 +01:00
" <td>9</td>\n",
2024-02-10 22:05:09 +01:00
" <td>63250.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2024-02-13 18:45:33 +01:00
" <td>887.298484</td>\n",
" <td>440.265162</td>\n",
" <td>447.033322</td>\n",
2024-02-10 22:05:09 +01:00
" <td>5750.0</td>\n",
2024-02-13 18:45:33 +01:00
" <td>spectacle vivant</td>\n",
" <td>7.762474</td>\n",
2024-02-10 22:05:09 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-02-13 18:45:33 +01:00
" customer_id event_type_id nb_tickets nb_purchases total_amount \\\n",
"1 1 4 453242 228945 3248965.5 \n",
"0 1 2 384226 194790 2686540.5 \n",
"3 1 6 217356 111786 1435871.5 \n",
"2 1 5 201750 107110 1459190.0 \n",
"5032 6733 6 14208 114 0.0 \n",
"5029 6733 2 11656 158 471.0 \n",
"5030 6733 4 7440 162 0.0 \n",
"60 41 6 6583 634 12546.5 \n",
"57 41 2 6514 812 22423.0 \n",
"36376 63488 4 5750 9 63250.0 \n",
"\n",
" nb_suppliers vente_internet_max purchase_date_min purchase_date_max \\\n",
"1 6 1 3698.198229 5.221840 \n",
"0 7 1 3262.190868 4.179306 \n",
"3 5 1 2502.715509 1408.715532 \n",
"2 6 1 3803.369792 0.146331 \n",
"5032 3 1 2492.187199 1442.405116 \n",
"5029 3 1 2982.237384 489.495324 \n",
"5030 2 1 1036.392674 426.201944 \n",
"60 4 1 2501.337905 1409.370521 \n",
"57 6 1 3576.106609 247.232697 \n",
"36376 1 1 887.298484 440.265162 \n",
2024-02-10 22:05:09 +01:00
"\n",
2024-02-13 18:45:33 +01:00
" time_between_purchase nb_tickets_internet name_event_types \\\n",
"1 3692.976389 2988.0 spectacle vivant \n",
"0 3258.011562 51.0 offre muséale individuel \n",
"3 1093.999977 5.0 formule adhésion \n",
"2 3803.223461 9.0 offre muséale groupe \n",
"5032 1049.782083 13497.0 formule adhésion \n",
"5029 2492.742060 9815.0 offre muséale individuel \n",
"5030 610.190729 7419.0 spectacle vivant \n",
"60 1091.967384 6391.0 formule adhésion \n",
"57 3328.873912 5321.0 offre muséale individuel \n",
"36376 447.033322 5750.0 spectacle vivant \n",
2024-02-10 22:05:09 +01:00
"\n",
2024-02-13 18:45:33 +01:00
" avg_amount \n",
"1 7.762474 \n",
"0 6.150659 \n",
"3 6.439463 \n",
"2 4.452618 \n",
"5032 6.439463 \n",
"5029 6.150659 \n",
"5030 7.762474 \n",
"60 6.439463 \n",
"57 6.150659 \n",
"36376 7.762474 "
2024-02-10 22:05:09 +01:00
]
},
2024-02-10 23:19:35 +01:00
"execution_count": 6,
2024-02-10 22:05:09 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Présence d'outlier\n",
"tickets_kpi.sort_values(by = ['nb_tickets'], axis = 0, ascending = False).head(10)"
]
},
{
"cell_type": "code",
2024-02-10 23:19:35 +01:00
"execution_count": 7,
2024-02-10 22:05:09 +01:00
"id": "9b2e27f2-703d-465b-a0f9-76e996de617c",
"metadata": {},
"outputs": [],
"source": [
"# Part du CA par customer\n",
"total_amount_share = tickets_kpi.groupby('customer_id')['total_amount'].sum().reset_index()\n",
"total_amount_share['total_amount_entreprise'] = total_amount_share['total_amount'].sum()\n",
"total_amount_share['share_total_amount'] = total_amount_share['total_amount']/total_amount_share['total_amount_entreprise']\n",
"\n",
"total_amount_share_index = total_amount_share.set_index('customer_id')\n",
"df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False)\n"
]
},
{
"cell_type": "code",
2024-02-10 23:19:35 +01:00
"execution_count": 8,
2024-02-10 22:05:09 +01:00
"id": "36141803-8865-4210-bd39-0a980301fd0c",
"metadata": {},
"outputs": [
{
"data": {
2024-02-13 18:45:33 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASMAAAEWCAYAAAAtl/EzAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA3MElEQVR4nO3dd3hTZfsH8G9WkzbdpXtSRpllgyCIRZFVEARUZBVQprhFRFkKIv4c+IIvLhRRUMSBIoJFlsree7RQWlZp6V5pM+7fH30bSQddSc85yf25rl6Q5OSc+5ycfHPm88iIiMAYYwKTC10AY4wBHEaMMZHgMGKMiQKHEWNMFDiMGGOiwGHEGBMFDiPGmChwGDHGRIHDiDEmCg0WRidPnoSLiwuWL1/eUJNkjElIrcJo9erVkMlk5j+lUonAwEA8/vjjSEhIqPJ9eXl5GDFiBGbOnImZM2fWu+j6+P3337FgwYJKX4uIiEBcXJz58Y0bN7BgwQIcP368wrALFiyATCazTZF1JJPJqpw3R3H27FksWLAAV65caZDpvfXWW9i4caPVxmeN+u+2josa1cKXX35JAOjLL7+kffv20c6dO2nRokXk7OxMfn5+lJmZWen7Ro4cSaNHjyaTyVSbydnEjBkzqKrZPnr0KCUmJpofHzp0yDy/5V29epX27dtnqzLrBADNnz9f6DIEtWHDBgJAO3fubJDpabVaGj9+vNXGZ43677aOi5myLgHWpk0bdO7cGQBw//33w2g0Yv78+di4cSMmTJhQYfjvv/++jlFpPYWFhXBxcbnrMB06dKjx+EJCQhASElLfshhjZWqTXGVbRocOHbJ4fvPmzQSAlixZYvH8oUOHaPDgweTl5UVqtZrat29P69evr3Sc8fHxFBcXR15eXuTi4kKxsbF06dIli2Hj4+NpyJAhFBwcTGq1mpo0aUKTJ0+m9PR0i+Hmz59PAOjIkSM0fPhw8vT0pICAABo/fjwBqPCXlJRERETh4eHmX7mdO3dWOmzZlkfZNO5kNBpp6dKlFBUVRU5OTuTr60tjx46lq1evWgzXu3dvat26NR08eJB69uxJzs7O1LhxY1qyZAkZjcZqP4ecnBx68sknydvbm7RaLfXr148uXLhQ6ZbRxYsXadSoUeTr60tOTk7UokULWrFiRYW633zzTWrevDlpNBry8PCgtm3b0rJly+5aR9kyWrt2Lc2aNYsCAgJIq9VSbGwspaamUm5uLj311FPk4+NDPj4+FBcXR3l5eRbjKCoqotmzZ1NERASpVCoKCgqi6dOnU1ZWlsVw4eHhNGjQINqyZQt16NCBNBoNRUVF0apVq8zDlK1L5f/Ktmxru/6cPn2aHn/8cXJ3dyc/Pz+aMGECZWdnm4erbFq9e/cmIqKCggJ68cUXKSIigtRqNXl5eVGnTp1o3bp1VS7P6uonIlq1ahVFR0ebxzl06FA6e/as+fXq1vEVK1ZQr169yNfXl1xcXKhNmza0dOlSKikpqbC8K9vi6927t3keiYimTJlCarWaDh8+bH7OaDRSnz59yM/Pj27cuFHl/JZnlTBasWIFAaAff/zR/NyOHTvIycmJevXqRevXr6etW7dSXFxchYVbNs7Q0FCaOHEibdmyhT799FPy8/Oj0NBQi5Vy5cqVtGTJEvr1119p9+7d9NVXX1G7du0oKirKYmGWrUzh4eH0yiuv0LZt22jjxo2UmJhII0aMIAC0b98+859OpyMiyw8gJyfHXNvrr79uHrYsWCoLo8mTJxMAevrpp2nr1q308ccfk6+vL4WGhlqs8L179yYfHx9q1qwZffzxx7Rt2zaaPn06AaCvvvrqrp+ByWSimJgYUqvVtHjxYoqPj6f58+dTZGRkhTA6c+aMOVjWrFlD8fHx9OKLL5JcLqcFCxaYh1uyZAkpFAqaP38+bd++nbZu3UrLli2zGKYyZWEUHh5OcXFx5nl2dXWlmJgY6tu3L7300ksUHx9PS5cuJYVCQTNnzrSYl379+pFSqaS5c+dSfHw8vfvuu6TVaqlDhw7mz6XsswkJCaFWrVrRmjVr6I8//qCRI0cSANq9ezcREaWlpdFbb71FAOijjz4yf2ZpaWl1Wn+ioqJo3rx5tG3bNnr//fdJrVbThAkTzMPt27ePnJ2daeDAgeZpnTlzhohKv6QuLi70/vvv086dO+m3336jt99+m5YvX17l8qyu/rLXRo0aRZs3b6Y1a9ZQZGQkeXh40MWLF4mIql3Hn3/+eVq5ciVt3bqVduzYQR988AE1atTIYr7KlndNwqioqIjat29PkZGR5u/qvHnzSC6XU3x8fJXzWpk6hdH+/ftJr9dTXl4ebd26lQICAui+++4jvV5vHrZFixbUoUMHi+eIiGJjYykwMNC8BVA2zmHDhlkMt2fPHgJAixYtqrQWk8lEer2ekpOTCQD98ssv5tfKVqZ58+ZVeN/d9qfLfwB3O2ZUPozOnTtHAGj69OkWwx04cIAA0Jw5c8zP9e7dmwDQgQMHLIZt1aoV9evXr9LaymzZsoUA0Icffmjx/OLFiyuEUb9+/SgkJIRycnIshn366adJo9GYj/HFxsZS+/bt7zrdypSF0eDBgy2ef+655wgAPfPMMxbPDx06lLy9vc2Pt27dSgDonXfesRhu/fr1BIA+/fRT83Ph4eGk0WgoOTnZ/FxRURF5e3vTlClTzM/V9JhLTdaf8nVNnz6dNBqNxbHPqo4ZtWnThoYOHXrXGipTVf1ZWVnm4LtTSkoKqdVqeuKJJ8zP1fSYkdFoJL1eT2vWrCGFQmFxzLemYURElJCQQO7u7jR06FD6888/SS6X0+uvv179zJZTp1P799xzD1QqFdzc3NC/f394eXnhl19+gVJZeggqMTER58+fx+jRowEABoPB/Ddw4EDcvHkTFy5csBhn2bBlevTogfDwcOzcudP8XFpaGqZOnYrQ0FAolUqoVCqEh4cDAM6dO1ehzuHDh9dl9uqkrM47z8YBQNeuXdGyZUts377d4vmAgAB07drV4rno6GgkJyfXaDrll9cTTzxh8Vin02H79u0YNmwYXFxcKnwGOp0O+/fvN9d44sQJTJ8+HX/88Qdyc3NrNtP/Exsba/G4ZcuWAIBBgwZVeD4zMxP5+fkAgB07dgCouMxGjhwJrVZbYZm1b98eYWFh5scajQbNmzevdpmVqe36M2TIEIvH0dHR0Ol0SEtLq3ZaXbt2xZYtWzB79mzs2rULRUVFNaqxKvv27UNRUVGFZRUaGoo+ffpUWFZVOXbsGIYMGQIfHx8oFAqoVCqMGzcORqMRFy9erFNtTZs2xWeffYaNGzciNjYWvXr1qtPZvDqF0Zo1a3Do0CHs2LEDU6ZMwblz5zBq1Cjz67du3QIAvPTSS1CpVBZ/06dPBwDcvn3bYpwBAQEVphMQEICMjAwAgMlkwkMPPYSffvoJs2bNwvbt23Hw4EHzF6qyDzswMLAus1cnZXVWNs2goCDz62V8fHwqDKdWq6tdaTMyMqBUKiu8v/zyy8jIgMFgwPLlyyt8BgMHDgTw72fw6quv4t1338X+/fsxYMAA+Pj44IEHHsDhw4ermetS3t7eFo+dnJzu+rxOp7OYF19fX4vhZDKZxWdfpq7LDKjb+lN+emq1usphy/vPf/6DV155BRs3bkRMTAy8vb0xdOjQu14Ccze1Xb8qk5KSgl69euH69ev48MMP8ffff+PQoUP46KOPANRsvqoyaNAg+Pv7Q6fT4YUXXoBCoaj1OOp0Nq1ly5bms2kxMTEwGo34/PPP8cMPP2DEiBFo1KgRgNKV/JFHHql0HFFRURaPU1NTKwyTmpqKpk2bAgBOnz6NEydOYPXq1Rg/frx5mMTExCrrbMjrgMpW3Js3b1Y4y3bjxg3zMrHGdAwGAzIyMiy+LOWXn5eXFxQKBcaOHYsZM2ZUOq7GjRsDAJRKJV544QW88MILyM7Oxp9//ok5c+agX79+uHr1arVnIes7L+np6RaBRERITU1Fly5drDatuqw/9aHVarFw4UIsXLgQt27dMm8lDR48GOfPn6/1+O5cv8qr6fq1ceNGFBQU4KeffjJ
2024-02-10 22:05:09 +01:00
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Costumer 1 vs others customers\n",
"coupure = 1\n",
"\n",
"top = df_circulaire[:coupure]\n",
"rest = df_circulaire[coupure:]\n",
"\n",
"# Calculez la somme du reste\n",
"rest_sum = rest.sum()\n",
"\n",
"# Créez une nouvelle série avec les cinq plus grandes parts et 'Autre'\n",
"new_series = pd.concat([top, pd.Series([rest_sum], index=['Autre'])])\n",
"\n",
"# Créez le graphique circulaire\n",
"plt.figure(figsize=(3, 3))\n",
"plt.pie(new_series, labels=new_series.index, autopct='%1.1f%%', startangle=140, pctdistance=0.5)\n",
"plt.axis('equal') # Assurez-vous que le graphique est un cercle\n",
"plt.title('Répartition des montants totaux')\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
2024-02-10 23:19:35 +01:00
"execution_count": 9,
2024-02-10 22:05:09 +01:00
"id": "94cf1a25-9ded-48f2-b1b2-75225bdaf49d",
"metadata": {},
"outputs": [],
"source": [
"tickets_kpi_filtered = tickets_kpi[tickets_kpi['customer_id'] != 1]"
]
},
2024-02-13 23:41:13 +01:00
{
"cell_type": "markdown",
"id": "dbebfa92-310a-417b-a7fa-36ac3593db06",
"metadata": {},
"source": [
"## Evolution des commandes"
]
},
2024-02-13 18:45:33 +01:00
{
"cell_type": "code",
2024-02-21 23:08:33 +01:00
"execution_count": 6,
2024-02-13 23:41:13 +01:00
"id": "06137694-7f50-47ba-8749-68471ececc1e",
2024-02-13 18:45:33 +01:00
"metadata": {},
2024-02-21 23:08:33 +01:00
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_2168/3643128924.py:11: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n",
"/tmp/ipykernel_2168/3643128924.py:19: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" campaigns = pd.read_csv(file_in, sep=\",\", parse_dates = ['sent_at'], date_parser=custom_date_parser)\n"
]
}
],
2024-02-13 18:45:33 +01:00
"source": [
2024-02-13 23:41:13 +01:00
"# Importation - Chargement des données temporaires\n",
2024-02-21 23:08:33 +01:00
"def custom_date_parser(date_string):\n",
" return pd.to_datetime(date_string, utc = True, format = 'ISO8601')\n",
2024-02-13 18:45:33 +01:00
"\n",
2024-02-13 23:41:13 +01:00
"# Achat\n",
2024-02-13 18:45:33 +01:00
"BUCKET = \"projet-bdc2324-team1\"\n",
2024-02-21 23:08:33 +01:00
"FILE_KEY_S3 = \"0_Input/Company_1/products_purchased_reduced.csv\"\n",
2024-02-13 18:45:33 +01:00
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
2024-02-21 23:08:33 +01:00
" purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n",
2024-02-13 23:41:13 +01:00
" \n",
"# Emails\n",
"BUCKET = \"projet-bdc2324-team1\"\n",
2024-02-21 23:08:33 +01:00
"FILE_KEY_S3 = \"0_Input/Company_1/campaigns_information.csv\"\n",
2024-02-13 23:41:13 +01:00
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
2024-02-21 23:08:33 +01:00
" campaigns = pd.read_csv(file_in, sep=\",\", parse_dates = ['sent_at'], date_parser=custom_date_parser)\n"
2024-02-13 18:45:33 +01:00
]
},
{
"cell_type": "code",
2024-02-21 23:08:33 +01:00
"execution_count": 8,
2024-02-13 23:41:13 +01:00
"id": "e6b962d4-1a30-4133-ac0f-359f7afef42c",
2024-02-13 18:45:33 +01:00
"metadata": {},
2024-02-13 23:41:13 +01:00
"outputs": [],
2024-02-13 18:45:33 +01:00
"source": [
2024-02-13 23:41:13 +01:00
"# Mois du premier achat\n",
2024-02-21 23:08:33 +01:00
"purchase_min = purchases.groupby(['customer_id'])['purchase_date'].min().reset_index()\n",
2024-02-13 23:41:13 +01:00
"purchase_min.rename(columns = {'purchase_date' : 'first_purchase_event'}, inplace = True)\n",
"purchase_min['first_purchase_event'] = pd.to_datetime(purchase_min['first_purchase_event'])\n",
"purchase_min['first_purchase_month'] = pd.to_datetime(purchase_min['first_purchase_event'].dt.strftime('%Y-%m'))\n",
"\n",
"# Mois du premier mails\n",
"first_mail_received = campaigns.groupby('customer_id')['sent_at'].min().reset_index()\n",
"first_mail_received.rename(columns = {'sent_at' : 'first_email_reception'}, inplace = True)\n",
"first_mail_received['first_email_reception'] = pd.to_datetime(first_mail_received['first_email_reception'])\n",
"first_mail_received['first_email_month'] = pd.to_datetime(first_mail_received['first_email_reception'].dt.strftime('%Y-%m'))\n",
"\n",
"# Fusion \n",
2024-02-21 23:08:33 +01:00
"known_customer = pd.merge(purchase_min[['customer_id', 'first_purchase_month']], \n",
2024-02-13 23:41:13 +01:00
" first_mail_received[['customer_id', 'first_email_month']], on = 'customer_id', how = 'outer')\n",
"\n",
"# Mois à partir duquel le client est considere comme connu\n",
"known_customer['known_date'] = pd.to_datetime(known_customer[['first_email_month', 'first_purchase_month']].min(axis = 1), utc = True, format = 'ISO8601')"
2024-02-13 18:45:33 +01:00
]
},
{
"cell_type": "code",
2024-02-21 23:08:33 +01:00
"execution_count": 9,
2024-02-13 23:41:13 +01:00
"id": "9c56e5ac-cbf4-4343-80ba-be2ab8b60eab",
2024-02-13 18:45:33 +01:00
"metadata": {},
"outputs": [],
"source": [
2024-02-13 23:41:13 +01:00
"# Nombre de commande par mois\n",
2024-02-21 23:08:33 +01:00
"purchases_count = pd.merge(purchases[['customer_id', 'purchase_id', 'purchase_date']].drop_duplicates(), known_customer[['customer_id', 'known_date']], on = ['customer_id'], how = 'inner')\n",
2024-02-13 23:41:13 +01:00
"purchases_count['is_customer_known'] = purchases_count['purchase_date'] > purchases_count['known_date'] + pd.DateOffset(months=1)\n",
"purchases_count['purchase_date_month'] = pd.to_datetime(purchases_count['purchase_date'].dt.strftime('%Y-%m'))\n",
"purchases_count = purchases_count[purchases_count['customer_id'] != 1]\n",
2024-02-13 18:45:33 +01:00
"\n",
2024-02-13 23:41:13 +01:00
"# Nombre de commande par mois par type de client\n",
2024-02-21 23:08:33 +01:00
"nb_purchases_graph = purchases_count.groupby(['purchase_date_month', 'is_customer_known'])['purchase_id'].count().reset_index()\n",
2024-02-13 23:41:13 +01:00
"nb_purchases_graph.rename(columns = {'purchase_id' : 'nb_purchases'}, inplace = True)\n",
2024-02-13 18:45:33 +01:00
"\n",
2024-02-21 23:08:33 +01:00
"nb_purchases_graph_2 = purchases_count.groupby(['purchase_date_month', 'is_customer_known'])['customer_id'].nunique().reset_index()\n",
2024-02-13 23:41:13 +01:00
"nb_purchases_graph_2.rename(columns = {'customer_id' : 'nb_new_customer'}, inplace = True)"
2024-02-13 18:45:33 +01:00
]
},
{
"cell_type": "code",
2024-02-13 23:41:13 +01:00
"execution_count": 13,
"id": "8c1aed44-03d3-49f9-b96c-b06a0df03dde",
2024-02-13 18:45:33 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-02-21 23:08:33 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAnsAAAHGCAYAAAAMiE2hAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAByqUlEQVR4nO3deVxUZfs/8M/IMizCyL4oArmQCrhRiKaAgkjiXpYormm5hstjLplYLkm5PVim5r7hUy6ZKYkLloKKKG6RqeGWIKYIiOzcvz/8cn4ODMgyLI6f9+s1L537XHPOdc+cmbm4z7nPyIQQAkRERESkkerVdgJEREREVH1Y7BERERFpMBZ7RERERBqMxR4RERGRBmOxR0RERKTBWOwRERERaTAWe0REREQajMUeERERkQZjsUdERESkwVjsEdErY82aNTA0NMTp06drOxWil9bFixdhYGCAsLCw2k6Fyqlair2NGzdCJpNBT08Pt27dKrHcy8sLzs7O1bHpFxo+fDjq169fK9t+EZlMhpCQEOl+0fNYXUJCQiCTyfDvv/+qbZ3bt2/H8uXL1bY+Up+i11tdivbPmzdvSm3Dhw+Hl5eX2rahTvHx8ZgyZQrCw8Ph7u6utvUmJSXh008/hYeHB8zNzWFsbIz27dtjzZo1KCgoKBH/5MkTBAcHw9bWFnp6emjTpg3Cw8OVYgoKCrB06VL06NEDjRo1goGBAVq0aIEZM2bg8ePHSrGZmZl4//334eTkBCMjIxgaGqJVq1aYP38+MjMz1dbPusLBwQHDhw+X7t+8eRMymQxRUVGVWt/OnTvRqlUr6OvrQyaTIT4+HgAQFhaGpk2bQldXFzKZrMTzrk51+X1TXEZGBt555x1MnDgREydOrPR6in/fUflERUWV+Nwtj2od2cvJycGnn35anZugOobFHtVF6enpePfdd7F06VL06tVLreuOi4vD5s2b0a1bN2zevBm7du2Cp6cnxo4di9GjR5eI79+/PzZt2oS5c+fi4MGDeOONNzBo0CBs375disnKykJISAjs7e2xfPlyHDhwAKNHj8aaNWvQqVMnZGVlSbF5eXkQQmDKlCnYtWsXfvrpJwwYMACff/45+vTpo9a+apoHDx4gKCgITZo0QUREBGJiYtC8eXPEx8dj0qRJ8Pb2xtGjRxETEwMjI6PaTrdOGDVqFN588018+eWXtZ0KVYB2da68R48e2L59O6ZNm4bWrVtX56ZqhBAC2dnZ0NfXr+1USINlZWVBT0+vWkd1XzXGxsa4du1atay7U6dOuHHjBnR0dKQ2X19f5Obm4ptvvsG8efNgZ2cHADhw4AAiIyOxfft2DBo0CADg7e2NW7du4T//+Q/ee+89aGlpQV9fH4mJiTAzM5PW6eXlhcaNG+Pdd9/Frl27MGTIEABAgwYNsHPnTqWcfHx8kJOTg9DQUPz999947bXXqqXv1aGgoAD5+fmQy+XVvq2//voLeXl5GDJkCDw9PaX2K1euAABGjx6NN998s8x1PH36FAYGBtWaZ13yv//9r7ZTULtX4TO3Wkf2pk+fDjMzM3zyyScvjM3OzsbMmTPh6OgIXV1dNGzYEOPHjy8xdO7g4ICAgADs378fbdu2hb6+Plq0aIH9+/cDeHZoqUWLFjA0NMSbb76Js2fPqtzelStX0K1bNxgaGsLCwgITJkzA06dPlWJkMhkmTJiA7777Di1atIBcLsemTZsAANeuXUNgYCAsLS0hl8vRokULfPPNN+V6XtLT0zF69GiYmZmhfv366NGjB/76669yPXbnzp3o3r07bGxspL7PmDFD5eGa06dPo1evXjAzM4Oenh6aNGmC4ODgEnH379/HoEGDoFAoYGVlhZEjRyItLU0p5ptvvkGXLl1gaWkJQ0NDuLi4IDQ0FHl5eVKMl5cXfvnlF9y6dQsymUy6FVm1ahVat26N+vXrw8jICK+//jpmzZr1wj7PmzcP7u7uMDU1hbGxMdq1a4d169ZBCKEUV9phgeKHfUqTk5ODzz//HC1atICenh7MzMzg7e2N6OhoKaa8+2l5cyk6FHro0CGMHDkSFhYWMDAwQE5ODh48eIAxY8bAzs4OcrkcFhYW6NSpEw4fPvzCvvzyyy9o06YN5HI5HB0d8fXXX6uME0Lg22+/RZs2baCvrw8TExO88847+Pvvv1+4jfIq7+tX9N6OiIhAu3btoK+vj9dffx3r169Xiit6zo4dO4axY8fC3NwcZmZm6N+/P+7du1di+zt37oSHhwcMDQ1Rv359+Pn54fz58yXizp49i969e8PU1BR6enpo27Ztub7YTExMlAq9IkVFwt27d6W2PXv2oH79+nj33XeVYkeMGIF79+5J5xJqaWkpFXrF13nnzp0X5mVhYQEA0NYu+2/6ouczMjISI0aMgKmpKQwNDdGrVy+V+8H69evRunVr6OnpwdTUFP369UNCQoJSjJeXl8rDksOHD4eDg4N0v+gQbGhoKObPnw9HR0fI5XIcO3bshf17kX379sHDwwMGBgYwMjKCr68vYmJilHJ56623AADvvfceZDKZlHdRIe3u7g6ZTCa9Z4tOQfrtt9/QsWNHGBgYYOTIkQCefa5PmzZN6bMhODi4UofSi56Xr776CosXL4aDgwP09fXh5eUlFagzZsyAra0tFAoF+vXrh5SUFKV1lPcz6OnTp1LeRa+pm5sbduzYofS48rw/Hjx4gHHjxqFly5aoX78+LC0t0bVrV/z+++/l6ndycjI+/PBDNGrUCLq6unB0dMS8efOQn5//wsfm5ORg6tSpsLa2hoGBAbp06YK4uLgKfeYWFhYiNDQUr7/+OuRyOSwtLTF06FCl97Cq57BI8f2+6JDr1q1bMWXKFFhbW0NfXx+enp5q/QwqF1ENNmzYIACI2NhYsWLFCgFAHDlyRFru6ekpWrVqJd0vLCwUfn5+QltbW8yZM0ccOnRIfP3118LQ0FC0bdtWZGdnS7H29vaiUaNGwtnZWezYsUMcOHBAuLu7Cx0dHfHZZ5+JTp06id27d4s9e/aI5s2bCysrK/H06VPp8cOGDRO6urqicePGYsGCBeLQoUMiJCREaGtri4CAAKV+ABANGzYUrq6uYvv27eLo0aPi8uXL4sqVK0KhUAgXFxexefNmcejQITF16lRRr149ERISUuZzU1hYKLy9vYVcLpe2P3fuXPHaa68JAGLu3LllPv6LL74Qy5YtE7/88ouIiooS3333nXB0dBTe3t5KcREREUJHR0e4urqKjRs3iqNHj4r169eL999/X4qZO3euACCcnJzEZ599JiIjI8XSpUuFXC4XI0aMUFrf5MmTxapVq0RERIQ4evSoWLZsmTA3N1eKu3LliujUqZOwtrYWMTEx0k0IIXbs2CEAiIkTJ4pDhw6Jw4cPi++++05MmjSpzP4KIcTw4cPFunXrRGRkpIiMjBRffPGF0NfXF/PmzVOKK+35s7e3F8OGDStzG3l5ecLb21toa2uLadOmiQMHDoh9+/aJWbNmiR07dgghKrafljeXovdKw4YNxZgxY8TBgwfFjz/+KPLz84Wfn5+wsLAQa9asEVFRUWLv3r3is88+E+Hh4WX25fDhw0JLS0u89dZbYvfu3eKHH34Qb7zxhmjcuLEo/pYfPXq00NHREVOnThURERFi+/bt4vXXXxdWVlYiOTm5zO0U5Z6YmFhmXHlfv6L3dsuWLcXmzZvFr7/+Kt59910BQBw/frzEdl977TUxceJE8euvv4rvv/9emJiYlHgfLFiwQMhkMjFy5Eixf/9+sXv3buHh4SEMDQ3FlStXpLijR48KXV1d0blzZ7Fz504REREhhg8fLgCIDRs2lNm/0gwbNkxoa2uLf//9V2rr0KGDeOONN0rEXr58WQAQq1evLnOdRX3/6aefSiwrLCwUeXl5Ii0tTRw8eFBYW1uLQYMGvTDPonXa2dmJkSNHioMHD4o1a9YIS0tLYWdnJ1JTU6XYhQsXCgBi0KBB4pdffhGbN28Wr732mlAoFOKvv/6S4jw9PYWnp6fK58Te3l66n5iYKO3/3t7e4scffxSHDh0qc58qz/t527Z
2024-02-13 18:45:33 +01:00
"text/plain": [
2024-02-13 23:41:13 +01:00
"<Figure size 640x480 with 1 Axes>"
2024-02-13 18:45:33 +01:00
]
},
"metadata": {},
2024-02-13 23:41:13 +01:00
"output_type": "display_data"
2024-02-13 18:45:33 +01:00
}
],
"source": [
2024-02-13 23:41:13 +01:00
"# Graphique en nombre de commande\n",
2024-02-21 23:08:33 +01:00
"purchases_graph = nb_purchases_graph\n",
2024-02-13 23:41:13 +01:00
"\n",
2024-02-21 23:08:33 +01:00
"purchases_graph_used = purchases_graph[purchases_graph[\"purchase_date_month\"] >= datetime(2021,4,1)]\n",
2024-02-13 23:41:13 +01:00
"purchases_graph_used_0 = purchases_graph_used[purchases_graph_used[\"is_customer_known\"]==False]\n",
"purchases_graph_used_1 = purchases_graph_used[purchases_graph_used[\"is_customer_known\"]==True]\n",
"\n",
"\n",
"# Création du barplot\n",
"plt.bar(purchases_graph_used_0[\"purchase_date_month\"], purchases_graph_used_0[\"nb_purchases\"], width=12, label = \"Nouveau client\")\n",
"plt.bar(purchases_graph_used_0[\"purchase_date_month\"], purchases_graph_used_1[\"nb_purchases\"], \n",
" bottom = purchases_graph_used_0[\"nb_purchases\"], width=12, label = \"Ancien client\")\n",
"\n",
"\n",
"# commande pr afficher slt\n",
"plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b%y'))\n",
"\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Mois')\n",
"plt.ylabel(\"Nombre d'achats\")\n",
"plt.title(\"Nombre d'achats au cours de l'année 2023 pour l'offre 'muséale groupe'\")\n",
"plt.legend()\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
2024-02-13 18:45:33 +01:00
]
},
{
"cell_type": "code",
2024-02-21 23:08:33 +01:00
"execution_count": 16,
2024-02-13 23:41:13 +01:00
"id": "d312276c-4c46-4d29-b6d6-ed110f59890d",
2024-02-13 18:45:33 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-02-21 23:08:33 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoIAAAHGCAYAAADg0eryAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB5XElEQVR4nO3dd1gU1/s28Hul1xWkK82GDexRNAp2VLAmdgR7YotRYzTGiIk91mBi1Ngb6jdqrCgWMCooqNhjjMEOYqQp0jnvH77Mz6XJwi6g3J/r2kv2zNmZ58ye3X08M2dGJoQQICIiIqIKp1JZB0BEREREZYOJIBEREVEFxUSQiIiIqIJiIkhERERUQTERJCIiIqqgmAgSERERVVBMBImIiIgqKCaCRERERBUUE0EiIiKiCoqJIBG9t7755htYWlri3r17ZR0KEf1/x44dg5aWFvbv31/WoVARlCgR3LRpE2QyGXR1dfHgwYM8y93d3dGgQYOSbKLYfH19YWhoWCbbfheZTAY/P79S3aa7uzvc3d1LPY4jR46UelvV7ZdffsGmTZvKOowPnp+fH2QyWYHLjx49ilWrVuHQoUOoUaNGKUZWuKdPn8LPzw+RkZF5lr2rTQVR5XfprVu34Ofnh/v376tkfeVN7u+14OBgyGSyYrfX398fNWvWhLa2NmQyGRISEgAA3377Lezs7KCpqYnKlSuXOO7CuLu7w9fXV63bUJXHjx9jyJAhWLlyJXr16lWsddy/fx8ymYzfs8WQk5cpQyUjgmlpafj2229VsSoqRaGhoRg5cqRat3HkyBHMmTNHrdsobUwEy96jR48wbNgw7Nq1C82bNy/rcBQ8ffoUc+bMyTcRHDlyJEJDQ0s/qLfcunULc+bM+WATQVWKjIzExIkT0a5dO5w6dQqhoaEwMjLCH3/8gXnz5mHo0KEICQnBiRMnyjrUciEzMxP9+/fH6NGjMXbs2LIOh4pIUxUr8fDwwI4dOzB16lQ0bNhQFassU0IIpKamQk9Pr6xDUauWLVuWdQhExWJra4uYmJiyDkNp1apVQ7Vq1co6jPdaRkYGZDIZNDVV8vNVqJs3bwIARo0ahY8++kgqv3HjBgBg4sSJsLCwKHQdKSkpH/xvSQ5NTU2cO3eurMNQudevX0NfX7+sw1AblYwITps2DVWqVMHXX3/9zrqpqamYMWMGHB0doa2tjapVq2LcuHHScHsOBwcHeHp64tChQ2jcuDH09PRQt25dHDp0CMCb4c+6devCwMAAH330ESIiIvLd3s2bN9GhQwcYGBjA3Nwc48ePx+vXrxXqyGQyjB8/Hr/++ivq1q0LHR0dbN68GQBw9+5dDBo0CBYWFtDR0UHdunXx888/F2m/JCUlYdSoUahSpQoMDQ3h4eGBv//+O9+6JdlOdnY2/P390ahRI+jp6aFy5cpo2bIlDhw4UOjr8js0HBMTgzFjxqBatWrQ1taGo6Mj5syZg8zMTKlOzrD9kiVLsGzZMjg6OsLQ0BCurq4ICwuT6vn6+kptkMlk0qOwkYigoCD07NkT1apVg66uLmrWrIkxY8bgv//+k+r8+eefkMlk2LlzZ57Xb9myBTKZDOHh4QCAiIgIDBgwAA4ODtDT04ODgwMGDhyY51SGnOH006dP4/PPP4eZmRmqVKmCPn364OnTp1I9BwcH3Lx5EyEhIVJ7HBwcCt3PRXl/srOzsXjxYtSpUwc6OjqwsLDA0KFD8fjxY4V15RwiDA0NRatWraQ2bdy4EQBw+PBhNGnSBPr6+nB2dkZgYKDC63MOTV67dg2ffvop5HI5TE1NMXnyZGRmZuLOnTvw8PCAkZERHBwcsHjxYoXXp6amYsqUKWjUqJH0WldXV/zxxx952p3zudq6dSvq1q0LfX19NGzYUPoMv+3w4cNo1KgRdHR04OjoiCVLluS7L4UQ+OWXX6R9aWJigk8++QT//vtvoe8B8KY/5vde5Xe4VpnY3xYcHCyNUA4bNkzqIzmfs4IODe/YsQOurq4wNDSEoaEhGjVqhPXr1xe6rX379kFfXx8jR46UPp8RERHo0aMHTE1Noauri8aNG2P37t3SazZt2oRPP/0UANCuXTspvsJGuHNivnLlCvr06QNjY2PI5XIMGTIEz58/V6hb1H7s4OCQ76HO3Kew5BzW3bp1K6ZMmYKqVatCR0cH//zzT6H7pig2bNiAhg0bQldXF6ampujduzdu376tEMuQIUMAAC1atIBMJpP6UM4RMEtLS4X3N+d3a+/evWjcuDF0dXWlIyJF+W4tqpz9smPHDnz99dewtraGoaEhvLy88OzZM7x8+RKjR4+GmZkZzMzMMGzYMLx69Up6fWGHXnP/Ljx//hyjR4+Gra0tdHR0YG5ujtatW+cZBT1x4gQ6dOgAY2Nj6Ovro3Xr1jh58qRCnX/++QfDhg1DrVq1oK+vj6pVq8LLywvXr18vUrtL8juZkJCAESNGwNTUFIaGhujevTv+/fffPO3N6e+XL1/GJ598AhMTE+nUk6LmLwWddpW73+f87gQFBWHYsGEwNTWFgYEBvLy88v1OK8o+LhZRAhs3bhQARHh4uFi5cqUAIE6ePCktd3NzE/Xr15eeZ2dniy5dughNTU0xa9Yscfz4cbFkyRJhYGAgGjduLFJTU6W69vb2olq1aqJBgwZi586d4siRI6JFixZCS0tLfPfdd6J169Zi7969Yt++faJ27drC0tJSvH79Wnq9j4+P0NbWFnZ2dmLevHni+PHjws/PT2hqagpPT0+FdgAQVatWFS4uLmLHjh3i1KlT4saNG+LmzZtCLpcLZ2dnsWXLFnH8+HExZcoUUalSJeHn51fovsnOzhbt2rUTOjo60vZnz54tqlevLgCI2bNnS3VLsh0hhPD29hYymUyMHDlS/PHHH+Lo0aNi3rx5YuXKlQrvhZubW552vx1HdHS0sLW1Ffb29mLNmjXixIkT4ocffhA6OjrC19dXqhcVFSUACAcHB+Hh4SH2798v9u/fL5ydnYWJiYlISEgQQgjxzz//iE8++UQAEKGhodLj7fc5t9WrV4sFCxaIAwcOiJCQELF582bRsGFD4eTkJNLT06V6jRs3Fq1bt87z+ubNm4vmzZtLz/fs2SO+++47sW/fPhESEiICAgKEm5ubMDc3F8+fP5fq5fTl6tWriwkTJohjx46J3377TZiYmIh27dpJ9S5fviyqV68uGjduLLXn8uXLhbw7RXt/Ro8eLQCI8ePHi8DAQPHrr78Kc3NzYWtrqxCnm5ubqFKlinBychLr168Xx44dE56engKAmDNnjnB2dpY+Ly1bthQ6OjriyZMn0utnz54tAAgnJyfxww8/iKCgIDFt2jRp23Xq1BE//fSTCAoKEsOGDRMAxO+//y69PiEhQfj6+oqtW7eKU6dOicDAQDF16lRRqVIlsXnzZoV25/SRjz76SOzevVscOXJEuLu7C01NTXHv3j2p3okTJ4SGhob4+OOPxd69e8WePXtE8+bNhZ2dncj9FTVq1CihpaUlpkyZIgIDA8WOHTtEnTp1hKWlpYiJiSn0ffDx8RH29vZ5ynP2SXFizy0xMVHqS99++63URx49elTgtmbNmiUAiD59+og9e/aI48ePi2XLlolZs2ZJdXJ/ly5btkxoaGiIH374QSo7deqU0NbWFm3atBG7du0SgYGBwtfXVwAQGzduFEIIERsbK+bPny8AiJ9//lmKLzY2tsA25cRsb28vvvrqK3Hs2DGxbNky6Xv77c9lUfuxvb298PHxybOt3N9Tp0+flr6fP/nkE3HgwAFx6NAh8eLFiwLjzf29lp+cfTBw4EBx+PBhsWXLFlG9enUhl8vF33//LYR487387bffSvsvNDRU/PPPP+Ly5ctixIgRAoAIDAxUeH/t7e2FtbW1qF69utiwYYM4ffq0uHjxYpG/W4sqZ7/Y29sLX19faV8bGhqKdu3aiU6dOompU6eK48ePi0WLFgkNDQ0xYcIE6fU53+E5/aKw/delSxdhbm4u1q5dK4KDg8X+/fvFd999JwICAqQ6W7duFTKZTPTq1Uvs3btXHDx4UHh6egoNDQ1
2024-02-13 18:45:33 +01:00
"text/plain": [
2024-02-13 23:41:13 +01:00
"<Figure size 640x480 with 1 Axes>"
2024-02-13 18:45:33 +01:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-02-13 23:41:13 +01:00
"# graphique en nombre de client ayant commandé\n",
2024-02-21 23:08:33 +01:00
"purchases_graph = nb_purchases_graph_2\n",
2024-02-13 18:45:33 +01:00
"\n",
2024-02-21 23:08:33 +01:00
"purchases_graph_used = purchases_graph[purchases_graph[\"purchase_date_month\"] >= datetime(2021,4,1)]\n",
2024-02-13 23:41:13 +01:00
"purchases_graph_used_0 = purchases_graph_used[purchases_graph_used[\"is_customer_known\"]==False]\n",
"purchases_graph_used_1 = purchases_graph_used[purchases_graph_used[\"is_customer_known\"]==True]\n",
2024-02-13 18:45:33 +01:00
"\n",
"\n",
2024-02-13 23:41:13 +01:00
"# Création du barplot\n",
"plt.bar(purchases_graph_used_0[\"purchase_date_month\"], purchases_graph_used_0[\"nb_new_customer\"], width=12, label = \"Nouveau client\")\n",
"plt.bar(purchases_graph_used_0[\"purchase_date_month\"], purchases_graph_used_1[\"nb_new_customer\"], \n",
" bottom = purchases_graph_used_0[\"nb_new_customer\"], width=12, label = \"Ancien client\")\n",
2024-02-13 18:45:33 +01:00
"\n",
"\n",
2024-02-13 23:41:13 +01:00
"# commande pr afficher slt\n",
"plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b%y'))\n",
2024-02-13 18:45:33 +01:00
"\n",
2024-02-13 23:41:13 +01:00
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Mois')\n",
"plt.ylabel(\"Nombre de client ayant commandé\")\n",
"plt.title(\"Nombre de client ayant commandé un ticket pour l'offre 'muséale groupe'\")\n",
"plt.legend()\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
2024-02-13 18:45:33 +01:00
]
},
2024-02-14 12:44:57 +01:00
{
"cell_type": "markdown",
"id": "82895dfc-e5ca-4be0-af24-93c1be8f6248",
"metadata": {},
"source": [
"### Proportion de tickets de prix 0"
]
},
{
"cell_type": "code",
2024-02-21 23:08:33 +01:00
"execution_count": 17,
2024-02-14 12:44:57 +01:00
"id": "6e27dd83-f188-43a5-b595-618b4922a358",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ticket_id 0.418220\n",
"customer_id 0.418220\n",
"purchase_id 0.418220\n",
"event_type_id 0.418220\n",
"supplier_name 0.418220\n",
"purchase_date 0.418220\n",
"type_of_ticket_name 0.418220\n",
"amount 0.418220\n",
"children 0.418220\n",
"is_full_price 0.418220\n",
"name_event_types 0.418220\n",
"name_facilities 0.418220\n",
"name_categories 0.402548\n",
"name_events 0.175585\n",
"name_seasons 0.418220\n",
"dtype: float64"
]
},
2024-02-21 23:08:33 +01:00
"execution_count": 17,
2024-02-14 12:44:57 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"purchases[purchases['amount'] == 0].count()/len(purchases)"
]
},
{
"cell_type": "code",
2024-02-21 23:08:33 +01:00
"execution_count": 18,
2024-02-14 12:44:57 +01:00
"id": "f663d68b-8a5c-4804-b31a-4477a03ca1e4",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>purchase_id</th>\n",
" <th>ticket_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>73518.000000</td>\n",
" <td>7.351800e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>10.096167</td>\n",
" <td>2.484660e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>2367.702603</td>\n",
" <td>4.636993e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>1.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1.000000</td>\n",
" <td>1.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>1.000000</td>\n",
" <td>2.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>1.000000</td>\n",
" <td>3.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>641981.000000</td>\n",
" <td>1.256574e+06</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" purchase_id ticket_id\n",
"count 73518.000000 7.351800e+04\n",
"mean 10.096167 2.484660e+01\n",
"std 2367.702603 4.636993e+03\n",
"min 1.000000 1.000000e+00\n",
"25% 1.000000 1.000000e+00\n",
"50% 1.000000 2.000000e+00\n",
"75% 1.000000 3.000000e+00\n",
"max 641981.000000 1.256574e+06"
]
},
2024-02-21 23:08:33 +01:00
"execution_count": 18,
2024-02-14 12:44:57 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"purchases.groupby('customer_id')[['purchase_id', 'ticket_id']].nunique().describe()"
]
},
{
"cell_type": "code",
2024-02-19 23:11:28 +01:00
"execution_count": 17,
2024-02-14 12:44:57 +01:00
"id": "d1212b10-3933-450a-b001-9e2cbf308f79",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ticket_id</th>\n",
" <th>customer_id</th>\n",
" <th>purchase_id</th>\n",
" <th>event_type_id</th>\n",
" <th>supplier_name</th>\n",
" <th>purchase_date</th>\n",
" <th>type_of_ticket_name</th>\n",
" <th>amount</th>\n",
" <th>children</th>\n",
" <th>is_full_price</th>\n",
" <th>name_event_types</th>\n",
" <th>name_facilities</th>\n",
" <th>name_categories</th>\n",
" <th>name_events</th>\n",
" <th>name_seasons</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>13070859</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>8.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>13070860</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>4.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13070861</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>4.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13070862</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>4.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>13070863</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>4.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826667</th>\n",
" <td>20662815</td>\n",
" <td>1256135</td>\n",
" <td>8007697</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 17:23:54+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826668</th>\n",
" <td>20662816</td>\n",
" <td>1256136</td>\n",
" <td>8007698</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 18:32:18+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826669</th>\n",
" <td>20662817</td>\n",
" <td>1256136</td>\n",
" <td>8007698</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 18:32:18+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826670</th>\n",
" <td>20662818</td>\n",
" <td>1256137</td>\n",
" <td>8007699</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 19:30:28+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826671</th>\n",
" <td>20662819</td>\n",
" <td>1256137</td>\n",
" <td>8007699</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 19:30:28+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1826672 rows × 15 columns</p>\n",
"</div>"
],
"text/plain": [
" ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
"0 13070859 48187 5107462 4 vente en ligne \n",
"1 13070860 48187 5107462 4 vente en ligne \n",
"2 13070861 48187 5107462 4 vente en ligne \n",
"3 13070862 48187 5107462 4 vente en ligne \n",
"4 13070863 48187 5107462 4 vente en ligne \n",
"... ... ... ... ... ... \n",
"1826667 20662815 1256135 8007697 5 vente en ligne \n",
"1826668 20662816 1256136 8007698 5 vente en ligne \n",
"1826669 20662817 1256136 8007698 5 vente en ligne \n",
"1826670 20662818 1256137 8007699 5 vente en ligne \n",
"1826671 20662819 1256137 8007699 5 vente en ligne \n",
"\n",
" purchase_date type_of_ticket_name amount \\\n",
"0 2018-12-28 14:47:50+00:00 Atelier 8.0 \n",
"1 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
"2 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
"3 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
"4 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
"... ... ... ... \n",
"1826667 2023-11-08 17:23:54+00:00 Atelier 11.0 \n",
"1826668 2023-11-08 18:32:18+00:00 Atelier 11.0 \n",
"1826669 2023-11-08 18:32:18+00:00 Atelier 11.0 \n",
"1826670 2023-11-08 19:30:28+00:00 Atelier 11.0 \n",
"1826671 2023-11-08 19:30:28+00:00 Atelier 11.0 \n",
"\n",
" children is_full_price name_event_types name_facilities \\\n",
"0 pricing_formula False spectacle vivant mucem \n",
"1 pricing_formula False spectacle vivant mucem \n",
"2 pricing_formula False spectacle vivant mucem \n",
"3 pricing_formula False spectacle vivant mucem \n",
"4 pricing_formula False spectacle vivant mucem \n",
"... ... ... ... ... \n",
"1826667 pricing_formula False offre muséale groupe mucem \n",
"1826668 pricing_formula False offre muséale groupe mucem \n",
"1826669 pricing_formula False offre muséale groupe mucem \n",
"1826670 pricing_formula False offre muséale groupe mucem \n",
"1826671 pricing_formula False offre muséale groupe mucem \n",
"\n",
" name_categories name_events name_seasons \n",
"0 indiv prog enfant l'école des magiciens 2018 \n",
"1 indiv prog enfant l'école des magiciens 2018 \n",
"2 indiv prog enfant l'école des magiciens 2018 \n",
"3 indiv prog enfant l'école des magiciens 2018 \n",
"4 indiv prog enfant l'école des magiciens 2018 \n",
"... ... ... ... \n",
"1826667 indiv entrées tp NaN 2023 \n",
"1826668 indiv entrées tp NaN 2023 \n",
"1826669 indiv entrées tp NaN 2023 \n",
"1826670 indiv entrées tp NaN 2023 \n",
"1826671 indiv entrées tp NaN 2023 \n",
"\n",
"[1826672 rows x 15 columns]"
]
},
2024-02-19 23:11:28 +01:00
"execution_count": 17,
2024-02-14 12:44:57 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"purchases"
]
},
2024-02-11 11:47:58 +01:00
{
"cell_type": "markdown",
"id": "b8a90eaa-c383-4f73-9fd6-6fbbe8eeefb8",
"metadata": {},
"source": [
"# 2 - Comportement d'achat bis (Alexis)"
]
},
{
"cell_type": "code",
2024-02-19 23:11:28 +01:00
"execution_count": 18,
2024-02-11 11:47:58 +01:00
"id": "dc45c1cd-2a78-48a6-aa2b-6a501254b6f2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-02-13 23:41:13 +01:00
"(156289, 40)\n"
2024-02-11 11:47:58 +01:00
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>birthdate</th>\n",
" <th>street_id</th>\n",
" <th>is_partner</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>opt_in</th>\n",
" <th>structure_id</th>\n",
" <th>profession</th>\n",
" <th>language</th>\n",
" <th>...</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>name_event_types</th>\n",
" <th>avg_amount</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
2024-02-13 23:41:13 +01:00
" <td>3262.190868</td>\n",
" <td>4.179306</td>\n",
" <td>3258.011562</td>\n",
2024-02-11 11:47:58 +01:00
" <td>51.0</td>\n",
" <td>offre muséale individuel</td>\n",
" <td>6.150659</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
2024-02-13 23:41:13 +01:00
" <td>2502.715509</td>\n",
" <td>1408.715532</td>\n",
" <td>1093.999977</td>\n",
2024-02-11 11:47:58 +01:00
" <td>5.0</td>\n",
" <td>formule adhésion</td>\n",
" <td>6.439463</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
2024-02-13 23:41:13 +01:00
" <td>3698.198229</td>\n",
" <td>5.221840</td>\n",
" <td>3692.976389</td>\n",
2024-02-11 11:47:58 +01:00
" <td>2988.0</td>\n",
" <td>spectacle vivant</td>\n",
" <td>7.762474</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
2024-02-13 23:41:13 +01:00
" <td>3803.369792</td>\n",
" <td>0.146331</td>\n",
" <td>3803.223461</td>\n",
2024-02-11 11:47:58 +01:00
" <td>9.0</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>4.452618</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
2024-02-13 23:41:13 +01:00
" <td>1705.261192</td>\n",
" <td>1456.333715</td>\n",
" <td>248.927477</td>\n",
2024-02-11 11:47:58 +01:00
" <td>0.0</td>\n",
" <td>formule adhésion</td>\n",
" <td>6.439463</td>\n",
" <td>4.0</td>\n",
2024-02-13 23:41:13 +01:00
" <td>NaN</td>\n",
2024-02-11 11:47:58 +01:00
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
2024-02-13 23:41:13 +01:00
"<p>5 rows × 40 columns</p>\n",
2024-02-11 11:47:58 +01:00
"</div>"
],
"text/plain": [
2024-02-13 23:41:13 +01:00
" customer_id birthdate street_id is_partner gender is_email_true \\\n",
"0 1 NaN 2 False 2 True \n",
"1 1 NaN 2 False 2 True \n",
"2 1 NaN 2 False 2 True \n",
"3 1 NaN 2 False 2 True \n",
"4 2 NaN 2 False 1 True \n",
2024-02-11 11:47:58 +01:00
"\n",
2024-02-13 23:41:13 +01:00
" opt_in structure_id profession language ... vente_internet_max \\\n",
"0 False NaN NaN NaN ... 1.0 \n",
"1 False NaN NaN NaN ... 1.0 \n",
"2 False NaN NaN NaN ... 1.0 \n",
"3 False NaN NaN NaN ... 1.0 \n",
"4 True NaN NaN NaN ... 0.0 \n",
2024-02-11 11:47:58 +01:00
"\n",
2024-02-13 23:41:13 +01:00
" purchase_date_min purchase_date_max time_between_purchase \\\n",
"0 3262.190868 4.179306 3258.011562 \n",
"1 2502.715509 1408.715532 1093.999977 \n",
"2 3698.198229 5.221840 3692.976389 \n",
"3 3803.369792 0.146331 3803.223461 \n",
"4 1705.261192 1456.333715 248.927477 \n",
2024-02-11 11:47:58 +01:00
"\n",
2024-02-13 23:41:13 +01:00
" nb_tickets_internet name_event_types avg_amount nb_campaigns \\\n",
"0 51.0 offre muséale individuel 6.150659 NaN \n",
"1 5.0 formule adhésion 6.439463 NaN \n",
"2 2988.0 spectacle vivant 7.762474 NaN \n",
"3 9.0 offre muséale groupe 4.452618 NaN \n",
"4 0.0 formule adhésion 6.439463 4.0 \n",
2024-02-11 11:47:58 +01:00
"\n",
2024-02-13 23:41:13 +01:00
" nb_campaigns_opened time_to_open \n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
2024-02-11 11:47:58 +01:00
"\n",
2024-02-13 23:41:13 +01:00
"[5 rows x 40 columns]"
2024-02-11 11:47:58 +01:00
]
},
2024-02-19 23:11:28 +01:00
"execution_count": 18,
2024-02-11 11:47:58 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Chargement des données temporaires\n",
"BUCKET = \"projet-bdc2324-team1\"\n",
"FILE_KEY_S3 = \"0_Temp/Company 1 - customer_event.csv\"\n",
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" customer = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(customer.shape)\n",
"customer.head()"
]
},
{
"cell_type": "code",
2024-02-19 23:11:28 +01:00
"execution_count": 19,
2024-02-11 11:47:58 +01:00
"id": "89fcb455-efb4-4ad4-ab88-efd6c8a76287",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['customer_id', 'birthdate', 'street_id', 'is_partner', 'gender',\n",
" 'is_email_true', 'opt_in', 'structure_id', 'profession', 'language',\n",
" 'mcp_contact_id', 'last_buying_date', 'max_price', 'ticket_sum',\n",
" 'average_price', 'fidelity', 'average_purchase_delay',\n",
" 'average_price_basket', 'average_ticket_basket', 'total_price',\n",
" 'purchase_count', 'first_buying_date', 'country', 'age', 'tenant_id',\n",
2024-02-13 23:41:13 +01:00
" 'event_type_id', 'nb_tickets', 'nb_purchases', 'total_amount',\n",
" 'nb_suppliers', 'vente_internet_max', 'purchase_date_min',\n",
" 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet',\n",
" 'name_event_types', 'avg_amount', 'nb_campaigns', 'nb_campaigns_opened',\n",
" 'time_to_open'],\n",
2024-02-11 11:47:58 +01:00
" dtype='object')"
]
},
2024-02-19 23:11:28 +01:00
"execution_count": 19,
2024-02-11 11:47:58 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customer.columns"
]
},
{
"cell_type": "code",
2024-02-19 23:11:28 +01:00
"execution_count": 20,
2024-02-11 11:47:58 +01:00
"id": "d7b2356a-d5fc-4547-b3ff-fded0e304fb6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>average_price</th>\n",
" <th>average_purchase_delay</th>\n",
" <th>average_price_basket</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>purchase_count</th>\n",
" <th>total_price</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>7.030122</td>\n",
" <td>-67.790969</td>\n",
" <td>13.751530</td>\n",
" <td>1.956087</td>\n",
2024-02-13 23:41:13 +01:00
" <td>641472</td>\n",
2024-02-11 11:47:58 +01:00
" <td>8821221.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
2024-02-13 23:41:13 +01:00
" <td>307</td>\n",
2024-02-11 11:47:58 +01:00
" <td>0.0</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>3</td>\n",
" <td>18.333333</td>\n",
" <td>30.666667</td>\n",
" <td>36.666667</td>\n",
" <td>2.000000</td>\n",
2024-02-13 23:41:13 +01:00
" <td>3</td>\n",
2024-02-11 11:47:58 +01:00
" <td>110.0</td>\n",
" <td>222.0</td>\n",
" <td>124.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>4</td>\n",
" <td>10.250000</td>\n",
" <td>5.000000</td>\n",
" <td>20.500000</td>\n",
" <td>2.000000</td>\n",
2024-02-13 23:41:13 +01:00
" <td>2</td>\n",
2024-02-11 11:47:58 +01:00
" <td>41.0</td>\n",
" <td>7.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>5</td>\n",
" <td>9.500000</td>\n",
" <td>0.000000</td>\n",
" <td>19.000000</td>\n",
" <td>2.000000</td>\n",
2024-02-13 23:41:13 +01:00
" <td>1</td>\n",
2024-02-11 11:47:58 +01:00
" <td>19.0</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" customer_id average_price average_purchase_delay average_price_basket \\\n",
"0 1 7.030122 -67.790969 13.751530 \n",
"4 2 0.000000 0.000000 0.000000 \n",
"6 3 18.333333 30.666667 36.666667 \n",
"7 4 10.250000 5.000000 20.500000 \n",
"9 5 9.500000 0.000000 19.000000 \n",
"\n",
" average_ticket_basket purchase_count total_price nb_campaigns \\\n",
2024-02-13 23:41:13 +01:00
"0 1.956087 641472 8821221.5 0.0 \n",
"4 1.000000 307 0.0 4.0 \n",
"6 2.000000 3 110.0 222.0 \n",
"7 2.000000 2 41.0 7.0 \n",
"9 2.000000 1 19.0 4.0 \n",
2024-02-11 11:47:58 +01:00
"\n",
" nb_campaigns_opened \n",
"0 0.0 \n",
"4 0.0 \n",
"6 124.0 \n",
"7 7.0 \n",
"9 0.0 "
]
},
2024-02-19 23:11:28 +01:00
"execution_count": 20,
2024-02-11 11:47:58 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"achat = ['customer_id', 'average_price', 'average_purchase_delay', 'average_price_basket',\n",
" 'average_ticket_basket', 'purchase_count', 'total_price', 'nb_campaigns',\n",
" 'nb_campaigns_opened']\n",
"\n",
"customer_achat = customer[achat].drop_duplicates(subset = ['customer_id'])\n",
"customer_achat['nb_campaigns'] = customer_achat['nb_campaigns'].fillna(0)\n",
"customer_achat['nb_campaigns_opened'] = customer_achat['nb_campaigns_opened'].fillna(0)\n",
"customer_achat = customer_achat.fillna(0)\n",
"customer_achat.head()"
]
},
{
"cell_type": "code",
2024-02-19 23:11:28 +01:00
"execution_count": 21,
2024-02-11 11:47:58 +01:00
"id": "5559748f-1745-4651-a9f6-94702c7ee66f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>average_price</th>\n",
" <th>average_purchase_delay</th>\n",
" <th>average_price_basket</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>purchase_count</th>\n",
" <th>total_price</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
2024-02-13 23:41:13 +01:00
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
2024-02-11 11:47:58 +01:00
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
2024-02-13 23:41:13 +01:00
" <td>5.252070</td>\n",
" <td>-206.581486</td>\n",
" <td>11.451596</td>\n",
" <td>1.723372</td>\n",
" <td>0.655148</td>\n",
" <td>16.994064</td>\n",
" <td>40.923241</td>\n",
" <td>7.870681</td>\n",
2024-02-11 11:47:58 +01:00
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
2024-02-13 23:41:13 +01:00
" <td>7.915955</td>\n",
" <td>2996.743657</td>\n",
" <td>48.271194</td>\n",
" <td>7.045950</td>\n",
" <td>5.694038</td>\n",
" <td>313.099102</td>\n",
" <td>70.445724</td>\n",
" <td>23.119061</td>\n",
2024-02-11 11:47:58 +01:00
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.000000</td>\n",
2024-02-13 23:41:13 +01:00
" <td>-44863.000000</td>\n",
2024-02-11 11:47:58 +01:00
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>2.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
2024-02-13 23:41:13 +01:00
" <td>5.000000</td>\n",
2024-02-11 11:47:58 +01:00
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>11.000000</td>\n",
" <td>0.000000</td>\n",
2024-02-13 23:41:13 +01:00
" <td>19.000000</td>\n",
2024-02-11 11:47:58 +01:00
" <td>2.000000</td>\n",
" <td>1.000000</td>\n",
2024-02-13 23:41:13 +01:00
" <td>20.000000</td>\n",
" <td>32.000000</td>\n",
" <td>3.000000</td>\n",
2024-02-11 11:47:58 +01:00
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
2024-02-13 23:41:13 +01:00
" <td>320.000000</td>\n",
2024-02-11 11:47:58 +01:00
" <td>1914.000000</td>\n",
" <td>9900.000000</td>\n",
" <td>900.000000</td>\n",
" <td>1508.000000</td>\n",
" <td>64350.000000</td>\n",
" <td>439.000000</td>\n",
" <td>434.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" average_price average_purchase_delay average_price_basket \\\n",
2024-02-13 23:41:13 +01:00
"count 151865.000000 151865.000000 151865.000000 \n",
"mean 5.252070 -206.581486 11.451596 \n",
"std 7.915955 2996.743657 48.271194 \n",
"min 0.000000 -44863.000000 0.000000 \n",
2024-02-11 11:47:58 +01:00
"25% 0.000000 0.000000 0.000000 \n",
"50% 0.000000 0.000000 0.000000 \n",
2024-02-13 23:41:13 +01:00
"75% 11.000000 0.000000 19.000000 \n",
"max 320.000000 1914.000000 9900.000000 \n",
2024-02-11 11:47:58 +01:00
"\n",
" average_ticket_basket purchase_count total_price nb_campaigns \\\n",
2024-02-13 23:41:13 +01:00
"count 151865.000000 151865.000000 151865.000000 151865.000000 \n",
"mean 1.723372 0.655148 16.994064 40.923241 \n",
"std 7.045950 5.694038 313.099102 70.445724 \n",
2024-02-11 11:47:58 +01:00
"min 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 2.000000 \n",
2024-02-13 23:41:13 +01:00
"50% 0.000000 0.000000 0.000000 5.000000 \n",
"75% 2.000000 1.000000 20.000000 32.000000 \n",
2024-02-11 11:47:58 +01:00
"max 900.000000 1508.000000 64350.000000 439.000000 \n",
"\n",
" nb_campaigns_opened \n",
2024-02-13 23:41:13 +01:00
"count 151865.000000 \n",
"mean 7.870681 \n",
"std 23.119061 \n",
2024-02-11 11:47:58 +01:00
"min 0.000000 \n",
"25% 0.000000 \n",
"50% 1.000000 \n",
2024-02-13 23:41:13 +01:00
"75% 3.000000 \n",
2024-02-11 11:47:58 +01:00
"max 434.000000 "
]
},
2024-02-19 23:11:28 +01:00
"execution_count": 21,
2024-02-11 11:47:58 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customer_wto_outlier = customer_achat[customer_achat['customer_id']!=1]\n",
"\n",
"customer_wto_outlier[['average_price', 'average_purchase_delay', 'average_price_basket',\n",
" 'average_ticket_basket', 'purchase_count', 'total_price', 'nb_campaigns', 'nb_campaigns_opened']].describe()"
]
},
{
"cell_type": "markdown",
"id": "b49c9e93-f324-42ee-a262-34ffb44a2261",
"metadata": {},
"source": [
2024-02-13 23:41:13 +01:00
"# Event"
2024-02-11 11:47:58 +01:00
]
},
{
"cell_type": "code",
2024-02-19 23:11:28 +01:00
"execution_count": 22,
2024-02-11 11:47:58 +01:00
"id": "4971e35d-a762-4e18-9443-fd9571bd3f1e",
"metadata": {},
"outputs": [
{
"data": {
2024-02-13 23:41:13 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAJgCAYAAACXyFewAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACRpklEQVR4nOzdeXxM1/8/8NdkXyQjkZ2IRAiRxFqRqJ2EirVKG9LYtUpqSa211lZ76aeWqtrXompLY6/YhSRUKBUEiSAbQURyfn/45X6NCZ1h4o54PR+PeTzMuWfuvGZJvHPuuecqhBACRERERPRKBnIHICIiInoXsGgiIiIi0gCLJiIiIiINsGgiIiIi0gCLJiIiIiINsGgiIiIi0gCLJiIiIiINsGgiIiIi0gCLJiIiIiINsGgiIiLSUwkJCbCwsMD8+fPfaB9KpfKN9kHPsGgqJsuWLYNCoYCZmRmuXbumtr1x48bw8fGRIRnQvXt3lCpVSpbn/i8KhQLjx4+XOwa9hp07d/Kze8GBAwegUChw4MABuaO8886fP4/x48fj6tWrckd5bY0bN0bjxo2l+1evXn3l9+P+/fvo1KkTBg4ciIEDB7728/r5+WHLli0YNWoUjh8//tr7KQmOHDmC8ePHIzMz87Uez6KpmOXm5uLbb7+VOwZRsdu5cycmTJggdwy9UqtWLRw9ehS1atWSO8o77/z585gwYcI7XTRpq1evXqhbty6mTZv2xvtq2rQpFi1ahC5duuDevXs6SPduOnLkCCZMmMCiSV+1bNkSa9asQXx8vNxRdEIIgUePHskdg0hNXl4enj59KncMFdbW1qhXrx6sra3ljvJOePjwodwR9MqGDRuwatUqKBQKnewvNDQUV69eRZkyZXSyv/cRi6ZiNmzYMJQpUwbDhw//z76PHz/GyJEj4e7uDhMTE5QtWxZfffWVWkVcoUIFhISEYPv27ahZsybMzc1RtWpVbN++HcCzQ4NVq1aFpaUl6tati1OnThX5fH///TeaNWsGS0tL2NvbY8CAAWq/tBQKBQYMGICFCxeiatWqMDU1xfLlywEAly5dQmhoKBwcHGBqaoqqVavif//7n0bvS3Z2Nvr06YMyZcqgVKlSaNmyJf75558i+77J8xQUFGD+/PmoUaMGzM3NUbp0adSrVw9//PGHSp/p06ejSpUqMDU1hYODAz7//HPcuHFDZV+Fh1RPnjyJBg0awMLCAh4eHpg2bRoKCgpU9jdp0iR4eXlJz+nn54cffvhBZX8xMTFo1qwZrKysYGFhgcDAQOzYsUOlT+Fh3n379knvl7W1NT7//HPk5OQgNTUVnTt3RunSpeHs7IzIyEjk5eVJjy8c/p8xYwa+//57VKhQAebm5mjcuDH++ecf5OXlYcSIEXBxcYFSqUSHDh2QlpamkmH9+vUICgqCs7Oz9F0bMWIEcnJypD7du3eXPhOFQiHdCkcFhBD46aefpM/BxsYGnTp1wpUrV1Seq0KFCujevbva5/jiYY3Cw14rV67E0KFDUbZsWZiamuLy5ct4+PAhIiMj4e7uDjMzM9ja2qJOnTpYu3at2n6fN378+CL/cyr8DJ4f4Sj8GYyKikKtWrVgbm6OKlWqYOnSpSqPfdnhuWXLlsHLy0v6Pq9YsQLdu3dHhQoV/vOxhZ/psmXLVNpPnTqFtm3bwtbWFmZmZqhZsyY2bNig0ud135vC92D37t3o0aMHbG1tYWlpiTZt2qh9hrt370a7du1Qrlw5mJmZwdPTE/369cPdu3dV+hW+36dPn0anTp1gY2ODihUrvvT5P/nkEwBAkyZNpO/XsmXL8N1338HIyAjJyclqj+vZsyfKlCmDx48fA/i/z23Lli3w8/ODmZkZPDw8MG/ePLXHZmdnS+9V4e/jQYMGqXzvX0YIgenTp8PNzQ1mZmaoVasWdu3a9Z+PK/Rfn2V8fDwUCgV++eUXtcfu2rULCoVC5Xfc879DTUxMUKVKFbXXXPh9W7t2LUaPHg0XFxdYW1ujefPmuHjxotrz7NmzB82aNYO1tTUsLCxQv3597N27V6VP4WeckJCATz75BEqlEra2thgyZAiePn2KixcvomXLlrCyskKFChUwffp0tefR9HMo/L9q5cqVqFq1KiwsLFC9enXp/8XCPN988w0AwN3dXfoeaXX4XFCx+PXXXwUAcfLkSfHDDz8IAGLv3r3S9kaNGolq1apJ9wsKCkRwcLAwMjISY8aMEdHR0WLmzJnC0tJS1KxZUzx+/Fjq6+bmJsqVKyd8fHzE2rVrxc6dO4W/v78wNjYWY8eOFfXr1xebN28WW7ZsEZUrVxaOjo7i4cOH0uPDw8OFiYmJKF++vJg8ebKIjo4W48ePF0ZGRiIkJETldQAQZcuWFX5+fmLNmjVi37594ty5c+Lvv/8WSqVS+Pr6ihUrVojo6GgxdOhQYWBgIMaPH//K96agoEA0adJEmJqaSs8/btw44eHhIQCIcePGSX3f5HmEECIsLEwoFArRu3dvsXXrVrFr1y4xefJk8cMPP0h9+vbtKwCIAQMGiKioKLFw4UJhb28vXF1dxZ07d1Q+szJlyohKlSqJhQsXit27d4v+/fsLAGL58uVSv6lTpwpDQ0Mxbtw4sXfvXhEVFSXmzp2rkvfAgQPC2NhY1K5dW6xfv178/vvvIigoSCgUCrFu3TqpX+H3yN3dXQwdOlRER0eL77//XhgaGorPPvtM1KpVS0yaNEns3r1bDB8+XAAQs2bNkh6flJQkAAg3NzfRpk0bsX37drFq1Srh6OgoKleuLMLCwkTPnj3Frl27xMKFC0WpUqVEmzZtVN7D7777TsyZM0fs2LFDHDhwQCxcuFC4u7uLJk2aSH0uX74sOnXqJACIo0ePSrfC722fPn2EsbGxGDp0qIiKihJr1qwRVapUEY6OjiI1NVXaj5ubmwgPD1f7HBs1aiQaNWok3d+/f7/03ezUqZP4448/xPbt28W9e/dEv379hIWFhZg9e7bYv3+/2L59u5g2bZqYP3/+K78r48aNE0X9Siz8DJKSklRylitXTnh7e4sVK1aIP//8U3zyyScCgDh48KBazv3796vtr127dmLbtm1i1apVwtPTU7i6ugo3N7dXPlaI//tMf/31V6lt3759wsTERDRo0ECsX79eREVFie7du6v1e933pjCzq6ur9H1ZvHixcHBwEK6uriIjI0Pqu2DBAjF16lTxxx9/iIMHD4rly5eL6tWrCy8vL/HkyRO199vNzU0MHz5c7N69W/z+++9FPn9aWpqYMmWKACD+97//Sd+vtLQ0cfv2bWFqaipGjx6t8ph79+4Jc3Nz8c0330htbm5uomzZsqJ8+fJi6dKlYufOnaJr164CgJgxY4bULycnR9SoUUPY2dmJ2bNniz179ogffvhBKJVK0bRpU1FQUPDK96vwtfXq1Ut6r8qWLSucnJxUvsdF0fSzrFmzpqhfv77a4zt37iwcHBxEXl6eEKLo36GDBw8WCoVCjBkzRnpc4fetQoUKomvXrmLHjh1i7dq1onz58qJSpUri6dOnUt+VK1cKhUIh2rdvLzZv3iy2bdsmQkJChKGhodizZ4/a++Dl5SW+++47sXv3bjFs2DDp922VKlXEvHnzxO7du0WPHj0EALFp06bX+hwKs9etW1ds2LBB7Ny5UzRu3FgYGRmJf//9VwghRHJyshg4cKAAIDZv3ix9j7Kysl75mTyPRVMxeb5oys3NFR4eHqJOnTrSh/xi0RQVFSUAiOnTp6vsZ/369QKAWLx4sdTm5uYmzM3NxY0bN6S2uLg4AUA4OzuLnJwcqf33338XAMQff/whtYWHhwsAKoWDEEJMnjxZABAxMTFSGwChVCpFenq6St/g4GBRrlw5tS/bgAEDhJmZmVr/5+3ateuVz/980fQmz/PXX38JAGq/TJ+XmJgoAIj+/furtB8/flwAEKNGjZLaGjVqJACI48ePq/T19vYWwcHB0v2QkBBRo0aNlz6nEELUq1dPODg4iPv370ttT58+FT4+PqJcuXLS96TwezRw4ECVx7dv314AELNnz1Zpr1GjhqhVq5Z0v/A
2024-02-11 11:47:58 +01:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Nombre de consommateurs uniques par type d'évènement\n",
"\n",
"event_counts = customer.groupby('name_event_types')['customer_id'].nunique()\n",
"\n",
"event_counts.plot(kind='bar')\n",
"plt.xlabel(\"Type d'évènement\")\n",
"plt.ylabel('Nombre de consommateurs uniques')\n",
"plt.title(\"Nombre de consommateurs uniques par type d'évènement\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-02-19 23:11:28 +01:00
"execution_count": 23,
2024-02-11 11:47:58 +01:00
"id": "bc65a711-d172-4839-b487-3047280fc3a6",
"metadata": {},
"outputs": [
{
"data": {
2024-02-13 23:41:13 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAtIAAAJICAYAAABMlwOPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACqtUlEQVR4nOzdd1xT1/8/8FdYYQgRRUAcuHGAWxFtRavgwj1asQhq0bqQonXUuhVn1dbd1r1w103BhaKiOFCxzn5cKIgiWzbn90d/3K8RB4lgGK/n48FDc+47yTs3J5c3J+eeKxNCCBARERERkUq0NJ0AEREREVFRxEKaiIiIiEgNLKSJiIiIiNTAQpqIiIiISA0spImIiIiI1MBCmoiIiIhIDSykiYiIiIjUwEKaiIiIiEgNLKSJiIiIiNTAQpqIiAqVpKQk1K5dG3379kV2dram06FPlB/vZ0JCAho2bIg+ffrkc3ZEn4aFNBV6GzZsgEwmg76+Ph49epRre5s2bWBra6uBzAAPDw+UKlVKI889ffp0yGSyj/60adPmo4917tw5TJ8+HXFxcZ+cj7o8PDxQpUoVte//tiNHjmD69Onv3FalShV4eHio9HgPHz6ETCbDokWLPj05ANu2bcPSpUvz5bEKQkF8rnx9ffHXX399NM7T0xMWFhbYsmULtLSK/q8pDw+PPH1WVe2TmvCuz3mVKlXe+1kD8uf9NDExwZEjRxAaGoolS5ao9RjFxbNnzzB9+nSEhYVpOhUCoKPpBIjyKi0tDT///DM2b96s6VQKhe+++w4dO3aUbkdGRqJXr14YPXo0XF1dpXYTE5OPPta5c+cwY8YMeHh4oHTp0gWR7md35MgRrFix4p2/4Pft25en/VKQtm3bhvDwcHh7e2s0j8/J19cXffr0QY8ePd4bs2LFCly/fh1nz56FXC7/fMkVoClTpuD777+Xbl+5cgUjR46Er68v2rZtK7WXK1dOE+kVqPx8P62srHD06FG0bdsW9vb2aNmyZT5lWbQ8e/YMM2bMQJUqVdCwYUNNp1PisZCmIqNjx47Ytm0bxo0bhwYNGmg6nU8mhEBqaioMDAzUun/FihVRsWJF6fbDhw8BAJUrV0aLFi3yI8Viq1GjRppOgd5j5MiRGDlypKbTUEtGRgZkMhl0dJR/tVavXh3Vq1eXbqempgIAatasWew/q/n9ftatWxfPnz/Pt8cj+lRF/zszKjHGjx+PsmXLYsKECR+NTU1NxaRJk1C1alXo6emhQoUKGDlyZK6pC1WqVIGLiwsOHTqERo0awcDAAHXq1MGhQ4cA/DetpE6dOjAyMkLz5s1x6dKldz7fzZs30a5dOxgZGaFcuXIYNWoUXr9+rRQjk8kwatQorF69GnXq1IFcLsfGjRsBAPfu3YOrqyvMzc0hl8tRp04drFixQo29lNuBAwfg4OAAQ0NDGBsbw8nJCefPn5e2T58+HT/++CMAoGrVqtLXzKdOnQIA7NixA87Ozihfvry0fyZOnIjk5GS1c9qwYQNsbGyk17pp06Z3xqWnp2P27NmoXbs25HI5ypUrh0GDBuHFixcffHwPDw9p/7351XnOHxvvmtoRFxeHsWPHolq1apDL5TA3N0fnzp1x+/bt9z5PRkYG3N3dUapUKanPCCGwcuVKNGzYEAYGBjA1NUWfPn3wv//9T7pfmzZtcPjwYTx69EgpvxyrVq1CgwYNUKpUKRgbG6N27dr46aefPviaAWDGjBmwt7dHmTJlYGJigsaNG2Pt2rUQQuSK3bZtGxwcHFCqVCmUKlUKDRs2xNq1a3PFhYaG4ssvv4ShoSGqVauGefPm5ZrnmpCQgHHjxil93ry9vZX6iEwmQ3JyMjZu3PjOaUdRUVEYNmwYKlasCD09PVStWhUzZsxAZmam0nOps29ypuUsWLAAc+bMQeXKlaGvr4+mTZvi+PHjSrH379/HoEGDULNmTRgaGqJChQro2rUrbty4oRR36tQpyGQybN68GWPHjkWFChUgl8tx//79D+byLmfOnIFMJsP27dtzbdu0aRNkMhlCQ0MB/N90srwcc/LSFz/k8OHDaNiwIeRyOapWrarStKaPvZ8ZGRkwNzeHm5tbrvvGxcXBwMAAPj4+UtvbfczKygpeXl5ISkpSum/OcXbz5s2oU6cODA0N0aBBA+nz+aa8HHdz3udt27ZhwoQJKF++PEqVKoWuXbvi+fPnSExMxNChQ2FmZgYzMzMMGjQoV055fR9yplN96DN36tQpNGvWDAAwaNAg6bOU883b//73P3zzzTewsrKCXC6HhYUF2rVrx2kgBUkQFXLr168XAERoaKj49ddfBQBx/Phxabujo6OoV6+edDs7O1t06NBB6OjoiClTpoiAgACxaNEiYWRkJBo1aiRSU1OlWGtra1GxYkVha2srtm/fLo4cOSLs7e2Frq6umDp1qmjVqpXYu3ev2Ldvn6hVq5awsLAQr1+/lu7v7u4u9PT0ROXKlcWcOXNEQECAmD59utDR0REuLi5KrwOAqFChgqhfv77Ytm2bOHHihAgPDxc3b94UCoVC2NnZiU2bNomAgAAxduxYoaWlJaZPn57n/fTgwQMBQCxcuFBq27p1qwAgnJ2dxV9//SV27NghmjRpIvT09MSZM2eEEEI8efJEjB49WgAQe/fuFefPnxfnz58X8fHxQgghZs2aJZYsWSIOHz4sTp06JVavXi2qVq0q2rZtq/T806ZNE3k5pOS8n927dxcHDx4UW7ZsETVq1BCVKlUS1tbWUlxWVpbo2LGjMDIyEjNmzBCBgYHizz//FBUqVBB169ZVeh/edv/+fdGnTx8BQHo958+fl957a2tr4e7uLsUnJCSIevXqCSMjIzFz5kzx999/iz179ogxY8aIEydOvHP/xsbGirZt2wpLS0tx6dIl6bE8PT2Frq6uGDt2rPD39xfbtm0TtWvXFhYWFiIqKkoIIcTNmzdFq1athKWlpVJ+Qgixfft2AUCMHj1aBAQEiGPHjonVq1cLLy+vj+5bDw8PsXbtWhEYGCgCAwPFrFmzhIGBgZgxY4ZS3JQpUwQA0atXL7Fr1y4REBAgFi9eLKZMmSLFODo6irJly4qaNWuK1atXi8DAQDFixAgBQGzcuFGKS05OFg0bNhRmZmZi8eLF4tixY+LXX38VCoVCfPXVVyI7O1sIIcT58+eFgYGB6Ny5s/R6b968KYQQIjIyUnr/16xZI44dOyZmzZol5HK58PDwkJ5L3X2T895VqlRJfPHFF2LPnj1i165dolmzZkJXV1ecO3dOig0KChJjx44Vu3fvFkFBQWLfvn2iR48ewsDAQNy+fVuKO3nypPSZ7tOnjzhw4IA4dOiQiImJ+ej7lHPfXbt2SW2NGjUSrVq1yhXbrFkz0axZM+m2KsecvPTF9zl27JjQ1tYWX3zxhdi7d6+0vypXrvzRz3le388ffvhBGBgYSMeaHCtXrhQAxPXr14UQ7+5jS5YsESYmJsLR0VFkZWVJ9wUgqlSpIpo3by527twpjhw5Itq0aSN0dHTEv//+K8Xl9bib815ZW1sLDw8P4e/vL1avXi1KlSol2rZtK5ycnMS4ceNEQECAmD9/vtDW1hajR49W633Iy2cuPj5eOob+/PPP0mfpyZMnQgghbGxsRI0aNcTmzZtFUFCQ2LNnjxg7dqw4efLkB98zUh8LaSr03iyk09LSRLVq1UTTpk2lX9BvF9L+/v4CgFiwYIHS4+zYsUMAEL///rvUZm1tLQwMDERERITUFhYWJgCI8uXLi+TkZKn9r7/+EgDEgQMHpDZ3d3cBQPz6669KzzVnzhwBQAQHB0ttAIRCoRCvXr1Siu3QoYOoWLFirl8mo0aNEvr6+rni3+ftQi8rK0tYWVkJOzs7pV80iYmJwtzcXLRs2VJqW7hwoQAgHjx48MHnyM7OFhkZGSIoKEgAENeuXZO25aWQzsmpcePG0vsnhBAPHz4Uurq6SoV0TtG0Z88epccIDQ0VAMTKlSs/+FwjR458bz5vF9IzZ84UAERgYOB7H+/N/fvgwQNRt25dUbduXfHw4UMp5vz58wKA+OWXX5T
2024-02-11 11:47:58 +01:00
"text/plain": [
"<Figure size 800x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Nombre Total de tickets achetés par Type d'évènements\n",
"\n",
"total_tickets_by_event = customer.groupby('name_event_types')['nb_tickets'].sum()\n",
"\n",
"total_tickets_by_event.plot(kind='bar', figsize=(8, 5))\n",
"plt.xlabel(\"Type d'évènements\")\n",
"plt.ylabel('Nombre Total de tickets achetés')\n",
"plt.title(\"Nombre Total de tickets achetés par Type d'évènements\")\n",
"plt.xticks(rotation=45)\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-02-19 23:11:28 +01:00
"execution_count": 24,
2024-02-11 11:47:58 +01:00
"id": "c95cc35c-abfc-47c7-9b8a-ac69bfd60dd8",
"metadata": {},
"outputs": [
{
"data": {
2024-02-13 23:41:13 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAwsAAAJgCAYAAADF4v+XAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACNvklEQVR4nOzdd1hT1/8H8HfYIENQQERkSB0IuFAL1j1x1lE3DrBWrcXdOqq4tdY6W7TWgZPi3nuL4ldBBBfWgWIV3IiioJDz+8MfqTEJgkIu4Pv1PDwPOfcmeecmuckn59xzZUIIASIiIiIiovfoSB2AiIiIiIgKJhYLRERERESkFosFIiIiIiJSi8UCERERERGpxWKBiIiIiIjUYrFARERERERqsVggIiIiIiK1WCwQEREREZFaLBaIiIiIiEgtFgtERIXY3bt3YWNjgxEjRkgdhYgoz/To0QMVKlTAo0ePPvo2/P39Ua5cOSQnJ+ddsM9QroqFkJAQyGQyGBkZ4fbt2yrLGzRoAHd39zwLlxt9+vSBqampJPf9ITKZDBMnTszT20xPT8fvv/+Or776CpaWljAwMIC9vT06d+6MY8eO5el9FVYNGjRAgwYNPvl23rx5A1tbW3z55Zca15HL5Shbtiw8PT0/+f7UCQ4ORkhISL7c9oc4OTlBJpNp3JarVq2CTCaDTCbD0aNHtZpNSi9fvsTEiRPVPuasfeWtW7cUbX369IGTk5PSek5OTujTp89HZ8jIyEDXrl3RqFEjzJ49+6NvR9s+9XF/LO43tSu79wjlnJT7/7xw9OhRlc+HPn36ZPv5vHjxYhw6dAh79+5FyZIlP/q+lyxZgvLly0uyvylopk+fjq1bt37UdT+qZyE9PR0///zzR90hfbpHjx6hTp06GD58ONzd3RESEoJDhw7ht99+g66uLho3boyYmBipYxYZ+vr68PPzw//+9z9cvnxZ7ToHDx7EnTt3EBAQkC8ZpP6wMDMzw/Hjx3Hjxg2VZcuXL4e5ubkEqaT18uVLTJo0Se0XoVatWiEiIgJ2dnbZ3saWLVswfvz4j84wZswY6OnpKQo20oz7Te3L7j1COSf1/l/bzp07h/Hjx2P37t1wdnb+pNvS09PDhg0bcOfOHfz66695lLBw+pRiQe9jrtSiRQusW7cOI0eORJUqVT7qjgsSIQTS0tJgbGwsdZQc6dWrF2JiYrBv3z40atRIaVnXrl0xfPhwWFpaSpSuaAoICMBvv/2G5cuXq/0Fd/ny5TAwMEDPnj0lSJf/vvrqK1y4cAHLly/HtGnTFO03btzA8ePH0a9fP/z1118SJixYrK2tYW1t/cH1qlWr9kn3U1A//DIzM5GRkQFDQ0Opoyhwv/l5evnyJUxMTKSOka3C9h0kv1WvXh0PHz7Ms9szNTVFVFRUnt3e5+ijehZ+/PFHlChRAj/99NMH101LS8OYMWPg7Oys6PL9/vvvVcaPOTk5oXXr1ti5cyeqVasGY2NjVKpUCTt37gTwtlu/UqVKKFasGGrVqoXIyEi193fp0iU0btwYxYoVg7W1NQYPHoyXL18qrSOTyTB48GAsXrwYlSpVgqGhIVauXAkAuHbtGrp37w4bGxsYGhqiUqVK+OOPP3K0XVJSUvDtt9+iRIkSMDU1RYsWLfDPP/+oXfdj7ycqKgp79uxBQECAygdelpo1a6Js2bIAgIcPH2LQoEFwc3ODqakpbGxs0KhRI5w4cULpOrdu3YJMJsPs2bMxZ84cODs7w9TUFN7e3jh9+rTSupGRkejatSucnJxgbGwMJycndOvWTWVo2sSJE9X+2vn+EI3w8HDo6+tj5MiRatdbtmxZtttECIFZs2bB0dERRkZGqF69Ovbs2aN23ZSUFIwcOVLp9Th06FCkpqZmex+VKlWCt7c3Vq9ejYyMDKVlycnJ2LZtG9q1a4cSJUootlHbtm1hZWUFIyMjVKtWDevXr1f7+I4cOYKBAweiZMmSKFGiBDp06IB79+4p1nNycsKlS5dw7NgxxXCfd4ez5PQxbdiwAbVr14aFhQVMTEzg4uICf3//bB93Fh0dHfTq1QsrV66EXC5XtC9fvhwODg5o0qSJ2utt374d3t7eMDExgZmZGZo2bYqIiAjF8hMnTkAmkyE0NFTlulm/lp89e1bRlpfbVRNNw9feHUZ069YtRTEwadIkxfOS1dWtbhiSOu8Px5HL5Zg6dSoqVKgAY2NjFC9eHJ6enpg/f77S9T5lP5W1//vzzz9Rvnx5GBoaws3NDX///bfSerndd8yaNQtTp06Fs7MzDA0NceTIkRzlyZKfr+Pc7jcB4OLFi2jXrh0sLS1hZGSEqlWrKj4nsmQNrwgNDcW4ceNQunRpmJubo0mTJrh69arSutHR0WjdurXiOStdujRatWqFf//9V7GOtj4vs4btxsXFoXnz5ihWrBjs7Owwc+ZMAMDp06fx1VdfoVixYihfvrzK487Ja+ND7xEgZ69jTe8ldUNbsoZCHz9+HD4+PjAxMVG8Ng4fPowGDRqgRIkSMDY2RtmyZdGxY0eV7wfvy9rWW7ZsgaenJ4yMjODi4oIFCxYorZeWloYRI0agatWqsLCwgJWVFby9vbFt2zaV28zuO4i6+1e3/3/x4gWKFy+O7777TuU6t27dgq6uruLHhKxteODAAfTt2xdWVlYoVqwY2rRpg5s3b6pc/+DBg2jcuDHMzc1hYmKCOnXq4NChQ9lupyxxcXFo0aIFTExMULJkSQwYMADPnz/P0XVfv36NqVOnomLFijA0NIS1tTX69u2rVDh8/fXXcHR0VPocylK7dm1Ur15dcVkIgeDgYFStWlWxP+3QoQOuX7+udL2s183Zs2dRt25dxX5l5syZKveT0/1U1nO8YsUKxf7cy8sLp0+fhhACv/76q+J7VqNGjVQyATl7HrK+Z126dAndunWDhYUFbG1t4e/vj2fPninlSU1NxcqVKxWvo1wN0xa5sGLFCgFAnD17VsyfP18AEIcOHVIsr1+/vqhcubLislwuF82bNxd6enpi/PjxYv/+/WL27NmiWLFiolq1aiItLU2xrqOjoyhTpoxwd3cXoaGhYvfu3aJ27dpCX19fTJgwQdSpU0ds3rxZbNmyRZQvX17Y2tqKly9fKq7fu3dvYWBgIMqWLSumTZsm9u/fLyZOnCj09PRE69atlR4HAGFvby88PT3FunXrxOHDh8XFixfFpUuXhIWFhfDw8BCrVq0S+/fvFyNGjBA6Ojpi4sSJ2W4buVwuGjZsKAwNDRX3HxQUJFxcXAQAERQUpFj3U+5n+vTpAoDYs2dPtutliYuLEwMHDhR///23OHr0qNi5c6cICAgQOjo64siRI4r14uPjBQDh5OQkWrRoIbZu3Sq2bt0qPDw8hKWlpUhOTlasu2HDBjFhwgSxZcsWcezYMfH333+L+vXrC2tra/Hw4UPFekFBQULdSyzrdRQfH69omzlzpgAgtm3bJoQQ4uLFi8LExET07Nnzg48x634CAgLEnj17xJIlS4S9vb0oVaqUqF+/vmK91NRUUbVqVVGyZEkxZ84ccfDgQTF//nxhYWEhGjVqJORyebb3s3TpUgFAbN26Van9jz/+EADE3r17hRBCHD58WBgYGIi6deuKsLAwsXfvXtGnTx8BQKxYsUJlO7i4uIgffvhB7Nu3TyxdulRYWlqKhg0bKtY7d+6ccHFxEdWqVRMREREiIiJCnDt3LleP6dSpU0Imk4muXbuK3bt3i8OHD4sVK1YIPz+/D25fR0dH0apVK3H9+nUhk8nE7t27hRBCZGRkCHt7ezFhwgSxYcMGAUDpNbV27VoBQDRr1kxs3bpVhIWFiRo1aggDAwNx4sQJxXrVqlUTderUUbnfmjVripo1ayou5/V21aR+/fpKr5ssvXv3Fo6OjkIIIdLS0sTevXsVr7us5+X69etKGd59jb97/Xe3be/evRWXZ8yYIXR1dUVQUJA4dOiQ2Lt3r5g3b57SfuFT9h9CvN3/OTg4CDc3NxEaGiq2b98uWrRoIQCIDRs2KNbL7b7D3t5eNGzYUGzcuFHs379f6bG/7/3Hnd+v44/
2024-02-11 11:47:58 +01:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Nombre de Canaux de Ventes Moyen utilisé par les Consommateurs par type d'évènement\n",
"\n",
"avg_supp_event = customer.groupby('name_event_types')['nb_suppliers'].mean()\n",
"avg_supp_event.plot(kind='bar')\n",
"plt.xlabel(\"Type d'évènement\")\n",
"plt.ylabel('Nombre de Canaux de Ventes Moyen')\n",
"plt.title(\"Nombre de Canaux de Ventes Moyen utilisé par les Consommateurs par type d'évènement\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-02-19 23:11:28 +01:00
"execution_count": 25,
2024-02-11 11:47:58 +01:00
"id": "49d5fd2d-9bc1-43ac-9270-1efd73759854",
"metadata": {},
"outputs": [
{
"data": {
2024-02-13 23:41:13 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAtIAAAJICAYAAABMlwOPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACyfElEQVR4nOzdd1hT1/8H8HcYYYnIEBAH4kJR3IpiKzhwUupuxSIutGpFqtbRqjhR0aqtVsVtXVj3LAWtoqi4927rFkSRIcoM5/eHP+7XCCqJQQK+X8/j0+bck+Sd5HLz4XDuuTIhhAAREREREalEp7ADEBEREREVRSykiYiIiIjUwEKaiIiIiEgNLKSJiIiIiNTAQpqIiIiISA0spImIiIiI1MBCmoiIiIhIDSykiYiIiIjUwEKaiIiIiEgNLKSJiIqBlJQUVK9eHd27d0d2dnZhxyEqth4+fAhra2uMHDlS7cd48OABypcv/0GPQdqBhXQxtXr1ashkMhgaGuLu3bu5tru7u6NWrVqFkAzo06cPSpQoUSjPPWnSJMhksvf+c3d3f+9jHTt2DJMmTUJiYuIH51FXnz59ULFiRbXv/6Z9+/Zh0qRJeW6rWLEi+vTpo9Lj3blzBzKZDHPmzPnwcAA2bNiA+fPna+SxCkJB/FwFBQVhx44d7+3n5+cHGxsbrFu3Djo6Rf/Q/iHHiatXr2LSpEm4c+eOZkMVkJcvX2LSpEk4dOhQYUfJF3d393wdR992LNEmeR1DZTIZVq9enWf/rKwsfP3112jZsuUHHdfKlSuHP//8EytXrsS2bdvUfpzioKj9vL5Jr7ADUMFKT0/H+PHjsXbt2sKOohUGDBiAdu3aSbdjYmLQpUsXDBs2DN7e3lJ7yZIl3/tYx44dw+TJk9GnTx+UKlWqIOJ+dPv27cNvv/2W5xfg9u3b8/W+FKQNGzbg8uXLCAgIKNQcH1NQUBC6deuGTp06vbXPb7/9hosXL+Lo0aMwMDD4eOG01NWrVzF58mS4u7tr9BfNgvLy5UtMnjwZAPL1S3xhW7RoEZKTk6Xbe/fuxbRp07Bq1SpUr15dai9XrlxhxCtQ48aNg56eHn7//fcPGgQBgFq1amH79u3o0aMHateujSpVqmgoZdFS1H5e38RCuphr164dNmzYgFGjRqFOnTqFHeeDCSGQlpYGIyMjte5frlw5pYN7zm/AFSpUQJMmTTQRsdiqV69eYUegtxg6dCiGDh1a2DFUlpqaqvbPcmF4+fIljI2NCzvGR5OamgpDQ8NcBaOTk5PS7evXrwN4VRg2bNjwo+UrDLNnz9bo47m7uyMuLk6jj0kfV9H/+x+90+jRo2FpaYkxY8a8t29aWhrGjRsHBwcHyOVylC1bFkOHDs01daFixYrw9PTEnj17UK9ePRgZGaFGjRrYs2cPgFfTSmrUqAETExM0btwYp0+fzvP5rly5glatWsHExASlS5fGd999h5cvXyr1kclk+O6777BkyRLUqFEDBgYGWLNmDQDg1q1b8Pb2hrW1NQwMDFCjRg389ttvarxLue3atQtNmzaFsbExTE1N4eHhgePHj0vbJ02ahB9++AEA4ODgIP0pM+dPs5s2bUKbNm1QpkwZ6f0ZO3YsXrx4oXam1atXw9HRUXqtv//+e579MjIyMG3aNFSvXh0GBgYoXbo0+vbtiydPnrzz8fv06SO9f6//eTbnl428pnYkJiZi5MiRqFSpEgwMDGBtbY0OHTpIX6x5yczMhK+vL0qUKCHtM0IILFq0CHXr1oWRkRHMzc3RrVs3/Pfff9L93N3dsXfvXty9e1cpX47FixejTp06KFGiBExNTVG9enX8+OOP73zNADB58mS4uLjAwsICJUuWRP369bFixQoIIXL13bBhA5o2bYoSJUqgRIkSqFu3LlasWJGr36lTp/D555/D2NgYlSpVwsyZM3PNW05OTsaoUaOUft4CAgKU9hGZTIYXL15gzZo1eU47io2NxaBBg1CuXDnI5XI4ODhg8uTJyMrKUnoudd+b993vbVOTcqaWvf6n2pzjxrZt21CvXj0YGhpKo7D5lfMYYWFhqF+/PoyMjFC9enWsXLlS6bm7d+8OAGjRooX0vr3+p/r9+/ejVatWKFmyJIyNjdGsWTMcOHBA6blyXtvZs2fRrVs3mJubo3LlyvnOkeN9n9GdO3dQunRpAK/2xZy875pGdejQIchkMqxbtw4jRoyAra0tjIyM4ObmhnPnzin1PX36NL7++mtUrFgRRkZGqFixInr27Jlryl/OZxYeHo5+/fqhdOnSMDY2Rnp6+ns+ldzWrl0LmUymdMzMMWXKFOjr6+PRo0cA/jcd6siRI2jSpAmMjIxQtmxZTJgwAQqFQum+6h7bXn+N+TmG5uV93zdPnjyBXC7HhAkTct33+vXrkMlk+PXXX6W2N/eLihUrIjAwUOln9/WpcXPnzoWDgwNKlCiBpk2bIjo6OtfznD59Gl5eXrCwsIChoSHq1auHP/74I9d7IJPJ8Pfff8PPzw+WlpYoWbIkevfujRcvXiA2NhY9evRAqVKlUKZMGYwaNQqZmZlKj5Hfz0ETP6/nzp2Dp6en9L7b2dmhY8eOePDgwfs+so9HULG0atUqAUCcOnVK/PLLLwKAOHDggLTdzc1N1KxZU7qdnZ0t2rZtK/T09MSECRNEeHi4mDNnjjAxMRH16tUTaWlpUl97e3tRrlw5UatWLbFx40axb98+4eLiIvT19cXEiRNFs2bNxLZt28T27dtFtWrVhI2NjXj58qV0f19fXyGXy0WFChXE9OnTRXh4uJg0aZLQ09MTnp6eSq8DgChbtqyoXbu22LBhg/j777/F5cuXxZUrV4SZmZlwdnYWv//+uwgPDxcjR44UOjo6YtKkSfl+n27fvi0AiNmzZ0tt69evFwBEmzZtxI4dO8SmTZtEgwYNhFwuF0eOHBFCCHH//n0xbNgwAUBs27ZNHD9+XBw/flwkJSUJIYSYOnWqmDdvnti7d684dOiQWLJkiXBwcBAtWrRQev7AwECRnx/DnM/zyy+/FLt37xbr1q0TVapUEeXLlxf29vZSP4VCIdq1aydMTEzE5MmTRUREhFi+fLkoW7ascHJyUvoc3vTPP/+Ibt26CQDS6zl+/Lj02dvb2wtfX1+pf3JysqhZs6YwMTERU6ZMEX/99ZfYunWrGD58uPj777/zfH8TEhJEixYthK2trTh9+rT0WH5+fkJfX1+MHDlShIWFiQ0bNojq1asLGxsbERsbK4QQ4sqVK6JZs2bC1tZWKZ8QQmzcuFEAEMOGDRPh4eFi//79YsmSJcLf3/+9722fPn3EihUrREREhIiIiBBTp04VRkZGYvLkyUr9JkyYIACILl26iM2bN4vw8HAxd+5cMWHCBKmPm5ubsLS0FFWrVhVLliwRERERYsiQIQKAWLNmjdTvxYsXom7dusLKykrMnTtX7N+/X/zyyy/CzMxMtGzZUmRnZwshhDh+/LgwMjISHTp0kF7vlStXhBBCxMTESJ9/SEiI2L9/v5g6daowMDAQffr0kZ5L3fcmP/d72/6bs7/evn1barO3txdlypQRlSpVEitXrhQHDx4UJ0+efOvz+/r6ChMTE6W2nGOPk5OT+P3338Vff/0lunfvLgCIyMhIIYQQcXFxIigoSAAQv/32m/S+xcXFCSGEWLt2rZDJZKJTp05i27ZtYvfu3cLT01Po6uqK/fv353pt9vb2YsyYMSIiIkLs2LEj3zny+xmlpaWJsLAwAUD0799fyvvPP/+89b05ePCgACDKly+f65hQsmRJ8e+//0p9N2/eLCZOnCi2b98uIiMjRWhoqHBzcxOlS5cWT548yfWZlS1bVgwcOFD8+eefYsuWLSIrK+utOd6876lTp4QQQqSnpwtbW1vRq1cvpX6ZmZnCzs5OdO/eXWrL+Zmxs7MTv/76q/jrr7+Ev7+/ACCGDh0q9fuQY9vrGd93DM1Lfr9vOnfuLMqXLy8UCoXS/UePHi3kcrl4+vSpECLv/WLy5MlCLpcLHx8f6X45x8+KFSuKdu3aiR07dogdO3YIZ2dnYW5uLhITE6W
2024-02-11 11:47:58 +01:00
"text/plain": [
"<Figure size 800x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Nombre Total de tickets achetés sur Internet par Type d'évènements\n",
"\n",
"nb_tickets_internet = customer.groupby('name_event_types')['nb_tickets_internet'].sum()\n",
"nb_tickets_internet.plot(kind='bar', figsize=(8, 5))\n",
"plt.xlabel(\"Type d'évènements\")\n",
"plt.ylabel('Nombre Total de tickets achetés sur Internet')\n",
"plt.title(\"Nombre Total de tickets achetés sur Internet par Type d'évènements\")\n",
"plt.xticks(rotation=45)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dc071992-cf4d-4b9f-9c3b-3f0e98e20eff",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "4f9561a9-6a94-434e-b8e7-9b708f5b5529",
"metadata": {},
"source": [
"# 3 - Caractéristiques Démographiques (peu exploitable)"
]
},
{
"cell_type": "code",
2024-02-19 23:11:28 +01:00
"execution_count": 26,
2024-02-11 11:47:58 +01:00
"id": "e50e2583-4b8f-478e-87ac-591dde200af8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['customer_id', 'birthdate', 'street_id', 'is_partner', 'gender',\n",
" 'is_email_true', 'opt_in', 'structure_id', 'profession', 'language',\n",
" 'mcp_contact_id', 'last_buying_date', 'max_price', 'ticket_sum',\n",
" 'average_price', 'fidelity', 'average_purchase_delay',\n",
" 'average_price_basket', 'average_ticket_basket', 'total_price',\n",
" 'purchase_count', 'first_buying_date', 'country', 'age', 'tenant_id',\n",
2024-02-13 23:41:13 +01:00
" 'event_type_id', 'nb_tickets', 'nb_purchases', 'total_amount',\n",
" 'nb_suppliers', 'vente_internet_max', 'purchase_date_min',\n",
" 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet',\n",
" 'name_event_types', 'avg_amount', 'nb_campaigns', 'nb_campaigns_opened',\n",
" 'time_to_open'],\n",
2024-02-11 11:47:58 +01:00
" dtype='object')"
]
},
2024-02-19 23:11:28 +01:00
"execution_count": 26,
2024-02-11 11:47:58 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customer.columns"
]
},
{
"cell_type": "code",
2024-02-19 23:11:28 +01:00
"execution_count": 27,
2024-02-11 11:47:58 +01:00
"id": "c724a315-9fe8-4874-be8f-a8115b17b5e2",
"metadata": {},
"outputs": [],
"source": [
"def percent_of_na(df, column):\n",
" na_percentage = df[column].isna().mean() * 100\n",
" non_na_percentage = 100 - na_percentage\n",
" \n",
" labels = ['Valeurs Manquantes', 'Non-Valeurs Manquantes']\n",
" sizes = [na_percentage, non_na_percentage]\n",
" colors = ['#ff9999','#66b3ff']\n",
" explode = (0.1, 0)\n",
"\n",
" plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', startangle=140)\n",
" plt.axis('equal') \n",
" plt.title('Pourcentage de Valeurs Manquantes : {}'.format(column))\n",
" plt.show()"
]
},
{
"cell_type": "code",
2024-02-19 23:11:28 +01:00
"execution_count": 28,
2024-02-11 11:47:58 +01:00
"id": "58af5dcb-673e-4f4d-ad5c-f66ce1e8a22c",
"metadata": {},
"outputs": [
{
"data": {
2024-02-13 23:41:13 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlMAAAGZCAYAAACt2rSnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABeR0lEQVR4nO3dd3wUdf7H8demVyCBFAgJPfSQIL2HckGKCqKcIl1PBdspotypoGLDH5az4akU9Q48FVBQEKQJUpVu6FU6AUIJhLT5/TEmZkkC6ZPdvJ+PRx6ws7Mzn91sNu985zufsRmGYSAiIiIiheJidQEiIiIijkxhSkRERKQIFKZEREREikBhSkRERKQIFKZEREREikBhSkRERKQIFKZEREREikBhSkRERKQIFKZEREREikBhyglNnz4dm82W9eXm5kb16tUZPnw4R48etbq8UvXyyy8zd+5cq8sotIMHD2Kz2Zg+fXqRt/XNN99gs9mYMmVKnussXrwYm83GG2+8ke/tDhs2jJo1axa5vtK2fPnyrJ+RvF7frl27YrPZHPL5lYTVq1czYcIEEhMTrS6lTFmyZAktWrTA19cXm81myWeOo/4cOguFKSc2bdo01qxZw+LFi7nvvvuYOXMmHTt2JCkpyerSSo2jh6ni1Lt3b0JDQ5k6dWqe60ybNg13d3cGDx5cipVZy9/fn08++STH8gMHDrB8+XIqVKhgQVVl0+rVq3n++ecVprIxDIM777wTd3d3vv32W9asWUPnzp1LvY5nn32WOXPmlPp+xaQw5cSaNGlCmzZtiI2NZfz48YwdO5YDBw6Uari4cuUKuvxj2eDm5saQIUPYsGED27dvz3F/YmIic+bM4ZZbbiEoKMiCCgsmNTWVtLS0Im9n4MCBrFq1ij179tgtnzp1KmFhYbRv377I+xDHYRgGV65cyff6x44d4+zZs/Tr149u3brRpk0bAgICSrDC3NWpU4eYmJhS36+YFKbKkTZt2gBw6NAhAJKTkxk3bhy1atXCw8ODsLAwRo8eneOvTpvNxoQJE3Jsr2bNmgwbNizrdubhxUWLFjFixAiCgoLw8fHh6tWrAPz3v/+lbdu2+Pn54efnR3R0dI4RgR9//JFu3bpRoUIFfHx8aN++PUuWLLFbZ8KECdhsNn777TfuuusuKlasSEhICCNGjOD8+fN2dSclJTFjxoyswzldunQB4PTp04waNYpGjRrh5+dHcHAwXbt2ZeXKlTme55EjRxgwYAD+/v5UqlSJQYMGsWHDhlwPD/3yyy/ccsstBAYG4uXlRUxMDP/73//y/J5kd+zYMe688078/f2pWLEiAwcO5MSJE7muW9j9jBw5EjBHoK41c+ZMkpOTGTFiBADvvfcenTp1Ijg4GF9fX5o2bcqkSZNITU294X4Mw+D9998nOjoab29vAgICGDBgAPv377db79r3UKYuXbpkfa/gz0Nyn332GU888QRhYWF4enqyd+9eLl++zJgxY6hVqxZeXl4EBgbSokULZs6cecM6AXr06EF4eLjdiF1GRgYzZsxg6NChuLjk/JjM72vTpUsXmjRpwoYNG+jYsSM+Pj7Url2bV199lYyMDLt1d+7cSc+ePfHx8aFKlSo88MADzJs3D5vNxvLlywv8miUnJ/PEE08QHR1NxYoVCQwMpG3btnzzzTc5Hmuz2XjooYf47LPPaNiwIT4+PjRr1oz58+dnrTNhwgSefPJJAGrVqpX1M5W9ti+++IK2bdvi6+uLn58fcXFxbNq0yW5f+/fv569//SvVqlXD09OTkJAQunXrxubNm3PUVVjDhg3Dz8+P3377jW7duuHr60tQUBAPPfQQly9fzvW5T5kyhYYNG+Lp6cmMGTMAWLVqFd26dcPf3x8fHx/atWvHd999Z/eaVK9eHYCnnnoqxyHhPXv2cPfddxMcHIynpycNGzbkvffes9t/RkYGEydOpH79+nh7e1OpUiWioqJ4++23s9Y5ffo0f/vb3wgPD8fT05OgoCDat2/Pjz/+aPecrz3Ml9/P+Jo1a9KnTx8WLlxI8+bN8fb2pkGDBtcdxRZ7blYXIKVn7969AAQFBWEYBrfddhtLlixh3LhxdOzYka1btzJ+/HjWrFnDmjVr8PT0LNR+RowYQe/evfnss89ISkrC3d2d5557jhdffJH+/fvzxBNPULFiRbZv354V7AA+//xzhgwZwq233sqMGTNwd3fnww8/JC4ujh9++IFu3brZ7ef2229n4MCBjBw5km3btjFu3DiArA+ANWvW0LVrV2JjY3n22WcBsg7ZnD17FoDx48cTGhrKpUuXmDNnDl26dGHJkiVZv5SSkpKIjY3l7NmzvPbaa9StW5eFCxcycODAHM972bJl9OzZk9atWzNlyhQqVqzIrFmzGDhwIJcvX871F2CmK1eu0L17d44dO8Yrr7xCZGQk3333XbHvJzIykg4dOvD555/z6quv4u7unnXftGnTCAsLIy4uDoB9+/Zx9913Z30Qb9myhZdeeomdO3fe8EP2/vvvZ/r06TzyyCO89tprnD17lhdeeIF27dqxZcsWQkJCrvv4vIwbN462bdsyZcoUXFxcCA4O5vHHH+ezzz5j4sSJxMTEkJSUxPbt2zlz5ky+tuni4sKwYcP45JNPmDhxIq6urixatIgjR44wfPhwHn300RyPKchrc+LECQYNGsQTTzzB+PHjmTNnDuPGjaNatWoMGTIEgJMnT9K5c2fc3d15//33CQkJ4T//+Q8PPfRQoV4ngKtXr3L27FnGjBlDWFgYKSkp/Pjjj/Tv359p06Zl7TvTd999x4YNG3jhhRfw8/Nj0qRJ9OvXj127dlG7dm3uvfdezp49yzvvvMPs2bOpWrUqAI0aNQLMQ+rPPPMMw4cP55lnniElJYXXX3+djh07sn79+qz1evXqRXp6OpMmTSIiIoKEhARWr159w0OHBw8epFatWgwdOjRfcwhTU1Pp1asX999/P08//TSrV69m4sSJHDp0iHnz5tmtO3fuXFauXMlzzz1HaGgowcHBrFixgh49ehAVFcUnn3yCp6cn77//Pn379mXmzJkMHDiQe++9l2bNmtG/f38efvhh7r777qzPzfj4eNq1a0dERASTJ08mNDSUH374gUceeYSEhATGjx8PwKRJk5gwYQLPPPMMnTp1IjU1lZ07d9q9HoMHD2bjxo289NJLREZGkpiYyMaNG6/7Hi/oZ/yWLVt44oknePrppwkJCeHjjz9m5MiR1K1bl06dOt3w9S73DHE606ZNMwBj7dq1RmpqqnHx4kVj/vz5RlBQkOHv72+cOHHCWLhwoQEYkyZNsnvsF198YQDGv//976xlgDF+/Pgc+6lRo4YxdOjQHPsdMmSI3Xr79+83XF1djUGDBuVZc1JSkhEYGGj07dvXbnl6errRrFkzo1WrVlnLxo8fn2vto0aNMry8vIyMjIysZb6+vnY15iUtLc1ITU01unXrZvTr1y9r+XvvvWcAxoIFC+zWv//++w3AmDZtWtayBg0aGDExMUZqaqrdun369DGqVq1qpKen57n/Dz74wACMb775xm75fffdV6z7MYw/v0+zZ8/OWrZ9+3YDMP75z3/m+pj09HQjNTXV+PTTTw1XV1fj7NmzWfcNHTrUqFGjRtbtNWvWGIAxefJku238/vvvhre3tzF27NisZde+hzJ17tzZ6Ny5c9btZcuWGYDRqVOnHOs2adLEuO222677nHOTuc0vv/zS2L9/v2Gz2Yz58+cbhmEYd9xxh9GlSxfDMAyjd+/eds/vWtd7bTp37mwAxrp16+we06hRIyMuLi7r9lNPPWXYbDZj8+bNduv16NHDAIxly5ZlLcvva3atzPf4yJEjjZiYGLv7ACMkJMS4cOFC1rITJ04YLi4uxiuvvJK17PXXXzcA48CBA3aPP3z4sOHm5mY8/PDDdssvXrxohIaGGnfeeadhGIaRkJBgAMZbb72VZ515OXjwoOHq6mqMGDHihusOHTrUAIy3337bbvlLL71kAMaqVauylgFGxYoV7b5vhmEYbdq0MYKDg42LFy9mLUtLSzOaNGliVK9
2024-02-11 11:47:58 +01:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"percent_of_na(customer, 'profession')"
]
},
{
"cell_type": "code",
2024-02-19 23:11:28 +01:00
"execution_count": 29,
2024-02-11 11:47:58 +01:00
"id": "cc3437f7-8b36-4398-9da6-ff15e8e4c8d7",
"metadata": {},
"outputs": [
{
"data": {
2024-02-13 23:41:13 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAm8AAAGZCAYAAADfFdYRAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABbwUlEQVR4nO3dd1gUV9sG8HtZll6kN+kKdgG7KPZgjzXGGLEl0WhiisbyJpaoiRGj6VHfvIotscX2xRa7SZSIJVawImIBREBFOux8f4xsXJqLArPl/l3XXsnOzs48s+Dh3jlzzsgEQRBARERERDrBSOoCiIiIiEhzDG9EREREOoThjYiIiEiHMLwRERER6RCGNyIiIiIdwvBGREREpEMY3oiIiIh0CMMbERERkQ5heCMiIiLSIQxvVKGVK1dCJpOpHsbGxqhduzZGjRqFO3fuSF1ejfr888+xbds2qct4bgkJCZDJZFi5cuULb2v79u2QyWRYunRpuevs27cPMpkMixcv1ni7I0eOhI+PzwvXV9MOHz6s+jdS3ufbuXNnyGQynTy+6nDs2DHMnj0bDx48kLqUCs2ePRsymUzqMojUMLyRRqKiohAdHY19+/bhzTffxLp169C+fXtkZWVJXVqN0fXwVpV69eoFV1dXrFixotx1oqKioFAoMHz48BqsTFrW1tZYvnx5qeU3btzA4cOHYWNjI0FV2unYsWP49NNPtT68EWkjhjfSSKNGjdC6dWt06tQJs2bNwpQpU3Djxo0aDTM5OTngrXi1g7GxMSIiInDixAlcuHCh1OsPHjzA1q1b0bdvXzg5OUlQYeUUFBSgsLDwhbczZMgQ/PXXX7h69ara8hUrVsDDwwOhoaEvvA8iIoY3ei6tW7cGANy8eRMAkJubi+nTp8PX1xcmJibw8PDAhAkTSn2rlslkmD17dqnt+fj4YOTIkarnxd21e/fuxejRo+Hk5AQLCwvk5eUBAH755Re0adMGVlZWsLKyQlBQUKkzHvv370eXLl1gY2MDCwsLhIaG4sCBA2rrFHeJXLx4EUOHDoWtrS1cXFwwevRoPHz4UK3urKwsrFq1StU91rFjRwBAamoqxo8fjwYNGsDKygrOzs7o3Lkz/vzzz1LHefv2bQwaNAjW1taoVasWhg0bhhMnTpTZ3Xby5En07dsX9vb2MDMzQ3BwMDZu3Fjuz+Rpd+/exSuvvAJra2vY2tpiyJAhSE5OLnPd593PmDFjAIhn2Epat24dcnNzMXr0aADADz/8gLCwMDg7O8PS0hKNGzdGZGQkCgoKnrkfQRDw448/IigoCObm5rCzs8OgQYMQHx+vtl7J36FiHTt2VP2sgH+7ONesWYNJkybBw8MDpqamuHbtGrKzszF58mT4+vrCzMwM9vb2aN68OdatW/fMOgGgW7du8PT0VDsjqVQqsWrVKowYMQJGRqWbXE0/m44dO6JRo0Y4ceIE2rdvDwsLC/j5+eGLL76AUqlUW/fSpUvo3r07LCws4OjoiHHjxuG3336DTCbD4cOHK/2Z5ebmYtKkSQgKCoKtrS3s7e3Rpk0bbN++vdR7ZTIZ3nnnHaxZswb169eHhYUFmjZtih07dqjWmT17Nj766CMAgK+vr+rf1NO1bdiwAW3atIGlpSWsrKwQHh6Of/75R21f8fHxePXVV+Hu7g5TU1O4uLigS5cuOHPmTKm6qtKGDRvw0ksvwc3NDebm5qhfvz6mTZtWqidi5MiRsLKywrVr19CzZ09YWVnB09MTkyZNUrVlxTRtG0r+bJ7eV8ku+U8//RStWrWCvb09bGxsEBISguXLl5f6EpyXl4dJkybB1dUVFhYWCAsLw6lTp8r8/UhOTsbYsWNRu3ZtmJiYwNfXF59++mmVfPkhzRlLXQDppmvXrgEAnJycIAgC+vXrhwMHDmD69Olo3749zp07h1mzZiE6OhrR0dEwNTV9rv2MHj0avXr1wpo1a5CVlQWFQoGZM2di7ty5GDBgACZNmgRbW1tcuHBBFSQBYO3atYiIiMDLL7+MVatWQaFQYNmyZQgPD8fvv/+OLl26qO1n4MCBGDJkCMaMGYPz589j+vTpAKD6IxwdHY3OnTujU6dOmDFjBgCousDS09MBALNmzYKrqyseP36MrVu3omPHjjhw4ICqoc3KykKnTp2Qnp6OBQsWoE6dOtizZw+GDBlS6rgPHTqE7t27o1WrVli6dClsbW2xfv16DBkyBNnZ2WX+wS2Wk5ODrl274u7du5g/fz4CAgKwc+fOKt9PQEAA2rVrh7Vr1+KLL76AQqFQvRYVFQUPDw+Eh4cDAK5fv47XXntNFe7Pnj2Lzz77DJcuXaqw6xUAxo4di5UrV2LixIlYsGAB0tPTMWfOHLRt2xZnz56Fi4tLhe8vz/Tp09GmTRssXboURkZGcHZ2xocffog1a9Zg3rx5CA4ORlZWFi5cuIC0tDSNtmlkZISRI0di+fLlmDdvHuRyOfbu3Yvbt29j1KhReO+990q9pzKfTXJyMoYNG4ZJkyZh1qxZ2Lp1K6ZPnw53d3dEREQAAFJSUtChQwcoFAr8+OOPcHFxwc8//4x33nnnuT4nQPzjnp6ejsmTJ8PDwwP5+fnYv38/BgwYgKioKNW+i+3cuRMnTpzAnDlzYGVlhcjISPTv3x+XL1+Gn58f3njjDaSnp+O7777Dli1b4ObmBgBo0KABAPEShU8++QSjRo3CJ598gvz8fCxcuBDt27dHTEyMar2ePXuiqKgIkZGR8PLywv3793Hs2LFndsUmJCTA19cXI0aMeK5rQK9evYqePXvi/fffh6WlJS5duoQFCxYgJiYGBw8eVFu3oKAAffv2xZgxYzBp0iT88ccfmDt3LmxtbTFz5kwAlWsbKiMhIQFjx46Fl5cXAODvv//Gu+++izt37qj2DQCjRo3Chg0bMGXKFHTu3BmxsbHo378/Hj16pLa95ORktGzZEkZGRpg5cyb8/f0RHR2NefPmISEhocwvclRNBKIKREVFCQCEv//+WygoKBAyMzOFHTt2CE5OToK1tbWQnJws7NmzRwAgREZGqr13w4YNAgDhv//9r2oZAGHWrFml9uPt7S2MGDGi1H4jIiLU1ouPjxfkcrkwbNiwcmvOysoS7O3thT59+qgtLyoqEpo2bSq0bNlStWzWrFll1j5+/HjBzMxMUCqVqmWWlpZqNZansLBQKCgoELp06SL0799ftfyHH34QAAi7d+9WW3/s2LECACEqKkq1rF69ekJwcLBQUFCgtm7v3r0FNzc3oaioqNz9L1myRAAgbN++XW35m2++WaX7EYR/f05btmxRLbtw4YIAQPj444/LfE9RUZFQUFAgrF69WpDL5UJ6errqtREjRgje3t6q59HR0QIAYdGiRWrbuHXrlmBubi5MmTJFtazk71CxDh06CB06dFA9P3TokABACAsLK7Vuo0aNhH79+lV4zGUp3uamTZuE+Ph4QSaTCTt27BAEQRAGDx4sdOzYURAEQejVq5fa8ZVU0WfToUMHAYBw/Phxtfc0aNBACA8PVz2fOnWqIJPJhDNnzqit161bNwGAcOjQIdUyTT+zkop/x8eMGSMEBwervQZAcHFxER49eqRalpycLBgZGQnz589XLVu4cKEAQLhx44ba+xMTEwVjY2Ph3XffVVuemZkpuLq6Cq+88oogCIJw//59AYDw9ddfl1tneRISEgS5XC6MHj36mesWtxHlUSqVQkFBgXDkyBEBgHD27FnVayNGjBAACBs3blR7T8+ePYXAwEDV88q0DeX9bEr+2ymp+Hdrzpw5goODg6ptu3jxogBAmDp1qtr669atEwCo/X6MHTtWsLKyEm7evKm27pdffikAEC5evFju/qlqsduUNNK6dWsoFApYW1ujd+/ecHV1xe7du+Hi4qL6plnyLM3gwYNhaWlZqquyMgYOHKj2fN++fSgqKsKECRPKfc+xY8eQnp6OESNGoLCwUPVQKpXo3r07Tpw4Uap7o2/fvmrPmzRpgtzcXNy7d0+jOpcuXYqQkBCYmZnB2NgYCoUCBw4cQFxcnGqdI0eOwNraGt27d1d779ChQ9W
2024-02-11 11:47:58 +01:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"percent_of_na(customer, 'language')"
]
},
2024-02-10 22:05:09 +01:00
{
"cell_type": "code",
"execution_count": null,
2024-02-11 11:47:58 +01:00
"id": "c34164d2-5ab2-4923-a165-30dc5c070233",
2024-02-10 22:05:09 +01:00
"metadata": {},
"outputs": [],
"source": []
2024-02-10 19:37:38 +01:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2024-02-13 18:45:33 +01:00
"version": "3.11.6"
2024-02-10 19:37:38 +01:00
}
},
"nbformat": 4,
"nbformat_minor": 5
}