BDC-team-1/1_Descriptive_Statistics_Museum.ipynb

3599 lines
557 KiB
Plaintext
Raw Normal View History

{
"cells": [
{
"cell_type": "markdown",
"id": "3f41343f-7205-41d9-89dd-88039e301413",
"metadata": {},
"source": [
"# Statistiques descriptives"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "abfaf341-7b35-4407-9133-d21336c04027",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import s3fs\n",
"import re\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib.dates as mdates\n",
"from datetime import datetime, date, timedelta\n",
"from dateutil.relativedelta import relativedelta\n",
"import warnings"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "7fb72fa3-7940-496f-ac78-c2837f65eefa",
"metadata": {},
"outputs": [],
"source": [
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "c34e13f4-e043-43d6-ba8c-2e13d008647c",
"metadata": {},
"outputs": [],
"source": [
"# Import cleaning and merge functions\n",
"exec(open('0_KPI_functions.py').read())\n",
"\n",
"# Useful functions :\n",
"# display_databases(directory_path, file_name = ['customerplus_cleaned', 'target_information', 'campaigns_information', 'products_purchased_reduced'], datetime_col = None)\n",
"# campaigns_kpi_function(campaigns_information = None)\n",
"# tickets_kpi_function(tickets_information = None)\n",
"# customerplus_kpi_function(customerplus_clean = None)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "c60505f4-b95b-4c61-b842-26b27af7e280",
"metadata": {},
"outputs": [],
"source": [
"# set the max columns to none\n",
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "aaffd291-2c88-44c8-a951-0ef1f8369ba3",
"metadata": {},
"outputs": [],
"source": [
"# Additional function to load initial \n",
"def load_dataset_2(directory_path, file_name):\n",
" \"\"\"\n",
" This function loads csv file\n",
" \"\"\"\n",
" file_path = \"bdc2324-data\" + \"/\" + directory_path + \"/\" + directory_path + file_name + \".csv\"\n",
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in, sep=\",\")\n",
"\n",
" # drop na :\n",
" #df = df.dropna(axis=1, thresh=len(df))\n",
" # if identifier in table : delete it\n",
" if 'identifier' in df.columns:\n",
" df = df.drop(columns = 'identifier')\n",
" return df"
]
},
{
"cell_type": "markdown",
"id": "ae3c0c33-55a7-4a28-9a62-3ce13496917a",
"metadata": {},
"source": [
"# 0 - Specificité de la company 101"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "f8a8dedc-2f67-407c-9bbf-f70d236fc783",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>sent_at</th>\n",
" <th>software</th>\n",
" <th>satisfaction</th>\n",
" <th>extra_field</th>\n",
" <th>customer_id</th>\n",
" <th>contribution_site_id</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>27228546</td>\n",
" <td>2023-10-07 14:00:28.593000+02:00</td>\n",
" <td>NaN</td>\n",
" <td>9.0</td>\n",
" <td>NaN</td>\n",
" <td>17622337</td>\n",
" <td>1</td>\n",
" <td>2023-10-08 04:47:36.310325+02:00</td>\n",
" <td>2023-10-08 04:47:36.310325+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>13182836</td>\n",
" <td>2023-01-01 15:18:53.571000+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2023-01-02 03:55:55.685969+01:00</td>\n",
" <td>2023-01-02 03:55:55.685969+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12150807</td>\n",
" <td>2022-12-09 19:54:44.599000+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2022-12-10 03:59:59.208896+01:00</td>\n",
" <td>2022-12-10 03:59:59.208896+01:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>403</td>\n",
" <td>2021-11-19 14:32:00.627000+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2022-05-13 10:59:25.378565+02:00</td>\n",
" <td>2022-05-13 10:59:25.378565+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>27228547</td>\n",
" <td>2023-10-07 14:00:16.811000+02:00</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" <td>NaN</td>\n",
" <td>17622338</td>\n",
" <td>1</td>\n",
" <td>2023-10-08 04:47:36.312124+02:00</td>\n",
" <td>2023-10-08 04:47:36.312124+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78853</th>\n",
" <td>18078312</td>\n",
" <td>2022-02-15 15:48:13.102000+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>1289</td>\n",
" <td>2023-04-08 03:50:13.854821+02:00</td>\n",
" <td>2023-04-08 03:50:13.854821+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78854</th>\n",
" <td>18078313</td>\n",
" <td>2022-02-15 15:43:40.708000+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>1289</td>\n",
" <td>2023-04-08 03:50:13.855243+02:00</td>\n",
" <td>2023-04-08 03:50:13.855243+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78855</th>\n",
" <td>18078314</td>\n",
" <td>2022-02-15 15:43:38.814000+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>1289</td>\n",
" <td>2023-04-08 03:50:13.855649+02:00</td>\n",
" <td>2023-04-08 03:50:13.855649+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78856</th>\n",
" <td>18078315</td>\n",
" <td>2022-02-15 15:42:31.720000+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>1289</td>\n",
" <td>2023-04-08 03:50:13.856084+02:00</td>\n",
" <td>2023-04-08 03:50:13.856084+02:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78857</th>\n",
" <td>18078316</td>\n",
" <td>2022-02-15 15:27:27.722000+01:00</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>1289</td>\n",
" <td>2023-04-08 03:50:13.856544+02:00</td>\n",
" <td>2023-04-08 03:50:13.856544+02:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>78858 rows × 9 columns</p>\n",
"</div>"
],
"text/plain": [
" id sent_at software satisfaction \\\n",
"0 27228546 2023-10-07 14:00:28.593000+02:00 NaN 9.0 \n",
"1 13182836 2023-01-01 15:18:53.571000+01:00 NaN NaN \n",
"2 12150807 2022-12-09 19:54:44.599000+01:00 NaN NaN \n",
"3 403 2021-11-19 14:32:00.627000+01:00 NaN NaN \n",
"4 27228547 2023-10-07 14:00:16.811000+02:00 NaN 7.0 \n",
"... ... ... ... ... \n",
"78853 18078312 2022-02-15 15:48:13.102000+01:00 NaN NaN \n",
"78854 18078313 2022-02-15 15:43:40.708000+01:00 NaN NaN \n",
"78855 18078314 2022-02-15 15:43:38.814000+01:00 NaN NaN \n",
"78856 18078315 2022-02-15 15:42:31.720000+01:00 NaN NaN \n",
"78857 18078316 2022-02-15 15:27:27.722000+01:00 NaN 7.0 \n",
"\n",
" extra_field customer_id contribution_site_id \\\n",
"0 NaN 17622337 1 \n",
"1 NaN 1 1 \n",
"2 NaN 1 2 \n",
"3 NaN 1 1 \n",
"4 NaN 17622338 1 \n",
"... ... ... ... \n",
"78853 NaN 1 1289 \n",
"78854 NaN 1 1289 \n",
"78855 NaN 1 1289 \n",
"78856 NaN 1 1289 \n",
"78857 NaN 1 1289 \n",
"\n",
" created_at updated_at \n",
"0 2023-10-08 04:47:36.310325+02:00 2023-10-08 04:47:36.310325+02:00 \n",
"1 2023-01-02 03:55:55.685969+01:00 2023-01-02 03:55:55.685969+01:00 \n",
"2 2022-12-10 03:59:59.208896+01:00 2022-12-10 03:59:59.208896+01:00 \n",
"3 2022-05-13 10:59:25.378565+02:00 2022-05-13 10:59:25.378565+02:00 \n",
"4 2023-10-08 04:47:36.312124+02:00 2023-10-08 04:47:36.312124+02:00 \n",
"... ... ... \n",
"78853 2023-04-08 03:50:13.854821+02:00 2023-04-08 03:50:13.854821+02:00 \n",
"78854 2023-04-08 03:50:13.855243+02:00 2023-04-08 03:50:13.855243+02:00 \n",
"78855 2023-04-08 03:50:13.855649+02:00 2023-04-08 03:50:13.855649+02:00 \n",
"78856 2023-04-08 03:50:13.856084+02:00 2023-04-08 03:50:13.856084+02:00 \n",
"78857 2023-04-08 03:50:13.856544+02:00 2023-04-08 03:50:13.856544+02:00 \n",
"\n",
"[78858 rows x 9 columns]"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"company_number = '101'\n",
"\n",
"facilities = load_dataset_2(company_number, \"contributions\")\n",
"\n",
"facilities #.sort_values(by = 'street_id')"
]
},
{
"cell_type": "markdown",
"id": "45d5261f-4d46-49cb-8582-dd2121122b05",
"metadata": {},
"source": [
"# 1 - Comportement d'achat"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "8917cc1b-4728-460c-8432-a633de7f039b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_1/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_2/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_3/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_4/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
"<string>:13: DtypeWarning: Columns (12) have mixed types. Specify dtype option on import or set low_memory=False.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_101/products_purchased_reduced.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_101/products_purchased_reduced_1.csv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
]
}
],
"source": [
"for company_number in ['1', '2', '3', '4', '101'] :\n",
" nom_dataframe = 'df'+ company_number +'_tickets'\n",
" globals()[nom_dataframe] = display_databases(company_number, file_name = 'products_purchased_reduced' , datetime_col = ['purchase_date'])\n",
"\n",
" if company_number == \"101\" :\n",
" df101_tickets_1 = display_databases(company_number, file_name = 'products_purchased_reduced_1' , datetime_col = ['purchase_date'])\n",
"\n",
" "
]
},
{
"cell_type": "markdown",
"id": "3479960c-0d23-45f1-8fff-d87395205731",
"metadata": {},
"source": [
"## Outlier"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "9376af51-4320-44b6-8f30-1e1234371556",
"metadata": {},
"outputs": [],
"source": [
"def outlier_detection(directory_path = \"1\", coupure = 1):\n",
" nom_dataframe = 'df'+ directory_path +'_tickets'\n",
" df_tickets = globals()[nom_dataframe].copy()\n",
" df_tickets_kpi = tickets_kpi_function(df_tickets)\n",
"\n",
" if directory_path == \"101\" :\n",
" df_tickets_1 = df101_tickets_1.copy()\n",
" df_tickets_kpi_1 = tickets_kpi_function(df_tickets_1)\n",
"\n",
" df_tickets_kpi = pd.concat([df_tickets_kpi, df_tickets_kpi_1])\n",
" # Part du CA par customer\n",
" total_amount_share = df_tickets_kpi.groupby('customer_id')['total_amount'].sum().reset_index()\n",
" total_amount_share['total_amount_entreprise'] = total_amount_share['total_amount'].sum()\n",
" total_amount_share['share_total_amount'] = total_amount_share['total_amount']/total_amount_share['total_amount_entreprise']\n",
" \n",
" total_amount_share_index = total_amount_share.set_index('customer_id')\n",
" df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False)\n",
" \n",
" top = df_circulaire[:coupure]\n",
" rest = df_circulaire[coupure:]\n",
" \n",
" # Calculez la somme du reste\n",
" rest_sum = rest.sum()\n",
" \n",
" # Créez une nouvelle série avec les cinq plus grandes parts et 'Autre'\n",
" new_series = pd.concat([top, pd.Series([rest_sum], index=['Autre'])])\n",
" \n",
" # Créez le graphique circulaire\n",
" plt.figure(figsize=(3, 3))\n",
" plt.pie(new_series, labels=new_series.index, autopct='%1.1f%%', startangle=140, pctdistance=0.5)\n",
" plt.axis('equal') # Assurez-vous que le graphique est un cercle\n",
" plt.title('Répartition des montants totaux')\n",
" plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "73211efc-b79f-4235-a250-c0699ea277bf",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASMAAAEWCAYAAAAtl/EzAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABA5UlEQVR4nO3dd3QUVd8H8O/2lrbpvUISIAm9SBGDQgRCUYgCghSRDgJK0Ucpj2LhRcVHFEEjiKIiKl0wSJcaeklCSEgvpPdssuW+f8SsLElI283MbO7nHA5nd2dnfjs7+82UO/fyCCEEFEVRDOMzXQBFURRAw4iiKJagYURRFCvQMKIoihVoGFEUxQo0jCiKYgUaRhRFsQINI4qiWIGGEUVRrNBmYXTz5k3I5XJ8/vnnbbVIiqI4pFlhtH37dvB4PP0/oVAIFxcXTJgwAffu3WvwfaWlpRg/fjwWLlyIhQsXtrro1vjjjz+wZs2ael/z9vbGtGnT9I8zMzOxZs0aXL9+vc60a9asAY/HM02RLcTj8Rr8bO1FTEwM1qxZg+Tk5DZZ3vvvv4+9e/cabX7GqP9x2zirkWbYtm0bAUC2bdtGzp8/T06cOEHee+89IpPJiKOjIykoKKj3fREREeSll14iOp2uOYszifnz55OGPvbVq1dJQkKC/nF0dLT+8z4qLS2NnD9/3lRltggAsnr1aqbLYNTu3bsJAHLixIk2WZ5CoSBTp0412vyMUf/jtnE2E7YkwIKCgtCrVy8AwFNPPQWtVovVq1dj7969mD59ep3pf/nllxZGpfFUVFRALpc/dpru3bs3eX7u7u5wd3dvbVkURdVqTnLV7hlFR0cbPH/o0CECgHzwwQcGz0dHR5NRo0YRpVJJJBIJ6datG9m1a1e984yKiiLTpk0jSqWSyOVyEh4eThITEw2mjYqKIqNHjyZubm5EIpEQPz8/MmvWLJKbm2sw3erVqwkAcuXKFTJu3DhiY2NDnJ2dydSpUwmAOv+SkpIIIYR4eXnp/8qdOHGi3mlr9zxql/EwrVZLPvroIxIQEEDEYjFxcHAgU6ZMIWlpaQbTDR48mHTp0oVcunSJDBw4kMhkMuLj40M++OADotVqG/0eiouLycyZM4mtrS1RKBQkLCyM3L17t949o/j4eDJx4kTi4OBAxGIxCQwMJJs2bapT97vvvkv8/f2JVCol1tbWJDg4mGzcuPGxddSuo507d5Lly5cTZ2dnolAoSHh4OMnOziYlJSXk1VdfJXZ2dsTOzo5MmzaNlJaWGsyjsrKSrFy5knh7exORSERcXV3JvHnzSGFhocF0Xl5eZOTIkeTw4cOke/fuRCqVkoCAABIZGamfpnZbevRf7Z5tc7ef27dvkwkTJhArKyvi6OhIpk+fToqKivTT1beswYMHE0IIKS8vJ6+//jrx9vYmEomEKJVK0rNnT/Ljjz82uD4bq58QQiIjI0lISIh+nmPHjiUxMTH61xvbxjdt2kQGDRpEHBwciFwuJ0FBQeSjjz4i1dXVddZ3fXt8gwcP1n9GQgiZPXs2kUgk5PLly/rntFotGTJkCHF0dCSZmZkNft5HGSWMNm3aRACQ3377Tf/c8ePHiVgsJoMGDSK7du0iR44cIdOmTauzcmvn6eHhQWbMmEEOHz5Mtm7dShwdHYmHh4fBRrl582bywQcfkP3795NTp06R7777jnTt2pUEBAQYrMzajcnLy4usWLGCHD16lOzdu5ckJCSQ8ePHEwDk/Pnz+n8qlYoQYvgFFBcX62t7++239dPWBkt9YTRr1iwCgCxYsIAcOXKEfPXVV8TBwYF4eHgYbPCDBw8mdnZ2pGPHjuSrr74iR48eJfPmzSMAyHfffffY70Cn05HQ0FAikUjIunXrSFRUFFm9ejXx9fWtE0Z37tzRB8uOHTtIVFQUef311wmfzydr1qzRT/fBBx8QgUBAVq9eTY4dO0aOHDlCNm7caDBNfWrDyMvLi0ybNk3/mS0sLEhoaCgZOnQoeeONN0hUVBT56KOPiEAgIAsXLjT4LGFhYUQoFJJ33nmHREVFkQ0bNhCFQkG6d++u/15qvxt3d3fSuXNnsmPHDvLnn3+SiIgIAoCcOnWKEEJITk4Oef/99wkA8sUXX+i/s5ycnBZtPwEBAWTVqlXk6NGj5JNPPiESiYRMnz5dP9358+eJTCYjI0aM0C/rzp07hJCaH6lcLieffPIJOXHiBDl48CD58MMPyeeff97g+mys/trXJk6cSA4dOkR27NhBfH19ibW1NYmPjyeEkEa38SVLlpDNmzeTI0eOkOPHj5NPP/2U2NvbG3yu2vXdlDCqrKwk3bp1I76+vvrf6qpVqwifzydRUVENftb6tCiMLly4QNRqNSktLSVHjhwhzs7O5MknnyRqtVo/bWBgIOnevbvBc4QQEh4eTlxcXPR7ALXzfO655wymO3v2LAFA3nvvvXpr0el0RK1Wk5SUFAKA7Nu3T/9a7ca0atWqOu973PH0o1/A484ZPRpGsbGxBACZN2+ewXQXL14kAMhbb72lf27w4MEEALl48aLBtJ07dyZhYWH11lbr8OHDBAD57LPPDJ5ft25dnTAKCwsj7u7upLi42GDaBQsWEKlUqj/HFx4eTrp16/bY5danNoxGjRpl8PzixYsJALJo0SKD58eOHUtsbW31j48cOUIAkPXr1xtMt2vXLgKAbN26Vf+cl5cXkUqlJCUlRf9cZWUlsbW1JbNnz9Y/19RzLk3Zfh6ta968eUQqlRqc+2zonFFQUBAZO3bsY2uoT0P1FxYW6oPvYampqUQikZBJkybpn2vqOSOtVkvUajXZsWMHEQgEBud8mxpGhBBy7949YmVlRcaOHUv++usvwufzydtvv934h31Eiy7t9+vXDyKRCJaWlnj22WehVCqxb98+CIU1p6ASEhIQFxeHl156CQCg0Wj0/0aMGIGsrCzcvXvXYJ6109bq378/vLy8cOLECf1zOTk5mDNnDjw8PCAUCiESieDl5QUAiI2NrVPnuHHjWvLxWqS2zoevxgFAnz590KlTJxw7dszgeWdnZ/Tp08fguZCQEKSkpDRpOY+ur0mTJhk8VqlUOHbsGJ577jnI5fI634FKpcKFCxf0Nd64cQPz5s3Dn3/+iZKSkqZ96H+Eh4cbPO7UqRMAYOTIkXWeLygoQFlZGQDg+PHjAOqus4iICCgUijrrrFu3bvD09NQ/lkql8Pf3b3Sd1Wru9jN69GiDxyEhIVCpVMjJyWl0WX369MHhw4excuVKnDx5EpWVlU2qsSHnz59HZWVlnXXl4eGBIUOG1FlXDbl27RpGjx4NOzs7CAQCiEQivPzyy9BqtYiPj29RbR06dMDXX3+NvXv3Ijw8HIMGDWrR1bwWhdGOHTsQHR2N48ePY/bs2YiNjcXEiRP1rz948AAA8MYbb0AkEhn8mzdvHgAgLy/PYJ7Ozs51luPs7Iz8/HwAgE6nw7Bhw/D7779j+fLlOHbsGC5duqT/QdX3Zbu4uLTk47VIbZ31LdPV1VX/ei07O7s600kkkkY32vz8fAiFwjrvf3T95efnQ6PR4PPPP6/zHYwYMQLAv9/Bm2++iQ0bNuDChQsYPnw47Ozs8PTTT+Py5cuNfOoatra2Bo/FYvFjn1epVAafxcHBwWA6Ho9n8N3Xauk6A1q2/Ty6PIlE0uC0j/rf//6HFStWYO/evQgNDYWtrS3Gjh372CYwj9Pc7as+qampGDRoEDIyMvDZZ5/hzJkziI6OxhdffAGgaZ+rISNHjoSTkxNUKhWWLl0KgUDQ7Hm06Gpap06d9FfTQkNDodVq8c033+DXX3/F+PHjYW9vD6BmI3/++efrnUdAQIDB4+zs7DrTZGdno0OHDgCA27dv48aNG9i+fTumTp2qnyYhIaHBOtuyHVDthpuVlVXnKltmZqZ+nRhjORqNBvn5+QY/lkfXn1KphEAgwJQpUzB//vx65+Xj4wMAEAqFWLp0KZYuXYqioiL89ddfeOuttxAWFoa0tLRGr0K29rPk5uYaBBIhBNnZ2ejdu7fRltWS7ac1FAoF1q5di7Vr1+LBgwf6vaRRo0Y
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"outlier_detection(directory_path = \"1\", coupure = 2)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "5c8e9bb7-a403-4898-b40b-47aa37237bc6",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>lastname</th>\n",
" <th>firstname</th>\n",
" <th>birthdate</th>\n",
" <th>email</th>\n",
" <th>street_id</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>civility</th>\n",
" <th>is_partner</th>\n",
" <th>extra</th>\n",
" <th>deleted_at</th>\n",
" <th>reference</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>extra_field</th>\n",
" <th>opt_in</th>\n",
" <th>structure_id</th>\n",
" <th>note</th>\n",
" <th>profession</th>\n",
" <th>language</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>need_reload</th>\n",
" <th>last_buying_date</th>\n",
" <th>max_price</th>\n",
" <th>ticket_sum</th>\n",
" <th>average_price</th>\n",
" <th>fidelity</th>\n",
" <th>average_purchase_delay</th>\n",
" <th>average_price_basket</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>total_price</th>\n",
" <th>preferred_category</th>\n",
" <th>preferred_supplier</th>\n",
" <th>preferred_formula</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>last_visiting_date</th>\n",
" <th>zipcode</th>\n",
" <th>country</th>\n",
" <th>age</th>\n",
" <th>tenant_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>58201</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>2020-09-03 13:11:25.569167+02:00</td>\n",
" <td>2023-03-04 13:27:42.761679+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>2023-11-08 03:20:07</td>\n",
" <td>45.0</td>\n",
" <td>1254775</td>\n",
" <td>7.030122</td>\n",
" <td>330831</td>\n",
" <td>-67.790969</td>\n",
" <td>13.75153</td>\n",
" <td>1.956087</td>\n",
" <td>8821221.5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>641472</td>\n",
" <td>2013-06-10 12:37:58+02:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>1311</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id lastname firstname birthdate email street_id \\\n",
"58201 1 NaN NaN NaN NaN 2 \n",
"\n",
" created_at updated_at \\\n",
"58201 2020-09-03 13:11:25.569167+02:00 2023-03-04 13:27:42.761679+01:00 \n",
"\n",
" civility is_partner extra deleted_at reference gender \\\n",
"58201 NaN False NaN NaN NaN 2 \n",
"\n",
" is_email_true extra_field opt_in structure_id note profession \\\n",
"58201 True NaN False NaN NaN NaN \n",
"\n",
" language mcp_contact_id need_reload last_buying_date max_price \\\n",
"58201 NaN NaN False 2023-11-08 03:20:07 45.0 \n",
"\n",
" ticket_sum average_price fidelity average_purchase_delay \\\n",
"58201 1254775 7.030122 330831 -67.790969 \n",
"\n",
" average_price_basket average_ticket_basket total_price \\\n",
"58201 13.75153 1.956087 8821221.5 \n",
"\n",
" preferred_category preferred_supplier preferred_formula \\\n",
"58201 NaN NaN NaN \n",
"\n",
" purchase_count first_buying_date last_visiting_date zipcode \\\n",
"58201 641472 2013-06-10 12:37:58+02:00 NaN NaN \n",
"\n",
" country age tenant_id \n",
"58201 fr NaN 1311 "
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = load_dataset_2('1', 'customersplus')\n",
"df[df['id'] == 1]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "4455b6b9-8395-47ea-b976-d98a2d3c782c",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAUUAAAESCAYAAABq/8cSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABEYklEQVR4nO3dd3gU5doG8Ht7y6Zseg+hhECAhCAICAGkd6U3KYoFhYOoNJWmHJSjHPzwSFEEUfRYDyhNIiAdCaRAaAkQkpAC6aRtsuX9/ohZsilkEzaZze7zu65csDPvzNw7u3ky5Z0ZHmOMgRBCCACAz3UAQgixJFQUCSGkCiqKhBBSBRVFQgipgooiIYRUQUWREEKqoKJICCFVUFEkhJAqqCgSQkgVLb4oXrp0CXK5HJs2beI6CiHEClhEUdy5cyd4PJ7hRygUwtPTE5MnT0ZiYmKd0xUWFmL8+PGYP38+5s+f34yJazpw4ABWrVpV67iAgADMmjXL8Do9PR2rVq1CbGxsjbarVq0Cj8drmpCNxOPx6nxvtuLq1atYtWoV7ty50yzL++c//4k9e/aYbX7myP+o77hVYRZgx44dDADbsWMHO3v2LDt27Bh7//33mUwmY25ubiw3N7fW6SZMmMCmTZvG9Hp9Myeu6dVXX2V1rc7o6Gh28+ZNw+uoqCjD+60uNTWVnT17tqliNgoAtnLlSq5jcOrHH39kANixY8eaZXkKhYLNnDnTbPMzR/5HfcetiZDDelxDSEgIunXrBgDo168fdDodVq5ciT179mD27Nk12v/www/NHbGGkpISyOXyR7YJCwszeX4+Pj7w8fF53FiEkMbiuioz9nBLMSoqymj4/v37GQC2bt06o+FRUVFs1KhRzMnJiUkkEhYaGsq+//77Wud5+PBhNmvWLObk5MTkcjkbOXIku3XrllHbw4cPs9GjRzNvb28mkUhY69at2YsvvsiysrKM2q1cuZIBYBcvXmTjxo1jjo6OzMPDg82cOZMBqPGTlJTEGGPM39/f8Ff/2LFjtbat3BKrXEZVOp2OffjhhywoKIiJxWLm6urKZsyYwVJTU43aRUREsI4dO7Lz58+zp556islkMtaqVSu2bt06ptPp6v0cCgoK2AsvvMBUKhVTKBRsyJAh7MaNG7VuKSYkJLApU6YwV1dXJhaLWfv27dmnn35aI/d7773H2rVrx6RSKXNwcGCdOnViGzdufGSOynW0e/dutnjxYubh4cEUCgUbOXIky8zMZA8ePGBz585lzs7OzNnZmc2aNYsVFhYazaO0tJQtXbqUBQQEMJFIxLy8vNi8efNYXl6eUTt/f382YsQIdvDgQRYWFsakUikLCgpi27dvN7Sp/C5V/6nc0m/o9yc+Pp5NnjyZ2dvbMzc3NzZ79myWn59vaFfbsiIiIhhjjBUXF7M33niDBQQEMIlEwpycnFh4eDj79ttv61yf9eVnjLHt27ezzp07G+Y5duxYdvXqVcP4+r7jn376KevTpw9zdXVlcrmchYSEsA8//JCVl5fXWN+1bQFHREQY3iNjjL300ktMIpGwCxcuGIbpdDo2YMAA5ubmxtLT0+t8v4/Loovip59+ygCwn3/+2TDs6NGjTCwWsz59+rDvv/+eHTp0iM2aNavGh1w5T19fXzZnzhx28OBBtm3bNubm5sZ8fX2Nfjk2b97M1q1bx3799Vd2/Phx9tVXX7EuXbqwoKAgow+18kvt7+/PlixZwiIjI9mePXvYzZs32fjx4xkAdvbsWcOPWq1mjBl/EQoKCgzZ3nnnHUPbygJXW1F88cUXGQD22muvsUOHDrEtW7YwV1dX5uvra/SLFxERwZydnVnbtm3Zli1bWGRkJJs3bx4DwL766qtHfgZ6vZ7179+fSSQStnbtWnb48GG2cuVKFhgYWKMoXrlyxVDgdu3axQ4fPszeeOMNxufz2apVqwzt1q1bxwQCAVu5ciU7cuQIO3ToENu4caNRm9pUFkV/f382a9Ysw3u2s7Nj/fv3Z4MGDWJvvvkmO3z4MPvwww+ZQCBg8+fPN3ovQ4YMYUKhkL377rvs8OHD7KOPPmIKhYKFhYUZPpfKz8bHx4d16NCB7dq1i/3+++9swoQJDAA7fvw4Y4yx+/fvs3/+858MAPvPf/5j+Mzu37/fqO9PUFAQW7FiBYuMjGQbNmxgEomEzZ4929Du7NmzTCaTseHDhxuWdeXKFcZYRbGQy+Vsw4YN7NixY2zfvn3sgw8+YJs2bapzfdaXv3LclClT2P79+9muXbtYYGAgc3BwYAkJCYwxVu93/PXXX2ebN29mhw4dYkePHmX//ve/mYuLi9H7qlzfphTF0tJSFhoaygIDAw2/qytWrGB8Pp8dPny4zvdqDhZVFM+dO8c0Gg0rLCxkhw4dYh4eHqxv375Mo9EY2rZv356FhYUZDWOMsZEjRzJPT0/DFlHlPJ955hmjdqdPn2YA2Pvvv19rFr1ezzQaDUtOTmYA2N69ew3jKr/UK1asqDHdo463VP8iPOqYYvWieO3aNQaAzZs3z6jdX3/9xQCw5cuXG4ZFREQwAOyvv/4yatuhQwc2ZMiQWrNVOnjwIAPAPvnkE6Pha9eurVEUhwwZwnx8fFhBQYFR29dee41JpVLDMeCRI0ey0NDQRy63NpVFcdSoUUbDFy5cyACwBQsWGA0fO3YsU6lUhteHDh1iANj69euN2n3//fcMANu2bZthmL+/P5NKpSw5OdkwrLS0lKlUKvbSSy8Zhpl6TM6U70/1XPPmzWNSqdTo2HhdxxRDQkLY2LFjH5mhNnXlz8vLMxTgqlJSUphEImFTp041DDP1mKJOp2MajYbt2rWLCQQCo3MCphZFxhhLTExk9vb2bOzYseyPP/5gfD6fvfPOO/W/2cdkEWefKz355JMQiURQKpUYOnQonJycsHfvXgiFFYc+b968ievXr2PatGkAAK1Wa/gZPnw4MjIycOPGDaN5Vrat1KtXL/j7++PYsWOGYffv38fLL78MX19fCIVCiEQi+Pv7AwCuXbtWI+e4cePM+r4fpTJn1bPXANC9e3cEBwfjyJEjRsM9PDzQvXt3o2GdO3dGcnKyScupvr6mTp1q9FqtVuPIkSN45plnIJfLa3wGarUa586dM2SMi4vDvHnz8Pvvv+PBgwemvem/jRw50uh1cHAwAGDEiBE1hufm5qKoqAgAcPToUQA119mECROgUChqrLPQ0FD4+fkZXkulUrRr167edVapod+f0aNHG73u3Lkz1Go17t+/X++yunfvjoMHD2Lp0qX4888/UVpaalLGupw9exalpaU11pWvry8GDBhQY13VJSYmBqNHj4azszMEAgFEIhGee+456HQ6JCQkNCpbmzZt8Pnnn2PPnj0YOXIk+vTp0yxnvy2qKO7atQtRUVE4evQoXnrpJVy7dg1TpkwxjL937x4A4M0334RIJDL6mTdvHgAgOzvbaJ4eHh41luPh4YGcnBwAgF6vx+DBg/HLL79g8eLFOHLkCM6fP2/4xa7tS+fp6WmeN2yCypy1LdPLy8swvpKzs3ONdhKJpN5fnpycHAiFwhrTV19/OTk50Gq12LRpU43PYPjw4QAefgbLli3DRx99hHPnzmHYsGFwdnbG008/jQsXLtTzriuoVCqj12Kx+JHD1Wq10XtxdXU1asfj8Yw++0qNXWdA474/1ZcnkUjqbFvd//3f/2HJkiXYs2cP+vfvD5VKhbFjxz6y69qjNPT7VZuUlBT06dMHaWlp+OSTT3Dy5ElERUXhP//5DwDT3lddRowYAXd3d6jVaixatAgCgaDR8zKVRZ19Dg4ONpx97t+/P3Q6Hb744gv89NNPGD9+PFxcXABU/LI9++yztc4jKCjI6HVmZmaNNpmZmWjTpg0AID4+HnFxcdi5cydmzpxpaHPz5s06czZnP8LKX6CMjIwaZ6XT09MN68Qcy9FqtcjJyTH6pa2+/pycnCAQCDBjxgy8+uqrtc6rVatWAAChUIhFixZh0aJFyM/Pxx9//IHly5djyJAhSE1Nrfes/eO+l6ysLKPCyBhDZmYmnnjiCbMtqzHfn8ehUCi
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"outlier_detection(directory_path = \"2\", coupure = 2)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "ee16cf31-18e1-4803-b003-ba1d1a3fc333",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>lastname</th>\n",
" <th>firstname</th>\n",
" <th>birthdate</th>\n",
" <th>email</th>\n",
" <th>street_id</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>civility</th>\n",
" <th>is_partner</th>\n",
" <th>extra</th>\n",
" <th>deleted_at</th>\n",
" <th>reference</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>extra_field</th>\n",
" <th>opt_in</th>\n",
" <th>structure_id</th>\n",
" <th>note</th>\n",
" <th>profession</th>\n",
" <th>language</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>need_reload</th>\n",
" <th>last_buying_date</th>\n",
" <th>max_price</th>\n",
" <th>ticket_sum</th>\n",
" <th>average_price</th>\n",
" <th>fidelity</th>\n",
" <th>average_purchase_delay</th>\n",
" <th>average_price_basket</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>total_price</th>\n",
" <th>preferred_category</th>\n",
" <th>preferred_supplier</th>\n",
" <th>preferred_formula</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>last_visiting_date</th>\n",
" <th>zipcode</th>\n",
" <th>country</th>\n",
" <th>age</th>\n",
" <th>tenant_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>170246</th>\n",
" <td>12184</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3564</td>\n",
" <td>2023-10-12 12:25:15.438714+02:00</td>\n",
" <td>2023-11-09 05:14:01.944407+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>1275.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>2023-11-08 19:17:50.565000</td>\n",
" <td>75.0</td>\n",
" <td>512831</td>\n",
" <td>12.645438</td>\n",
" <td>197358</td>\n",
" <td>0.0</td>\n",
" <td>31.719577</td>\n",
" <td>2.508381</td>\n",
" <td>6484972.4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>204447</td>\n",
" <td>2020-08-28 08:55:55.710000+02:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id lastname firstname birthdate email street_id \\\n",
"170246 12184 NaN NaN NaN NaN 3564 \n",
"\n",
" created_at updated_at \\\n",
"170246 2023-10-12 12:25:15.438714+02:00 2023-11-09 05:14:01.944407+01:00 \n",
"\n",
" civility is_partner extra deleted_at reference gender \\\n",
"170246 NaN False NaN NaN NaN 2 \n",
"\n",
" is_email_true extra_field opt_in structure_id note profession \\\n",
"170246 True NaN False 1275.0 NaN NaN \n",
"\n",
" language mcp_contact_id need_reload last_buying_date \\\n",
"170246 NaN NaN False 2023-11-08 19:17:50.565000 \n",
"\n",
" max_price ticket_sum average_price fidelity \\\n",
"170246 75.0 512831 12.645438 197358 \n",
"\n",
" average_purchase_delay average_price_basket average_ticket_basket \\\n",
"170246 0.0 31.719577 2.508381 \n",
"\n",
" total_price preferred_category preferred_supplier \\\n",
"170246 6484972.4 NaN NaN \n",
"\n",
" preferred_formula purchase_count first_buying_date \\\n",
"170246 NaN 204447 2020-08-28 08:55:55.710000+02:00 \n",
"\n",
" last_visiting_date zipcode country age tenant_id \n",
"170246 NaN NaN NaN NaN 1879 "
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = load_dataset_2('2', 'customersplus')\n",
"df[df['id'] == 12184]"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "4073c986-3e2c-4945-8601-220fea747c9c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>lastname</th>\n",
" <th>firstname</th>\n",
" <th>birthdate</th>\n",
" <th>email</th>\n",
" <th>street_id</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>civility</th>\n",
" <th>is_partner</th>\n",
" <th>extra</th>\n",
" <th>deleted_at</th>\n",
" <th>reference</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>extra_field</th>\n",
" <th>opt_in</th>\n",
" <th>structure_id</th>\n",
" <th>note</th>\n",
" <th>profession</th>\n",
" <th>language</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>need_reload</th>\n",
" <th>last_buying_date</th>\n",
" <th>max_price</th>\n",
" <th>ticket_sum</th>\n",
" <th>average_price</th>\n",
" <th>fidelity</th>\n",
" <th>average_purchase_delay</th>\n",
" <th>average_price_basket</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>total_price</th>\n",
" <th>preferred_category</th>\n",
" <th>preferred_supplier</th>\n",
" <th>preferred_formula</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>last_visiting_date</th>\n",
" <th>zipcode</th>\n",
" <th>country</th>\n",
" <th>age</th>\n",
" <th>tenant_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>102639</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>email1</td>\n",
" <td>1</td>\n",
" <td>2023-07-20 17:16:27.062822+02:00</td>\n",
" <td>2023-07-20 17:16:27.074952+02:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>224453</th>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>firstname2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>2023-07-21 10:18:44.502496+02:00</td>\n",
" <td>2023-07-21 10:18:44.502496+02:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>josef</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>ch</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>103013</th>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>firstname3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>2023-07-21 10:18:44.503913+02:00</td>\n",
" <td>2023-07-21 10:18:44.503913+02:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>dominic</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>ch</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>138386</th>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" <td>firstname4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>2023-07-21 10:18:44.504404+02:00</td>\n",
" <td>2023-07-21 10:18:44.504404+02:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>abigail</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>ch</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>190087</th>\n",
" <td>5</td>\n",
" <td>NaN</td>\n",
" <td>firstname5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>2023-07-21 10:18:44.504841+02:00</td>\n",
" <td>2023-07-21 10:18:44.504841+02:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>sophia</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>ch</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>101868</th>\n",
" <td>601387</td>\n",
" <td>lastname601387</td>\n",
" <td>firstname601387</td>\n",
" <td>NaN</td>\n",
" <td>email601387</td>\n",
" <td>3550</td>\n",
" <td>2023-11-09 05:13:57.358715+01:00</td>\n",
" <td>2023-11-09 05:13:57.358715+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>de</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>205168</th>\n",
" <td>601388</td>\n",
" <td>lastname601388</td>\n",
" <td>firstname601388</td>\n",
" <td>NaN</td>\n",
" <td>email601388</td>\n",
" <td>3550</td>\n",
" <td>2023-11-09 05:13:57.359234+01:00</td>\n",
" <td>2023-11-09 05:13:57.359234+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>de</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>2023-11-09 00:25:24.716000</td>\n",
" <td>15.0</td>\n",
" <td>2</td>\n",
" <td>14.0</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>28.0</td>\n",
" <td>2.0</td>\n",
" <td>28.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>2023-11-09 00:25:24.716000+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>67641</th>\n",
" <td>601389</td>\n",
" <td>lastname601389</td>\n",
" <td>firstname601389</td>\n",
" <td>NaN</td>\n",
" <td>email601389</td>\n",
" <td>3550</td>\n",
" <td>2023-11-09 05:13:57.360373+01:00</td>\n",
" <td>2023-11-09 05:13:57.360373+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>de</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>2023-11-09 00:28:07.511000</td>\n",
" <td>15.0</td>\n",
" <td>2</td>\n",
" <td>15.0</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>30.0</td>\n",
" <td>2.0</td>\n",
" <td>30.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>2023-11-09 00:28:07.511000+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>67639</th>\n",
" <td>601390</td>\n",
" <td>lastname601390</td>\n",
" <td>firstname601390</td>\n",
" <td>NaN</td>\n",
" <td>email601390</td>\n",
" <td>3550</td>\n",
" <td>2023-11-09 05:13:57.360903+01:00</td>\n",
" <td>2023-11-09 05:13:57.360903+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>256450</th>\n",
" <td>601391</td>\n",
" <td>lastname601391</td>\n",
" <td>firstname601391</td>\n",
" <td>NaN</td>\n",
" <td>email601391</td>\n",
" <td>3550</td>\n",
" <td>2023-11-09 05:13:57.361432+01:00</td>\n",
" <td>2023-11-09 05:14:18.906054+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>2023-11-09 00:36:41.172000</td>\n",
" <td>15.0</td>\n",
" <td>2</td>\n",
" <td>15.0</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>30.0</td>\n",
" <td>2.0</td>\n",
" <td>30.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>2023-11-09 00:36:41.172000+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1879</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>275622 rows × 42 columns</p>\n",
"</div>"
],
"text/plain": [
" id lastname firstname birthdate email \\\n",
"102639 1 NaN NaN NaN email1 \n",
"224453 2 NaN firstname2 NaN NaN \n",
"103013 3 NaN firstname3 NaN NaN \n",
"138386 4 NaN firstname4 NaN NaN \n",
"190087 5 NaN firstname5 NaN NaN \n",
"... ... ... ... ... ... \n",
"101868 601387 lastname601387 firstname601387 NaN email601387 \n",
"205168 601388 lastname601388 firstname601388 NaN email601388 \n",
"67641 601389 lastname601389 firstname601389 NaN email601389 \n",
"67639 601390 lastname601390 firstname601390 NaN email601390 \n",
"256450 601391 lastname601391 firstname601391 NaN email601391 \n",
"\n",
" street_id created_at \\\n",
"102639 1 2023-07-20 17:16:27.062822+02:00 \n",
"224453 2 2023-07-21 10:18:44.502496+02:00 \n",
"103013 3 2023-07-21 10:18:44.503913+02:00 \n",
"138386 3 2023-07-21 10:18:44.504404+02:00 \n",
"190087 3 2023-07-21 10:18:44.504841+02:00 \n",
"... ... ... \n",
"101868 3550 2023-11-09 05:13:57.358715+01:00 \n",
"205168 3550 2023-11-09 05:13:57.359234+01:00 \n",
"67641 3550 2023-11-09 05:13:57.360373+01:00 \n",
"67639 3550 2023-11-09 05:13:57.360903+01:00 \n",
"256450 3550 2023-11-09 05:13:57.361432+01:00 \n",
"\n",
" updated_at civility is_partner extra \\\n",
"102639 2023-07-20 17:16:27.074952+02:00 NaN False NaN \n",
"224453 2023-07-21 10:18:44.502496+02:00 NaN False NaN \n",
"103013 2023-07-21 10:18:44.503913+02:00 NaN False NaN \n",
"138386 2023-07-21 10:18:44.504404+02:00 NaN False NaN \n",
"190087 2023-07-21 10:18:44.504841+02:00 NaN False NaN \n",
"... ... ... ... ... \n",
"101868 2023-11-09 05:13:57.358715+01:00 NaN False NaN \n",
"205168 2023-11-09 05:13:57.359234+01:00 NaN False NaN \n",
"67641 2023-11-09 05:13:57.360373+01:00 NaN False NaN \n",
"67639 2023-11-09 05:13:57.360903+01:00 NaN False NaN \n",
"256450 2023-11-09 05:14:18.906054+01:00 NaN False NaN \n",
"\n",
" deleted_at reference gender is_email_true extra_field opt_in \\\n",
"102639 NaN NaN 2 True NaN False \n",
"224453 NaN NaN 1 True NaN False \n",
"103013 NaN NaN 2 True NaN False \n",
"138386 NaN NaN 2 True NaN False \n",
"190087 NaN NaN 1 True NaN False \n",
"... ... ... ... ... ... ... \n",
"101868 NaN NaN 2 True NaN False \n",
"205168 NaN NaN 2 True NaN False \n",
"67641 NaN NaN 2 True NaN False \n",
"67639 NaN NaN 0 True NaN False \n",
"256450 NaN NaN 2 True NaN False \n",
"\n",
" structure_id note profession language mcp_contact_id need_reload \\\n",
"102639 NaN NaN NaN NaN 1.0 False \n",
"224453 NaN NaN NaN josef NaN False \n",
"103013 NaN NaN NaN dominic NaN False \n",
"138386 NaN NaN NaN abigail NaN False \n",
"190087 NaN NaN NaN sophia NaN False \n",
"... ... ... ... ... ... ... \n",
"101868 NaN NaN NaN de NaN False \n",
"205168 NaN NaN NaN de NaN False \n",
"67641 NaN NaN NaN de NaN False \n",
"67639 NaN NaN NaN NaN NaN False \n",
"256450 NaN NaN NaN NaN NaN False \n",
"\n",
" last_buying_date max_price ticket_sum average_price \\\n",
"102639 NaN NaN 0 NaN \n",
"224453 NaN NaN 0 NaN \n",
"103013 NaN NaN 0 NaN \n",
"138386 NaN NaN 0 NaN \n",
"190087 NaN NaN 0 NaN \n",
"... ... ... ... ... \n",
"101868 NaN NaN 0 NaN \n",
"205168 2023-11-09 00:25:24.716000 15.0 2 14.0 \n",
"67641 2023-11-09 00:28:07.511000 15.0 2 15.0 \n",
"67639 NaN NaN 0 NaN \n",
"256450 2023-11-09 00:36:41.172000 15.0 2 15.0 \n",
"\n",
" fidelity average_purchase_delay average_price_basket \\\n",
"102639 0 NaN NaN \n",
"224453 0 NaN NaN \n",
"103013 0 NaN NaN \n",
"138386 0 NaN NaN \n",
"190087 0 NaN NaN \n",
"... ... ... ... \n",
"101868 0 NaN NaN \n",
"205168 1 0.0 28.0 \n",
"67641 1 0.0 30.0 \n",
"67639 0 NaN NaN \n",
"256450 1 0.0 30.0 \n",
"\n",
" average_ticket_basket total_price preferred_category \\\n",
"102639 NaN 0.0 NaN \n",
"224453 NaN 0.0 NaN \n",
"103013 NaN 0.0 NaN \n",
"138386 NaN 0.0 NaN \n",
"190087 NaN 0.0 NaN \n",
"... ... ... ... \n",
"101868 NaN 0.0 NaN \n",
"205168 2.0 28.0 NaN \n",
"67641 2.0 30.0 NaN \n",
"67639 NaN 0.0 NaN \n",
"256450 2.0 30.0 NaN \n",
"\n",
" preferred_supplier preferred_formula purchase_count \\\n",
"102639 NaN NaN 0 \n",
"224453 NaN NaN 0 \n",
"103013 NaN NaN 0 \n",
"138386 NaN NaN 0 \n",
"190087 NaN NaN 0 \n",
"... ... ... ... \n",
"101868 NaN NaN 0 \n",
"205168 NaN NaN 1 \n",
"67641 NaN NaN 1 \n",
"67639 NaN NaN 0 \n",
"256450 NaN NaN 1 \n",
"\n",
" first_buying_date last_visiting_date zipcode country \\\n",
"102639 NaN NaN NaN fr \n",
"224453 NaN NaN NaN ch \n",
"103013 NaN NaN NaN ch \n",
"138386 NaN NaN NaN ch \n",
"190087 NaN NaN NaN ch \n",
"... ... ... ... ... \n",
"101868 NaN NaN NaN NaN \n",
"205168 2023-11-09 00:25:24.716000+01:00 NaN NaN NaN \n",
"67641 2023-11-09 00:28:07.511000+01:00 NaN NaN NaN \n",
"67639 NaN NaN NaN NaN \n",
"256450 2023-11-09 00:36:41.172000+01:00 NaN NaN NaN \n",
"\n",
" age tenant_id \n",
"102639 NaN 1879 \n",
"224453 NaN 1879 \n",
"103013 NaN 1879 \n",
"138386 NaN 1879 \n",
"190087 NaN 1879 \n",
"... ... ... \n",
"101868 NaN 1879 \n",
"205168 NaN 1879 \n",
"67641 NaN 1879 \n",
"67639 NaN 1879 \n",
"256450 NaN 1879 \n",
"\n",
"[275622 rows x 42 columns]"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sort_values(by = 'id')"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "475030ad-6a69-4c91-9cd6-943a0edeaf01",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_3/products_purchased_reduced.csv\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAATwAAAEQCAYAAAAta8hLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA+3klEQVR4nO3dd3gU5doG8Ht7Sd8lyaY3QgIJIZGAihRBaRIEBTyiYABBkCKICtgoR4pwFFH0iCgaEDgfogI2MEjvndBCAiGEQHqvm23v90fMypKEFJLMlud3XblgZ2dnnpmd3HmnvcNjjDEQQogN4HNdACGEtBUKPEKIzaDAI4TYDAo8QojNoMAjhNgMCjxCiM2gwCOE2AwKPEKIzaDAI4TYjDYLvAsXLkAul2P16tVtNUtCCDHRpMCLi4sDj8cz/giFQnh4eOD555/HtWvX6v1caWkpRo4ciRkzZmDGjBkPXPSD+OOPP7Bw4cI63/P398e4ceOMrzMyMrBw4UKcP3++1rgLFy4Ej8drnSKbicfj1btstuLKlStYuHAhbt682SbzW7p0KbZv395i02uJ+u+3jds81gTfffcdA8C+++47duzYMbZv3z62ePFiJpPJmJubGysoKKjzc6NGjWIvvvgiMxgMTZldq5g2bRqrb7HPnj3Lrl+/bnx96tQp4/LeKz09nR07dqy1ymwWAGzBggVcl8GprVu3MgBs3759bTI/Ozs7Fhsb22LTa4n677eN2zphc0IyPDwc0dHRAIDHH38cer0eCxYswPbt2zF+/Pha4//www/NjOOWU1FRAblcft9xoqKiGj09b29veHt7P2hZhJC21JR0rGnhnTp1ymT477//zgCwZcuWmQw/deoUGzp0KHNxcWESiYRFRkayLVu21DnN+Ph4Nm7cOObi4sLkcjmLiYlhKSkpJuPGx8ezp59+mnl5eTGJRMKCgoLYK6+8wnJzc03GW7BgAQPAzpw5w0aMGMGcnZ2ZSqVisbGxDECtn9TUVMYYY35+fsa/1vv27atz3JoWVM087qbX69ny5ctZSEgIE4vFzNXVlY0dO5alp6ebjNenTx8WFhbGTp48yXr27MlkMhkLCAhgy5YtY3q9vsHvobi4mE2cOJEpFApmZ2fHBg4cyJKSkups4SUnJ7PRo0czV1dXJhaLWWhoKPv8889r1f3BBx+wDh06MKlUypycnFjnzp3ZqlWr7ltHzTratGkTmzNnDlOpVMzOzo7FxMSwrKwsVlJSwiZNmsSUSiVTKpVs3LhxrLS01GQalZWVbN68eczf35+JRCLm6enJpk6dygoLC03G8/PzY0OGDGE7d+5kUVFRTCqVspCQELZu3TrjODXb0r0/NS30pm4/ly5dYs8//zxzdHRkbm5ubPz48ayoqMg4Xl3z6tOnD2OMsfLycvbGG28wf39/JpFImIuLC+vatSvbvHlzveuzofoZY2zdunUsIiLCOM3hw4ezK1euGN9vaBv//PPPWa9evZirqyuTy+UsPDycLV++nGk0mlrru66Wa58+fYzLyBhjkydPZhKJhJ0+fdo4TK/Xs379+jE3NzeWkZFR7/JyoUUC7/PPP2cA2E8//WQctnfvXiYWi1mvXr3Yli1b2K5du9i4ceNqfYE10/Tx8WETJkxgO3fuZGvXrmVubm7Mx8fHZMP/8ssv2bJly9gvv/zCDhw4wNavX8+6dOnCQkJCTL6wmg3Wz8+PzZ07l+3evZtt376dXb9+nY0cOZIBYMeOHTP+qNVqxpjpl1xcXGys7b333jOOWxNedQXeK6+8wgCw6dOns127drE1a9YwV1dX5uPjY/JL1adPH6ZUKllwcDBbs2YN2717N5s6dSoDwNavX3/f78BgMLC+ffsyiUTClixZwuLj49mCBQtYYGBgrcC7fPmyMbw2bNjA4uPj2RtvvMH4fD5buHChcbxly5YxgUDAFixYwPbs2cN27drFVq1aZTJOXWoCz8/Pj40bN864zPb29qxv376sf//+7M0332Tx8fFs+fLlTCAQsBkzZpgsy8CBA5lQKGTvv/8+i4+PZx999BGzs7NjUVFRxu+l5rvx9vZmnTp1Yhs2bGB//vknGzVqFAPADhw4wBhjLCcnhy1dupQBYF988YXxO8vJyWnW9hMSEsLmz5/Pdu/ezVauXMkkEgkbP368cbxjx44xmUzGnnrqKeO8Ll++zBirDgK5XM5WrlzJ9u3bx3777Tf24YcfstWrV9e7Phuqv+a90aNHs99//51t2LCBBQYGMicnJ5acnMwYYw1u46+//jr78ssv2a5du9jevXvZJ598wtq1a2eyXDXruzGBV1lZySIjI1lgYKDxd3X+/PmMz+ez+Pj4epeVK80KvOPHjzOtVstKS0vZrl27mEqlYr1792ZardY4bmhoKIuKijIZxhhjMTExzMPDw9iSqZnmM888YzLekSNHGAC2ePHiOmsxGAxMq9WytLQ0BoDt2LHD+F7NBjt//vxan7vf8Y17v+T7HcO7N/ASExMZADZ16lST8U6cOMEAsHfeecc4rE+fPgwAO3HihMm4nTp1YgMHDqyztho7d+5kANinn35qMnzJkiW1Am/gwIHM29ubFRcXm4w7ffp0JpVKjcdcY2JiWGRk5H3nW5eawBs6dKjJ8FmzZjEA7LXXXjMZPnz4cKZQKIyvd+3axQCwFStWmIy3ZcsWBoCtXbvWOMzPz49JpVKWlpZmHFZZWckUCgWbPHmycVhjj4E1Zvu5t66pU6cyqVRqciy6vmN44eHhbPjw4fetoS711V9YWGgM17vdunWLSSQS9sILLxiHNfYYnl6vZ1qtlm3YsIEJBAKTY/CNDTzGGLt27RpzdHRkw4cPZ3/99Rfj8/nsvffea3hhOdCsy1IeeeQRiEQiODg4YNCgQXBxccGOHTsgFFYfErx+/TquXr2KF198EQCg0+mMP0899RQyMzORlJRkMs2acWv06NEDfn5+2Ldvn3FYTk4OpkyZAh8fHwiFQohEIvj5+QEAEhMTa9U5YsSI5ixes9TUefdZXgDo3r07OnbsiD179pgMV6lU6N69u8mwiIgIpKWlNWo+966vF154weS1Wq3Gnj178Mwzz0Aul9f6DtRqNY4fP26sMSEhAVOnTsWff/6JkpKSxi3032JiYkxed+zYEQAwZMiQWsMLCgpQVlYGANi7dy+A2uts1KhRsLOzq7XOIiMj4evra3wtlUrRoUOHBtdZjaZuP08//bTJ64iICKjVauTk5DQ4r+7du2Pnzp2YN28e9u/fj8rKykbVWJ9jx46hsrKy1rry8fFBv379aq2r+pw7dw5PP/00lEolBAIBRCIRXnrpJej1eiQnJzertvbt2+Prr7/G9u3bERMTg169epntWeJmBd6GDRtw6tQp7N27F5MnT0ZiYiJGjx5tfD87OxsA8Oabb0IkEpn8TJ06FQCQl5dnMk2VSlVrPiqVCvn5+QAAg8GAAQMG4Oeff8acOXOwZ88enDx50vhLW9cG5eHh0ZzFa5aaOuuap6enp/H9GkqlstZ4EomkwV+M/Px8CIXCWp+/d/3l5+dDp9Nh9erVtb6Dp556CsA/38Hbb7+Njz76CMePH8fgwYOhVCrxxBNP4PTp0w0sdTWFQmHyWiwW33e4Wq02WRZXV1eT8Xg8nsl3X6O56wxo3vZz7/wkEkm9497rs88+w9y5c7F9+3b07dsXCoUCw4cPv+/lW/fT1O2rLrdu3UKvXr1w584dfPrppzh06BBOnTqFL774AkDjlqs+Q4YMgbu7O9RqNWbPng2BQNDsabWmZp2l7dixo/Esbd++faHX6/HNN9/gxx9/xMiRI9GuXTsA1b9Izz77bJ3TCAkJMXmdlZVVa5ysrCy0b98eAHDp0iUkJCQgLi4OsbGxxnGuX79eb51teZ1czS9HZmZmrbO3GRkZxnXSEvPR6XTIz883+YW8d/25uLhAIBBg7NixmDZtWp3TCggIAAAIhULMnj0bs2fPRlFREf766y+88847GDhwINLT0xs8u/2gy5Kbm2sSeowxZGVloVu3bi02r+ZsPw/Czs4OixYtwqJFi5CdnW1s7Q0
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"outlier_detection(directory_path = \"3\", coupure = 2)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "b64d04db-1c3f-4538-9d05-8f7d62c7c046",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>lastname</th>\n",
" <th>firstname</th>\n",
" <th>birthdate</th>\n",
" <th>email</th>\n",
" <th>street_id</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>civility</th>\n",
" <th>is_partner</th>\n",
" <th>extra</th>\n",
" <th>deleted_at</th>\n",
" <th>reference</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>extra_field</th>\n",
" <th>opt_in</th>\n",
" <th>structure_id</th>\n",
" <th>note</th>\n",
" <th>profession</th>\n",
" <th>language</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>need_reload</th>\n",
" <th>last_buying_date</th>\n",
" <th>max_price</th>\n",
" <th>ticket_sum</th>\n",
" <th>average_price</th>\n",
" <th>fidelity</th>\n",
" <th>average_purchase_delay</th>\n",
" <th>average_price_basket</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>total_price</th>\n",
" <th>preferred_category</th>\n",
" <th>preferred_supplier</th>\n",
" <th>preferred_formula</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>last_visiting_date</th>\n",
" <th>zipcode</th>\n",
" <th>country</th>\n",
" <th>age</th>\n",
" <th>tenant_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>105720</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1961-12-04</td>\n",
" <td>NaN</td>\n",
" <td>91159</td>\n",
" <td>2021-03-02 15:35:40.452065+01:00</td>\n",
" <td>2023-11-09 01:31:07.539604+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>19715.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>2023-11-06 16:57:19</td>\n",
" <td>7500.0</td>\n",
" <td>2297716</td>\n",
" <td>10.152196</td>\n",
" <td>14917</td>\n",
" <td>-39771.165147</td>\n",
" <td>27.514811</td>\n",
" <td>2.710232</td>\n",
" <td>2.332686e+07</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>847793</td>\n",
" <td>2016-01-01 10:23:36+01:00</td>\n",
" <td>2023-11-06 17:12:00</td>\n",
" <td>13090</td>\n",
" <td>fr</td>\n",
" <td>61.0</td>\n",
" <td>1512</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id lastname firstname birthdate email street_id \\\n",
"105720 1 NaN NaN 1961-12-04 NaN 91159 \n",
"\n",
" created_at updated_at \\\n",
"105720 2021-03-02 15:35:40.452065+01:00 2023-11-09 01:31:07.539604+01:00 \n",
"\n",
" civility is_partner extra deleted_at reference gender \\\n",
"105720 NaN False NaN NaN NaN 2 \n",
"\n",
" is_email_true extra_field opt_in structure_id note profession \\\n",
"105720 False NaN False 19715.0 NaN NaN \n",
"\n",
" language mcp_contact_id need_reload last_buying_date max_price \\\n",
"105720 NaN NaN False 2023-11-06 16:57:19 7500.0 \n",
"\n",
" ticket_sum average_price fidelity average_purchase_delay \\\n",
"105720 2297716 10.152196 14917 -39771.165147 \n",
"\n",
" average_price_basket average_ticket_basket total_price \\\n",
"105720 27.514811 2.710232 2.332686e+07 \n",
"\n",
" preferred_category preferred_supplier preferred_formula \\\n",
"105720 NaN NaN NaN \n",
"\n",
" purchase_count first_buying_date last_visiting_date \\\n",
"105720 847793 2016-01-01 10:23:36+01:00 2023-11-06 17:12:00 \n",
"\n",
" zipcode country age tenant_id \n",
"105720 13090 fr 61.0 1512 "
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = load_dataset_2('3', 'customersplus')\n",
"df[df['id'] == 1]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "1d817bee-3ded-4066-9f91-6cf095591b0e",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path : projet-bdc2324-team1/0_Input/Company_4/products_purchased_reduced.csv\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASMAAAEQCAYAAAD7zhIuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABAUklEQVR4nO3dd3QUVRsG8Gd7S++9h4SSQGjSIdKRpoCKgIAgSFNsiChNQQTxAwWlC9IUC0VEQgldQAIEAqR30nvbZLPtfn9gVpb0ZDezm9zfOTmcnZ3MvDs7eZhy514WIYSAoiiKYWymC6AoigJoGFEUZSBoGFEUZRBoGFEUZRBoGFEUZRBoGFEUZRBoGFEUZRBoGFEUZRBoGFEUZRBaLIwiIiIgFouxZcuWllolRVFGpFFhtG/fPrBYLM0Pl8uFo6MjXn31VcTFxdX6e6WlpZg4cSIWLVqERYsWNbvo5vjrr7+watWqGt/z8PDAjBkzNK8zMjKwatUq3Lt3r9q8q1atAovF0k+RTcRisWr9bG1FZGQkVq1aheTk5BZZ3xdffIHjx4/rbHm6qL+ufdygkUbYu3cvAUD27t1Lbty4QS5evEjWrFlDRCIRsbOzIwUFBTX+3qRJk8iUKVOIWq1uzOr0YsGCBaS2j3337l0SHx+veR0WFqb5vM96/PgxuXHjhr7KbBIAZOXKlUyXwahff/2VACAXL15skfVJJBIyffp0nS1PF/XXtY8bMm5TAqxTp07o3r07AGDQoEFQqVRYuXIljh8/jpkzZ1ab/5dffmliVOpOeXk5xGJxnfMEBQU1eHkuLi5wcXFpblkURVVpTHJVHRmFhYVpTT916hQBQNatW6c1PSwsjIwZM4ZYWloSgUBAunTpQo4cOVLjMs+ePUtmzJhBLC0tiVgsJqNHjyYJCQla8549e5aMHTuWODs7E4FAQLy9vcmcOXNIbm6u1nwrV64kAMidO3fIhAkTiIWFBXFwcCDTp08nAKr9JCUlEUIIcXd31/wvd/HixRrnrTryqFrH01QqFVm/fj3x8/MjfD6f2NrakmnTppHHjx9rzTdw4EDSsWNHcuvWLdKvXz8iEomIp6cnWbduHVGpVPV+D8XFxWT27NnEysqKSCQSMnz4cBITE1PjkVFsbCyZPHkysbW1JXw+n/j7+5OtW7dWq/vzzz8n7dq1I0KhkJibm5OAgACyefPmOuuo2kaHDh0iS5YsIQ4ODkQikZDRo0eTrKwsUlJSQt58801ibW1NrK2tyYwZM0hpaanWMioqKsjSpUuJh4cH4fF4xMnJicyfP58UFhZqzefu7k5eeOEFcvr0aRIUFESEQiHx8/Mje/bs0cxTtS89+1N1ZNvY/efhw4fk1VdfJWZmZsTOzo7MnDmTFBUVaearaV0DBw4khBAilUrJ+++/Tzw8PIhAICCWlpakW7du5PDhw7Vuz/rqJ4SQPXv2kMDAQM0yx48fTyIjIzXv17ePb926lfTv35/Y2toSsVhMOnXqRNavX0/kcnm17V3TEd/AgQM1n5EQQubOnUsEAgG5ffu2ZppKpSLPP/88sbOzIxkZGbV+3mfpJIy2bt1KAJDff/9dM+3ChQuEz+eT/v37kyNHjpCQkBAyY8aMahu3apmurq7kjTfeIKdPnyY7d+4kdnZ2xNXVVWun3LZtG1m3bh35448/yOXLl8mPP/5IOnfuTPz8/LQ2ZtXO5O7uTj766CNy7tw5cvz4cRIfH08mTpxIAJAbN25ofmQyGSFE+wsoLi7W1Pbpp59q5q0KlprCaM6cOQQAWbhwIQkJCSHbt28ntra2xNXVVWuHHzhwILG2tia+vr5k+/bt5Ny5c2T+/PkEAPnxxx/r/A7UajUJDg4mAoGArF27lpw9e5asXLmSeHl5VQujR48eaYJl//795OzZs+T9998nbDabrFq1SjPfunXrCIfDIStXriShoaEkJCSEbN68WWuemlSFkbu7O5kxY4bmM5uYmJDg4GAydOhQ8sEHH5CzZ8+S9evXEw6HQxYtWqT1WYYPH064XC5Zvnw5OXv2LNm4cSORSCQkKChI871UfTcuLi6kQ4cOZP/+/eTMmTNk0qRJBAC5fPkyIYSQnJwc8sUXXxAA5LvvvtN8Zzk5OU3af/z8/MiKFSvIuXPnyP/+9z8iEAjIzJkzNfPduHGDiEQiMmrUKM26Hj16RAh58kcqFovJ//73P3Lx4kXy559/ki+//JJs2bKl1u1ZX/1V702ePJmcOnWK7N+/n3h5eRFzc3MSGxtLCCH17uPvvvsu2bZtGwkJCSEXLlwgmzZtIjY2Nlqfq2p7NySMKioqSJcuXYiXl5fmb3XFihWEzWaTs2fP1vpZa9KkMLp58yZRKBSktLSUhISEEAcHBzJgwACiUCg08/r7+5OgoCCtaYQQMnr0aOLo6Kg5Aqha5osvvqg1399//00AkDVr1tRYi1qtJgqFgqSkpBAA5MSJE5r3qnamFStWVPu9us6nn/0C6rpm9GwYRUVFEQBk/vz5WvP9888/BABZtmyZZtrAgQMJAPLPP/9ozduhQwcyfPjwGmurcvr0aQKAfPPNN1rT165dWy2Mhg8fTlxcXEhxcbHWvAsXLiRCoVBzjW/06NGkS5cuda63JlVhNGbMGK3pixcvJgDI22+/rTV9/PjxxMrKSvM6JCSEACAbNmzQmu/IkSMEANm5c6dmmru7OxEKhSQlJUUzraKiglhZWZG5c+dqpjX0mktD9p9n65o/fz4RCoVa1z5ru2bUqVMnMn78+DprqElt9RcWFmqC72mpqalEIBCQ1157TTOtodeMVCoVUSgUZP/+/YTD4Whd821oGBFCSFxcHDEzMyPjx48n58+fJ2w2m3z66af1f9hnNOnWfq9evcDj8WBqaooRI0bA0tISJ06cAJf75BJUfHw8oqOjMWXKFACAUqnU/IwaNQqZmZmIiYnRWmbVvFX69OkDd3d3XLx4UTMtJycHb731FlxdXcHlcsHj8eDu7g4AiIqKqlbnhAkTmvLxmqSqzqfvxgFAz5490b59e4SGhmpNd3BwQM+ePbWmBQYGIiUlpUHreXZ7vfbaa1qvZTIZQkND8eKLL0IsFlf7DmQyGW7evKmp8f79+5g/fz7OnDmDkpKShn3of40ePVrrdfv27QEAL7zwQrXpBQUFKCsrAwBcuHABQPVtNmnSJEgkkmrbrEuXLnBzc9O8FgqFaNeuXb3brEpj95+xY8dqvQ4MDIRMJkNOTk696+rZsydOnz6NpUuX4tKlS6ioqGhQjbW5ceMGKioqqm0rV1dXPP/889W2VW3Cw8MxduxYWFtbg8PhgMfj4fXXX4dKpUJsbGyTavPx8cGuXbtw/PhxjB49Gv3792/S3bwmhdH+/fsRFhaGCxcuYO7cuYiKisLkyZM172dnZwMAPvjgA/B4PK2f+fPnAwDy8vK0lung4FBtPQ4ODsjPzwcAqNVqDBs2DEePHsWSJUsQGhqKW7duaf6gavqyHR0dm/LxmqSqzprW6eTkpHm/irW1dbX5BAJBvTttfn4+uFxutd9/dvvl5+dDqVRiy5Yt1b6DUaNGAfjvO/j444+xceNG3Lx5EyNHjoS1tTUGDx6M27dv1/Opn7CystJ6zefz65wuk8m0Poutra3WfCwWS+u7r9LUbQY0bf95dn0CgaDWeZ/17bff4qOPPsLx48cRHBwMKysrjB8/vs4mMHVp7P5Vk9TUVPTv3x/p6en45ptvcPXqVYSFheG7774D0LDPVZsXXngB9vb2kMlkeO+998DhcBq9jCbdTWvfvr3mblpwcDBUKhV2796N3377DRMnToSNjQ2AJzv5Sy+9VOMy/Pz8tF5nZWVVmycrKws+Pj4AgIcPH+L+/fvYt28fpk+frpknPj6+1jpbsh1Q1Y6bmZlZ7S5bRkaGZpvoYj1KpRL5+flafyzPbj9LS0twOBxMmzYNCxYsqHFZnp6eAAAul4v33nsP7733HoqKinD+/HksW7YMw4cPx+PHj+u9C9ncz5Kbm6sVSIQQZGVloUePHjpbV1P2n+aQSCRYvXo1Vq9ejezsbM1R0pgxYxAdHd3o5T2
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"outlier_detection(directory_path = \"4\", coupure = 2)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "4cc07982-1070-439b-a579-fd3f351778b3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>lastname</th>\n",
" <th>firstname</th>\n",
" <th>birthdate</th>\n",
" <th>email</th>\n",
" <th>street_id</th>\n",
" <th>created_at</th>\n",
" <th>updated_at</th>\n",
" <th>civility</th>\n",
" <th>is_partner</th>\n",
" <th>extra</th>\n",
" <th>deleted_at</th>\n",
" <th>reference</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>extra_field</th>\n",
" <th>opt_in</th>\n",
" <th>structure_id</th>\n",
" <th>note</th>\n",
" <th>profession</th>\n",
" <th>language</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>need_reload</th>\n",
" <th>last_buying_date</th>\n",
" <th>max_price</th>\n",
" <th>ticket_sum</th>\n",
" <th>average_price</th>\n",
" <th>fidelity</th>\n",
" <th>average_purchase_delay</th>\n",
" <th>average_price_basket</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>total_price</th>\n",
" <th>preferred_category</th>\n",
" <th>preferred_supplier</th>\n",
" <th>preferred_formula</th>\n",
" <th>purchase_count</th>\n",
" <th>first_buying_date</th>\n",
" <th>last_visiting_date</th>\n",
" <th>zipcode</th>\n",
" <th>country</th>\n",
" <th>age</th>\n",
" <th>tenant_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>300754</th>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>2020-09-25 19:09:07.669208+02:00</td>\n",
" <td>2021-11-30 02:07:28.120188+01:00</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>2023-11-07 16:33:09</td>\n",
" <td>360.0</td>\n",
" <td>1237224</td>\n",
" <td>6.056248</td>\n",
" <td>236850</td>\n",
" <td>0.015528</td>\n",
" <td>13.493612</td>\n",
" <td>2.228048</td>\n",
" <td>7492935.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>555295</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1342</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id lastname firstname birthdate email street_id \\\n",
"300754 2 NaN NaN NaN NaN 2 \n",
"\n",
" created_at updated_at \\\n",
"300754 2020-09-25 19:09:07.669208+02:00 2021-11-30 02:07:28.120188+01:00 \n",
"\n",
" civility is_partner extra deleted_at reference gender \\\n",
"300754 NaN False NaN NaN NaN 2 \n",
"\n",
" is_email_true extra_field opt_in structure_id note profession \\\n",
"300754 False NaN False NaN NaN NaN \n",
"\n",
" language mcp_contact_id need_reload last_buying_date max_price \\\n",
"300754 NaN NaN False 2023-11-07 16:33:09 360.0 \n",
"\n",
" ticket_sum average_price fidelity average_purchase_delay \\\n",
"300754 1237224 6.056248 236850 0.015528 \n",
"\n",
" average_price_basket average_ticket_basket total_price \\\n",
"300754 13.493612 2.228048 7492935.0 \n",
"\n",
" preferred_category preferred_supplier preferred_formula \\\n",
"300754 NaN NaN NaN \n",
"\n",
" purchase_count first_buying_date last_visiting_date zipcode \\\n",
"300754 555295 1901-01-01 00:09:21+00:09 NaN NaN \n",
"\n",
" country age tenant_id \n",
"300754 NaN NaN 1342 "
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = load_dataset_2('4', 'customersplus')\n",
"df[df['id'] == 2]"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "f74a9e62-a0f7-41cf-9834-78a99204547c",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAATEAAAEQCAYAAADYlUP7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABBNklEQVR4nO3dd3gUVdsG8Ht7S9s00isQWkIiSJUSBAIYmoAUQToKCKIoIK+0V3xRPgsISpEAgqioIAIKhI5IgFASSigJpJFOeja72Xa+P2IWlvSQZHZ2z++6csHOnp15ZndyZ+bM2RkOIYSAoiiKpbhMF0BRFPU8aIhRFMVqNMQoimI1GmIURbEaDTGKoliNhhhFUaxGQ4yiKFajIUZRFKvREKMoitWaLcRu3LgBqVSKDRs2NNciKYqyAPUKsZ07d4LD4Rh++Hw+XF1dMW7cOMTHx1f7uuLiYowePRrz5s3DvHnznrvo5/HXX39h5cqVVT7n4+ODKVOmGB6np6dj5cqViImJqdR25cqV4HA4TVNkA3E4nGrXzVLExcVh5cqVSEpKapbl/e9//8OBAwcabX6NUX9N27hZIvWwY8cOAoDs2LGDREVFkdOnT5PVq1cTiURCnJ2dSV5eXpWvGzNmDHn99deJXq+vz+KaxNy5c0l1q33t2jWSkJBgeBwdHW1Y32elpqaSqKiopiqzQQCQFStWMF0Go3799VcCgJw+fbpZlieTycjkyZMbbX6NUX9N27g54jck+Dp06IDOnTsDAPr27QudTocVK1bgwIEDmDp1aqX2v/zySwMjtvGUlpZCKpXW2CYkJKTO8/Pw8ICHh8fzlkVR1POqT+JV7IlFR0cbTf/zzz8JALJmzRqj6dHR0WTo0KFELpcTkUhEgoODyd69e6ucZ2RkJJkyZQqRy+VEKpWS8PBw8uDBA6O2kZGRZNiwYcTd3Z2IRCLi7+9PZs2aRXJycozarVixggAgV69eJaNGjSJ2dnbExcWFTJ48mQCo9JOYmEgIIcTb29vwV/X06dNVtq3Y06lYxtN0Oh357LPPSEBAABEKhcTJyYlMmjSJpKamGrXr06cPad++Pbl8+TJ56aWXiEQiIb6+vmTNmjVEp9PV+jkUFhaSGTNmEHt7eyKTyUhYWBi5d+9elXti9+/fJ+PHjydOTk5EKBSSNm3akI0bN1aq++OPPyatW7cmYrGY2NraksDAQLJu3boa66h4j/bs2UMWLVpEXFxciEwmI+Hh4SQzM5MUFRWRmTNnEgcHB+Lg4ECmTJlCiouLjeahVCrJkiVLiI+PDxEIBMTNzY3MmTOH5OfnG7Xz9vYmr7zyCjly5AgJCQkhYrGYBAQEkIiICEObim3p2Z+KPen6bj+3bt0i48aNIzY2NsTZ2ZlMnTqVFBQUGNpVtaw+ffoQQghRKBRk4cKFxMfHh4hEIiKXy0mnTp3Ijz/+WO37WVv9hBASERFBgoKCDPMcMWIEiYuLMzxf2za+ceNG0qtXL+Lk5ESkUinp0KED+eyzz4hara70fle1h9mnTx/DOhJCyJtvvklEIhG5cuWKYZpOpyP9+vUjzs7OJD09vdr1bSyNEmIbN24kAMi+ffsM006dOkWEQiHp1asX2bt3Lzl69CiZMmVKpQ+lYp6enp5k2rRp5MiRI2Tr1q3E2dmZeHp6Gm3MmzZtImvWrCEHDx4kZ8+eJd9//z3p2LEjCQgIMPoQKjZCb29vsnjxYnL8+HFy4MABkpCQQEaPHk0AkKioKMOPSqUihBh/cIWFhYbaPvroI0PbikCqKsRmzZpFAJC3336bHD16lGzevJk4OTkRT09Po1+UPn36EAcHB9KqVSuyefNmcvz4cTJnzhwCgHz//fc1fgZ6vZ6EhoYSkUhEPvnkExIZGUlWrFhB/Pz8KoXY7du3DYG0a9cuEhkZSRYuXEi4XC5ZuXKlod2aNWsIj8cjK1asICdPniRHjx4l69atM2pTlYoQ8/b2JlOmTDGss5WVFQkNDSUDBgwg77//PomMjCSfffYZ4fF4ZN68eUbrEhYWRvh8Plm2bBmJjIwkn3/+OZHJZCQkJMTwuVR8Nh4eHqRdu3Zk165d5NixY2TMmDEEADl79iwhhJDs7Gzyv//9jwAg33zzjeEzy87ObtD2ExAQQJYvX06OHz9OvvzySyISicjUqVMN7aKioohEIiFDhgwxLOv27duEkPJfbqlUSr788kty+vRpcvjwYfLpp5+SDRs2VPt+1lZ/xXPjx48nf/75J9m1axfx8/Mjtra25P79+4QQUus2/u6775JNmzaRo0ePklOnTpGvvvqKODo6Gq1XxftdlxBTKpUkODiY+Pn5GX5Xly9fTrhcLomMjKx2XRtTg0Ls4sWLRKPRkOLiYnL06FHi4uJCevfuTTQajaFtmzZtSEhIiNE0QggJDw8nrq6uhj2OinmOHDnSqN0///xDAJDVq1dXWYterycajYYkJycTAOSPP/4wPFexES5fvrzS62rqL3j2g6upT+zZELtz5w4BQObMmWPU7tKlSwQAWbp0qWFanz59CABy6dIlo7bt2rUjYWFhVdZW4ciRIwQAWb9+vdH0Tz75pFKIhYWFEQ8PD1JYWGjU9u233yZisdjQhxkeHk6Cg4NrXG5VKkJs6NChRtMXLFhAAJD58+cbTR8xYgSxt7c3PD569CgBQNauXWvUbu/evQQA2bp1q2Gat7c3EYvFJDk52TBNqVQSe3t78uabbxqm1bVPqS7bz7N1zZkzh4jFYqO+3er6xDp06EBGjBhRYw1Vqa7+/Px8Q2A+LSUlhYhEIjJhwgTDtLr2iel0OqLRaMiuXbsIj8cz6tOua4gRQkh8fDyxsbEhI0aMICdOnCBcLpd89NFHta9sI2nQEItu3bpBIBDA2toagwYNglwuxx9//AE+v7yLLSEhAXfv3sXrr78OANBqtYafIUOGICMjA/fu3TOaZ0XbCj169IC3tzdOnz5tmJadnY233noLnp6e4PP5EAgE8Pb2BgDcuXOnUp2jRo1qyOo1SEWdT5/dBIAuXbqgbdu2OHnypNF0FxcXdOnSxWhaUFAQkpOT67ScZ9+vCRMmGD1WqVQ4efIkRo4cCalUWukzUKlUuHjxoqHG2NhYzJkzB8eOHUNRUVHdVvpf4eHhRo/btm0LAHjllVcqTc/Ly0NJSQkA4NSpUwAqv2djxoyBTCar9J4FBwfDy8vL8FgsFqN169a1vmcV6rv9DBs2zOhxUFAQVCoVsrOza11Wly5dcOTIESxZsgRnzpyBUqmsU43ViYqKglKprPReeXp6ol+/fpXeq+pcv34dw4YNg4ODA3g8HgQCAd544w3odDrcv3+/QbW1bNkS3333HQ4cOIDw8HD06tWrWc+ONijEdu3ahejoaJw6dQpvvvkm7ty5g/Hjxxuez8rKAgC8//77EAgERj9z5swBADx+/Nhoni4uLpWW4+LigtzcXACAXq/HwIEDsX//fixatAgnT57E5cuXDb+IVW0krq6uDVm9Bqmos6plurm5GZ6v4ODgUKmdSCSqdWPPzc0Fn8+v9Ppn37/c3FxotVps2LCh0mcwZMgQAE8+gw8//BCff/45Ll68iMGDB8PBwQEvv/wyrly5Ustal7O3tzd6LBQKa5yuUqmM1sXJycmoHYfDMfrsKzT0PQMatv08uzyRSFRt22d9/fXXWLx4MQ4cOIDQ0FDY29tjxIgRNQ5Fqkl9t6+qpKSkoFevXkhLS8P69evx999/Izo6Gt988w2Auq1XdV555RW0aNECKpUK7733Hng8XoPnVV8NOjvZtm1bw9nJ0NBQ6HQ6bNu2Db/99htGjx4NR0dHAOW/HK+++mqV8wgICDB6nJmZWalNZmYmWrZsCQC4desWYmNjsXPnTkyePNnQJiEhodo6m3McV8UGn5GRUemsZXp6uuE9aYzlaLVa5ObmGv2SPfv+yeVy8Hg8TJo0CXPnzq1yXr6+vgAAPp+P9957D++99x4KCgpw4sQJLF26FGFhYUhNTa31rO7zrktOTo5RkBFCkJmZiRdffLHRltWQ7ed5yGQyrFq1CqtWrUJWVpZhr2zo0KG4e/d
"text/plain": [
"<Figure size 300x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"outlier_detection(directory_path = \"101\", coupure = 2)"
]
},
{
"cell_type": "markdown",
"id": "dbebfa92-310a-417b-a7fa-36ac3593db06",
"metadata": {},
"source": [
"## Evolution des commandes"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "06137694-7f50-47ba-8749-68471ececc1e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_448/3643128924.py:11: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n",
"/tmp/ipykernel_448/3643128924.py:19: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
" campaigns = pd.read_csv(file_in, sep=\",\", parse_dates = ['sent_at'], date_parser=custom_date_parser)\n"
]
}
],
"source": [
"# Importation - Chargement des données temporaires\n",
"company_number = \"1\"\n",
"nom_dataframe = 'df'+ company_number +'_tickets'\n",
"purchases = globals()[nom_dataframe].copy()\n",
"\n",
"campaigns = display_databases(company_number,'campaigns_information', ['sent_at'])\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "e6b962d4-1a30-4133-ac0f-359f7afef42c",
"metadata": {},
"outputs": [],
"source": [
"# Mois du premier achat\n",
"purchase_min = purchases.groupby(['customer_id'])['purchase_date'].min().reset_index()\n",
"purchase_min.rename(columns = {'purchase_date' : 'first_purchase_event'}, inplace = True)\n",
"purchase_min['first_purchase_event'] = pd.to_datetime(purchase_min['first_purchase_event'])\n",
"purchase_min['first_purchase_month'] = pd.to_datetime(purchase_min['first_purchase_event'].dt.strftime('%Y-%m'))\n",
"\n",
"# Mois du premier mails\n",
"first_mail_received = campaigns.groupby('customer_id')['sent_at'].min().reset_index()\n",
"first_mail_received.rename(columns = {'sent_at' : 'first_email_reception'}, inplace = True)\n",
"first_mail_received['first_email_reception'] = pd.to_datetime(first_mail_received['first_email_reception'])\n",
"first_mail_received['first_email_month'] = pd.to_datetime(first_mail_received['first_email_reception'].dt.strftime('%Y-%m'))\n",
"\n",
"# Fusion \n",
"known_customer = pd.merge(purchase_min[['customer_id', 'first_purchase_month']], \n",
" first_mail_received[['customer_id', 'first_email_month']], on = 'customer_id', how = 'outer')\n",
"\n",
"# Mois à partir duquel le client est considere comme connu\n",
"known_customer['known_date'] = pd.to_datetime(known_customer[['first_email_month', 'first_purchase_month']].min(axis = 1), utc = True, format = 'ISO8601')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "9c56e5ac-cbf4-4343-80ba-be2ab8b60eab",
"metadata": {},
"outputs": [],
"source": [
"# Nombre de commande par mois\n",
"purchases_count = pd.merge(purchases[['customer_id', 'purchase_id', 'purchase_date']].drop_duplicates(), known_customer[['customer_id', 'known_date']], on = ['customer_id'], how = 'inner')\n",
"purchases_count['is_customer_known'] = purchases_count['purchase_date'] > purchases_count['known_date'] + pd.DateOffset(months=1)\n",
"purchases_count['purchase_date_month'] = pd.to_datetime(purchases_count['purchase_date'].dt.strftime('%Y-%m'))\n",
"purchases_count = purchases_count[purchases_count['customer_id'] != 1]\n",
"\n",
"# Nombre de commande par mois par type de client\n",
"nb_purchases_graph = purchases_count.groupby(['purchase_date_month', 'is_customer_known'])['purchase_id'].count().reset_index()\n",
"nb_purchases_graph.rename(columns = {'purchase_id' : 'nb_purchases'}, inplace = True)\n",
"\n",
"nb_purchases_graph_2 = purchases_count.groupby(['purchase_date_month', 'is_customer_known'])['customer_id'].nunique().reset_index()\n",
"nb_purchases_graph_2.rename(columns = {'customer_id' : 'nb_new_customer'}, inplace = True)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "8c1aed44-03d3-49f9-b96c-b06a0df03dde",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHFCAYAAAAT5Oa6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABezElEQVR4nO3deVxV1f4//teRGYQjg0zKYE6pgKaUoJkoyBA4lwNGKqblGKkfk8ordk3TcrqaXjUHUhErxesUaip6EQdEcSQzL04J4gAHQWbW7w+/7J9HDsrBw3hez8djP+Ks/d57vxds483aa+8tE0IIEBEREWmxRrWdABEREVFtY0FEREREWo8FEREREWk9FkRERESk9VgQERERkdZjQURERERajwURERERaT0WRERERKT1WBARERGR1mNBRKRhGzduhEwmg6GhIW7evFluvZeXF1xcXGohM2DUqFFo3LhxrRz7ZWQyGSIiIqTPZd/H6hIREQGZTIYHDx5obJ9RUVFYunSpxvanrlGjRkEmk8HU1BQ5OTnl1t+8eRONGjWq8Ht95swZlfsNCgqCs7Nzufbs7Gx88803cHd3h5mZGQwMDODs7IzQ0FCcPXu23P4rWuLi4qRYZ2dnyGQyeHl5qczlp59+Urkd0avSre0EiBqqgoICfPXVV9i0aVNtp0I1JCoqCpcuXUJYWFit5aCnp4fi4mJs27YNY8aMUVq3YcMGmJqaIjs7+5WPc/36dfj6+iIjIwOffPIJ5syZg8aNG+PGjRv4+eef0aVLF2RlZUEulysd//XXXy+3r/bt2yt9NjU1xbFjx3D9+nW0bNlSad369ethZmamkT4QPYsjRETVxN/fH1FRUTh//nxtp6IRQgjk5eXVdhr0Evr6+hgwYADWr1+v1C6EwMaNGzF06NBXPkZJSQkGDhyIBw8e4MSJE/juu+8QGBiInj17YuTIkdi7dy/27dsHPT09pe1cXFzg4eFRbjEzM1OKe/vtt9GsWbNyfbh+/TqOHTumkT4QPY8FEVE1mTFjBiwtLfH555+/NDY/Px/h4eFo0aIF9PX10axZM0ycOBFZWVlKcc7OzggKCsKePXvwxhtvwMjICO3atcOePXsAPL000a5dO5iYmOCtt96q8BLI5cuX4e3tDRMTEzRt2hSTJk3CkydPlGJkMhkmTZqEf//732jXrh0MDAwQGRkJALh27RqCg4NhbW0NAwMDtGvXDj/88EOlvi/Z2dkYO3YsLC0t0bhxY/j7++PPP/+s1Lbbtm2Dr68v7OzspL7PnDkTubm55WJPnTqFvn37wtLSEoaGhmjZsqXKkZt79+5h+PDhkMvlsLGxQWhoKBQKhVLMDz/8gHfeeQfW1tYwMTGBq6srFi5ciKKiIinGy8sLe/fuxc2bN5UuB5VZtWoVOnbsiMaNG8PU1BSvv/46vvjii0r1W12hoaFISEjA1atXpbbff/8dN2/exOjRo195/zt37sTFixcRHh5e4eXfgIAAGBsbV2n/jRo1wocffojIyEiUlpZK7evXr4eDgwN8fHyqtF+iF2FBRFRNTE1N8dVXX2H//v04fPhwhXFCCAwYMADff/89QkJCsHfvXkydOhWRkZHo3bs3CgoKlOLPnz+P8PBwfP7559ixYwfkcjkGDRqE2bNn48cff8S8efOwZcsWKBQKBAUFlRvVKSoqwrvvvgtvb2/s3LkTkyZNwurVq1X+1b1z506sWrUK//jHP7B//3706NEDV65cwZtvvolLly5h0aJF2LNnDwIDAzFlyhTMmTPnhd+Tsr5u2rQJ06ZNQ0xMDDw8PBAQEFAudtSoURBCKLVdu3YN7777LtatW4fY2FiEhYXh559/Rt++fZXiynK9desWFi9ejN9++w1fffUV7t27V+44gwcPRps2bbB9+3bMnDkTUVFR+Oyzz5Rirl+/juDgYGzatAl79uzBmDFj8N133+Hjjz+WYlauXInu3bvD1tYWJ06ckBYAiI6OxoQJE9CzZ0/ExMRg586d+Oyzz1QWcprg4+MDJycnpRGWdevW4Z133kHr1q1fef8HDhwAAAwYMECt7UpKSlBcXKy0lJSUqIwNDQ3F3bt3sX//fmnbyMhIjBo1Co0a8VcXVQNBRBq1YcMGAUAkJiaKgoIC8dprrwl3d3dRWloqhBCiZ8+eokOHDlJ8bGysACAWLlyotJ9t27YJAGLNmjVSm5OTkzAyMhJ37tyR2pKTkwUAYWdnJ3Jzc6X2nTt3CgBi165dUtvIkSMFALFs2TKlY33zzTcCgIiPj5faAAi5XC4ePXqkFOvn5yeaN28uFAqFUvukSZOEoaFhufhn/fbbby88/uzZsyvc9nmlpaWiqKhIHD16VAAQ58+fl9a1bNlStGzZUuTl5VW4/ezZs1V+3ydMmCAMDQ2ln9fzSkpKRFFRkfjpp5+Ejo6OUn8DAwOFk5NTuW0mTZokmjRpUum+VdXIkSOFiYmJEOJp/2xtbUVRUZF4+PChMDAwEBs3bhT3798v971+9pxV5fl++fv7CwAiPz+/UnmV7V/VoqOjoxTr5OQkAgMDhRBP/6289957Qggh9u7dK2QymUhNTRW//PKLACCOHDlSye8M0cuxzCaqRvr6+pg7dy7OnDmDn3/+WWVM2ejRqFGjlNrff/99mJiY4NChQ0rtnTp1QrNmzaTP7dq1A/D0ks2zlyjK2lXd6TZixAilz8HBwQCAI0eOKLX37t0b5ubm0uf8/HwcOnQIAwcOhLGxsdJf+u+++y7y8/Nx8uRJlf18dv8VHf9l/ve//yE4OBi2trbQ0dGBnp4eevbsCQBISUkBAPz555+4fv06xowZA0NDw5fus1+/fkqf3dzckJ+fj4yMDKnt3Llz6NevHywtLaXjfvjhhygpKanU5b633noLWVlZGD58OP7zn/9U+s6250dUnr189DKjR4/GvXv38Ntvv2HLli3Q19fH+++/X+ntq8NPP/2ExMREpeXUqVMVxoeGhmLXrl14+PAh1q1bh169eqm8241IE1gQEVWzYcOGoXPnzvjyyy+V5pyUefjwIXR1ddG0aVOldplMBltbWzx8+FCp3cLCQumzvr7+C9vz8/OV2nV1dWFpaanUZmtrK+XyLDs7u3K5FhcXY/ny5dDT01Na3n33XQB44S/7sr5WdPwXycnJQY8ePXDq1CnMnTsXcXFxSExMxI4dOwBAujR4//59AEDz5s1fuk8A5XIxMDBQ2t+tW7fQo0cP/P3331i2bBn++9//IjExUZozVZmJ5iEhIVi/fj1u3ryJwYMHw9raGl27dsXBgwdfuJ23t7fS9zg0NLRSfQIAJycneHt7Y/369Vi/fj2GDRtW4ZweXd2nNxxXdPmquLhYaYK0o6MjACA1NbXS+QBPi3R3d3elpUuXLhXGv/feezA0NMSSJUuwe/fucnfNEWkSb7snqmYymQwLFixAnz59sGbNmnLrLS0tUVxcjPv37ysVRUIIpKen480339RoPsXFxXj48KFSIZCeni7l8nzuzzI3N4eOjg5CQkIwceJElftv0aJFhccu62tFx3+Rw4cP4+7du4iLi5NGhQCUm3he9j28c+fOS/dZGTt37kRubi527NgBJycnqT05OVmt/YwePRqjR49Gbm4ujh07htmzZyMoKAh//vmn0n6ftXr1ajx+/Fj6bGVlpdYxQ0ND8cEHH6C0tBSrVq2qMM7GxgYA8Pfff6tc//fff0sxAODn54c1a9Zg586dmDlzplo5qcPY2BjDhg3D/PnzYWZmhkGDBlXbsYg4QkRUA3x8fNCnTx98/fXX5R6Y5+3tDQDYvHmzUvv27duRm5srrdekLVu2KH2OiooCgAofhlfG2NgYvXr1wrlz5+Dm5lbur313d/dyRdWzevXq9cLjv0hZcVY2glNm9erVSp/btGmDli1bYv369eUmpFeFquMKIbB27dpysQYGBi8dMTIxMUFAQAC+/PJLFBYW4vLlyxXGtm3bVul7q+7looEDB2LgwIEIDQ2Fh4dHhXEeHh5o3Lgxtm3bVm7dlStXcPnyZaU7u/r37w9XV1fMnz8fly5dUrnP/fv3l7tzsSr
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Graphique en nombre de commande\n",
"purchases_graph = nb_purchases_graph\n",
"\n",
"purchases_graph_used = purchases_graph[purchases_graph[\"purchase_date_month\"] >= datetime(2021,3,1)]\n",
"purchases_graph_used_0 = purchases_graph_used[purchases_graph_used[\"is_customer_known\"]==False]\n",
"purchases_graph_used_1 = purchases_graph_used[purchases_graph_used[\"is_customer_known\"]==True]\n",
"\n",
"\n",
"# Création du barplot\n",
"plt.bar(purchases_graph_used_0[\"purchase_date_month\"], purchases_graph_used_0[\"nb_purchases\"], width=12, label = \"Nouveau client\")\n",
"plt.bar(purchases_graph_used_0[\"purchase_date_month\"], purchases_graph_used_1[\"nb_purchases\"], \n",
" bottom = purchases_graph_used_0[\"nb_purchases\"], width=12, label = \"Ancien client\")\n",
"\n",
"\n",
"# commande pr afficher slt\n",
"plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b%y'))\n",
"\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Mois')\n",
"plt.ylabel(\"Nombre d'achats\")\n",
"plt.title(\"Nombre d'achats - MUCEM\")\n",
"plt.legend()\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "d312276c-4c46-4d29-b6d6-ed110f59890d",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoIAAAHGCAYAAADg0eryAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB5XElEQVR4nO3dd1gU1/s28Hul1xWkK82GDexRNAp2VLAmdgR7YotRYzTGiIk91mBi1Ngb6jdqrCgWMCooqNhjjMEOYqQp0jnvH77Mz6XJwi6g3J/r2kv2zNmZ58ye3X08M2dGJoQQICIiIqIKp1JZB0BEREREZYOJIBEREVEFxUSQiIiIqIJiIkhERERUQTERJCIiIqqgmAgSERERVVBMBImIiIgqKCaCRERERBUUE0EiIiKiCoqJIBG9t7755htYWlri3r17ZR0KEf1/x44dg5aWFvbv31/WoVARlCgR3LRpE2QyGXR1dfHgwYM8y93d3dGgQYOSbKLYfH19YWhoWCbbfheZTAY/P79S3aa7uzvc3d1LPY4jR46UelvV7ZdffsGmTZvKOowPnp+fH2QyWYHLjx49ilWrVuHQoUOoUaNGKUZWuKdPn8LPzw+RkZF5lr2rTQVR5XfprVu34Ofnh/v376tkfeVN7u+14OBgyGSyYrfX398fNWvWhLa2NmQyGRISEgAA3377Lezs7KCpqYnKlSuXOO7CuLu7w9fXV63bUJXHjx9jyJAhWLlyJXr16lWsddy/fx8ymYzfs8WQk5cpQyUjgmlpafj2229VsSoqRaGhoRg5cqRat3HkyBHMmTNHrdsobUwEy96jR48wbNgw7Nq1C82bNy/rcBQ8ffoUc+bMyTcRHDlyJEJDQ0s/qLfcunULc+bM+WATQVWKjIzExIkT0a5dO5w6dQqhoaEwMjLCH3/8gXnz5mHo0KEICQnBiRMnyjrUciEzMxP9+/fH6NGjMXbs2LIOh4pIUxUr8fDwwI4dOzB16lQ0bNhQFassU0IIpKamQk9Pr6xDUauWLVuWdQhExWJra4uYmJiyDkNp1apVQ7Vq1co6jPdaRkYGZDIZNDVV8vNVqJs3bwIARo0ahY8++kgqv3HjBgBg4sSJsLCwKHQdKSkpH/xvSQ5NTU2cO3eurMNQudevX0NfX7+sw1AblYwITps2DVWqVMHXX3/9zrqpqamYMWMGHB0doa2tjapVq2LcuHHScHsOBwcHeHp64tChQ2jcuDH09PRQt25dHDp0CMCb4c+6devCwMAAH330ESIiIvLd3s2bN9GhQwcYGBjA3Nwc48ePx+vXrxXqyGQyjB8/Hr/++ivq1q0LHR0dbN68GQBw9+5dDBo0CBYWFtDR0UHdunXx888/F2m/JCUlYdSoUahSpQoMDQ3h4eGBv//+O9+6JdlOdnY2/P390ahRI+jp6aFy5cpo2bIlDhw4UOjr8js0HBMTgzFjxqBatWrQ1taGo6Mj5syZg8zMTKlOzrD9kiVLsGzZMjg6OsLQ0BCurq4ICwuT6vn6+kptkMlk0qOwkYigoCD07NkT1apVg66uLmrWrIkxY8bgv//+k+r8+eefkMlk2LlzZ57Xb9myBTKZDOHh4QCAiIgIDBgwAA4ODtDT04ODgwMGDhyY51SGnOH006dP4/PPP4eZmRmqVKmCPn364OnTp1I9BwcH3Lx5EyEhIVJ7HBwcCt3PRXl/srOzsXjxYtSpUwc6OjqwsLDA0KFD8fjxY4V15RwiDA0NRatWraQ2bdy4EQBw+PBhNGnSBPr6+nB2dkZgYKDC63MOTV67dg2ffvop5HI5TE1NMXnyZGRmZuLOnTvw8PCAkZERHBwcsHjxYoXXp6amYsqUKWjUqJH0WldXV/zxxx952p3zudq6dSvq1q0LfX19NGzYUPoMv+3w4cNo1KgRdHR04OjoiCVLluS7L4UQ+OWXX6R9aWJigk8++QT//vtvoe8B8KY/5vde5Xe4VpnY3xYcHCyNUA4bNkzqIzmfs4IODe/YsQOurq4wNDSEoaEhGjVqhPXr1xe6rX379kFfXx8jR46UPp8RERHo0aMHTE1Noauri8aNG2P37t3SazZt2oRPP/0UANCuXTspvsJGuHNivnLlCvr06QNjY2PI5XIMGTIEz58/V6hb1H7s4OCQ76HO3Kew5BzW3bp1K6ZMmYKqVatCR0cH//zzT6H7pig2bNiAhg0bQldXF6ampujduzdu376tEMuQIUMAAC1atIBMJpP6UM4RMEtLS4X3N+d3a+/evWjcuDF0dXWlIyJF+W4tqpz9smPHDnz99dewtraGoaEhvLy88OzZM7x8+RKjR4+GmZkZzMzMMGzYMLx69Up6fWGHXnP/Ljx//hyjR4+Gra0tdHR0YG5ujtatW+cZBT1x4gQ6dOgAY2Nj6Ovro3Xr1jh58qRCnX/++QfDhg1DrVq1oK+vj6pVq8LLywvXr18vUrtL8juZkJCAESNGwNTUFIaGhujevTv+/fffPO3N6e+XL1/GJ598AhMTE+nUk6LmLwWddpW73+f87gQFBWHYsGEwNTWFgYEBvLy88v1OK8o+LhZRAhs3bhQARHh4uFi5cqUAIE6ePCktd3NzE/Xr15eeZ2dniy5dughNTU0xa9Yscfz4cbFkyRJhYGAgGjduLFJTU6W69vb2olq1aqJBgwZi586d4siRI6JFixZCS0tLfPfdd6J169Zi7969Yt++faJ27drC0tJSvH79Wnq9j4+P0NbWFnZ2dmLevHni+PHjws/PT2hqagpPT0+FdgAQVatWFS4uLmLHjh3i1KlT4saNG+LmzZtCLpcLZ2dnsWXLFnH8+HExZcoUUalSJeHn51fovsnOzhbt2rUTOjo60vZnz54tqlevLgCI2bNnS3VLsh0hhPD29hYymUyMHDlS/PHHH+Lo0aNi3rx5YuXKlQrvhZubW552vx1HdHS0sLW1Ffb29mLNmjXixIkT4ocffhA6OjrC19dXqhcVFSUACAcHB+Hh4SH2798v9u/fL5ydnYWJiYlISEgQQgjxzz//iE8++UQAEKGhodLj7fc5t9WrV4sFCxaIAwcOiJCQELF582bRsGFD4eTkJNLT06V6jRs3Fq1bt87z+ubNm4vmzZtLz/fs2SO+++47sW/fPhESEiICAgKEm5ubMDc3F8+fP5fq5fTl6tWriwkTJohjx46J3377TZiYmIh27dpJ9S5fviyqV68uGjduLLXn8uXLhbw7RXt/Ro8eLQCI8ePHi8DAQPHrr78Kc3NzYWtrqxCnm5ubqFKlinBychLr168Xx44dE56engKAmDNnjnB2dpY+Ly1bthQ6OjriyZMn0utnz54tAAgnJyfxww8/iKCgIDFt2jRp23Xq1BE//fSTCAoKEsOGDRMAxO+//y69PiEhQfj6+oqtW7eKU6dOicDAQDF16lRRqVIlsXnzZoV25/SRjz76SOzevVscOXJEuLu7C01NTXHv3j2p3okTJ4SGhob4+OOPxd69e8WePXtE8+bNhZ2dncj9FTVq1CihpaUlpkyZIgIDA8WOHTtEnTp1hKWlpYiJiSn0ffDx8RH29vZ5ynP2SXFizy0xMVHqS99++63URx49elTgtmbNmiUAiD59+og9e/aI48ePi2XLlolZs2ZJdXJ/ly5btkxoaGiIH374QSo7deqU0NbWFm3atBG7du0SgYGBwtfXVwAQGzduFEIIERsbK+bPny8AiJ9//lmKLzY2tsA25cRsb28vvvrqK3Hs2DGxbNky6Xv77c9lUfuxvb298PHxybOt3N9Tp0+flr6fP/nkE3HgwAFx6NAh8eLFiwLjzf29lp+cfTBw4EBx+PBhsWXLFlG9enUhl8vF33//LYR487387bffSvsvNDRU/PPPP+Ly5ctixIgRAoAIDAxUeH/t7e2FtbW1qF69utiwYYM4ffq0uHjxYpG/W4sqZ7/Y29sLX19faV8bGhqKdu3aiU6dOompU6eK48ePi0WLFgkNDQ0xYcIE6fU53+E5/aKw/delSxdhbm4u1q5dK4KDg8X+/fvFd999JwICAqQ6W7duFTKZTPTq1Uvs3btXHDx4UHh6egoNDQ1
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# graphique en nombre de client ayant commandé\n",
"purchases_graph = nb_purchases_graph_2\n",
"\n",
"purchases_graph_used = purchases_graph[purchases_graph[\"purchase_date_month\"] >= datetime(2021,4,1)]\n",
"purchases_graph_used_0 = purchases_graph_used[purchases_graph_used[\"is_customer_known\"]==False]\n",
"purchases_graph_used_1 = purchases_graph_used[purchases_graph_used[\"is_customer_known\"]==True]\n",
"\n",
"\n",
"# Création du barplot\n",
"plt.bar(purchases_graph_used_0[\"purchase_date_month\"], purchases_graph_used_0[\"nb_new_customer\"], width=12, label = \"Nouveau client\")\n",
"plt.bar(purchases_graph_used_0[\"purchase_date_month\"], purchases_graph_used_1[\"nb_new_customer\"], \n",
" bottom = purchases_graph_used_0[\"nb_new_customer\"], width=12, label = \"Ancien client\")\n",
"\n",
"\n",
"# commande pr afficher slt\n",
"plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b%y'))\n",
"\n",
"\n",
"# Ajout de titres et d'étiquettes\n",
"plt.xlabel('Mois')\n",
"plt.ylabel(\"Nombre de client ayant commandé\")\n",
"plt.title(\"Nombre de client ayant commandé un ticket pour l'offre 'muséale groupe'\")\n",
"plt.legend()\n",
"\n",
"# Affichage du barplot\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "82895dfc-e5ca-4be0-af24-93c1be8f6248",
"metadata": {},
"source": [
"### Proportion de tickets de prix 0"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "5fd7484c-f111-4955-8a84-86d0d184f9f9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ticket_id</th>\n",
" <th>customer_id</th>\n",
" <th>purchase_id</th>\n",
" <th>event_type_id</th>\n",
" <th>supplier_name</th>\n",
" <th>purchase_date</th>\n",
" <th>amount</th>\n",
" <th>is_full_price</th>\n",
" <th>name_event_types</th>\n",
" <th>name_facilities</th>\n",
" <th>name_categories</th>\n",
" <th>name_events</th>\n",
" <th>name_seasons</th>\n",
" <th>start_date_time</th>\n",
" <th>end_date_time</th>\n",
" <th>open</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>27559143</td>\n",
" <td>1</td>\n",
" <td>5512832.0</td>\n",
" <td>60870</td>\n",
" <td>point de vente france billet</td>\n",
" <td>2016-01-17 14:30:00+00:00</td>\n",
" <td>20.0</td>\n",
" <td>False</td>\n",
" <td>spectacle saison</td>\n",
" <td>chateau de chantilly</td>\n",
" <td>individuel</td>\n",
" <td>concert nouvel an ind pt fb</td>\n",
" <td>2016</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>27559144</td>\n",
" <td>1</td>\n",
" <td>5512832.0</td>\n",
" <td>60870</td>\n",
" <td>point de vente france billet</td>\n",
" <td>2016-01-17 14:30:00+00:00</td>\n",
" <td>20.0</td>\n",
" <td>False</td>\n",
" <td>spectacle saison</td>\n",
" <td>chateau de chantilly</td>\n",
" <td>individuel</td>\n",
" <td>concert nouvel an ind pt fb</td>\n",
" <td>2016</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>27559145</td>\n",
" <td>1</td>\n",
" <td>5512832.0</td>\n",
" <td>60870</td>\n",
" <td>point de vente france billet</td>\n",
" <td>2016-01-17 14:30:00+00:00</td>\n",
" <td>20.0</td>\n",
" <td>False</td>\n",
" <td>spectacle saison</td>\n",
" <td>chateau de chantilly</td>\n",
" <td>individuel</td>\n",
" <td>concert nouvel an ind pt fb</td>\n",
" <td>2016</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>27559146</td>\n",
" <td>1</td>\n",
" <td>5512833.0</td>\n",
" <td>60870</td>\n",
" <td>point de vente france billet</td>\n",
" <td>2016-01-22 11:32:59+00:00</td>\n",
" <td>20.0</td>\n",
" <td>False</td>\n",
" <td>spectacle saison</td>\n",
" <td>chateau de chantilly</td>\n",
" <td>individuel</td>\n",
" <td>concert nouvel an ind pt fb</td>\n",
" <td>2016</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>27559147</td>\n",
" <td>1</td>\n",
" <td>5512833.0</td>\n",
" <td>60870</td>\n",
" <td>point de vente france billet</td>\n",
" <td>2016-01-22 11:32:59+00:00</td>\n",
" <td>20.0</td>\n",
" <td>False</td>\n",
" <td>spectacle saison</td>\n",
" <td>chateau de chantilly</td>\n",
" <td>individuel</td>\n",
" <td>concert nouvel an ind pt fb</td>\n",
" <td>2016</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>1901-01-01 00:09:21+00:09</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ticket_id customer_id purchase_id event_type_id \\\n",
"0 27559143 1 5512832.0 60870 \n",
"1 27559144 1 5512832.0 60870 \n",
"2 27559145 1 5512832.0 60870 \n",
"3 27559146 1 5512833.0 60870 \n",
"4 27559147 1 5512833.0 60870 \n",
"\n",
" supplier_name purchase_date amount \\\n",
"0 point de vente france billet 2016-01-17 14:30:00+00:00 20.0 \n",
"1 point de vente france billet 2016-01-17 14:30:00+00:00 20.0 \n",
"2 point de vente france billet 2016-01-17 14:30:00+00:00 20.0 \n",
"3 point de vente france billet 2016-01-22 11:32:59+00:00 20.0 \n",
"4 point de vente france billet 2016-01-22 11:32:59+00:00 20.0 \n",
"\n",
" is_full_price name_event_types name_facilities name_categories \\\n",
"0 False spectacle saison chateau de chantilly individuel \n",
"1 False spectacle saison chateau de chantilly individuel \n",
"2 False spectacle saison chateau de chantilly individuel \n",
"3 False spectacle saison chateau de chantilly individuel \n",
"4 False spectacle saison chateau de chantilly individuel \n",
"\n",
" name_events name_seasons start_date_time \\\n",
"0 concert nouvel an ind pt fb 2016 1901-01-01 00:09:21+00:09 \n",
"1 concert nouvel an ind pt fb 2016 1901-01-01 00:09:21+00:09 \n",
"2 concert nouvel an ind pt fb 2016 1901-01-01 00:09:21+00:09 \n",
"3 concert nouvel an ind pt fb 2016 1901-01-01 00:09:21+00:09 \n",
"4 concert nouvel an ind pt fb 2016 1901-01-01 00:09:21+00:09 \n",
"\n",
" end_date_time open \n",
"0 1901-01-01 00:09:21+00:09 True \n",
"1 1901-01-01 00:09:21+00:09 True \n",
"2 1901-01-01 00:09:21+00:09 True \n",
"3 1901-01-01 00:09:21+00:09 True \n",
"4 1901-01-01 00:09:21+00:09 True "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3_tickets.head()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "10828dd8-8ec9-49eb-b450-acca741964c7",
"metadata": {},
"outputs": [],
"source": [
"barplot_prop_free_price = pd.DataFrame()\n",
"for company_number in ['1', '2', '3', '4', '101'] : # \n",
" nom_dataframe = 'df'+ company_number +'_tickets'\n",
" df_tickets = globals()[nom_dataframe].copy()\n",
" df_free_tickets = df_tickets[df_tickets['amount'] == 0]\n",
"\n",
" if company_number == '101' :\n",
" df_free_tickets_1 = df101_tickets_1[df101_tickets_1['amount'] == 0]\n",
" nb_tickets = len(df_tickets) + len(df101_tickets_1)\n",
" nb_free_tickets = len(df_free_tickets) + len(df_free_tickets_1)\n",
" \n",
" graph_dataframe = pd.DataFrame({'company_number' : [company_number], \n",
" 'prop_free_tickets' : [nb_free_tickets / nb_tickets],\n",
" 'nb_tickets' : [nb_tickets]})\n",
" \n",
" else : \n",
" graph_dataframe = pd.DataFrame({'company_number' : [company_number], \n",
" 'prop_free_tickets' : [len(df_free_tickets) / len(df_tickets)],\n",
" 'nb_tickets' : [len(df_tickets)]})\n",
"\n",
" barplot_prop_free_price = pd.concat([barplot_prop_free_price, graph_dataframe])"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "065576ef-2515-43eb-a65d-21f07f228c9e",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1cAAAIiCAYAAAAkWjI2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABdz0lEQVR4nO3deVxU9f7H8fcIwiAC7qCJQJoLuSGaghdNDcisq5lXynK5auZNc6G6SWqKdUO7NyXL9VdptiD1cylTUyzXK3VLxTK17aqYQuYGrihwfn/4YH6NgzqjB0f09Xw85vFgvvM93/M5w5nk3fec71gMwzAEAAAAALgmFdxdAAAAAADcDAhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAABl7MCBA6pVq5aefvrpqx7j22+/VaVKlfT666+bWBkAMxGuANwQ5s+fL4vFYnt4enqqbt26+utf/6oDBw64u7yrtnPnTk2cOFF79+51eG3AgAEKDQ297jVdyt69e2WxWDR//nxTx/vXv/51xb4lv/8/vk+lvT+hoaEaMGDAVdXz8ssva+nSpVe17Y3ggw8+UGpq6jWNMXHiRFksFru2mTNnmvY7v1Vd6bNTWFiohx9+WJ07d3b4PBw8eFATJ05UVlbWZfdx4sQJ9erVS0899ZSeeuopkyoHYDbCFYAbyrx585SZmamMjAw9/vjjSktLU0xMjE6dOuXu0q7Kzp07lZycXGq4Gj9+vJYsWXL9i7oBdevWTZmZmapdu3aZ7YNwJQ0ePFiZmZl2bYSra1e7dm1lZmaqW7dupb6elJQkT09PLViwwCHcHjx4UMnJyVcMV4MGDdJdd92lyZMnm1U2gDLg6e4CAOCPmjZtqtatW0uSOnXqpKKiIr344otaunSpHn300VK3OX36tCpVqnQ9y7yi8+fPO/wRdbH69etfp2pufDVr1lTNmjXdXcZ1debMGfn4+FzXfdatW1d169a9rvssK2fOnJHVar3i5+x68Pb2Vrt27S75+j//+c9r3seHH354zWMAKHvMXAG4oZX8wbJv3z5JFy4Vq1y5sr777jvFxcXJz89PXbp0kSQdPXpUTz75pG677TZ5eXnp9ttv19ixY1VQUGA3psVi0fDhwzVnzhw1bNhQ3t7eCg8P18KFCx32v2PHDnXv3l1Vq1aV1WpVy5Yt9c4779j1WbdunSwWi9599109/fTTuu222+Tt7a0333xTf/nLXyRdCIollzyWzBKUdtnb2bNnlZSUpLCwMHl5eem2227TsGHDdPz4cbt+oaGhuv/++/XZZ5+pVatW8vHxUePGjfX222879b4ePHhQvXv3lp+fnwICApSQkKDc3NxS+37zzTf685//rGrVqslqtSoiIsKlP/SKi4v1j3/8Q/Xq1ZPValXr1q31+eef2/Up7bJAZ+Xn5+uZZ56xe89GjRplN9tpsVh06tQpvfPOO7bfw9133y3pQjgv2d5qtapatWpq3bq10tLSrrjvTZs2KSoqSlarVbfddpvGjx+vN9980+FYSn5fixcvVkREhKxWq5KTkyVJM2bMUIcOHVSrVi35+vqqWbNmeuWVV3T+/Hnb9nfffbeWL1+uffv22V0+K/3/+bdu3Tq72kq7VO3iywJDQ0P1/fffa/369bYxS87J4uJivfTSS2rUqJF8fHxUpUoVNW/eXK+99tpl35OSet577z0lJiYqKChIPj4+6tixo7Zt22bX95tvvtHDDz+s0NBQ+fj4KDQ0VI888ojt816i5PxYvXq1Bg4cqJo1a6pSpUoOn+0Szta+adMmdenSRX5+fqpUqZKio6O1fPlyh/EOHDigIUOGKDg4WF5eXqpTp4569eql33777ZLvtST99NNP6tOnj2rVqiVvb281adJEM2bMsHuv2rRpI0n661//avsdTJw40e49upbPH4Dri5krADe0n3/+WZLsZjXOnTunP//5z3riiSc0ZswYFRYW6uzZs+rUqZN++eUXJScnq3nz5tq4caNSUlKUlZXl8AfTJ598orVr12rSpEny9fXVzJkz9cgjj8jT01O9evWSJP3www+Kjo5WrVq1NH36dFWvXl3vvfeeBgwYoN9++01///vf7cZMSkpSVFSUZs+erQoVKqh169Y6duyYnn/+ec2YMUOtWrWSdOkZK8Mw1KNHD33++edKSkpSTEyMvv32W02YMEGZmZnKzMyUt7e3rf/27dv19NNPa8yYMQoMDNSbb76pQYMGqUGDBurQocMl39MzZ87onnvu0cGDB5WSkqKGDRtq+fLlSkhIcOi7du1a3XvvvWrbtq1mz56tgIAALVy4UAkJCTp9+rRT9z+98cYbCgkJUWpqqoqLi/XKK6+oa9euWr9+vaKioq64/eWcPn1aHTt21K+//qrnn39ezZs31/fff68XXnhB3333ndasWSOLxaLMzEx17txZnTp10vjx4yVJ/v7+kqTExES9++67eumllxQREaFTp05px44dOnLkyGX3/e233yo2NlYNGzbUO++8o0qVKmn27Nl67733Su2/detW7dq1S+PGjVNYWJh8fX0lSb/88ov69OljC4fbt2/XP/7xD+3evdsWlmfOnKkhQ4bol19+MfVS0iVLlqhXr14KCAjQzJkzJcl2jr3yyiuaOHGixo0bpw4dOuj8+fPavXu3Q9C/lOeff16tWrXSm2++qby8PE2cOFF33323tm3bpttvv13ShVDSqFEjPfzww6pWrZpycnI0a9YstWnTRjt37lSNGjXsxhw4cKC6deumd999V6dOnVLFihVL3bczta9fv16xsbFq3ry53nrrLXl7e2vmzJl64IEHlJaWZvs8HDhwQG3atNH58+dt59iRI0e0atUqHTt2TIGBgaXWsHPnTkVHR6tevXp69dVXFRQUpFWrVmnEiBE6fPiwJkyYoFatWmnevHn661//qnHjxtkuKyyZXTTj8wfgOjMA4AYwb948Q5Lx5ZdfGufPnzdOnDhhfPrpp0bNmjUNPz8/Izc31zAMw+jfv78hyXj77bfttp89e7Yhyfjwww/t2qdMmWJIMlavXm1rk2T4+PjYxjQMwygsLDQaN25sNGjQwNb28MMPG97e3kZ2drbdmF27djUqVapkHD9+3DAMw1i7dq0hyejQoYPDcX300UeGJGPt2rUOr/Xv398ICQmxPf/ss88MScYrr7xi1y89Pd2QZMydO9fWFhISYlitVmPfvn22tjNnzhjVqlUznnjiCYd9/dGsWbMMScbHH39s1/74448bkox58+bZ2ho3bmxEREQY58+ft+t7//33G7Vr1zaKioouuZ89e/YYkow6deoYZ86csbXn5+cb1apVM+655x5bW8nvf8+ePba2i9+fkuPu37+/7XlKSopRoUIF4+uvv7br97//+7+GJGPFihW2Nl9fX7ttSzRt2tTo0aPHJY/jUv7yl78Yvr6+xu+//25rKyoqMsLDwx2OJSQkxPDw8DB++OGHy45ZVFRknD9/3liwYIHh4eFhHD161PZat27dHN4Pw/j/8+/ic6zk/f/j73PChAnGxf/033nnnUbHjh0dxr3//vuNli1bXrbe0pTU06pVK6O4uNjWvnfvXqNixYrG4MGDL7ltYWGhcfLkScPX19d47bXXbO0l50e/fv2cqsGZ2tu1a2fUqlXLOHHihN3+mzZtatStW9dW+8CBA42KFSsaO3fuvORYpb3X8fHxRt26dY28vDy7vsOHDzesVqvtd/v11187bFviWj5/ANyDywIB3FDatWunihUrys/PT/fff7+CgoK0cuVKh/87/NBDD9k9/+KLL+Tr62ubdSpR8n92L74MrUuXLnZjenh4KCEhQT///LN+/fVX25hdunRRcHCww5inT592WBjg4ppc9cUXX9jVXOIvf/mLfH19HY6hZcuWqlevnu251WpVw4YNHS6putjatWvl5+enP//5z3btffr0sXv+888/a/fu3bZ73QoLC22P++67Tzk5Ofrhhx+ueFw9e/aU1Wq1Pffz89MDDzygDRs2qKio6Ir
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"barplot_prop_free_price\n",
"\n",
"df = barplot_prop_free_price.sort_values( by = 'prop_free_tickets')\n",
"\n",
"# Création du barplot\n",
"plt.figure(figsize=(10, 6))\n",
"plt.bar(df['company_number'], df['prop_free_tickets'])\n",
"plt.xlabel('Numéro de la société')\n",
"plt.ylabel('Proportion de billets gratuits')\n",
"plt.title('Proportion de billets gratuits par musée')\n",
"plt.xticks(df['company_number'])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "b41b5434-0e5b-495b-bede-23f5cb45272c",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>purchase_id</th>\n",
" <th>ticket_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>73518.000000</td>\n",
" <td>7.351800e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>10.096167</td>\n",
" <td>2.484660e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>2367.702603</td>\n",
" <td>4.636993e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>1.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1.000000</td>\n",
" <td>1.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>1.000000</td>\n",
" <td>2.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>1.000000</td>\n",
" <td>3.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>641981.000000</td>\n",
" <td>1.256574e+06</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" purchase_id ticket_id\n",
"count 73518.000000 7.351800e+04\n",
"mean 10.096167 2.484660e+01\n",
"std 2367.702603 4.636993e+03\n",
"min 1.000000 1.000000e+00\n",
"25% 1.000000 1.000000e+00\n",
"50% 1.000000 2.000000e+00\n",
"75% 1.000000 3.000000e+00\n",
"max 641981.000000 1.256574e+06"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"purchases.groupby('customer_id')[['purchase_id', 'ticket_id']].nunique().describe()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "d1212b10-3933-450a-b001-9e2cbf308f79",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ticket_id</th>\n",
" <th>customer_id</th>\n",
" <th>purchase_id</th>\n",
" <th>event_type_id</th>\n",
" <th>supplier_name</th>\n",
" <th>purchase_date</th>\n",
" <th>type_of_ticket_name</th>\n",
" <th>amount</th>\n",
" <th>children</th>\n",
" <th>is_full_price</th>\n",
" <th>name_event_types</th>\n",
" <th>name_facilities</th>\n",
" <th>name_categories</th>\n",
" <th>name_events</th>\n",
" <th>name_seasons</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>13070859</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>8.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>13070860</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>4.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13070861</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>4.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13070862</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>4.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>13070863</td>\n",
" <td>48187</td>\n",
" <td>5107462</td>\n",
" <td>4</td>\n",
" <td>vente en ligne</td>\n",
" <td>2018-12-28 14:47:50+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>4.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>spectacle vivant</td>\n",
" <td>mucem</td>\n",
" <td>indiv prog enfant</td>\n",
" <td>l'école des magiciens</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826667</th>\n",
" <td>20662815</td>\n",
" <td>1256135</td>\n",
" <td>8007697</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 17:23:54+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826668</th>\n",
" <td>20662816</td>\n",
" <td>1256136</td>\n",
" <td>8007698</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 18:32:18+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826669</th>\n",
" <td>20662817</td>\n",
" <td>1256136</td>\n",
" <td>8007698</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 18:32:18+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826670</th>\n",
" <td>20662818</td>\n",
" <td>1256137</td>\n",
" <td>8007699</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 19:30:28+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1826671</th>\n",
" <td>20662819</td>\n",
" <td>1256137</td>\n",
" <td>8007699</td>\n",
" <td>5</td>\n",
" <td>vente en ligne</td>\n",
" <td>2023-11-08 19:30:28+00:00</td>\n",
" <td>Atelier</td>\n",
" <td>11.0</td>\n",
" <td>pricing_formula</td>\n",
" <td>False</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>mucem</td>\n",
" <td>indiv entrées tp</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1826672 rows × 15 columns</p>\n",
"</div>"
],
"text/plain": [
" ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
"0 13070859 48187 5107462 4 vente en ligne \n",
"1 13070860 48187 5107462 4 vente en ligne \n",
"2 13070861 48187 5107462 4 vente en ligne \n",
"3 13070862 48187 5107462 4 vente en ligne \n",
"4 13070863 48187 5107462 4 vente en ligne \n",
"... ... ... ... ... ... \n",
"1826667 20662815 1256135 8007697 5 vente en ligne \n",
"1826668 20662816 1256136 8007698 5 vente en ligne \n",
"1826669 20662817 1256136 8007698 5 vente en ligne \n",
"1826670 20662818 1256137 8007699 5 vente en ligne \n",
"1826671 20662819 1256137 8007699 5 vente en ligne \n",
"\n",
" purchase_date type_of_ticket_name amount \\\n",
"0 2018-12-28 14:47:50+00:00 Atelier 8.0 \n",
"1 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
"2 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
"3 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
"4 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
"... ... ... ... \n",
"1826667 2023-11-08 17:23:54+00:00 Atelier 11.0 \n",
"1826668 2023-11-08 18:32:18+00:00 Atelier 11.0 \n",
"1826669 2023-11-08 18:32:18+00:00 Atelier 11.0 \n",
"1826670 2023-11-08 19:30:28+00:00 Atelier 11.0 \n",
"1826671 2023-11-08 19:30:28+00:00 Atelier 11.0 \n",
"\n",
" children is_full_price name_event_types name_facilities \\\n",
"0 pricing_formula False spectacle vivant mucem \n",
"1 pricing_formula False spectacle vivant mucem \n",
"2 pricing_formula False spectacle vivant mucem \n",
"3 pricing_formula False spectacle vivant mucem \n",
"4 pricing_formula False spectacle vivant mucem \n",
"... ... ... ... ... \n",
"1826667 pricing_formula False offre muséale groupe mucem \n",
"1826668 pricing_formula False offre muséale groupe mucem \n",
"1826669 pricing_formula False offre muséale groupe mucem \n",
"1826670 pricing_formula False offre muséale groupe mucem \n",
"1826671 pricing_formula False offre muséale groupe mucem \n",
"\n",
" name_categories name_events name_seasons \n",
"0 indiv prog enfant l'école des magiciens 2018 \n",
"1 indiv prog enfant l'école des magiciens 2018 \n",
"2 indiv prog enfant l'école des magiciens 2018 \n",
"3 indiv prog enfant l'école des magiciens 2018 \n",
"4 indiv prog enfant l'école des magiciens 2018 \n",
"... ... ... ... \n",
"1826667 indiv entrées tp NaN 2023 \n",
"1826668 indiv entrées tp NaN 2023 \n",
"1826669 indiv entrées tp NaN 2023 \n",
"1826670 indiv entrées tp NaN 2023 \n",
"1826671 indiv entrées tp NaN 2023 \n",
"\n",
"[1826672 rows x 15 columns]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"purchases"
]
},
{
"cell_type": "markdown",
"id": "b8a90eaa-c383-4f73-9fd6-6fbbe8eeefb8",
"metadata": {},
"source": [
"# 2 - Comportement d'achat bis (Alexis)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "dc45c1cd-2a78-48a6-aa2b-6a501254b6f2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(156289, 40)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>birthdate</th>\n",
" <th>street_id</th>\n",
" <th>is_partner</th>\n",
" <th>gender</th>\n",
" <th>is_email_true</th>\n",
" <th>opt_in</th>\n",
" <th>structure_id</th>\n",
" <th>profession</th>\n",
" <th>language</th>\n",
" <th>...</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>name_event_types</th>\n",
" <th>avg_amount</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>3262.190868</td>\n",
" <td>4.179306</td>\n",
" <td>3258.011562</td>\n",
" <td>51.0</td>\n",
" <td>offre muséale individuel</td>\n",
" <td>6.150659</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>2502.715509</td>\n",
" <td>1408.715532</td>\n",
" <td>1093.999977</td>\n",
" <td>5.0</td>\n",
" <td>formule adhésion</td>\n",
" <td>6.439463</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>3698.198229</td>\n",
" <td>5.221840</td>\n",
" <td>3692.976389</td>\n",
" <td>2988.0</td>\n",
" <td>spectacle vivant</td>\n",
" <td>7.762474</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>3803.369792</td>\n",
" <td>0.146331</td>\n",
" <td>3803.223461</td>\n",
" <td>9.0</td>\n",
" <td>offre muséale groupe</td>\n",
" <td>4.452618</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1705.261192</td>\n",
" <td>1456.333715</td>\n",
" <td>248.927477</td>\n",
" <td>0.0</td>\n",
" <td>formule adhésion</td>\n",
" <td>6.439463</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 40 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id birthdate street_id is_partner gender is_email_true \\\n",
"0 1 NaN 2 False 2 True \n",
"1 1 NaN 2 False 2 True \n",
"2 1 NaN 2 False 2 True \n",
"3 1 NaN 2 False 2 True \n",
"4 2 NaN 2 False 1 True \n",
"\n",
" opt_in structure_id profession language ... vente_internet_max \\\n",
"0 False NaN NaN NaN ... 1.0 \n",
"1 False NaN NaN NaN ... 1.0 \n",
"2 False NaN NaN NaN ... 1.0 \n",
"3 False NaN NaN NaN ... 1.0 \n",
"4 True NaN NaN NaN ... 0.0 \n",
"\n",
" purchase_date_min purchase_date_max time_between_purchase \\\n",
"0 3262.190868 4.179306 3258.011562 \n",
"1 2502.715509 1408.715532 1093.999977 \n",
"2 3698.198229 5.221840 3692.976389 \n",
"3 3803.369792 0.146331 3803.223461 \n",
"4 1705.261192 1456.333715 248.927477 \n",
"\n",
" nb_tickets_internet name_event_types avg_amount nb_campaigns \\\n",
"0 51.0 offre muséale individuel 6.150659 NaN \n",
"1 5.0 formule adhésion 6.439463 NaN \n",
"2 2988.0 spectacle vivant 7.762474 NaN \n",
"3 9.0 offre muséale groupe 4.452618 NaN \n",
"4 0.0 formule adhésion 6.439463 4.0 \n",
"\n",
" nb_campaigns_opened time_to_open \n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
"[5 rows x 40 columns]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Chargement des données temporaires\n",
"BUCKET = \"projet-bdc2324-team1\"\n",
"FILE_KEY_S3 = \"0_Temp/Company 1 - customer_event.csv\"\n",
"FILE_PATH_S3 = BUCKET + \"/\" + FILE_KEY_S3\n",
"\n",
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
" customer = pd.read_csv(file_in, sep=\",\")\n",
"\n",
"print(customer.shape)\n",
"customer.head()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "89fcb455-efb4-4ad4-ab88-efd6c8a76287",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['customer_id', 'birthdate', 'street_id', 'is_partner', 'gender',\n",
" 'is_email_true', 'opt_in', 'structure_id', 'profession', 'language',\n",
" 'mcp_contact_id', 'last_buying_date', 'max_price', 'ticket_sum',\n",
" 'average_price', 'fidelity', 'average_purchase_delay',\n",
" 'average_price_basket', 'average_ticket_basket', 'total_price',\n",
" 'purchase_count', 'first_buying_date', 'country', 'age', 'tenant_id',\n",
" 'event_type_id', 'nb_tickets', 'nb_purchases', 'total_amount',\n",
" 'nb_suppliers', 'vente_internet_max', 'purchase_date_min',\n",
" 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet',\n",
" 'name_event_types', 'avg_amount', 'nb_campaigns', 'nb_campaigns_opened',\n",
" 'time_to_open'],\n",
" dtype='object')"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customer.columns"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "d7b2356a-d5fc-4547-b3ff-fded0e304fb6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>average_price</th>\n",
" <th>average_purchase_delay</th>\n",
" <th>average_price_basket</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>purchase_count</th>\n",
" <th>total_price</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>7.030122</td>\n",
" <td>-67.790969</td>\n",
" <td>13.751530</td>\n",
" <td>1.956087</td>\n",
" <td>641472</td>\n",
" <td>8821221.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>307</td>\n",
" <td>0.0</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>3</td>\n",
" <td>18.333333</td>\n",
" <td>30.666667</td>\n",
" <td>36.666667</td>\n",
" <td>2.000000</td>\n",
" <td>3</td>\n",
" <td>110.0</td>\n",
" <td>222.0</td>\n",
" <td>124.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>4</td>\n",
" <td>10.250000</td>\n",
" <td>5.000000</td>\n",
" <td>20.500000</td>\n",
" <td>2.000000</td>\n",
" <td>2</td>\n",
" <td>41.0</td>\n",
" <td>7.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>5</td>\n",
" <td>9.500000</td>\n",
" <td>0.000000</td>\n",
" <td>19.000000</td>\n",
" <td>2.000000</td>\n",
" <td>1</td>\n",
" <td>19.0</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" customer_id average_price average_purchase_delay average_price_basket \\\n",
"0 1 7.030122 -67.790969 13.751530 \n",
"4 2 0.000000 0.000000 0.000000 \n",
"6 3 18.333333 30.666667 36.666667 \n",
"7 4 10.250000 5.000000 20.500000 \n",
"9 5 9.500000 0.000000 19.000000 \n",
"\n",
" average_ticket_basket purchase_count total_price nb_campaigns \\\n",
"0 1.956087 641472 8821221.5 0.0 \n",
"4 1.000000 307 0.0 4.0 \n",
"6 2.000000 3 110.0 222.0 \n",
"7 2.000000 2 41.0 7.0 \n",
"9 2.000000 1 19.0 4.0 \n",
"\n",
" nb_campaigns_opened \n",
"0 0.0 \n",
"4 0.0 \n",
"6 124.0 \n",
"7 7.0 \n",
"9 0.0 "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"achat = ['customer_id', 'average_price', 'average_purchase_delay', 'average_price_basket',\n",
" 'average_ticket_basket', 'purchase_count', 'total_price', 'nb_campaigns',\n",
" 'nb_campaigns_opened']\n",
"\n",
"customer_achat = customer[achat].drop_duplicates(subset = ['customer_id'])\n",
"customer_achat['nb_campaigns'] = customer_achat['nb_campaigns'].fillna(0)\n",
"customer_achat['nb_campaigns_opened'] = customer_achat['nb_campaigns_opened'].fillna(0)\n",
"customer_achat = customer_achat.fillna(0)\n",
"customer_achat.head()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "5559748f-1745-4651-a9f6-94702c7ee66f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>average_price</th>\n",
" <th>average_purchase_delay</th>\n",
" <th>average_price_basket</th>\n",
" <th>average_ticket_basket</th>\n",
" <th>purchase_count</th>\n",
" <th>total_price</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
" <td>151865.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>5.252070</td>\n",
" <td>-206.581486</td>\n",
" <td>11.451596</td>\n",
" <td>1.723372</td>\n",
" <td>0.655148</td>\n",
" <td>16.994064</td>\n",
" <td>40.923241</td>\n",
" <td>7.870681</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>7.915955</td>\n",
" <td>2996.743657</td>\n",
" <td>48.271194</td>\n",
" <td>7.045950</td>\n",
" <td>5.694038</td>\n",
" <td>313.099102</td>\n",
" <td>70.445724</td>\n",
" <td>23.119061</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.000000</td>\n",
" <td>-44863.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>2.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>5.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>11.000000</td>\n",
" <td>0.000000</td>\n",
" <td>19.000000</td>\n",
" <td>2.000000</td>\n",
" <td>1.000000</td>\n",
" <td>20.000000</td>\n",
" <td>32.000000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>320.000000</td>\n",
" <td>1914.000000</td>\n",
" <td>9900.000000</td>\n",
" <td>900.000000</td>\n",
" <td>1508.000000</td>\n",
" <td>64350.000000</td>\n",
" <td>439.000000</td>\n",
" <td>434.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" average_price average_purchase_delay average_price_basket \\\n",
"count 151865.000000 151865.000000 151865.000000 \n",
"mean 5.252070 -206.581486 11.451596 \n",
"std 7.915955 2996.743657 48.271194 \n",
"min 0.000000 -44863.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 \n",
"50% 0.000000 0.000000 0.000000 \n",
"75% 11.000000 0.000000 19.000000 \n",
"max 320.000000 1914.000000 9900.000000 \n",
"\n",
" average_ticket_basket purchase_count total_price nb_campaigns \\\n",
"count 151865.000000 151865.000000 151865.000000 151865.000000 \n",
"mean 1.723372 0.655148 16.994064 40.923241 \n",
"std 7.045950 5.694038 313.099102 70.445724 \n",
"min 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 2.000000 \n",
"50% 0.000000 0.000000 0.000000 5.000000 \n",
"75% 2.000000 1.000000 20.000000 32.000000 \n",
"max 900.000000 1508.000000 64350.000000 439.000000 \n",
"\n",
" nb_campaigns_opened \n",
"count 151865.000000 \n",
"mean 7.870681 \n",
"std 23.119061 \n",
"min 0.000000 \n",
"25% 0.000000 \n",
"50% 1.000000 \n",
"75% 3.000000 \n",
"max 434.000000 "
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customer_wto_outlier = customer_achat[customer_achat['customer_id']!=1]\n",
"\n",
"customer_wto_outlier[['average_price', 'average_purchase_delay', 'average_price_basket',\n",
" 'average_ticket_basket', 'purchase_count', 'total_price', 'nb_campaigns', 'nb_campaigns_opened']].describe()"
]
},
{
"cell_type": "markdown",
"id": "b49c9e93-f324-42ee-a262-34ffb44a2261",
"metadata": {},
"source": [
"# Event"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "4971e35d-a762-4e18-9443-fd9571bd3f1e",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAJgCAYAAACXyFewAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACRpklEQVR4nOzdeXxM1/8/8NdkXyQjkZ2IRAiRxFqRqJ2EirVKG9LYtUpqSa211lZ76aeWqtrXompLY6/YhSRUKBUEiSAbQURyfn/45X6NCZ1h4o54PR+PeTzMuWfuvGZJvHPuuecqhBACRERERPRKBnIHICIiInoXsGgiIiIi0gCLJiIiIiINsGgiIiIi0gCLJiIiIiINsGgiIiIi0gCLJiIiIiINsGgiIiIi0gCLJiIiIiINsGgiIiLSUwkJCbCwsMD8+fPfaB9KpfKN9kHPsGgqJsuWLYNCoYCZmRmuXbumtr1x48bw8fGRIRnQvXt3lCpVSpbn/i8KhQLjx4+XOwa9hp07d/Kze8GBAwegUChw4MABuaO8886fP4/x48fj6tWrckd5bY0bN0bjxo2l+1evXn3l9+P+/fvo1KkTBg4ciIEDB7728/r5+WHLli0YNWoUjh8//tr7KQmOHDmC8ePHIzMz87Uez6KpmOXm5uLbb7+VOwZRsdu5cycmTJggdwy9UqtWLRw9ehS1atWSO8o77/z585gwYcI7XTRpq1evXqhbty6mTZv2xvtq2rQpFi1ahC5duuDevXs6SPduOnLkCCZMmMCiSV+1bNkSa9asQXx8vNxRdEIIgUePHskdg0hNXl4enj59KncMFdbW1qhXrx6sra3ljvJOePjwodwR9MqGDRuwatUqKBQKnewvNDQUV69eRZkyZXSyv/cRi6ZiNmzYMJQpUwbDhw//z76PHz/GyJEj4e7uDhMTE5QtWxZfffWVWkVcoUIFhISEYPv27ahZsybMzc1RtWpVbN++HcCzQ4NVq1aFpaUl6tati1OnThX5fH///TeaNWsGS0tL2NvbY8CAAWq/tBQKBQYMGICFCxeiatWqMDU1xfLlywEAly5dQmhoKBwcHGBqaoqqVavif//7n0bvS3Z2Nvr06YMyZcqgVKlSaNmyJf75558i+77J8xQUFGD+/PmoUaMGzM3NUbp0adSrVw9//PGHSp/p06ejSpUqMDU1hYODAz7//HPcuHFDZV+Fh1RPnjyJBg0awMLCAh4eHpg2bRoKCgpU9jdp0iR4eXlJz+nn54cffvhBZX8xMTFo1qwZrKysYGFhgcDAQOzYsUOlT+Fh3n379knvl7W1NT7//HPk5OQgNTUVnTt3RunSpeHs7IzIyEjk5eVJjy8c/p8xYwa+//57VKhQAebm5mjcuDH++ecf5OXlYcSIEXBxcYFSqUSHDh2QlpamkmH9+vUICgqCs7Oz9F0bMWIEcnJypD7du3eXPhOFQiHdCkcFhBD46aefpM/BxsYGnTp1wpUrV1Seq0KFCujevbva5/jiYY3Cw14rV67E0KFDUbZsWZiamuLy5ct4+PAhIiMj4e7uDjMzM9ja2qJOnTpYu3at2n6fN378+CL/cyr8DJ4f4Sj8GYyKikKtWrVgbm6OKlWqYOnSpSqPfdnhuWXLlsHLy0v6Pq9YsQLdu3dHhQoV/vOxhZ/psmXLVNpPnTqFtm3bwtbWFmZmZqhZsyY2bNig0ud135vC92D37t3o0aMHbG1tYWlpiTZt2qh9hrt370a7du1Qrlw5mJmZwdPTE/369cPdu3dV+hW+36dPn0anTp1gY2ODihUrvvT5P/nkEwBAkyZNpO/XsmXL8N1338HIyAjJyclqj+vZsyfKlCmDx48fA/i/z23Lli3w8/ODmZkZPDw8MG/ePLXHZmdnS+9V4e/jQYMGqXzvX0YIgenTp8PNzQ1mZmaoVasWdu3a9Z+PK/Rfn2V8fDwUCgV++eUXtcfu2rULCoVC5Xfc879DTUxMUKVKFbXXXPh9W7t2LUaPHg0XFxdYW1ujefPmuHjxotrz7NmzB82aNYO1tTUsLCxQv3597N27V6VP4WeckJCATz75BEqlEra2thgyZAiePn2KixcvomXLlrCyskKFChUwffp0tefR9HMo/L9q5cqVqFq1KiwsLFC9enXp/8XCPN988w0AwN3dXfoeaXX4XFCx+PXXXwUAcfLkSfHDDz8IAGLv3r3S9kaNGolq1apJ9wsKCkRwcLAwMjISY8aMEdHR0WLmzJnC0tJS1KxZUzx+/Fjq6+bmJsqVKyd8fHzE2rVrxc6dO4W/v78wNjYWY8eOFfXr1xebN28WW7ZsEZUrVxaOjo7i4cOH0uPDw8OFiYmJKF++vJg8ebKIjo4W48ePF0ZGRiIkJETldQAQZcuWFX5+fmLNmjVi37594ty5c+Lvv/8WSqVS+Pr6ihUrVojo6GgxdOhQYWBgIMaPH//K96agoEA0adJEmJqaSs8/btw44eHhIQCIcePGSX3f5HmEECIsLEwoFArRu3dvsXXrVrFr1y4xefJk8cMPP0h9+vbtKwCIAQMGiKioKLFw4UJhb28vXF1dxZ07d1Q+szJlyohKlSqJhQsXit27d4v+/fsLAGL58uVSv6lTpwpDQ0Mxbtw4sXfvXhEVFSXmzp2rkvfAgQPC2NhY1K5dW6xfv178/vvvIigoSCgUCrFu3TqpX+H3yN3dXQwdOlRER0eL77//XhgaGorPPvtM1KpVS0yaNEns3r1bDB8+XAAQs2bNkh6flJQkAAg3NzfRpk0bsX37drFq1Srh6OgoKleuLMLCwkTPnj3Frl27xMKFC0WpUqVEmzZtVN7D7777TsyZM0fs2LFDHDhwQCxcuFC4u7uLJk2aSH0uX74sOnXqJACIo0ePSrfC722fPn2EsbGxGDp0qIiKihJr1qwRVapUEY6OjiI1NVXaj5ubmwgPD1f7HBs1aiQaNWok3d+/f7/03ezUqZP4448/xPbt28W9e/dEv379hIWFhZg9e7bYv3+/2L59u5g2bZqYP3/+K78r48aNE0X9Siz8DJKSklRylitXTnh7e4sVK1aIP//8U3zyyScCgDh48KBazv3796vtr127dmLbtm1i1apVwtPTU7i6ugo3N7dXPlaI//tMf/31V6lt3759wsTERDRo0ECsX79eREVFie7du6v1e933pjCzq6ur9H1ZvHixcHBwEK6uriIjI0Pqu2DBAjF16lTxxx9/iIMHD4rly5eL6tWrCy8vL/HkyRO199vNzU0MHz5c7N69W/z+++9FPn9aWpqYMmWKACD+97//Sd+vtLQ0cfv2bWFqaipGjx6t8ph79+4Jc3Nz8c0330htbm5uomzZsqJ8+fJi6dKlYufOnaJr164CgJgxY4bULycnR9SoUUPY2dmJ2bNniz179ogffvhBKJVK0bRpU1FQUPDK96vwtfXq1Ut6r8qWLSucnJxUvsdF0fSzrFmzpqhfv77a4zt37iwcHBxEXl6eEKLo36GDBw8WCoVCjBkzRnpc4fetQoUKomvXrmLHjh1i7dq1onz58qJSpUri6dOnUt+VK1cKhUIh2rdvLzZv3iy2bdsmQkJChKGhodizZ4/a++Dl5SW+++47sXv3bjFs2DDp922VKlXEvHnzxO7du0WPHj0EALFp06bX+hwKs9etW1ds2LBB7Ny5UzRu3FgYGRmJf//9VwghRHJyshg4cKAAIDZv3ix9j7Kysl75mTyPRVMxeb5oys3NFR4eHqJOnTrSh/xi0RQVFSUAiOnTp6vsZ/369QKAWLx4sdTm5uYmzM3NxY0bN6S2uLg4AUA4OzuLnJwcqf33338XAMQff/whtYWHhwsAKoWDEEJMnjxZABAxMTFSGwChVCpFenq6St/g4GBRrlw5tS/bgAEDhJmZmVr/5+3ateuVz/980fQmz/PXX38JAGq/TJ+XmJgoAIj+/furtB8/flwAEKNGjZLaGjVqJACI48ePq/T19vYWwcHB0v2QkBBRo0aNlz6nEELUq1dPODg4iPv370ttT58+FT4+PqJcuXLS96TwezRw4ECVx7dv314AELNnz1Zpr1GjhqhVq5Z0v/A
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Nombre de consommateurs uniques par type d'évènement\n",
"\n",
"event_counts = customer.groupby('name_event_types')['customer_id'].nunique()\n",
"\n",
"event_counts.plot(kind='bar')\n",
"plt.xlabel(\"Type d'évènement\")\n",
"plt.ylabel('Nombre de consommateurs uniques')\n",
"plt.title(\"Nombre de consommateurs uniques par type d'évènement\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "bc65a711-d172-4839-b487-3047280fc3a6",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAtIAAAJICAYAAABMlwOPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACqtUlEQVR4nOzdd1xT1/8/8FdYYQgRRUAcuHGAWxFtRavgwj1asQhq0bqQonXUuhVn1dbd1r1w103BhaKiOFCxzn5cKIgiWzbn90d/3K8RB4lgGK/n48FDc+47yTs3J5c3J+eeKxNCCBARERERkUq0NJ0AEREREVFRxEKaiIiIiEgNLKSJiIiIiNTAQpqIiIiISA0spImIiIiI1MBCmoiIiIhIDSykiYiIiIjUwEKaiIiIiEgNLKSJiIiIiNTAQpqIiAqVpKQk1K5dG3379kV2dram06FPlB/vZ0JCAho2bIg+ffrkc3ZEn4aFNBV6GzZsgEwmg76+Ph49epRre5s2bWBra6uBzAAPDw+UKlVKI889ffp0yGSyj/60adPmo4917tw5TJ8+HXFxcZ+cj7o8PDxQpUoVte//tiNHjmD69Onv3FalShV4eHio9HgPHz6ETCbDokWLPj05ANu2bcPSpUvz5bEKQkF8rnx9ffHXX399NM7T0xMWFhbYsmULtLSK/q8pDw+PPH1WVe2TmvCuz3mVKlXe+1kD8uf9NDExwZEjRxAaGoolS5ao9RjFxbNnzzB9+nSEhYVpOhUCoKPpBIjyKi0tDT///DM2b96s6VQKhe+++w4dO3aUbkdGRqJXr14YPXo0XF1dpXYTE5OPPta5c+cwY8YMeHh4oHTp0gWR7md35MgRrFix4p2/4Pft25en/VKQtm3bhvDwcHh7e2s0j8/J19cXffr0QY8ePd4bs2LFCly/fh1nz56FXC7/fMkVoClTpuD777+Xbl+5cgUjR46Er68v2rZtK7WXK1dOE+kVqPx8P62srHD06FG0bdsW9vb2aNmyZT5lWbQ8e/YMM2bMQJUqVdCwYUNNp1PisZCmIqNjx47Ytm0bxo0bhwYNGmg6nU8mhEBqaioMDAzUun/FihVRsWJF6fbDhw8BAJUrV0aLFi3yI8Viq1GjRppOgd5j5MiRGDlypKbTUEtGRgZkMhl0dJR/tVavXh3Vq1eXbqempgIAatasWew/q/n9ftatWxfPnz/Pt8cj+lRF/zszKjHGjx+PsmXLYsKECR+NTU1NxaRJk1C1alXo6emhQoUKGDlyZK6pC1WqVIGLiwsOHTqERo0awcDAAHXq1MGhQ4cA/DetpE6dOjAyMkLz5s1x6dKldz7fzZs30a5dOxgZGaFcuXIYNWoUXr9+rRQjk8kwatQorF69GnXq1IFcLsfGjRsBAPfu3YOrqyvMzc0hl8tRp04drFixQo29lNuBAwfg4OAAQ0NDGBsbw8nJCefPn5e2T58+HT/++CMAoGrVqtLXzKdOnQIA7NixA87Ozihfvry0fyZOnIjk5GS1c9qwYQNsbGyk17pp06Z3xqWnp2P27NmoXbs25HI5ypUrh0GDBuHFixcffHwPDw9p/7351XnOHxvvmtoRFxeHsWPHolq1apDL5TA3N0fnzp1x+/bt9z5PRkYG3N3dUapUKanPCCGwcuVKNGzYEAYGBjA1NUWfPn3wv//9T7pfmzZtcPjwYTx69EgpvxyrVq1CgwYNUKpUKRgbG6N27dr46aefPviaAWDGjBmwt7dHmTJlYGJigsaNG2Pt2rUQQuSK3bZtGxwcHFCqVCmUKlUKDRs2xNq1a3PFhYaG4ssvv4ShoSGqVauGefPm5ZrnmpCQgHHjxil93ry9vZX6iEwmQ3JyMjZu3PjOaUdRUVEYNmwYKlasCD09PVStWhUzZsxAZmam0nOps29ypuUsWLAAc+bMQeXKlaGvr4+mTZvi+PHjSrH379/HoEGDULNmTRgaGqJChQro2rUrbty4oRR36tQpyGQybN68GWPHjkWFChUgl8tx//79D+byLmfOnIFMJsP27dtzbdu0aRNkMhlCQ0MB/N90srwcc/LSFz/k8OHDaNiwIeRyOapWrarStKaPvZ8ZGRkwNzeHm5tbrvvGxcXBwMAAPj4+UtvbfczKygpeXl5ISkpSum/OcXbz5s2oU6cODA0N0aBBA+nz+aa8HHdz3udt27ZhwoQJKF++PEqVKoWuXbvi+fPnSExMxNChQ2FmZgYzMzMMGjQoV055fR9yplN96DN36tQpNGvWDAAwaNAg6bOU883b//73P3zzzTewsrKCXC6HhYUF2rVrx2kgBUkQFXLr168XAERoaKj49ddfBQBx/Phxabujo6OoV6+edDs7O1t06NBB6OjoiClTpoiAgACxaNEiYWRkJBo1aiRSU1OlWGtra1GxYkVha2srtm/fLo4cOSLs7e2Frq6umDp1qmjVqpXYu3ev2Ldvn6hVq5awsLAQr1+/lu7v7u4u9PT0ROXKlcWcOXNEQECAmD59utDR0REuLi5KrwOAqFChgqhfv77Ytm2bOHHihAgPDxc3b94UCoVC2NnZiU2bNomAgAAxduxYoaWlJaZPn57n/fTgwQMBQCxcuFBq27p1qwAgnJ2dxV9//SV27NghmjRpIvT09MSZM2eEEEI8efJEjB49WgAQe/fuFefPnxfnz58X8fHxQgghZs2aJZYsWSIOHz4sTp06JVavXi2qVq0q2rZtq/T806ZNE3k5pOS8n927dxcHDx4UW7ZsETVq1BCVKlUS1tbWUlxWVpbo2LGjMDIyEjNmzBCBgYHizz//FBUqVBB169ZVeh/edv/+fdGnTx8BQHo958+fl957a2tr4e7uLsUnJCSIevXqCSMjIzFz5kzx999/iz179ogxY8aIEydOvHP/xsbGirZt2wpLS0tx6dIl6bE8PT2Frq6uGDt2rPD39xfbtm0TtWvXFhYWFiIqKkoIIcTNmzdFq1athKWlpVJ+Qgixfft2AUCMHj1aBAQEiGPHjonVq1cLLy+vj+5bDw8PsXbtWhEYGCgCAwPFrFmzhIGBgZgxY4ZS3JQpUwQA0atXL7Fr1y4REBAgFi9eLKZMmSLFODo6irJly4qaNWuK1atXi8DAQDFixAgBQGzcuFGKS05OFg0bNhRmZmZi8eLF4tixY+LXX38VCoVCfPXVVyI7O1sIIcT58+eFgYGB6Ny5s/R6b968KYQQIjIyUnr/16xZI44dOyZmzZol5HK58PDwkJ5L3X2T895VqlRJfPHFF2LPnj1i165dolmzZkJXV1ecO3dOig0KChJjx44Vu3fvFkFBQWLfvn2iR48ewsDAQNy+fVuKO3nypPSZ7tOnjzhw4IA4dOiQiImJ+ej7lHPfXbt2SW2NGjUSrVq1yhXbrFkz0axZM+m2KsecvPTF9zl27JjQ1tYWX3zxhdi7d6+0vypXrvzRz3le388ffvhBGBgYSMeaHCtXrhQAxPXr14UQ7+5jS5YsESYmJsLR0VFkZWVJ9wUgqlSpIpo3by527twpjhw5Itq0aSN0dHTEv//+K8Xl9bib815ZW1sLDw8P4e/vL1avXi1KlSol2rZtK5ycnMS4ceNEQECAmD9/vtDW1hajR49W633Iy2cuPj5eOob+/PPP0mfpyZMnQgghbGxsRI0aNcTmzZtFUFCQ2LNnjxg7dqw4efLkB98zUh8LaSr03iyk09LSRLVq1UTTpk2lX9BvF9L+/v4CgFiwYIHS4+zYsUMAEL///rvUZm1tLQwMDERERITUFhYWJgCI8uXLi+TkZKn9r7/+EgDEgQMHpDZ3d3cBQPz6669KzzVnzhwBQAQHB0ttAIRCoRCvXr1Siu3QoYOoWLFirl8mo0aNEvr6+rni3+ftQi8rK0tYWVkJOzs7pV80iYmJwtzcXLRs2VJqW7hwoQAgHjx48MHnyM7OFhkZGSIoKEgAENeuXZO25aWQzsmpcePG0vsnhBAPHz4Uurq6SoV0TtG0Z88epccIDQ0VAMTKlSs/+FwjR458bz5vF9IzZ84UAERgYOB7H+/N/fvgwQNRt25dUbduXfHw4UMp5vz58wKA+OWXX5T
"text/plain": [
"<Figure size 800x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Nombre Total de tickets achetés par Type d'évènements\n",
"\n",
"total_tickets_by_event = customer.groupby('name_event_types')['nb_tickets'].sum()\n",
"\n",
"total_tickets_by_event.plot(kind='bar', figsize=(8, 5))\n",
"plt.xlabel(\"Type d'évènements\")\n",
"plt.ylabel('Nombre Total de tickets achetés')\n",
"plt.title(\"Nombre Total de tickets achetés par Type d'évènements\")\n",
"plt.xticks(rotation=45)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "c95cc35c-abfc-47c7-9b8a-ac69bfd60dd8",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAwsAAAJgCAYAAADF4v+XAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACNvklEQVR4nOzdd1hT1/8H8HfYIENQQERkSB0IuFAL1j1x1lE3DrBWrcXdOqq4tdY6W7TWgZPi3nuL4ldBBBfWgWIV3IiioJDz+8MfqTEJgkIu4Pv1PDwPOfcmeecmuckn59xzZUIIASIiIiIiovfoSB2AiIiIiIgKJhYLRERERESkFosFIiIiIiJSi8UCERERERGpxWKBiIiIiIjUYrFARERERERqsVggIiIiIiK1WCwQEREREZFaLBaIiIiIiEgtFgtERIXY3bt3YWNjgxEjRkgdhYgoz/To0QMVKlTAo0ePPvo2/P39Ua5cOSQnJ+ddsM9QroqFkJAQyGQyGBkZ4fbt2yrLGzRoAHd39zwLlxt9+vSBqampJPf9ITKZDBMnTszT20xPT8fvv/+Or776CpaWljAwMIC9vT06d+6MY8eO5el9FVYNGjRAgwYNPvl23rx5A1tbW3z55Zca15HL5Shbtiw8PT0/+f7UCQ4ORkhISL7c9oc4OTlBJpNp3JarVq2CTCaDTCbD0aNHtZpNSi9fvsTEiRPVPuasfeWtW7cUbX369IGTk5PSek5OTujTp89HZ8jIyEDXrl3RqFEjzJ49+6NvR9s+9XF/LO43tSu79wjlnJT7/7xw9OhRlc+HPn36ZPv5vHjxYhw6dAh79+5FyZIlP/q+lyxZgvLly0uyvylopk+fjq1bt37UdT+qZyE9PR0///zzR90hfbpHjx6hTp06GD58ONzd3RESEoJDhw7ht99+g66uLho3boyYmBipYxYZ+vr68PPzw//+9z9cvnxZ7ToHDx7EnTt3EBAQkC8ZpP6wMDMzw/Hjx3Hjxg2VZcuXL4e5ubkEqaT18uVLTJo0Se0XoVatWiEiIgJ2dnbZ3saWLVswfvz4j84wZswY6OnpKQo20oz7Te3L7j1COSf1/l/bzp07h/Hjx2P37t1wdnb+pNvS09PDhg0bcOfOHfz66695lLBw+pRiQe9jrtSiRQusW7cOI0eORJUqVT7qjgsSIQTS0tJgbGwsdZQc6dWrF2JiYrBv3z40atRIaVnXrl0xfPhwWFpaSpSuaAoICMBvv/2G5cuXq/0Fd/ny5TAwMEDPnj0lSJf/vvrqK1y4cAHLly/HtGnTFO03btzA8ePH0a9fP/z1118SJixYrK2tYW1t/cH1qlWr9kn3U1A//DIzM5GRkQFDQ0Opoyhwv/l5evnyJUxMTKSOka3C9h0kv1WvXh0PHz7Ms9szNTVFVFRUnt3e5+ijehZ+/PFHlChRAj/99NMH101LS8OYMWPg7Oys6PL9/vvvVcaPOTk5oXXr1ti5cyeqVasGY2NjVKpUCTt37gTwtlu/UqVKKFasGGrVqoXIyEi193fp0iU0btwYxYoVg7W1NQYPHoyXL18qrSOTyTB48GAsXrwYlSpVgqGhIVauXAkAuHbtGrp37w4bGxsYGhqiUqVK+OOPP3K0XVJSUvDtt9+iRIkSMDU1RYsWLfDPP/+oXfdj7ycqKgp79uxBQECAygdelpo1a6Js2bIAgIcPH2LQoEFwc3ODqakpbGxs0KhRI5w4cULpOrdu3YJMJsPs2bMxZ84cODs7w9TUFN7e3jh9+rTSupGRkejatSucnJxgbGwMJycndOvWTWVo2sSJE9X+2vn+EI3w8HDo6+tj5MiRatdbtmxZtttECIFZs2bB0dERRkZGqF69Ovbs2aN23ZSUFIwcOVLp9Th06FCkpqZmex+VKlWCt7c3Vq9ejYyMDKVlycnJ2LZtG9q1a4cSJUootlHbtm1hZWUFIyMjVKtWDevXr1f7+I4cOYKBAweiZMmSKFGiBDp06IB79+4p1nNycsKlS5dw7NgxxXCfd4ez5PQxbdiwAbVr14aFhQVMTEzg4uICf3//bB93Fh0dHfTq1QsrV66EXC5XtC9fvhwODg5o0qSJ2utt374d3t7eMDExgZmZGZo2bYqIiAjF8hMnTkAmkyE0NFTlulm/lp89e1bRlpfbVRNNw9feHUZ069YtRTEwadIkxfOS1dWtbhiSOu8Px5HL5Zg6dSoqVKgAY2NjFC9eHJ6enpg/f77S9T5lP5W1//vzzz9Rvnx5GBoaws3NDX///bfSerndd8yaNQtTp06Fs7MzDA0NceTIkRzlyZKfr+Pc7jcB4OLFi2jXrh0sLS1hZGSEqlWrKj4nsmQNrwgNDcW4ceNQunRpmJubo0mTJrh69arSutHR0WjdurXiOStdujRatWqFf//9V7GOtj4vs4btxsXFoXnz5ihWrBjs7Owwc+ZMAMDp06fx1VdfoVixYihfvrzK487Ja+ND7xEgZ69jTe8ldUNbsoZCHz9+HD4+PjAxMVG8Ng4fPowGDRqgRIkSMDY2RtmyZdGxY0eV7wfvy9rWW7ZsgaenJ4yMjODi4oIFCxYorZeWloYRI0agatWqsLCwgJWVFby9vbFt2zaV28zuO4i6+1e3/3/x4gWKFy+O7777TuU6t27dgq6uruLHhKxteODAAfTt2xdWVlYoVqwY2rRpg5s3b6pc/+DBg2jcuDHMzc1hYmKCOnXq4NChQ9lupyxxcXFo0aIFTExMULJkSQwYMADPnz/P0XVfv36NqVOnomLFijA0NIS1tTX69u2rVDh8/fXXcHR0VPocylK7dm1Ur15dcVkIgeDgYFStWlWxP+3QoQOuX7+udL2s183Zs2dRt25dxX5l5syZKveT0/1U1nO8YsUKxf7cy8sLp0+fhhACv/76q+J7VqNGjVQyATl7HrK+Z126dAndunWDhYUFbG1t4e/vj2fPninlSU1NxcqVKxWvo1wN0xa5sGLFCgFAnD17VsyfP18AEIcOHVIsr1+/vqhcubLislwuF82bNxd6enpi/PjxYv/+/WL27NmiWLFiolq1aiItLU2xrqOjoyhTpoxwd3cXoaGhYvfu3aJ27dpCX19fTJgwQdSpU0ds3rxZbNmyRZQvX17Y2tqKly9fKq7fu3dvYWBgIMqWLSumTZsm9u/fLyZOnCj09PRE69atlR4HAGFvby88PT3FunXrxOHDh8XFixfFpUuXhIWFhfDw8BCrVq0S+/fvFyNGjBA6Ojpi4sSJ2W4buVwuGjZsKAwNDRX3HxQUJFxcXAQAERQUpFj3U+5n+vTpAoDYs2dPtutliYuLEwMHDhR///23OHr0qNi5c6cICAgQOjo64siRI4r14uPjBQDh5OQkWrRoIbZu3Sq2bt0qPDw8hKWlpUhOTlasu2HDBjFhwgSxZcsWcezYMfH333+L+vXrC2tra/Hw4UPFekFBQULdSyzrdRQfH69omzlzpgAgtm3bJoQQ4uLFi8LExET07Nnzg48x634CAgLEnj17xJIlS4S9vb0oVaqUqF+/vmK91NRUUbVqVVGyZEkxZ84ccfDgQTF//nxhYWEhGjVqJORyebb3s3TpUgFAbN26Van9jz/+EADE3r17hRBCHD58WBgYGIi6deuKsLAwsXfvXtGnTx8BQKxYsUJlO7i4uIgffvhB7Nu3TyxdulRYWlqKhg0bKtY7d+6ccHFxEdWqVRMREREiIiJCnDt3LleP6dSpU0Imk4muXbuK3bt3i8OHD4sVK1YIPz+/D25fR0dH0apVK3H9+nUhk8nE7t27hRBCZGRkCHt7ezFhwgSxYcMGAUDpNbV27VoBQDRr1kxs3bpVhIWFiRo1aggDAwNx4sQJxXrVqlUTderUUbnfmjVripo1ayou5/V21aR+/fpKr5ssvXv3Fo6OjkIIIdLS0sTevXsVr7us5+X69etKGd59jb97/Xe3be/evRWXZ8yYIXR1dUVQUJA4dOiQ2Lt3r5g3b57SfuFT9h9CvN3/OTg4CDc3NxEaGiq2b98uWrRoIQCIDRs2KNbL7b7D3t5eNGzYUGzcuFHs379f6bG/7/3Hnd+v44/
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Nombre de Canaux de Ventes Moyen utilisé par les Consommateurs par type d'évènement\n",
"\n",
"avg_supp_event = customer.groupby('name_event_types')['nb_suppliers'].mean()\n",
"avg_supp_event.plot(kind='bar')\n",
"plt.xlabel(\"Type d'évènement\")\n",
"plt.ylabel('Nombre de Canaux de Ventes Moyen')\n",
"plt.title(\"Nombre de Canaux de Ventes Moyen utilisé par les Consommateurs par type d'évènement\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "49d5fd2d-9bc1-43ac-9270-1efd73759854",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAtIAAAJICAYAAABMlwOPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACyfElEQVR4nOzdd1hT1/8H8HcYYYnIEBAH4kJR3IpiKzhwUupuxSIutGpFqtbRqjhR0aqtVsVtXVj3LAWtoqi4927rFkSRIcoM5/eHP+7XCCqJQQK+X8/j0+bck+Sd5HLz4XDuuTIhhAAREREREalEp7ADEBEREREVRSykiYiIiIjUwEKaiIiIiEgNLKSJiIiIiNTAQpqIiIiISA0spImIiIiI1MBCmoiIiIhIDSykiYiIiIjUwEKaiIiIiEgNLKSJiIqBlJQUVK9eHd27d0d2dnZhxyEqth4+fAhra2uMHDlS7cd48OABypcv/0GPQdqBhXQxtXr1ashkMhgaGuLu3bu5tru7u6NWrVqFkAzo06cPSpQoUSjPPWnSJMhksvf+c3d3f+9jHTt2DJMmTUJiYuIH51FXnz59ULFiRbXv/6Z9+/Zh0qRJeW6rWLEi+vTpo9Lj3blzBzKZDHPmzPnwcAA2bNiA+fPna+SxCkJB/FwFBQVhx44d7+3n5+cHGxsbrFu3Djo6Rf/Q/iHHiatXr2LSpEm4c+eOZkMVkJcvX2LSpEk4dOhQYUfJF3d393wdR992LNEmeR1DZTIZVq9enWf/rKwsfP3112jZsuUHHdfKlSuHP//8EytXrsS2bdvUfpzioKj9vL5Jr7ADUMFKT0/H+PHjsXbt2sKOohUGDBiAdu3aSbdjYmLQpUsXDBs2DN7e3lJ7yZIl3/tYx44dw+TJk9GnTx+UKlWqIOJ+dPv27cNvv/2W5xfg9u3b8/W+FKQNGzbg8uXLCAgIKNQcH1NQUBC6deuGTp06vbXPb7/9hosXL+Lo0aMwMDD4eOG01NWrVzF58mS4u7tr9BfNgvLy5UtMnjwZAPL1S3xhW7RoEZKTk6Xbe/fuxbRp07Bq1SpUr15dai9XrlxhxCtQ48aNg56eHn7//fcPGgQBgFq1amH79u3o0aMHateujSpVqmgoZdFS1H5e38RCuphr164dNmzYgFGjRqFOnTqFHeeDCSGQlpYGIyMjte5frlw5pYN7zm/AFSpUQJMmTTQRsdiqV69eYUegtxg6dCiGDh1a2DFUlpqaqvbPcmF4+fIljI2NCzvGR5OamgpDQ8NcBaOTk5PS7evXrwN4VRg2bNjwo+UrDLNnz9bo47m7uyMuLk6jj0kfV9H/+x+90+jRo2FpaYkxY8a8t29aWhrGjRsHBwcHyOVylC1bFkOHDs01daFixYrw9PTEnj17UK9ePRgZGaFGjRrYs2cPgFfTSmrUqAETExM0btwYp0+fzvP5rly5glatWsHExASlS5fGd999h5cvXyr1kclk+O6777BkyRLUqFEDBgYGWLNmDQDg1q1b8Pb2hrW1NQwMDFCjRg389ttvarxLue3atQtNmzaFsbExTE1N4eHhgePHj0vbJ02ahB9++AEA4ODgIP0pM+dPs5s2bUKbNm1QpkwZ6f0ZO3YsXrx4oXam1atXw9HRUXqtv//+e579MjIyMG3aNFSvXh0GBgYoXbo0+vbtiydPnrzz8fv06SO9f6//eTbnl428pnYkJiZi5MiRqFSpEgwMDGBtbY0OHTpIX6x5yczMhK+vL0qUKCHtM0IILFq0CHXr1oWRkRHMzc3RrVs3/Pfff9L93N3dsXfvXty9e1cpX47FixejTp06KFGiBExNTVG9enX8+OOP73zNADB58mS4uLjAwsICJUuWRP369bFixQoIIXL13bBhA5o2bYoSJUqgRIkSqFu3LlasWJGr36lTp/D555/D2NgYlSpVwsyZM3PNW05OTsaoUaOUft4CAgKU9hGZTIYXL15gzZo1eU47io2NxaBBg1CuXDnI5XI4ODhg8uTJyMrKUnoudd+b993vbVOTcqaWvf6n2pzjxrZt21CvXj0YGhpKo7D5lfMYYWFhqF+/PoyMjFC9enWsXLlS6bm7d+8OAGjRooX0vr3+p/r9+/ejVatWKFmyJIyNjdGsWTMcOHBA6blyXtvZs2fRrVs3mJubo3LlyvnOkeN9n9GdO3dQunRpAK/2xZy875pGdejQIchkMqxbtw4jRoyAra0tjIyM4ObmhnPnzin1PX36NL7++mtUrFgRRkZGqFixInr27Jlryl/OZxYeHo5+/fqhdOnSMDY2Rnp6+ns+ldzWrl0LmUymdMzMMWXKFOjr6+PRo0cA/jcd6siRI2jSpAmMjIxQtmxZTJgwAQqFQum+6h7bXn+N+TmG5uV93zdPnjyBXC7HhAkTct33+vXrkMlk+PXXX6W2N/eLihUrIjAwUOln9/WpcXPnzoWDgwNKlCiBpk2bIjo6OtfznD59Gl5eXrCwsIChoSHq1auHP/74I9d7IJPJ8Pfff8PPzw+WlpYoWbIkevfujRcvXiA2NhY9evRAqVKlUKZMGYwaNQqZmZlKj5Hfz0ETP6/nzp2Dp6en9L7b2dmhY8eOePDgwfs+so9HULG0atUqAUCcOnVK/PLLLwKAOHDggLTdzc1N1KxZU7qdnZ0t2rZtK/T09MSECRNEeHi4mDNnjjAxMRH16tUTaWlpUl97e3tRrlw5UatWLbFx40axb98+4eLiIvT19cXEiRNFs2bNxLZt28T27dtFtWrVhI2NjXj58qV0f19fXyGXy0WFChXE9OnTRXh4uJg0aZLQ09MTnp6eSq8DgChbtqyoXbu22LBhg/j777/F5cuXxZUrV4SZmZlwdnYWv//+uwgPDxcjR44UOjo6YtKkSfl+n27fvi0AiNmzZ0tt69evFwBEmzZtxI4dO8SmTZtEgwYNhFwuF0eOHBFCCHH//n0xbNgwAUBs27ZNHD9+XBw/flwkJSUJIYSYOnWqmDdvnti7d684dOiQWLJkiXBwcBAtWrRQev7AwECRnx/DnM/zyy+/FLt37xbr1q0TVapUEeXLlxf29vZSP4VCIdq1aydMTEzE5MmTRUREhFi+fLkoW7ascHJyUvoc3vTPP/+Ibt26CQDS6zl+/Lj02dvb2wtfX1+pf3JysqhZs6YwMTERU6ZMEX/99ZfYunWrGD58uPj777/zfH8TEhJEixYthK2trTh9+rT0WH5+fkJfX1+MHDlShIWFiQ0bNojq1asLGxsbERsbK4QQ4sqVK6JZs2bC1tZWKZ8QQmzcuFEAEMOGDRPh4eFi//79YsmSJcLf3/+9722fPn3EihUrREREhIiIiBBTp04VRkZGYvLkyUr9JkyYIACILl26iM2bN4vw8HAxd+5cMWHCBKmPm5ubsLS0FFWrVhVLliwRERERYsiQIQKAWLNmjdTvxYsXom7dusLKykrMnTtX7N+/X/zyyy/CzMxMtGzZUmRnZwshhDh+/LgwMjISHTp0kF7vlStXhBBCxMTESJ9/SEiI2L9/v5g6daowMDAQffr0kZ5L3fcmP/d72/6bs7/evn1barO3txdlypQRlSpVEitXrhQHDx4UJ0+efOvz+/r6ChMTE6W2nGOPk5OT+P3338Vff/0lunfvLgCIyMhIIYQQcXFxIigoSAAQv/32m/S+xcXFCSGEWLt2rZDJZKJTp05i27ZtYvfu3cLT01Po6uqK/fv353pt9vb2YsyYMSIiIkLs2LEj3zny+xmlpaWJsLAwAUD0799fyvvPP/+89b05ePCgACDKly+f65hQsmRJ8e+//0p9N2/eLCZOnCi2b98uIiMjRWhoqHBzcxOlS5cWT548yfWZlS1bVgwcOFD8+eefYsuWLSIrK+utOd6876lTp4QQQqSnpwtbW1vRq1cvpX6ZmZnCzs5OdO/eXWrL+Zmxs7MTv/76q/jrr7+Ev7+/ACCGDh0q9fuQY9vrGd93DM1Lfr9vOnfuLMqXLy8UCoXS/UePHi3kcrl4+vSpECLv/WLy5MlCLpcLHx8f6X45x8+KFSuKdu3aiR07dogdO3YIZ2dnYW5uLhITE6W
"text/plain": [
"<Figure size 800x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Nombre Total de tickets achetés sur Internet par Type d'évènements\n",
"\n",
"nb_tickets_internet = customer.groupby('name_event_types')['nb_tickets_internet'].sum()\n",
"nb_tickets_internet.plot(kind='bar', figsize=(8, 5))\n",
"plt.xlabel(\"Type d'évènements\")\n",
"plt.ylabel('Nombre Total de tickets achetés sur Internet')\n",
"plt.title(\"Nombre Total de tickets achetés sur Internet par Type d'évènements\")\n",
"plt.xticks(rotation=45)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dc071992-cf4d-4b9f-9c3b-3f0e98e20eff",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}