4805 lines
632 KiB
Plaintext
4805 lines
632 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "3f41343f-7205-41d9-89dd-88039e301413",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Statistiques descriptives"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 1,
|
|||
|
"id": "abfaf341-7b35-4407-9133-d21336c04027",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import os\n",
|
|||
|
"import s3fs\n",
|
|||
|
"import re\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import matplotlib.dates as mdates\n",
|
|||
|
"from datetime import datetime, date, timedelta\n",
|
|||
|
"from dateutil.relativedelta import relativedelta\n",
|
|||
|
"import warnings"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 2,
|
|||
|
"id": "7fb72fa3-7940-496f-ac78-c2837f65eefa",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Create filesystem object\n",
|
|||
|
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
|||
|
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 3,
|
|||
|
"id": "c34e13f4-e043-43d6-ba8c-2e13d008647c",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Import cleaning and merge functions\n",
|
|||
|
"exec(open('0_KPI_functions.py').read())\n",
|
|||
|
"\n",
|
|||
|
"# Useful functions :\n",
|
|||
|
" # display_databases(directory_path, file_name = ['customerplus_cleaned', 'target_information', 'campaigns_information', 'products_purchased_reduced'], datetime_col = None)\n",
|
|||
|
" # campaigns_kpi_function(campaigns_information = None)\n",
|
|||
|
" # tickets_kpi_function(tickets_information = None)\n",
|
|||
|
" # customerplus_kpi_function(customerplus_clean = None)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 4,
|
|||
|
"id": "c60505f4-b95b-4c61-b842-26b27af7e280",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# set the max columns to none\n",
|
|||
|
"pd.set_option('display.max_columns', None)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 5,
|
|||
|
"id": "aaffd291-2c88-44c8-a951-0ef1f8369ba3",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Additional function to load initial \n",
|
|||
|
"def load_dataset_2(directory_path, file_name):\n",
|
|||
|
" \"\"\"\n",
|
|||
|
" This function loads csv file\n",
|
|||
|
" \"\"\"\n",
|
|||
|
" file_path = \"bdc2324-data\" + \"/\" + directory_path + \"/\" + directory_path + file_name + \".csv\"\n",
|
|||
|
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
|
|||
|
" df = pd.read_csv(file_in, sep=\",\")\n",
|
|||
|
"\n",
|
|||
|
" # drop na :\n",
|
|||
|
" #df = df.dropna(axis=1, thresh=len(df))\n",
|
|||
|
" # if identifier in table : delete it\n",
|
|||
|
" if 'identifier' in df.columns:\n",
|
|||
|
" df = df.drop(columns = 'identifier')\n",
|
|||
|
" return df"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 6,
|
|||
|
"id": "09daec01-9927-45c7-a6d4-9b9d0340ee02",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"companies = {'musee' : ['1', '2', '3', '4'], # , '101'\n",
|
|||
|
" 'musique' : ['10', '11', '12', '13', '14']}"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 9,
|
|||
|
"id": "d9ccb033-3c7a-4647-ae1a-3a439dec2ea1",
|
|||
|
"metadata": {
|
|||
|
"scrolled": true
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_1/customerplus_cleaned.csv\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_1/campaigns_information.csv\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_1/products_purchased_reduced.csv\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_1/target_information.csv\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
|||
|
"<string>:27: SettingWithCopyWarning: \n",
|
|||
|
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
|||
|
"\n",
|
|||
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_2/customerplus_cleaned.csv\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_2/campaigns_information.csv\n",
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_2/products_purchased_reduced.csv\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
|||
|
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_2/target_information.csv\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
|||
|
"<string>:13: DtypeWarning: Columns (3) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
|||
|
"<string>:27: SettingWithCopyWarning: \n",
|
|||
|
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
|||
|
"\n",
|
|||
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_3/customerplus_cleaned.csv\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_3/campaigns_information.csv\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_3/products_purchased_reduced.csv\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_3/target_information.csv\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
|||
|
"<string>:13: DtypeWarning: Columns (3) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
|||
|
"<string>:27: SettingWithCopyWarning: \n",
|
|||
|
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
|||
|
"\n",
|
|||
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_4/customerplus_cleaned.csv\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_4/campaigns_information.csv\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_4/products_purchased_reduced.csv\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
|||
|
"<string>:13: DtypeWarning: Columns (12) have mixed types. Specify dtype option on import or set low_memory=False.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_4/target_information.csv\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<string>:13: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
|||
|
"<string>:27: SettingWithCopyWarning: \n",
|
|||
|
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
|||
|
"\n",
|
|||
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# création des bases contenant les KPI pour les 5 compagnies de spectacle\n",
|
|||
|
"\n",
|
|||
|
"# liste des compagnies de spectacle\n",
|
|||
|
"nb_compagnie= companies['musee']\n",
|
|||
|
"\n",
|
|||
|
"customer_musee = pd.DataFrame()\n",
|
|||
|
"campaigns_musee_brut = pd.DataFrame()\n",
|
|||
|
"campaigns_musee_kpi = pd.DataFrame()\n",
|
|||
|
"products_musee = pd.DataFrame()\n",
|
|||
|
"tickets_musee = pd.DataFrame()\n",
|
|||
|
"\n",
|
|||
|
"# début de la boucle permettant de générer des datasets agrégés pour les 5 compagnies de spectacle\n",
|
|||
|
"for directory_path in nb_compagnie:\n",
|
|||
|
" df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
|
|||
|
" df_campaigns_brut = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
|
|||
|
" df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
|
|||
|
" df_target_information = display_databases(directory_path, file_name = \"target_information\")\n",
|
|||
|
" \n",
|
|||
|
" df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_brut) \n",
|
|||
|
" df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n",
|
|||
|
" df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)\n",
|
|||
|
"\n",
|
|||
|
" \n",
|
|||
|
"# creation de la colonne Number compagnie, qui permettra d'agréger les résultats\n",
|
|||
|
" df_tickets_kpi[\"number_company\"]=int(directory_path)\n",
|
|||
|
" df_campaigns_brut[\"number_company\"]=int(directory_path)\n",
|
|||
|
" df_campaigns_kpi[\"number_company\"]=int(directory_path)\n",
|
|||
|
" df_customerplus_clean[\"number_company\"]=int(directory_path)\n",
|
|||
|
" df_target_information[\"number_company\"]=int(directory_path)\n",
|
|||
|
"\n",
|
|||
|
"# Traitement des index\n",
|
|||
|
" df_tickets_kpi[\"customer_id\"]= directory_path + '_' + df_tickets_kpi['customer_id'].astype('str')\n",
|
|||
|
" df_campaigns_brut[\"customer_id\"]= directory_path + '_' + df_campaigns_brut['customer_id'].astype('str')\n",
|
|||
|
" df_campaigns_kpi[\"customer_id\"]= directory_path + '_' + df_campaigns_kpi['customer_id'].astype('str') \n",
|
|||
|
" df_customerplus_clean[\"customer_id\"]= directory_path + '_' + df_customerplus_clean['customer_id'].astype('str') \n",
|
|||
|
" df_products_purchased_reduced[\"customer_id\"]= directory_path + '_' + df_products_purchased_reduced['customer_id'].astype('str') \n",
|
|||
|
"\n",
|
|||
|
"# Concaténation\n",
|
|||
|
" customer_musee = pd.concat([customer_musee, df_customerplus_clean], ignore_index=True)\n",
|
|||
|
" campaigns_musee_kpi = pd.concat([campaigns_musee_kpi, df_campaigns_kpi], ignore_index=True)\n",
|
|||
|
" campaigns_musee_brut = pd.concat([campaigns_musee_brut, df_campaigns_brut], ignore_index=True) \n",
|
|||
|
" tickets_musee = pd.concat([tickets_musee, df_tickets_kpi], ignore_index=True)\n",
|
|||
|
" products_musee = pd.concat([products_musee, df_products_purchased_reduced], ignore_index=True)\n",
|
|||
|
" "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 25,
|
|||
|
"id": "e6324fb6-7432-4925-beb0-a831a13be7d5",
|
|||
|
"metadata": {
|
|||
|
"editable": true,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": ""
|
|||
|
},
|
|||
|
"tags": []
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|||
|
"RangeIndex: 1005251 entries, 0 to 1005250\n",
|
|||
|
"Data columns (total 28 columns):\n",
|
|||
|
" # Column Non-Null Count Dtype \n",
|
|||
|
"--- ------ -------------- ----- \n",
|
|||
|
" 0 customer_id 1005251 non-null object \n",
|
|||
|
" 1 street_id 1005251 non-null int64 \n",
|
|||
|
" 2 structure_id 92386 non-null float64\n",
|
|||
|
" 3 mcp_contact_id 453246 non-null float64\n",
|
|||
|
" 4 fidelity 1005251 non-null int64 \n",
|
|||
|
" 5 tenant_id 1005251 non-null int64 \n",
|
|||
|
" 6 is_partner 1005251 non-null bool \n",
|
|||
|
" 7 deleted_at 0 non-null float64\n",
|
|||
|
" 8 gender 1005251 non-null int64 \n",
|
|||
|
" 9 is_email_true 1005251 non-null bool \n",
|
|||
|
" 10 opt_in 1005251 non-null bool \n",
|
|||
|
" 11 last_buying_date 625674 non-null object \n",
|
|||
|
" 12 max_price 625674 non-null float64\n",
|
|||
|
" 13 ticket_sum 1005251 non-null int64 \n",
|
|||
|
" 14 average_price 790528 non-null float64\n",
|
|||
|
" 15 average_purchase_delay 625674 non-null float64\n",
|
|||
|
" 16 average_price_basket 625674 non-null float64\n",
|
|||
|
" 17 average_ticket_basket 625674 non-null float64\n",
|
|||
|
" 18 total_price 840397 non-null float64\n",
|
|||
|
" 19 purchase_count 1005251 non-null int64 \n",
|
|||
|
" 20 first_buying_date 625674 non-null object \n",
|
|||
|
" 21 country 593083 non-null object \n",
|
|||
|
" 22 gender_label 1005251 non-null object \n",
|
|||
|
" 23 gender_female 1005251 non-null int64 \n",
|
|||
|
" 24 gender_male 1005251 non-null int64 \n",
|
|||
|
" 25 gender_other 1005251 non-null int64 \n",
|
|||
|
" 26 country_fr 593083 non-null float64\n",
|
|||
|
" 27 number_company 1005251 non-null int64 \n",
|
|||
|
"dtypes: bool(3), float64(10), int64(10), object(5)\n",
|
|||
|
"memory usage: 194.6+ MB\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"customer_musee.info()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 26,
|
|||
|
"id": "e7fa2e45-cb48-4c79-994f-9f836d566c21",
|
|||
|
"metadata": {
|
|||
|
"editable": true,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": ""
|
|||
|
},
|
|||
|
"tags": []
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"anonymous_customer = {'1' : '1_1', '2' : '2_12184', '3' : '3_1', '4' : '4_2', '101' : '101_1',\n",
|
|||
|
" '5' : '5_191835', '6' : '6_591412', '7' : '7_49632', '8' : '8_1942', '9' : '9_19683',\n",
|
|||
|
" '10' : '10_19521', '11' : '11_36', '12' : '12_1706757', '13' : '13_8422', '14' : '14_6354'}"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 36,
|
|||
|
"id": "51d02f4f-980c-4ab9-b295-f9632af9c2f0",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"['1_1',\n",
|
|||
|
" '2_12184',\n",
|
|||
|
" '3_1',\n",
|
|||
|
" '4_2',\n",
|
|||
|
" '101_1',\n",
|
|||
|
" '5_191835',\n",
|
|||
|
" '6_591412',\n",
|
|||
|
" '7_49632',\n",
|
|||
|
" '8_1942',\n",
|
|||
|
" '9_19683',\n",
|
|||
|
" '10_19521',\n",
|
|||
|
" '11_36',\n",
|
|||
|
" '12_1706757',\n",
|
|||
|
" '13_8422',\n",
|
|||
|
" '14_6354']"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 36,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"list(anonymous_customer.values())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 60,
|
|||
|
"id": "70b6e961-c303-465e-93f4-609721d38454",
|
|||
|
"metadata": {
|
|||
|
"editable": true,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": ""
|
|||
|
},
|
|||
|
"tags": []
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# On filtre les outliers\n",
|
|||
|
"\n",
|
|||
|
"def remove_elements(lst, elements_to_remove):\n",
|
|||
|
" return ''.join([x for x in lst if x not in elements_to_remove])\n",
|
|||
|
" \n",
|
|||
|
"databases = [customer_musee, campaigns_musee_kpi, tickets_musee, products_musee]\n",
|
|||
|
"\n",
|
|||
|
"outlier_list = list(anonymous_customer.values())\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"customer_musee = customer_musee[~customer_musee['customer_id'].isin(outlier_list)]\n",
|
|||
|
"campaigns_musee_kpi = campaigns_musee_kpi[~campaigns_musee_kpi['customer_id'].isin(outlier_list)]\n",
|
|||
|
"tickets_musee = tickets_musee[~tickets_musee['customer_id'].isin(outlier_list)]\n",
|
|||
|
"products_musee = products_musee[~products_musee['customer_id'].isin(outlier_list)]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "ae3c0c33-55a7-4a28-9a62-3ce13496917a",
|
|||
|
"metadata": {
|
|||
|
"jp-MarkdownHeadingCollapsed": true
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"# 0 - Specificité de la company 101"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 33,
|
|||
|
"id": "f8a8dedc-2f67-407c-9bbf-f70d236fc783",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>id</th>\n",
|
|||
|
" <th>name</th>\n",
|
|||
|
" <th>created_at</th>\n",
|
|||
|
" <th>updated_at</th>\n",
|
|||
|
" <th>street_id</th>\n",
|
|||
|
" <th>fixed_capacity</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>26</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>atelier des lumieres</td>\n",
|
|||
|
" <td>2020-10-12 08:57:27.783770+02:00</td>\n",
|
|||
|
" <td>2020-10-12 08:57:27.783770+02:00</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>17</th>\n",
|
|||
|
" <td>14007</td>\n",
|
|||
|
" <td>fabrique des lumieres</td>\n",
|
|||
|
" <td>2022-05-17 09:11:19.416106+02:00</td>\n",
|
|||
|
" <td>2022-05-17 09:11:19.416106+02:00</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>32</th>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>non défini</td>\n",
|
|||
|
" <td>2020-10-12 08:57:27.785329+02:00</td>\n",
|
|||
|
" <td>2020-10-12 08:57:27.785329+02:00</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>10</th>\n",
|
|||
|
" <td>10755</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2022-01-28 12:07:16.602885+01:00</td>\n",
|
|||
|
" <td>2022-01-28 12:07:16.602885+01:00</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>16</th>\n",
|
|||
|
" <td>13583</td>\n",
|
|||
|
" <td>hôtel de caumont</td>\n",
|
|||
|
" <td>2022-05-13 10:59:06.829576+02:00</td>\n",
|
|||
|
" <td>2022-05-13 10:59:06.829576+02:00</td>\n",
|
|||
|
" <td>859</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>16422</td>\n",
|
|||
|
" <td>atelier des lumières - cézanne</td>\n",
|
|||
|
" <td>2022-08-04 04:03:31.045648+02:00</td>\n",
|
|||
|
" <td>2022-08-04 04:03:31.045648+02:00</td>\n",
|
|||
|
" <td>859</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>20</th>\n",
|
|||
|
" <td>21098</td>\n",
|
|||
|
" <td>bassins des lumières - 2022 - venise</td>\n",
|
|||
|
" <td>2023-04-08 03:49:46.916777+02:00</td>\n",
|
|||
|
" <td>2023-04-08 03:49:46.916777+02:00</td>\n",
|
|||
|
" <td>859</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>18</th>\n",
|
|||
|
" <td>23460</td>\n",
|
|||
|
" <td>immersive box</td>\n",
|
|||
|
" <td>2023-08-29 17:39:55.188028+02:00</td>\n",
|
|||
|
" <td>2023-08-29 17:39:55.188028+02:00</td>\n",
|
|||
|
" <td>859</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>8</th>\n",
|
|||
|
" <td>13584</td>\n",
|
|||
|
" <td>bassins des lumières - venise</td>\n",
|
|||
|
" <td>2022-05-13 11:00:14.943669+02:00</td>\n",
|
|||
|
" <td>2022-05-13 11:00:14.943669+02:00</td>\n",
|
|||
|
" <td>859</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>15</th>\n",
|
|||
|
" <td>21096</td>\n",
|
|||
|
" <td>atelier des lumières - 2022 - cézanne</td>\n",
|
|||
|
" <td>2023-04-08 03:42:10.395124+02:00</td>\n",
|
|||
|
" <td>2023-04-08 03:42:10.395124+02:00</td>\n",
|
|||
|
" <td>859</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>27</th>\n",
|
|||
|
" <td>260</td>\n",
|
|||
|
" <td>musée jacquemart andré</td>\n",
|
|||
|
" <td>2020-10-18 01:20:12.738229+02:00</td>\n",
|
|||
|
" <td>2020-10-18 01:20:12.738229+02:00</td>\n",
|
|||
|
" <td>3525</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>33</th>\n",
|
|||
|
" <td>71</td>\n",
|
|||
|
" <td>cité de l'automobile</td>\n",
|
|||
|
" <td>2020-10-13 11:05:43.705639+02:00</td>\n",
|
|||
|
" <td>2020-12-03 08:33:15.576065+01:00</td>\n",
|
|||
|
" <td>449992</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>30</th>\n",
|
|||
|
" <td>89</td>\n",
|
|||
|
" <td>bassins de lumieres</td>\n",
|
|||
|
" <td>2020-10-13 14:56:27.206958+02:00</td>\n",
|
|||
|
" <td>2020-10-13 14:56:27.206958+02:00</td>\n",
|
|||
|
" <td>460754</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>7</th>\n",
|
|||
|
" <td>108</td>\n",
|
|||
|
" <td>les baux de provence</td>\n",
|
|||
|
" <td>2020-10-14 14:16:20.284658+02:00</td>\n",
|
|||
|
" <td>2020-10-14 14:16:20.284658+02:00</td>\n",
|
|||
|
" <td>481475</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>19</th>\n",
|
|||
|
" <td>161</td>\n",
|
|||
|
" <td>les carrières de lumières</td>\n",
|
|||
|
" <td>2020-10-14 18:06:57.059828+02:00</td>\n",
|
|||
|
" <td>2020-10-14 18:06:57.059828+02:00</td>\n",
|
|||
|
" <td>483815</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>24</th>\n",
|
|||
|
" <td>118</td>\n",
|
|||
|
" <td>villa ephrussi de rothschild</td>\n",
|
|||
|
" <td>2020-10-14 15:02:40.478501+02:00</td>\n",
|
|||
|
" <td>2020-10-14 15:02:40.478501+02:00</td>\n",
|
|||
|
" <td>485539</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>29</th>\n",
|
|||
|
" <td>128</td>\n",
|
|||
|
" <td>théâtre antique orange</td>\n",
|
|||
|
" <td>2020-10-14 15:46:44.072307+02:00</td>\n",
|
|||
|
" <td>2020-10-14 15:46:44.072307+02:00</td>\n",
|
|||
|
" <td>499380</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>28</th>\n",
|
|||
|
" <td>3875</td>\n",
|
|||
|
" <td>carrieres de lumieres</td>\n",
|
|||
|
" <td>2021-06-11 10:52:15.706030+02:00</td>\n",
|
|||
|
" <td>2021-06-11 10:52:15.706030+02:00</td>\n",
|
|||
|
" <td>535931</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>25</th>\n",
|
|||
|
" <td>3866</td>\n",
|
|||
|
" <td>baux-de-provence</td>\n",
|
|||
|
" <td>2021-06-11 10:28:30.237144+02:00</td>\n",
|
|||
|
" <td>2021-06-11 10:28:30.237144+02:00</td>\n",
|
|||
|
" <td>569179</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>22</th>\n",
|
|||
|
" <td>392</td>\n",
|
|||
|
" <td>tour magne de nîmes</td>\n",
|
|||
|
" <td>2020-10-19 17:51:45.915572+02:00</td>\n",
|
|||
|
" <td>2020-10-19 17:51:45.915572+02:00</td>\n",
|
|||
|
" <td>717981</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>263</td>\n",
|
|||
|
" <td>musée maillol</td>\n",
|
|||
|
" <td>2020-10-18 01:30:23.853673+02:00</td>\n",
|
|||
|
" <td>2020-10-18 01:30:23.853673+02:00</td>\n",
|
|||
|
" <td>852301</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>6</th>\n",
|
|||
|
" <td>264</td>\n",
|
|||
|
" <td>cinéma d'aigues mortes</td>\n",
|
|||
|
" <td>2020-10-18 01:30:23.863631+02:00</td>\n",
|
|||
|
" <td>2020-10-18 01:30:23.863631+02:00</td>\n",
|
|||
|
" <td>852302</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>21</th>\n",
|
|||
|
" <td>388</td>\n",
|
|||
|
" <td>maison carrée de nîmes</td>\n",
|
|||
|
" <td>2020-10-19 17:37:09.345955+02:00</td>\n",
|
|||
|
" <td>2020-10-19 17:37:09.345955+02:00</td>\n",
|
|||
|
" <td>867431</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>23</th>\n",
|
|||
|
" <td>333</td>\n",
|
|||
|
" <td>les arènes de nîmes</td>\n",
|
|||
|
" <td>2020-10-19 10:17:55.757817+02:00</td>\n",
|
|||
|
" <td>2020-10-19 10:17:55.757817+02:00</td>\n",
|
|||
|
" <td>867431</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>31</th>\n",
|
|||
|
" <td>170</td>\n",
|
|||
|
" <td>caumont centre d'art</td>\n",
|
|||
|
" <td>2020-10-14 19:13:55.213186+02:00</td>\n",
|
|||
|
" <td>2022-10-14 06:21:53.310810+02:00</td>\n",
|
|||
|
" <td>887751</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>5</th>\n",
|
|||
|
" <td>1665</td>\n",
|
|||
|
" <td>cité de l'auto</td>\n",
|
|||
|
" <td>2020-12-08 18:46:15.957997+01:00</td>\n",
|
|||
|
" <td>2020-12-08 18:46:15.957997+01:00</td>\n",
|
|||
|
" <td>1418086</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>14</th>\n",
|
|||
|
" <td>11836</td>\n",
|
|||
|
" <td>phoenix des lumières</td>\n",
|
|||
|
" <td>2022-03-08 16:30:03.135537+01:00</td>\n",
|
|||
|
" <td>2022-03-08 16:30:03.135537+01:00</td>\n",
|
|||
|
" <td>3639035</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>13501</td>\n",
|
|||
|
" <td>château de boutemont</td>\n",
|
|||
|
" <td>2022-05-10 14:56:36.025562+02:00</td>\n",
|
|||
|
" <td>2022-05-10 14:56:36.025562+02:00</td>\n",
|
|||
|
" <td>4209418</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>13502</td>\n",
|
|||
|
" <td>fabrique des lumières</td>\n",
|
|||
|
" <td>2022-05-10 15:05:40.443121+02:00</td>\n",
|
|||
|
" <td>2022-05-10 15:05:40.443121+02:00</td>\n",
|
|||
|
" <td>4209419</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>12</th>\n",
|
|||
|
" <td>22219</td>\n",
|
|||
|
" <td>immersive box belgique</td>\n",
|
|||
|
" <td>2023-06-13 16:17:37.818103+02:00</td>\n",
|
|||
|
" <td>2023-06-13 16:17:37.818103+02:00</td>\n",
|
|||
|
" <td>7335205</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>13</th>\n",
|
|||
|
" <td>22512</td>\n",
|
|||
|
" <td>hall des lumières</td>\n",
|
|||
|
" <td>2023-06-29 09:31:23.575220+02:00</td>\n",
|
|||
|
" <td>2023-06-29 09:31:23.575220+02:00</td>\n",
|
|||
|
" <td>7364467</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>11</th>\n",
|
|||
|
" <td>22348</td>\n",
|
|||
|
" <td>hdl</td>\n",
|
|||
|
" <td>2023-06-20 17:58:19.153019+02:00</td>\n",
|
|||
|
" <td>2023-06-29 09:38:51.592547+02:00</td>\n",
|
|||
|
" <td>7364467</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>22516</td>\n",
|
|||
|
" <td>hall des lumieres</td>\n",
|
|||
|
" <td>2023-06-29 09:46:44.718839+02:00</td>\n",
|
|||
|
" <td>2023-06-29 09:46:44.718839+02:00</td>\n",
|
|||
|
" <td>7364467</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>9</th>\n",
|
|||
|
" <td>11835</td>\n",
|
|||
|
" <td>hdl - ny</td>\n",
|
|||
|
" <td>2022-03-08 16:00:20.821212+01:00</td>\n",
|
|||
|
" <td>2023-06-29 09:27:59.256591+02:00</td>\n",
|
|||
|
" <td>7446203</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" id name \\\n",
|
|||
|
"26 1 atelier des lumieres \n",
|
|||
|
"17 14007 fabrique des lumieres \n",
|
|||
|
"32 2 non défini \n",
|
|||
|
"10 10755 NaN \n",
|
|||
|
"16 13583 hôtel de caumont \n",
|
|||
|
"2 16422 atelier des lumières - cézanne \n",
|
|||
|
"20 21098 bassins des lumières - 2022 - venise \n",
|
|||
|
"18 23460 immersive box \n",
|
|||
|
"8 13584 bassins des lumières - venise \n",
|
|||
|
"15 21096 atelier des lumières - 2022 - cézanne \n",
|
|||
|
"27 260 musée jacquemart andré \n",
|
|||
|
"33 71 cité de l'automobile \n",
|
|||
|
"30 89 bassins de lumieres \n",
|
|||
|
"7 108 les baux de provence \n",
|
|||
|
"19 161 les carrières de lumières \n",
|
|||
|
"24 118 villa ephrussi de rothschild \n",
|
|||
|
"29 128 théâtre antique orange \n",
|
|||
|
"28 3875 carrieres de lumieres \n",
|
|||
|
"25 3866 baux-de-provence \n",
|
|||
|
"22 392 tour magne de nîmes \n",
|
|||
|
"3 263 musée maillol \n",
|
|||
|
"6 264 cinéma d'aigues mortes \n",
|
|||
|
"21 388 maison carrée de nîmes \n",
|
|||
|
"23 333 les arènes de nîmes \n",
|
|||
|
"31 170 caumont centre d'art \n",
|
|||
|
"5 1665 cité de l'auto \n",
|
|||
|
"14 11836 phoenix des lumières \n",
|
|||
|
"1 13501 château de boutemont \n",
|
|||
|
"4 13502 fabrique des lumières \n",
|
|||
|
"12 22219 immersive box belgique \n",
|
|||
|
"13 22512 hall des lumières \n",
|
|||
|
"11 22348 hdl \n",
|
|||
|
"0 22516 hall des lumieres \n",
|
|||
|
"9 11835 hdl - ny \n",
|
|||
|
"\n",
|
|||
|
" created_at updated_at \\\n",
|
|||
|
"26 2020-10-12 08:57:27.783770+02:00 2020-10-12 08:57:27.783770+02:00 \n",
|
|||
|
"17 2022-05-17 09:11:19.416106+02:00 2022-05-17 09:11:19.416106+02:00 \n",
|
|||
|
"32 2020-10-12 08:57:27.785329+02:00 2020-10-12 08:57:27.785329+02:00 \n",
|
|||
|
"10 2022-01-28 12:07:16.602885+01:00 2022-01-28 12:07:16.602885+01:00 \n",
|
|||
|
"16 2022-05-13 10:59:06.829576+02:00 2022-05-13 10:59:06.829576+02:00 \n",
|
|||
|
"2 2022-08-04 04:03:31.045648+02:00 2022-08-04 04:03:31.045648+02:00 \n",
|
|||
|
"20 2023-04-08 03:49:46.916777+02:00 2023-04-08 03:49:46.916777+02:00 \n",
|
|||
|
"18 2023-08-29 17:39:55.188028+02:00 2023-08-29 17:39:55.188028+02:00 \n",
|
|||
|
"8 2022-05-13 11:00:14.943669+02:00 2022-05-13 11:00:14.943669+02:00 \n",
|
|||
|
"15 2023-04-08 03:42:10.395124+02:00 2023-04-08 03:42:10.395124+02:00 \n",
|
|||
|
"27 2020-10-18 01:20:12.738229+02:00 2020-10-18 01:20:12.738229+02:00 \n",
|
|||
|
"33 2020-10-13 11:05:43.705639+02:00 2020-12-03 08:33:15.576065+01:00 \n",
|
|||
|
"30 2020-10-13 14:56:27.206958+02:00 2020-10-13 14:56:27.206958+02:00 \n",
|
|||
|
"7 2020-10-14 14:16:20.284658+02:00 2020-10-14 14:16:20.284658+02:00 \n",
|
|||
|
"19 2020-10-14 18:06:57.059828+02:00 2020-10-14 18:06:57.059828+02:00 \n",
|
|||
|
"24 2020-10-14 15:02:40.478501+02:00 2020-10-14 15:02:40.478501+02:00 \n",
|
|||
|
"29 2020-10-14 15:46:44.072307+02:00 2020-10-14 15:46:44.072307+02:00 \n",
|
|||
|
"28 2021-06-11 10:52:15.706030+02:00 2021-06-11 10:52:15.706030+02:00 \n",
|
|||
|
"25 2021-06-11 10:28:30.237144+02:00 2021-06-11 10:28:30.237144+02:00 \n",
|
|||
|
"22 2020-10-19 17:51:45.915572+02:00 2020-10-19 17:51:45.915572+02:00 \n",
|
|||
|
"3 2020-10-18 01:30:23.853673+02:00 2020-10-18 01:30:23.853673+02:00 \n",
|
|||
|
"6 2020-10-18 01:30:23.863631+02:00 2020-10-18 01:30:23.863631+02:00 \n",
|
|||
|
"21 2020-10-19 17:37:09.345955+02:00 2020-10-19 17:37:09.345955+02:00 \n",
|
|||
|
"23 2020-10-19 10:17:55.757817+02:00 2020-10-19 10:17:55.757817+02:00 \n",
|
|||
|
"31 2020-10-14 19:13:55.213186+02:00 2022-10-14 06:21:53.310810+02:00 \n",
|
|||
|
"5 2020-12-08 18:46:15.957997+01:00 2020-12-08 18:46:15.957997+01:00 \n",
|
|||
|
"14 2022-03-08 16:30:03.135537+01:00 2022-03-08 16:30:03.135537+01:00 \n",
|
|||
|
"1 2022-05-10 14:56:36.025562+02:00 2022-05-10 14:56:36.025562+02:00 \n",
|
|||
|
"4 2022-05-10 15:05:40.443121+02:00 2022-05-10 15:05:40.443121+02:00 \n",
|
|||
|
"12 2023-06-13 16:17:37.818103+02:00 2023-06-13 16:17:37.818103+02:00 \n",
|
|||
|
"13 2023-06-29 09:31:23.575220+02:00 2023-06-29 09:31:23.575220+02:00 \n",
|
|||
|
"11 2023-06-20 17:58:19.153019+02:00 2023-06-29 09:38:51.592547+02:00 \n",
|
|||
|
"0 2023-06-29 09:46:44.718839+02:00 2023-06-29 09:46:44.718839+02:00 \n",
|
|||
|
"9 2022-03-08 16:00:20.821212+01:00 2023-06-29 09:27:59.256591+02:00 \n",
|
|||
|
"\n",
|
|||
|
" street_id fixed_capacity \n",
|
|||
|
"26 1 NaN \n",
|
|||
|
"17 2 NaN \n",
|
|||
|
"32 2 NaN \n",
|
|||
|
"10 2 NaN \n",
|
|||
|
"16 859 NaN \n",
|
|||
|
"2 859 NaN \n",
|
|||
|
"20 859 NaN \n",
|
|||
|
"18 859 NaN \n",
|
|||
|
"8 859 NaN \n",
|
|||
|
"15 859 NaN \n",
|
|||
|
"27 3525 NaN \n",
|
|||
|
"33 449992 NaN \n",
|
|||
|
"30 460754 NaN \n",
|
|||
|
"7 481475 NaN \n",
|
|||
|
"19 483815 NaN \n",
|
|||
|
"24 485539 NaN \n",
|
|||
|
"29 499380 NaN \n",
|
|||
|
"28 535931 NaN \n",
|
|||
|
"25 569179 NaN \n",
|
|||
|
"22 717981 NaN \n",
|
|||
|
"3 852301 NaN \n",
|
|||
|
"6 852302 NaN \n",
|
|||
|
"21 867431 NaN \n",
|
|||
|
"23 867431 NaN \n",
|
|||
|
"31 887751 NaN \n",
|
|||
|
"5 1418086 NaN \n",
|
|||
|
"14 3639035 NaN \n",
|
|||
|
"1 4209418 NaN \n",
|
|||
|
"4 4209419 NaN \n",
|
|||
|
"12 7335205 NaN \n",
|
|||
|
"13 7364467 NaN \n",
|
|||
|
"11 7364467 NaN \n",
|
|||
|
"0 7364467 NaN \n",
|
|||
|
"9 7446203 NaN "
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 33,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"company_number = '101'\n",
|
|||
|
"\n",
|
|||
|
"facilities = load_dataset_2(company_number, \"facilities\")\n",
|
|||
|
"\n",
|
|||
|
"facilities.sort_values(by = 'street_id')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 13,
|
|||
|
"id": "c8c8eea4-21a2-487b-b20a-15d73616a253",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>id_x</th>\n",
|
|||
|
" <th>sent_at</th>\n",
|
|||
|
" <th>software</th>\n",
|
|||
|
" <th>satisfaction</th>\n",
|
|||
|
" <th>extra_field</th>\n",
|
|||
|
" <th>customer_id</th>\n",
|
|||
|
" <th>contribution_site_id</th>\n",
|
|||
|
" <th>created_at_x</th>\n",
|
|||
|
" <th>updated_at_x</th>\n",
|
|||
|
" <th>id_y</th>\n",
|
|||
|
" <th>facility_id</th>\n",
|
|||
|
" <th>created_at_y</th>\n",
|
|||
|
" <th>updated_at_y</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>134910</td>\n",
|
|||
|
" <td>2017-07-30 15:50:15+02:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>8.0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>91936</td>\n",
|
|||
|
" <td>70</td>\n",
|
|||
|
" <td>2020-09-25 20:41:07.752795+02:00</td>\n",
|
|||
|
" <td>2020-09-25 20:41:07.752795+02:00</td>\n",
|
|||
|
" <td>70</td>\n",
|
|||
|
" <td>438</td>\n",
|
|||
|
" <td>2020-09-25 20:41:07.735280+02:00</td>\n",
|
|||
|
" <td>2020-09-25 20:41:07.735280+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>5848427</td>\n",
|
|||
|
" <td>2020-03-04 16:18:13.597000+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>224450</td>\n",
|
|||
|
" <td>3420</td>\n",
|
|||
|
" <td>2022-01-21 02:44:34.857144+01:00</td>\n",
|
|||
|
" <td>2022-01-21 02:44:34.857144+01:00</td>\n",
|
|||
|
" <td>3420</td>\n",
|
|||
|
" <td>6650</td>\n",
|
|||
|
" <td>2022-01-21 02:44:34.690938+01:00</td>\n",
|
|||
|
" <td>2022-01-21 02:44:34.690938+01:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>918383</td>\n",
|
|||
|
" <td>2020-10-24 14:59:22.784000+02:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>397718</td>\n",
|
|||
|
" <td>208</td>\n",
|
|||
|
" <td>2020-10-25 02:06:54.048105+02:00</td>\n",
|
|||
|
" <td>2020-10-25 02:06:54.048105+02:00</td>\n",
|
|||
|
" <td>208</td>\n",
|
|||
|
" <td>576</td>\n",
|
|||
|
" <td>2020-09-27 18:05:14.671650+02:00</td>\n",
|
|||
|
" <td>2020-09-27 18:05:14.671650+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>918384</td>\n",
|
|||
|
" <td>2020-10-24 14:35:39.725000+02:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>397719</td>\n",
|
|||
|
" <td>208</td>\n",
|
|||
|
" <td>2020-10-25 02:06:54.050218+02:00</td>\n",
|
|||
|
" <td>2020-10-25 02:06:54.050218+02:00</td>\n",
|
|||
|
" <td>208</td>\n",
|
|||
|
" <td>576</td>\n",
|
|||
|
" <td>2020-09-27 18:05:14.671650+02:00</td>\n",
|
|||
|
" <td>2020-09-27 18:05:14.671650+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>918385</td>\n",
|
|||
|
" <td>2020-10-24 12:45:35.225000+02:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>10.0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>208</td>\n",
|
|||
|
" <td>2020-10-25 02:06:54.052201+02:00</td>\n",
|
|||
|
" <td>2020-10-25 02:06:54.052201+02:00</td>\n",
|
|||
|
" <td>208</td>\n",
|
|||
|
" <td>576</td>\n",
|
|||
|
" <td>2020-09-27 18:05:14.671650+02:00</td>\n",
|
|||
|
" <td>2020-09-27 18:05:14.671650+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>25454</th>\n",
|
|||
|
" <td>1951</td>\n",
|
|||
|
" <td>2018-03-20 09:34:09+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>8.0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>69694</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2020-09-25 20:06:37.138272+02:00</td>\n",
|
|||
|
" <td>2020-09-25 20:06:37.138272+02:00</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>369</td>\n",
|
|||
|
" <td>2020-09-25 20:06:35.964342+02:00</td>\n",
|
|||
|
" <td>2020-09-25 20:06:35.964342+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>25455</th>\n",
|
|||
|
" <td>1952</td>\n",
|
|||
|
" <td>2018-03-20 09:31:56+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>69694</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2020-09-25 20:06:37.138874+02:00</td>\n",
|
|||
|
" <td>2020-09-25 20:06:37.138874+02:00</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>369</td>\n",
|
|||
|
" <td>2020-09-25 20:06:35.964342+02:00</td>\n",
|
|||
|
" <td>2020-09-25 20:06:35.964342+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>25456</th>\n",
|
|||
|
" <td>1954</td>\n",
|
|||
|
" <td>2018-03-20 09:30:44+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>69694</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2020-09-25 20:06:37.140372+02:00</td>\n",
|
|||
|
" <td>2020-09-25 20:06:37.140372+02:00</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>369</td>\n",
|
|||
|
" <td>2020-09-25 20:06:35.964342+02:00</td>\n",
|
|||
|
" <td>2020-09-25 20:06:35.964342+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>25457</th>\n",
|
|||
|
" <td>1955</td>\n",
|
|||
|
" <td>2018-03-20 09:28:49+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>8.0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>69695</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2020-09-25 20:06:37.140966+02:00</td>\n",
|
|||
|
" <td>2020-09-25 20:06:37.140966+02:00</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>369</td>\n",
|
|||
|
" <td>2020-09-25 20:06:35.964342+02:00</td>\n",
|
|||
|
" <td>2020-09-25 20:06:35.964342+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>25458</th>\n",
|
|||
|
" <td>1953</td>\n",
|
|||
|
" <td>2018-03-20 09:31:23.361000+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>8.0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2020-09-25 20:06:37.139437+02:00</td>\n",
|
|||
|
" <td>2020-09-25 20:06:37.139437+02:00</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>369</td>\n",
|
|||
|
" <td>2020-09-25 20:06:35.964342+02:00</td>\n",
|
|||
|
" <td>2020-09-25 20:06:35.964342+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>25459 rows × 13 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" id_x sent_at software satisfaction \\\n",
|
|||
|
"0 134910 2017-07-30 15:50:15+02:00 NaN 8.0 \n",
|
|||
|
"1 5848427 2020-03-04 16:18:13.597000+01:00 NaN NaN \n",
|
|||
|
"2 918383 2020-10-24 14:59:22.784000+02:00 NaN NaN \n",
|
|||
|
"3 918384 2020-10-24 14:35:39.725000+02:00 NaN NaN \n",
|
|||
|
"4 918385 2020-10-24 12:45:35.225000+02:00 NaN 10.0 \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"25454 1951 2018-03-20 09:34:09+01:00 NaN 8.0 \n",
|
|||
|
"25455 1952 2018-03-20 09:31:56+01:00 NaN NaN \n",
|
|||
|
"25456 1954 2018-03-20 09:30:44+01:00 NaN NaN \n",
|
|||
|
"25457 1955 2018-03-20 09:28:49+01:00 NaN 8.0 \n",
|
|||
|
"25458 1953 2018-03-20 09:31:23.361000+01:00 NaN 8.0 \n",
|
|||
|
"\n",
|
|||
|
" extra_field customer_id contribution_site_id \\\n",
|
|||
|
"0 NaN 91936 70 \n",
|
|||
|
"1 NaN 224450 3420 \n",
|
|||
|
"2 NaN 397718 208 \n",
|
|||
|
"3 NaN 397719 208 \n",
|
|||
|
"4 NaN 2 208 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"25454 NaN 69694 1 \n",
|
|||
|
"25455 NaN 69694 1 \n",
|
|||
|
"25456 NaN 69694 1 \n",
|
|||
|
"25457 NaN 69695 1 \n",
|
|||
|
"25458 NaN 2 1 \n",
|
|||
|
"\n",
|
|||
|
" created_at_x updated_at_x \\\n",
|
|||
|
"0 2020-09-25 20:41:07.752795+02:00 2020-09-25 20:41:07.752795+02:00 \n",
|
|||
|
"1 2022-01-21 02:44:34.857144+01:00 2022-01-21 02:44:34.857144+01:00 \n",
|
|||
|
"2 2020-10-25 02:06:54.048105+02:00 2020-10-25 02:06:54.048105+02:00 \n",
|
|||
|
"3 2020-10-25 02:06:54.050218+02:00 2020-10-25 02:06:54.050218+02:00 \n",
|
|||
|
"4 2020-10-25 02:06:54.052201+02:00 2020-10-25 02:06:54.052201+02:00 \n",
|
|||
|
"... ... ... \n",
|
|||
|
"25454 2020-09-25 20:06:37.138272+02:00 2020-09-25 20:06:37.138272+02:00 \n",
|
|||
|
"25455 2020-09-25 20:06:37.138874+02:00 2020-09-25 20:06:37.138874+02:00 \n",
|
|||
|
"25456 2020-09-25 20:06:37.140372+02:00 2020-09-25 20:06:37.140372+02:00 \n",
|
|||
|
"25457 2020-09-25 20:06:37.140966+02:00 2020-09-25 20:06:37.140966+02:00 \n",
|
|||
|
"25458 2020-09-25 20:06:37.139437+02:00 2020-09-25 20:06:37.139437+02:00 \n",
|
|||
|
"\n",
|
|||
|
" id_y facility_id created_at_y \\\n",
|
|||
|
"0 70 438 2020-09-25 20:41:07.735280+02:00 \n",
|
|||
|
"1 3420 6650 2022-01-21 02:44:34.690938+01:00 \n",
|
|||
|
"2 208 576 2020-09-27 18:05:14.671650+02:00 \n",
|
|||
|
"3 208 576 2020-09-27 18:05:14.671650+02:00 \n",
|
|||
|
"4 208 576 2020-09-27 18:05:14.671650+02:00 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"25454 1 369 2020-09-25 20:06:35.964342+02:00 \n",
|
|||
|
"25455 1 369 2020-09-25 20:06:35.964342+02:00 \n",
|
|||
|
"25456 1 369 2020-09-25 20:06:35.964342+02:00 \n",
|
|||
|
"25457 1 369 2020-09-25 20:06:35.964342+02:00 \n",
|
|||
|
"25458 1 369 2020-09-25 20:06:35.964342+02:00 \n",
|
|||
|
"\n",
|
|||
|
" updated_at_y \n",
|
|||
|
"0 2020-09-25 20:41:07.735280+02:00 \n",
|
|||
|
"1 2022-01-21 02:44:34.690938+01:00 \n",
|
|||
|
"2 2020-09-27 18:05:14.671650+02:00 \n",
|
|||
|
"3 2020-09-27 18:05:14.671650+02:00 \n",
|
|||
|
"4 2020-09-27 18:05:14.671650+02:00 \n",
|
|||
|
"... ... \n",
|
|||
|
"25454 2020-09-25 20:06:35.964342+02:00 \n",
|
|||
|
"25455 2020-09-25 20:06:35.964342+02:00 \n",
|
|||
|
"25456 2020-09-25 20:06:35.964342+02:00 \n",
|
|||
|
"25457 2020-09-25 20:06:35.964342+02:00 \n",
|
|||
|
"25458 2020-09-25 20:06:35.964342+02:00 \n",
|
|||
|
"\n",
|
|||
|
"[25459 rows x 13 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 13,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# contribution and contribution sites \n",
|
|||
|
"contributions = load_dataset_2(company_number, \"contributions\")\n",
|
|||
|
"contribution_sites = load_dataset_2(company_number, \"contribution_sites\")\n",
|
|||
|
"\n",
|
|||
|
"pd.merge(contributions, contribution_sites, left_on = 'contribution_site_id', right_on = 'id', how = 'inner')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 22,
|
|||
|
"id": "85b70219-f753-422e-9f57-a26eb28e7481",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"id 0.000000\n",
|
|||
|
"sent_at 0.000000\n",
|
|||
|
"software 1.000000\n",
|
|||
|
"satisfaction 0.430732\n",
|
|||
|
"extra_field 1.000000\n",
|
|||
|
"customer_id 0.000000\n",
|
|||
|
"contribution_site_id 0.000000\n",
|
|||
|
"created_at 0.000000\n",
|
|||
|
"updated_at 0.000000\n",
|
|||
|
"dtype: float64"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 22,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"contributions.isna().sum()/len(contributions)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 25,
|
|||
|
"id": "647920c8-da07-4e87-964b-304fd7ff79f5",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>id</th>\n",
|
|||
|
" <th>name</th>\n",
|
|||
|
" <th>created_at</th>\n",
|
|||
|
" <th>updated_at</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>eur</td>\n",
|
|||
|
" <td>2023-07-17 15:35:19.957203+02:00</td>\n",
|
|||
|
" <td>2023-07-17 15:35:19.957203+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>usd</td>\n",
|
|||
|
" <td>2023-07-17 15:35:21.132408+02:00</td>\n",
|
|||
|
" <td>2023-07-17 15:35:21.132408+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>gbp</td>\n",
|
|||
|
" <td>2023-07-17 15:35:21.843594+02:00</td>\n",
|
|||
|
" <td>2023-07-17 15:35:21.843594+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>chf</td>\n",
|
|||
|
" <td>2023-07-17 15:35:23.229322+02:00</td>\n",
|
|||
|
" <td>2023-07-17 15:35:23.229322+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>cad</td>\n",
|
|||
|
" <td>2023-07-17 15:35:24.262466+02:00</td>\n",
|
|||
|
" <td>2023-07-17 15:35:24.262466+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" id name created_at updated_at\n",
|
|||
|
"0 1 eur 2023-07-17 15:35:19.957203+02:00 2023-07-17 15:35:19.957203+02:00\n",
|
|||
|
"1 2 usd 2023-07-17 15:35:21.132408+02:00 2023-07-17 15:35:21.132408+02:00\n",
|
|||
|
"2 3 gbp 2023-07-17 15:35:21.843594+02:00 2023-07-17 15:35:21.843594+02:00\n",
|
|||
|
"3 4 chf 2023-07-17 15:35:23.229322+02:00 2023-07-17 15:35:23.229322+02:00\n",
|
|||
|
"4 5 cad 2023-07-17 15:35:24.262466+02:00 2023-07-17 15:35:24.262466+02:00"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 25,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"company_number = \"2\"\n",
|
|||
|
"\n",
|
|||
|
"load_dataset_2(company_number, \"currencies\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 32,
|
|||
|
"id": "bc1f3d28-7f0c-4e87-baf7-dddcf03a7145",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>id</th>\n",
|
|||
|
" <th>percent_price</th>\n",
|
|||
|
" <th>max_price</th>\n",
|
|||
|
" <th>min_price</th>\n",
|
|||
|
" <th>category_id</th>\n",
|
|||
|
" <th>pricing_formula_id</th>\n",
|
|||
|
" <th>representation_id</th>\n",
|
|||
|
" <th>created_at</th>\n",
|
|||
|
" <th>updated_at</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>100.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2023-10-13 13:02:32.517137+02:00</td>\n",
|
|||
|
" <td>2023-10-13 13:02:32.517137+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>100.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>2023-10-13 13:02:32.531505+02:00</td>\n",
|
|||
|
" <td>2023-10-13 13:02:32.531505+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>100.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>2023-10-13 13:02:32.532172+02:00</td>\n",
|
|||
|
" <td>2023-10-13 13:02:32.532172+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>100.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>2023-10-13 13:02:32.532665+02:00</td>\n",
|
|||
|
" <td>2023-10-13 13:02:32.532665+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>100.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>2023-10-13 13:02:32.533142+02:00</td>\n",
|
|||
|
" <td>2023-10-13 13:02:32.533142+02:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>779980</th>\n",
|
|||
|
" <td>810312</td>\n",
|
|||
|
" <td>100.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>567254</td>\n",
|
|||
|
" <td>2023-11-09 05:14:16.770130+01:00</td>\n",
|
|||
|
" <td>2023-11-09 05:14:16.770130+01:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>779981</th>\n",
|
|||
|
" <td>810313</td>\n",
|
|||
|
" <td>100.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>567254</td>\n",
|
|||
|
" <td>2023-11-09 05:14:16.770538+01:00</td>\n",
|
|||
|
" <td>2023-11-09 05:14:16.770538+01:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>779982</th>\n",
|
|||
|
" <td>810314</td>\n",
|
|||
|
" <td>100.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>567255</td>\n",
|
|||
|
" <td>2023-11-09 05:14:16.770916+01:00</td>\n",
|
|||
|
" <td>2023-11-09 05:14:16.770916+01:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>779983</th>\n",
|
|||
|
" <td>810315</td>\n",
|
|||
|
" <td>100.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>567256</td>\n",
|
|||
|
" <td>2023-11-09 05:14:16.771359+01:00</td>\n",
|
|||
|
" <td>2023-11-09 05:14:16.771359+01:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>779984</th>\n",
|
|||
|
" <td>810316</td>\n",
|
|||
|
" <td>100.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>567257</td>\n",
|
|||
|
" <td>2023-11-09 05:14:16.771761+01:00</td>\n",
|
|||
|
" <td>2023-11-09 05:14:16.771761+01:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>779985 rows × 9 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" id percent_price max_price min_price category_id \\\n",
|
|||
|
"0 1 100.0 0.0 0.0 1 \n",
|
|||
|
"1 2 100.0 0.0 0.0 1 \n",
|
|||
|
"2 3 100.0 0.0 0.0 1 \n",
|
|||
|
"3 4 100.0 0.0 0.0 1 \n",
|
|||
|
"4 5 100.0 0.0 0.0 1 \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"779980 810312 100.0 0.0 0.0 1 \n",
|
|||
|
"779981 810313 100.0 0.0 0.0 1 \n",
|
|||
|
"779982 810314 100.0 0.0 0.0 1 \n",
|
|||
|
"779983 810315 100.0 0.0 0.0 1 \n",
|
|||
|
"779984 810316 100.0 0.0 0.0 1 \n",
|
|||
|
"\n",
|
|||
|
" pricing_formula_id representation_id \\\n",
|
|||
|
"0 1 1 \n",
|
|||
|
"1 1 2 \n",
|
|||
|
"2 1 3 \n",
|
|||
|
"3 1 4 \n",
|
|||
|
"4 1 5 \n",
|
|||
|
"... ... ... \n",
|
|||
|
"779980 1 567254 \n",
|
|||
|
"779981 4 567254 \n",
|
|||
|
"779982 1 567255 \n",
|
|||
|
"779983 1 567256 \n",
|
|||
|
"779984 1 567257 \n",
|
|||
|
"\n",
|
|||
|
" created_at updated_at \n",
|
|||
|
"0 2023-10-13 13:02:32.517137+02:00 2023-10-13 13:02:32.517137+02:00 \n",
|
|||
|
"1 2023-10-13 13:02:32.531505+02:00 2023-10-13 13:02:32.531505+02:00 \n",
|
|||
|
"2 2023-10-13 13:02:32.532172+02:00 2023-10-13 13:02:32.532172+02:00 \n",
|
|||
|
"3 2023-10-13 13:02:32.532665+02:00 2023-10-13 13:02:32.532665+02:00 \n",
|
|||
|
"4 2023-10-13 13:02:32.533142+02:00 2023-10-13 13:02:32.533142+02:00 \n",
|
|||
|
"... ... ... \n",
|
|||
|
"779980 2023-11-09 05:14:16.770130+01:00 2023-11-09 05:14:16.770130+01:00 \n",
|
|||
|
"779981 2023-11-09 05:14:16.770538+01:00 2023-11-09 05:14:16.770538+01:00 \n",
|
|||
|
"779982 2023-11-09 05:14:16.770916+01:00 2023-11-09 05:14:16.770916+01:00 \n",
|
|||
|
"779983 2023-11-09 05:14:16.771359+01:00 2023-11-09 05:14:16.771359+01:00 \n",
|
|||
|
"779984 2023-11-09 05:14:16.771761+01:00 2023-11-09 05:14:16.771761+01:00 \n",
|
|||
|
"\n",
|
|||
|
"[779985 rows x 9 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 32,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"load_dataset_2(company_number, \"products_groups\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "45d5261f-4d46-49cb-8582-dd2121122b05",
|
|||
|
"metadata": {
|
|||
|
"jp-MarkdownHeadingCollapsed": true
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"# 1 - Comportement d'achat"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "3479960c-0d23-45f1-8fff-d87395205731",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Outlier"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 13,
|
|||
|
"id": "9376af51-4320-44b6-8f30-1e1234371556",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def outlier_detection(directory_path = \"1\", coupure = 1):\n",
|
|||
|
" nom_dataframe = 'df'+ directory_path +'_tickets'\n",
|
|||
|
" df_tickets = globals()[nom_dataframe].copy()\n",
|
|||
|
" df_tickets_kpi = tickets_kpi_function(df_tickets)\n",
|
|||
|
"\n",
|
|||
|
" if directory_path == \"101\" :\n",
|
|||
|
" df_tickets_1 = df101_tickets_1.copy()\n",
|
|||
|
" df_tickets_kpi_1 = tickets_kpi_function(df_tickets_1)\n",
|
|||
|
"\n",
|
|||
|
" df_tickets_kpi = pd.concat([df_tickets_kpi, df_tickets_kpi_1])\n",
|
|||
|
" # Part du CA par customer\n",
|
|||
|
" total_amount_share = df_tickets_kpi.groupby('customer_id')['total_amount'].sum().reset_index()\n",
|
|||
|
" total_amount_share['total_amount_entreprise'] = total_amount_share['total_amount'].sum()\n",
|
|||
|
" total_amount_share['share_total_amount'] = total_amount_share['total_amount']/total_amount_share['total_amount_entreprise']\n",
|
|||
|
" \n",
|
|||
|
" total_amount_share_index = total_amount_share.set_index('customer_id')\n",
|
|||
|
" df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False)\n",
|
|||
|
" \n",
|
|||
|
" top = df_circulaire[:coupure]\n",
|
|||
|
" rest = df_circulaire[coupure:]\n",
|
|||
|
" \n",
|
|||
|
" # Calculez la somme du reste\n",
|
|||
|
" rest_sum = rest.sum()\n",
|
|||
|
" \n",
|
|||
|
" # Créez une nouvelle série avec les cinq plus grandes parts et 'Autre'\n",
|
|||
|
" new_series = pd.concat([top, pd.Series([rest_sum], index=['Autre'])])\n",
|
|||
|
" \n",
|
|||
|
" # Créez le graphique circulaire\n",
|
|||
|
" plt.figure(figsize=(3, 3))\n",
|
|||
|
" plt.pie(new_series, labels=new_series.index, autopct='%1.1f%%', startangle=140, pctdistance=0.5)\n",
|
|||
|
" plt.axis('equal') # Assurez-vous que le graphique est un cercle\n",
|
|||
|
" plt.title('Répartition des montants totaux')\n",
|
|||
|
" plt.show()\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 14,
|
|||
|
"id": "73211efc-b79f-4235-a250-c0699ea277bf",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASMAAAEWCAYAAAAtl/EzAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABA5UlEQVR4nO3dd3QUVd8H8O/2lrbpvUISIAm9SBGDQgRCUYgCghSRDgJK0Ucpj2LhRcVHFEEjiKIiKl0wSJcaeklCSEgvpPdssuW+f8SsLElI283MbO7nHA5nd2dnfjs7+82UO/fyCCEEFEVRDOMzXQBFURRAw4iiKJagYURRFCvQMKIoihVoGFEUxQo0jCiKYgUaRhRFsQINI4qiWIGGEUVRrNBmYXTz5k3I5XJ8/vnnbbVIiqI4pFlhtH37dvB4PP0/oVAIFxcXTJgwAffu3WvwfaWlpRg/fjwWLlyIhQsXtrro1vjjjz+wZs2ael/z9vbGtGnT9I8zMzOxZs0aXL9+vc60a9asAY/HM02RLcTj8Rr8bO1FTEwM1qxZg+Tk5DZZ3vvvv4+9e/cabX7GqP9x2zirkWbYtm0bAUC2bdtGzp8/T06cOEHee+89IpPJiKOjIykoKKj3fREREeSll14iOp2uOYszifnz55OGPvbVq1dJQkKC/nF0dLT+8z4qLS2NnD9/3lRltggAsnr1aqbLYNTu3bsJAHLixIk2WZ5CoSBTp0412vyMUf/jtnE2E7YkwIKCgtCrVy8AwFNPPQWtVovVq1dj7969mD59ep3pf/nllxZGpfFUVFRALpc/dpru3bs3eX7u7u5wd3dvbVkURdVqTnLV7hlFR0cbPH/o0CECgHzwwQcGz0dHR5NRo0YRpVJJJBIJ6datG9m1a1e984yKiiLTpk0jSqWSyOVyEh4eThITEw2mjYqKIqNHjyZubm5EIpEQPz8/MmvWLJKbm2sw3erVqwkAcuXKFTJu3DhiY2NDnJ2dydSpUwmAOv+SkpIIIYR4eXnp/8qdOHGi3mlr9zxql/EwrVZLPvroIxIQEEDEYjFxcHAgU6ZMIWlpaQbTDR48mHTp0oVcunSJDBw4kMhkMuLj40M++OADotVqG/0eiouLycyZM4mtrS1RKBQkLCyM3L17t949o/j4eDJx4kTi4OBAxGIxCQwMJJs2bapT97vvvkv8/f2JVCol1tbWJDg4mGzcuPGxddSuo507d5Lly5cTZ2dnolAoSHh4OMnOziYlJSXk1VdfJXZ2dsTOzo5MmzaNlJaWGsyjsrKSrFy5knh7exORSERcXV3JvHnzSGFhocF0Xl5eZOTIkeTw4cOke/fuRCqVkoCAABIZGamfpnZbevRf7Z5tc7ef27dvkwkTJhArKyvi6OhIpk+fToqKivTT1beswYMHE0IIKS8vJ6+//jrx9vYmEomEKJVK0rNnT/Ljjz82uD4bq58QQiIjI0lISIh+nmPHjiUxMTH61xvbxjdt2kQGDRpEHBwciFwuJ0FBQeSjjz4i1dXVddZ3fXt8gwcP1n9GQgiZPXs2kUgk5PLly/rntFotGTJkCHF0dCSZmZkNft5HGSWMNm3aRACQ3377Tf/c8ePHiVgsJoMGDSK7du0iR44cIdOmTauzcmvn6eHhQWbMmEEOHz5Mtm7dShwdHYmHh4fBRrl582bywQcfkP3795NTp06R7777jnTt2pUEBAQYrMzajcnLy4usWLGCHD16lOzdu5ckJCSQ8ePHEwDk/Pnz+n8qlYoQYvgFFBcX62t7++239dPWBkt9YTRr1iwCgCxYsIAcOXKEfPXVV8TBwYF4eHgYbPCDBw8mdnZ2pGPHjuSrr74iR48eJfPmzSMAyHfffffY70Cn05HQ0FAikUjIunXrSFRUFFm9ejXx9fWtE0Z37tzRB8uOHTtIVFQUef311wmfzydr1qzRT/fBBx8QgUBAVq9eTY4dO0aOHDlCNm7caDBNfWrDyMvLi0ybNk3/mS0sLEhoaCgZOnQoeeONN0hUVBT56KOPiEAgIAsXLjT4LGFhYUQoFJJ33nmHREVFkQ0bNhCFQkG6d++u/15qvxt3d3fSuXNnsmPHDvLnn3+SiIgIAoCcOnWKEEJITk4Oef/99wkA8sUXX+i/s5ycnBZtPwEBAWTVqlXk6NGj5JNPPiESiYRMnz5dP9358+eJTCYjI0aM0C/rzp07hJCaH6lcLieffPIJOXHiBDl48CD58MMPyeeff97g+mys/trXJk6cSA4dOkR27NhBfH19ibW1NYmPjyeEkEa38SVLlpDNmzeTI0eOkOPHj5NPP/2U2NvbG3yu2vXdlDCqrKwk3bp1I76+vvrf6qpVqwifzydRUVENftb6tCiMLly4QNRqNSktLSVHjhwhzs7O5MknnyRqtVo/bWBgIOnevbvBc4QQEh4eTlxcXPR7ALXzfO655wymO3v2LAFA3nvvvXpr0el0RK1Wk5SUFAKA7Nu3T/9a7ca0atWqOu973PH0o1/A484ZPRpGsbGxBACZN2+ewXQXL14kAMhbb72lf27w4MEEALl48aLBtJ07dyZhYWH11lbr8OHDBAD57LPPDJ5ft25dnTAKCwsj7u7upLi42GDaBQsWEKlUqj/HFx4eTrp16/bY5danNoxGjRpl8PzixYsJALJo0SKD58eOHUtsbW31j48cOUIAkPXr1xtMt2vXLgKAbN26Vf+cl5cXkUqlJCUlRf9cZWUlsbW1JbNnz9Y/19RzLk3Zfh6ta968eUQqlRqc+2zonFFQUBAZO3bsY2uoT0P1FxYW6oPvYampqUQikZBJkybpn2vqOSOtVkvUajXZsWMHEQgEBud8mxpGhBBy7949YmVlRcaOHUv++usvwufzydtvv934h31Eiy7t9+vXDyKRCJaWlnj22WehVCqxb98+CIU1p6ASEhIQFxeHl156CQCg0Wj0/0aMGIGsrCzcvXvXYJ6109bq378/vLy8cOLECf1zOTk5mDNnDjw8PCAUCiESieDl5QUAiI2NrVPnuHHjWvLxWqS2zoevxgFAnz590KlTJxw7dszgeWdnZ/Tp08fguZCQEKSkpDRpOY+ur0mTJhk8VqlUOHbsGJ577jnI5fI634FKpcKFCxf0Nd64cQPz5s3Dn3/+iZKSkqZ96H+Eh4cbPO7UqRMAYOTIkXWeLygoQFlZGQDg+PHjAOqus4iICCgUijrrrFu3bvD09NQ/lkql8Pf3b3Sd1Wru9jN69GiDxyEhIVCpVMjJyWl0WX369MHhw4excuVKnDx5EpWVlU2qsSHnz59HZWVlnXXl4eGBIUOG1FlXDbl27RpGjx4NOzs7CAQCiEQivPzyy9BqtYiPj29RbR06dMDXX3+NvXv3Ijw8HIMGDWrR1bwWhdGOHTsQHR2N48ePY/bs2YiNjcXEiRP1rz948AAA8MYbb0AkEhn8mzdvHgAgLy/PYJ7Ozs51luPs7Iz8/HwAgE6nw7Bhw/D7779j+fLlOHbsGC5duqT/QdX3Zbu4uLTk47VIbZ31LdPV1VX/ei07O7s600kkkkY32vz8fAiFwjrvf3T95efnQ6PR4PPPP6/zHYwYMQLAv9/Bm2++iQ0bNuDChQsYPnw47Ozs8PTTT+Py5cuNfOoatra2Bo/FYvFjn1epVAafxcHBwWA6Ho9n8N3Xauk6A1q2/Ty6PIlE0uC0j/rf//6HFStWYO/evQgNDYWtrS3Gjh372CYwj9Pc7as+qampGDRoEDIyMvDZZ5/hzJkziI6OxhdffAGgaZ+rISNHjoSTkxNUKhWWLl0KgUDQ7Hm06Gpap06d9FfTQkNDodVq8c033+DXX3/F+PHjYW9vD6BmI3/++efrnUdAQIDB4+zs7DrTZGdno0OHDgCA27dv48aNG9i+fTumTp2qnyYhIaHBOtuyHVDthpuVlVXnKltmZqZ+nRhjORqNBvn5+QY/lkfXn1KphEAgwJQpUzB//vx65+Xj4wMAEAqFWLp0KZYuXYqioiL89ddfeOuttxAWFoa0tLRGr0K29rPk5uYaBBIhBNnZ2ejdu7fRltWS7ac1FAoF1q5di7Vr1+LBgwf6vaRRo0Y
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 300x300 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"outlier_detection(directory_path = \"1\", coupure = 2)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 32,
|
|||
|
"id": "5c8e9bb7-a403-4898-b40b-47aa37237bc6",
|
|||
|
"metadata": {
|
|||
|
"scrolled": true
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>id</th>\n",
|
|||
|
" <th>lastname</th>\n",
|
|||
|
" <th>firstname</th>\n",
|
|||
|
" <th>birthdate</th>\n",
|
|||
|
" <th>email</th>\n",
|
|||
|
" <th>street_id</th>\n",
|
|||
|
" <th>created_at</th>\n",
|
|||
|
" <th>updated_at</th>\n",
|
|||
|
" <th>civility</th>\n",
|
|||
|
" <th>is_partner</th>\n",
|
|||
|
" <th>extra</th>\n",
|
|||
|
" <th>deleted_at</th>\n",
|
|||
|
" <th>reference</th>\n",
|
|||
|
" <th>gender</th>\n",
|
|||
|
" <th>is_email_true</th>\n",
|
|||
|
" <th>extra_field</th>\n",
|
|||
|
" <th>opt_in</th>\n",
|
|||
|
" <th>structure_id</th>\n",
|
|||
|
" <th>note</th>\n",
|
|||
|
" <th>profession</th>\n",
|
|||
|
" <th>language</th>\n",
|
|||
|
" <th>mcp_contact_id</th>\n",
|
|||
|
" <th>need_reload</th>\n",
|
|||
|
" <th>last_buying_date</th>\n",
|
|||
|
" <th>max_price</th>\n",
|
|||
|
" <th>ticket_sum</th>\n",
|
|||
|
" <th>average_price</th>\n",
|
|||
|
" <th>fidelity</th>\n",
|
|||
|
" <th>average_purchase_delay</th>\n",
|
|||
|
" <th>average_price_basket</th>\n",
|
|||
|
" <th>average_ticket_basket</th>\n",
|
|||
|
" <th>total_price</th>\n",
|
|||
|
" <th>preferred_category</th>\n",
|
|||
|
" <th>preferred_supplier</th>\n",
|
|||
|
" <th>preferred_formula</th>\n",
|
|||
|
" <th>purchase_count</th>\n",
|
|||
|
" <th>first_buying_date</th>\n",
|
|||
|
" <th>last_visiting_date</th>\n",
|
|||
|
" <th>zipcode</th>\n",
|
|||
|
" <th>country</th>\n",
|
|||
|
" <th>age</th>\n",
|
|||
|
" <th>tenant_id</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>58201</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>2020-09-03 13:11:25.569167+02:00</td>\n",
|
|||
|
" <td>2023-03-04 13:27:42.761679+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>2023-11-08 03:20:07</td>\n",
|
|||
|
" <td>45.0</td>\n",
|
|||
|
" <td>1254775</td>\n",
|
|||
|
" <td>7.030122</td>\n",
|
|||
|
" <td>330831</td>\n",
|
|||
|
" <td>-67.790969</td>\n",
|
|||
|
" <td>13.75153</td>\n",
|
|||
|
" <td>1.956087</td>\n",
|
|||
|
" <td>8821221.5</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>641472</td>\n",
|
|||
|
" <td>2013-06-10 12:37:58+02:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>fr</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1311</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" id lastname firstname birthdate email street_id \\\n",
|
|||
|
"58201 1 NaN NaN NaN NaN 2 \n",
|
|||
|
"\n",
|
|||
|
" created_at updated_at \\\n",
|
|||
|
"58201 2020-09-03 13:11:25.569167+02:00 2023-03-04 13:27:42.761679+01:00 \n",
|
|||
|
"\n",
|
|||
|
" civility is_partner extra deleted_at reference gender \\\n",
|
|||
|
"58201 NaN False NaN NaN NaN 2 \n",
|
|||
|
"\n",
|
|||
|
" is_email_true extra_field opt_in structure_id note profession \\\n",
|
|||
|
"58201 True NaN False NaN NaN NaN \n",
|
|||
|
"\n",
|
|||
|
" language mcp_contact_id need_reload last_buying_date max_price \\\n",
|
|||
|
"58201 NaN NaN False 2023-11-08 03:20:07 45.0 \n",
|
|||
|
"\n",
|
|||
|
" ticket_sum average_price fidelity average_purchase_delay \\\n",
|
|||
|
"58201 1254775 7.030122 330831 -67.790969 \n",
|
|||
|
"\n",
|
|||
|
" average_price_basket average_ticket_basket total_price \\\n",
|
|||
|
"58201 13.75153 1.956087 8821221.5 \n",
|
|||
|
"\n",
|
|||
|
" preferred_category preferred_supplier preferred_formula \\\n",
|
|||
|
"58201 NaN NaN NaN \n",
|
|||
|
"\n",
|
|||
|
" purchase_count first_buying_date last_visiting_date zipcode \\\n",
|
|||
|
"58201 641472 2013-06-10 12:37:58+02:00 NaN NaN \n",
|
|||
|
"\n",
|
|||
|
" country age tenant_id \n",
|
|||
|
"58201 fr NaN 1311 "
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 32,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df = load_dataset_2('1', 'customersplus')\n",
|
|||
|
"df[df['id'] == 1]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 15,
|
|||
|
"id": "4455b6b9-8395-47ea-b976-d98a2d3c782c",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAUUAAAESCAYAAABq/8cSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABEYklEQVR4nO3dd3gU5doG8Ht7y6Zseg+hhECAhCAICAGkd6U3KYoFhYOoNJWmHJSjHPzwSFEEUfRYDyhNIiAdCaRAaAkQkpAC6aRtsuX9/ohZsilkEzaZze7zu65csDPvzNw7u3ky5Z0ZHmOMgRBCCACAz3UAQgixJFQUCSGkCiqKhBBSBRVFQgipgooiIYRUQUWREEKqoKJICCFVUFEkhJAqqCgSQkgVLb4oXrp0CXK5HJs2beI6CiHEClhEUdy5cyd4PJ7hRygUwtPTE5MnT0ZiYmKd0xUWFmL8+PGYP38+5s+f34yJazpw4ABWrVpV67iAgADMmjXL8Do9PR2rVq1CbGxsjbarVq0Cj8drmpCNxOPx6nxvtuLq1atYtWoV7ty50yzL++c//4k9e/aYbX7myP+o77hVYRZgx44dDADbsWMHO3v2LDt27Bh7//33mUwmY25ubiw3N7fW6SZMmMCmTZvG9Hp9Myeu6dVXX2V1rc7o6Gh28+ZNw+uoqCjD+60uNTWVnT17tqliNgoAtnLlSq5jcOrHH39kANixY8eaZXkKhYLNnDnTbPMzR/5HfcetiZDDelxDSEgIunXrBgDo168fdDodVq5ciT179mD27Nk12v/www/NHbGGkpISyOXyR7YJCwszeX4+Pj7w8fF53FiEkMbiuioz9nBLMSoqymj4/v37GQC2bt06o+FRUVFs1KhRzMnJiUkkEhYaGsq+//77Wud5+PBhNmvWLObk5MTkcjkbOXIku3XrllHbw4cPs9GjRzNvb28mkUhY69at2YsvvsiysrKM2q1cuZIBYBcvXmTjxo1jjo6OzMPDg82cOZMBqPGTlJTEGGPM39/f8Ff/2LFjtbat3BKrXEZVOp2OffjhhywoKIiJxWLm6urKZsyYwVJTU43aRUREsI4dO7Lz58+zp556islkMtaqVSu2bt06ptPp6v0cCgoK2AsvvMBUKhVTKBRsyJAh7MaNG7VuKSYkJLApU6YwV1dXJhaLWfv27dmnn35aI/d7773H2rVrx6RSKXNwcGCdOnViGzdufGSOynW0e/dutnjxYubh4cEUCgUbOXIky8zMZA8ePGBz585lzs7OzNnZmc2aNYsVFhYazaO0tJQtXbqUBQQEMJFIxLy8vNi8efNYXl6eUTt/f382YsQIdvDgQRYWFsakUikLCgpi27dvN7Sp/C5V/6nc0m/o9yc+Pp5NnjyZ2dvbMzc3NzZ79myWn59vaFfbsiIiIhhjjBUXF7M33niDBQQEMIlEwpycnFh4eDj79ttv61yf9eVnjLHt27ezzp07G+Y5duxYdvXqVcP4+r7jn376KevTpw9zdXVlcrmchYSEsA8//JCVl5fXWN+1bQFHREQY3iNjjL300ktMIpGwCxcuGIbpdDo2YMAA5ubmxtLT0+t8v4/Loovip59+ygCwn3/+2TDs6NGjTCwWsz59+rDvv/+eHTp0iM2aNavGh1w5T19fXzZnzhx28OBBtm3bNubm5sZ8fX2Nfjk2b97M1q1bx3799Vd2/Phx9tVXX7EuXbqwoKAgow+18kvt7+/PlixZwiIjI9mePXvYzZs32fjx4xkAdvbsWcOPWq1mjBl/EQoKCgzZ3nnnHUPbygJXW1F88cUXGQD22muvsUOHDrEtW7YwV1dX5uvra/SLFxERwZydnVnbtm3Zli1bWGRkJJs3bx4DwL766qtHfgZ6vZ7179+fSSQStnbtWnb48GG2cuVKFhgYWKMoXrlyxVDgdu3axQ4fPszeeOMNxufz2apVqwzt1q1bxwQCAVu5ciU7cuQIO3ToENu4caNRm9pUFkV/f382a9Ysw3u2s7Nj/fv3Z4MGDWJvvvkmO3z4MPvwww+ZQCBg8+fPN3ovQ4YMYUKhkL377rvs8OHD7KOPPmIKhYKFhYUZPpfKz8bHx4d16NCB7dq1i/3+++9swoQJDAA7fvw4Y4yx+/fvs3/+858MAPvPf/5j+Mzu37/fqO9PUFAQW7FiBYuMjGQbNmxgEomEzZ4929Du7NmzTCaTseHDhxuWdeXKFcZYRbGQy+Vsw4YN7NixY2zfvn3sgw8+YJs2bapzfdaXv3LclClT2P79+9muXbtYYGAgc3BwYAkJCYwxVu93/PXXX2ebN29mhw4dYkePHmX//ve/mYuLi9H7qlzfphTF0tJSFhoaygIDAw2/qytWrGB8Pp8dPny4zvdqDhZVFM+dO8c0Gg0rLCxkhw4dYh4eHqxv375Mo9EY2rZv356FhYUZDWOMsZEjRzJPT0/DFlHlPJ955hmjdqdPn2YA2Pvvv19rFr1ezzQaDUtOTmYA2N69ew3jKr/UK1asqDHdo463VP8iPOqYYvWieO3aNQaAzZs3z6jdX3/9xQCw5cuXG4ZFREQwAOyvv/4yatuhQwc2ZMiQWrNVOnjwIAPAPvnkE6Pha9eurVEUhwwZwnx8fFhBQYFR29dee41JpVLDMeCRI0ey0NDQRy63NpVFcdSoUUbDFy5cyACwBQsWGA0fO3YsU6lUhteHDh1iANj69euN2n3//fcMANu2bZthmL+/P5NKpSw5OdkwrLS0lKlUKvbSSy8Zhpl6TM6U70/1XPPmzWNSqdTo2HhdxxRDQkLY2LFjH5mhNnXlz8vLMxTgqlJSUphEImFTp041DDP1mKJOp2MajYbt2rWLCQQCo3MCphZFxhhLTExk9vb2bOzYseyPP/5gfD6fvfPOO/W/2cdkEWefKz355JMQiURQKpUYOnQonJycsHfvXgiFFYc+b968ievXr2PatGkAAK1Wa/gZPnw4MjIycOPGDaN5Vrat1KtXL/j7++PYsWOGYffv38fLL78MX19fCIVCiEQi+Pv7AwCuXbtWI+e4cePM+r4fpTJn1bPXANC9e3cEBwfjyJEjRsM9PDzQvXt3o2GdO3dGcnKyScupvr6mTp1q9FqtVuPIkSN45plnIJfLa3wGarUa586dM2SMi4vDvHnz8Pvvv+PBgwemvem/jRw50uh1cHAwAGDEiBE1hufm5qKoqAgAcPToUQA119mECROgUChqrLPQ0FD4+fkZXkulUrRr167edVapod+f0aNHG73u3Lkz1Go17t+/X++yunfvjoMHD2Lp0qX4888/UVpaalLGupw9exalpaU11pWvry8GDBhQY13VJSYmBqNHj4azszMEAgFEIhGee+456HQ6JCQkNCpbmzZt8Pnnn2PPnj0YOXIk+vTp0yxnvy2qKO7atQtRUVE4evQoXnrpJVy7dg1TpkwxjL937x4A4M0334RIJDL6mTdvHgAgOzvbaJ4eHh41luPh4YGcnBwAgF6vx+DBg/HLL79g8eLFOHLkCM6fP2/4xa7tS+fp6WmeN2yCypy1LdPLy8swvpKzs3ONdhKJpN5fnpycHAiFwhrTV19/OTk50Gq12LRpU43PYPjw4QAefgbLli3DRx99hHPnzmHYsGFwdnbG008/jQsXLtTzriuoVCqj12Kx+JHD1Wq10XtxdXU1asfj8Yw++0qNXWdA474/1ZcnkUjqbFvd//3f/2HJkiXYs2cP+vfvD5VKhbFjxz6y69qjNPT7VZuUlBT06dMHaWlp+OSTT3Dy5ElERUXhP//5DwDT3lddRowYAXd3d6jVaixatAgCgaDR8zKVRZ19Dg4ONpx97t+/P3Q6Hb744gv89NNPGD9+PFxcXABU/LI9++yztc4jKCjI6HVmZmaNNpmZmWjTpg0AID4+HnFxcdi5cydmzpxpaHPz5s06czZnP8LKX6CMjIwaZ6XT09MN68Qcy9FqtcjJyTH6pa2+/pycnCAQCDBjxgy8+uqrtc6rVatWAAChUIhFixZh0aJFyM/Pxx9//IHly5djyJAhSE1Nrfes/eO+l6ysLKPCyBhDZmYmnnjiCbMtqzHfn8ehUCi
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 300x300 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"outlier_detection(directory_path = \"2\", coupure = 2)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 33,
|
|||
|
"id": "ee16cf31-18e1-4803-b003-ba1d1a3fc333",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>id</th>\n",
|
|||
|
" <th>lastname</th>\n",
|
|||
|
" <th>firstname</th>\n",
|
|||
|
" <th>birthdate</th>\n",
|
|||
|
" <th>email</th>\n",
|
|||
|
" <th>street_id</th>\n",
|
|||
|
" <th>created_at</th>\n",
|
|||
|
" <th>updated_at</th>\n",
|
|||
|
" <th>civility</th>\n",
|
|||
|
" <th>is_partner</th>\n",
|
|||
|
" <th>extra</th>\n",
|
|||
|
" <th>deleted_at</th>\n",
|
|||
|
" <th>reference</th>\n",
|
|||
|
" <th>gender</th>\n",
|
|||
|
" <th>is_email_true</th>\n",
|
|||
|
" <th>extra_field</th>\n",
|
|||
|
" <th>opt_in</th>\n",
|
|||
|
" <th>structure_id</th>\n",
|
|||
|
" <th>note</th>\n",
|
|||
|
" <th>profession</th>\n",
|
|||
|
" <th>language</th>\n",
|
|||
|
" <th>mcp_contact_id</th>\n",
|
|||
|
" <th>need_reload</th>\n",
|
|||
|
" <th>last_buying_date</th>\n",
|
|||
|
" <th>max_price</th>\n",
|
|||
|
" <th>ticket_sum</th>\n",
|
|||
|
" <th>average_price</th>\n",
|
|||
|
" <th>fidelity</th>\n",
|
|||
|
" <th>average_purchase_delay</th>\n",
|
|||
|
" <th>average_price_basket</th>\n",
|
|||
|
" <th>average_ticket_basket</th>\n",
|
|||
|
" <th>total_price</th>\n",
|
|||
|
" <th>preferred_category</th>\n",
|
|||
|
" <th>preferred_supplier</th>\n",
|
|||
|
" <th>preferred_formula</th>\n",
|
|||
|
" <th>purchase_count</th>\n",
|
|||
|
" <th>first_buying_date</th>\n",
|
|||
|
" <th>last_visiting_date</th>\n",
|
|||
|
" <th>zipcode</th>\n",
|
|||
|
" <th>country</th>\n",
|
|||
|
" <th>age</th>\n",
|
|||
|
" <th>tenant_id</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>170246</th>\n",
|
|||
|
" <td>12184</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>3564</td>\n",
|
|||
|
" <td>2023-10-12 12:25:15.438714+02:00</td>\n",
|
|||
|
" <td>2023-11-09 05:14:01.944407+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>1275.0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>2023-11-08 19:17:50.565000</td>\n",
|
|||
|
" <td>75.0</td>\n",
|
|||
|
" <td>512831</td>\n",
|
|||
|
" <td>12.645438</td>\n",
|
|||
|
" <td>197358</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>31.719577</td>\n",
|
|||
|
" <td>2.508381</td>\n",
|
|||
|
" <td>6484972.4</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>204447</td>\n",
|
|||
|
" <td>2020-08-28 08:55:55.710000+02:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1879</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" id lastname firstname birthdate email street_id \\\n",
|
|||
|
"170246 12184 NaN NaN NaN NaN 3564 \n",
|
|||
|
"\n",
|
|||
|
" created_at updated_at \\\n",
|
|||
|
"170246 2023-10-12 12:25:15.438714+02:00 2023-11-09 05:14:01.944407+01:00 \n",
|
|||
|
"\n",
|
|||
|
" civility is_partner extra deleted_at reference gender \\\n",
|
|||
|
"170246 NaN False NaN NaN NaN 2 \n",
|
|||
|
"\n",
|
|||
|
" is_email_true extra_field opt_in structure_id note profession \\\n",
|
|||
|
"170246 True NaN False 1275.0 NaN NaN \n",
|
|||
|
"\n",
|
|||
|
" language mcp_contact_id need_reload last_buying_date \\\n",
|
|||
|
"170246 NaN NaN False 2023-11-08 19:17:50.565000 \n",
|
|||
|
"\n",
|
|||
|
" max_price ticket_sum average_price fidelity \\\n",
|
|||
|
"170246 75.0 512831 12.645438 197358 \n",
|
|||
|
"\n",
|
|||
|
" average_purchase_delay average_price_basket average_ticket_basket \\\n",
|
|||
|
"170246 0.0 31.719577 2.508381 \n",
|
|||
|
"\n",
|
|||
|
" total_price preferred_category preferred_supplier \\\n",
|
|||
|
"170246 6484972.4 NaN NaN \n",
|
|||
|
"\n",
|
|||
|
" preferred_formula purchase_count first_buying_date \\\n",
|
|||
|
"170246 NaN 204447 2020-08-28 08:55:55.710000+02:00 \n",
|
|||
|
"\n",
|
|||
|
" last_visiting_date zipcode country age tenant_id \n",
|
|||
|
"170246 NaN NaN NaN NaN 1879 "
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 33,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df = load_dataset_2('2', 'customersplus')\n",
|
|||
|
"df[df['id'] == 12184]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 34,
|
|||
|
"id": "4073c986-3e2c-4945-8601-220fea747c9c",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>id</th>\n",
|
|||
|
" <th>lastname</th>\n",
|
|||
|
" <th>firstname</th>\n",
|
|||
|
" <th>birthdate</th>\n",
|
|||
|
" <th>email</th>\n",
|
|||
|
" <th>street_id</th>\n",
|
|||
|
" <th>created_at</th>\n",
|
|||
|
" <th>updated_at</th>\n",
|
|||
|
" <th>civility</th>\n",
|
|||
|
" <th>is_partner</th>\n",
|
|||
|
" <th>extra</th>\n",
|
|||
|
" <th>deleted_at</th>\n",
|
|||
|
" <th>reference</th>\n",
|
|||
|
" <th>gender</th>\n",
|
|||
|
" <th>is_email_true</th>\n",
|
|||
|
" <th>extra_field</th>\n",
|
|||
|
" <th>opt_in</th>\n",
|
|||
|
" <th>structure_id</th>\n",
|
|||
|
" <th>note</th>\n",
|
|||
|
" <th>profession</th>\n",
|
|||
|
" <th>language</th>\n",
|
|||
|
" <th>mcp_contact_id</th>\n",
|
|||
|
" <th>need_reload</th>\n",
|
|||
|
" <th>last_buying_date</th>\n",
|
|||
|
" <th>max_price</th>\n",
|
|||
|
" <th>ticket_sum</th>\n",
|
|||
|
" <th>average_price</th>\n",
|
|||
|
" <th>fidelity</th>\n",
|
|||
|
" <th>average_purchase_delay</th>\n",
|
|||
|
" <th>average_price_basket</th>\n",
|
|||
|
" <th>average_ticket_basket</th>\n",
|
|||
|
" <th>total_price</th>\n",
|
|||
|
" <th>preferred_category</th>\n",
|
|||
|
" <th>preferred_supplier</th>\n",
|
|||
|
" <th>preferred_formula</th>\n",
|
|||
|
" <th>purchase_count</th>\n",
|
|||
|
" <th>first_buying_date</th>\n",
|
|||
|
" <th>last_visiting_date</th>\n",
|
|||
|
" <th>zipcode</th>\n",
|
|||
|
" <th>country</th>\n",
|
|||
|
" <th>age</th>\n",
|
|||
|
" <th>tenant_id</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>102639</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>email1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2023-07-20 17:16:27.062822+02:00</td>\n",
|
|||
|
" <td>2023-07-20 17:16:27.074952+02:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>fr</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1879</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>224453</th>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>firstname2</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>2023-07-21 10:18:44.502496+02:00</td>\n",
|
|||
|
" <td>2023-07-21 10:18:44.502496+02:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>josef</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>ch</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1879</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>103013</th>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>firstname3</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>2023-07-21 10:18:44.503913+02:00</td>\n",
|
|||
|
" <td>2023-07-21 10:18:44.503913+02:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>dominic</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>ch</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1879</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>138386</th>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>firstname4</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>2023-07-21 10:18:44.504404+02:00</td>\n",
|
|||
|
" <td>2023-07-21 10:18:44.504404+02:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>abigail</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>ch</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1879</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>190087</th>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>firstname5</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>2023-07-21 10:18:44.504841+02:00</td>\n",
|
|||
|
" <td>2023-07-21 10:18:44.504841+02:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>sophia</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>ch</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1879</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>101868</th>\n",
|
|||
|
" <td>601387</td>\n",
|
|||
|
" <td>lastname601387</td>\n",
|
|||
|
" <td>firstname601387</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>email601387</td>\n",
|
|||
|
" <td>3550</td>\n",
|
|||
|
" <td>2023-11-09 05:13:57.358715+01:00</td>\n",
|
|||
|
" <td>2023-11-09 05:13:57.358715+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>de</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1879</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>205168</th>\n",
|
|||
|
" <td>601388</td>\n",
|
|||
|
" <td>lastname601388</td>\n",
|
|||
|
" <td>firstname601388</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>email601388</td>\n",
|
|||
|
" <td>3550</td>\n",
|
|||
|
" <td>2023-11-09 05:13:57.359234+01:00</td>\n",
|
|||
|
" <td>2023-11-09 05:13:57.359234+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>de</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>2023-11-09 00:25:24.716000</td>\n",
|
|||
|
" <td>15.0</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>14.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>28.0</td>\n",
|
|||
|
" <td>2.0</td>\n",
|
|||
|
" <td>28.0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2023-11-09 00:25:24.716000+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1879</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>67641</th>\n",
|
|||
|
" <td>601389</td>\n",
|
|||
|
" <td>lastname601389</td>\n",
|
|||
|
" <td>firstname601389</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>email601389</td>\n",
|
|||
|
" <td>3550</td>\n",
|
|||
|
" <td>2023-11-09 05:13:57.360373+01:00</td>\n",
|
|||
|
" <td>2023-11-09 05:13:57.360373+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>de</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>2023-11-09 00:28:07.511000</td>\n",
|
|||
|
" <td>15.0</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>15.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>30.0</td>\n",
|
|||
|
" <td>2.0</td>\n",
|
|||
|
" <td>30.0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2023-11-09 00:28:07.511000+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1879</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>67639</th>\n",
|
|||
|
" <td>601390</td>\n",
|
|||
|
" <td>lastname601390</td>\n",
|
|||
|
" <td>firstname601390</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>email601390</td>\n",
|
|||
|
" <td>3550</td>\n",
|
|||
|
" <td>2023-11-09 05:13:57.360903+01:00</td>\n",
|
|||
|
" <td>2023-11-09 05:13:57.360903+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1879</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>256450</th>\n",
|
|||
|
" <td>601391</td>\n",
|
|||
|
" <td>lastname601391</td>\n",
|
|||
|
" <td>firstname601391</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>email601391</td>\n",
|
|||
|
" <td>3550</td>\n",
|
|||
|
" <td>2023-11-09 05:13:57.361432+01:00</td>\n",
|
|||
|
" <td>2023-11-09 05:14:18.906054+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>2023-11-09 00:36:41.172000</td>\n",
|
|||
|
" <td>15.0</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>15.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>30.0</td>\n",
|
|||
|
" <td>2.0</td>\n",
|
|||
|
" <td>30.0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2023-11-09 00:36:41.172000+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1879</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>275622 rows × 42 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" id lastname firstname birthdate email \\\n",
|
|||
|
"102639 1 NaN NaN NaN email1 \n",
|
|||
|
"224453 2 NaN firstname2 NaN NaN \n",
|
|||
|
"103013 3 NaN firstname3 NaN NaN \n",
|
|||
|
"138386 4 NaN firstname4 NaN NaN \n",
|
|||
|
"190087 5 NaN firstname5 NaN NaN \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"101868 601387 lastname601387 firstname601387 NaN email601387 \n",
|
|||
|
"205168 601388 lastname601388 firstname601388 NaN email601388 \n",
|
|||
|
"67641 601389 lastname601389 firstname601389 NaN email601389 \n",
|
|||
|
"67639 601390 lastname601390 firstname601390 NaN email601390 \n",
|
|||
|
"256450 601391 lastname601391 firstname601391 NaN email601391 \n",
|
|||
|
"\n",
|
|||
|
" street_id created_at \\\n",
|
|||
|
"102639 1 2023-07-20 17:16:27.062822+02:00 \n",
|
|||
|
"224453 2 2023-07-21 10:18:44.502496+02:00 \n",
|
|||
|
"103013 3 2023-07-21 10:18:44.503913+02:00 \n",
|
|||
|
"138386 3 2023-07-21 10:18:44.504404+02:00 \n",
|
|||
|
"190087 3 2023-07-21 10:18:44.504841+02:00 \n",
|
|||
|
"... ... ... \n",
|
|||
|
"101868 3550 2023-11-09 05:13:57.358715+01:00 \n",
|
|||
|
"205168 3550 2023-11-09 05:13:57.359234+01:00 \n",
|
|||
|
"67641 3550 2023-11-09 05:13:57.360373+01:00 \n",
|
|||
|
"67639 3550 2023-11-09 05:13:57.360903+01:00 \n",
|
|||
|
"256450 3550 2023-11-09 05:13:57.361432+01:00 \n",
|
|||
|
"\n",
|
|||
|
" updated_at civility is_partner extra \\\n",
|
|||
|
"102639 2023-07-20 17:16:27.074952+02:00 NaN False NaN \n",
|
|||
|
"224453 2023-07-21 10:18:44.502496+02:00 NaN False NaN \n",
|
|||
|
"103013 2023-07-21 10:18:44.503913+02:00 NaN False NaN \n",
|
|||
|
"138386 2023-07-21 10:18:44.504404+02:00 NaN False NaN \n",
|
|||
|
"190087 2023-07-21 10:18:44.504841+02:00 NaN False NaN \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"101868 2023-11-09 05:13:57.358715+01:00 NaN False NaN \n",
|
|||
|
"205168 2023-11-09 05:13:57.359234+01:00 NaN False NaN \n",
|
|||
|
"67641 2023-11-09 05:13:57.360373+01:00 NaN False NaN \n",
|
|||
|
"67639 2023-11-09 05:13:57.360903+01:00 NaN False NaN \n",
|
|||
|
"256450 2023-11-09 05:14:18.906054+01:00 NaN False NaN \n",
|
|||
|
"\n",
|
|||
|
" deleted_at reference gender is_email_true extra_field opt_in \\\n",
|
|||
|
"102639 NaN NaN 2 True NaN False \n",
|
|||
|
"224453 NaN NaN 1 True NaN False \n",
|
|||
|
"103013 NaN NaN 2 True NaN False \n",
|
|||
|
"138386 NaN NaN 2 True NaN False \n",
|
|||
|
"190087 NaN NaN 1 True NaN False \n",
|
|||
|
"... ... ... ... ... ... ... \n",
|
|||
|
"101868 NaN NaN 2 True NaN False \n",
|
|||
|
"205168 NaN NaN 2 True NaN False \n",
|
|||
|
"67641 NaN NaN 2 True NaN False \n",
|
|||
|
"67639 NaN NaN 0 True NaN False \n",
|
|||
|
"256450 NaN NaN 2 True NaN False \n",
|
|||
|
"\n",
|
|||
|
" structure_id note profession language mcp_contact_id need_reload \\\n",
|
|||
|
"102639 NaN NaN NaN NaN 1.0 False \n",
|
|||
|
"224453 NaN NaN NaN josef NaN False \n",
|
|||
|
"103013 NaN NaN NaN dominic NaN False \n",
|
|||
|
"138386 NaN NaN NaN abigail NaN False \n",
|
|||
|
"190087 NaN NaN NaN sophia NaN False \n",
|
|||
|
"... ... ... ... ... ... ... \n",
|
|||
|
"101868 NaN NaN NaN de NaN False \n",
|
|||
|
"205168 NaN NaN NaN de NaN False \n",
|
|||
|
"67641 NaN NaN NaN de NaN False \n",
|
|||
|
"67639 NaN NaN NaN NaN NaN False \n",
|
|||
|
"256450 NaN NaN NaN NaN NaN False \n",
|
|||
|
"\n",
|
|||
|
" last_buying_date max_price ticket_sum average_price \\\n",
|
|||
|
"102639 NaN NaN 0 NaN \n",
|
|||
|
"224453 NaN NaN 0 NaN \n",
|
|||
|
"103013 NaN NaN 0 NaN \n",
|
|||
|
"138386 NaN NaN 0 NaN \n",
|
|||
|
"190087 NaN NaN 0 NaN \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"101868 NaN NaN 0 NaN \n",
|
|||
|
"205168 2023-11-09 00:25:24.716000 15.0 2 14.0 \n",
|
|||
|
"67641 2023-11-09 00:28:07.511000 15.0 2 15.0 \n",
|
|||
|
"67639 NaN NaN 0 NaN \n",
|
|||
|
"256450 2023-11-09 00:36:41.172000 15.0 2 15.0 \n",
|
|||
|
"\n",
|
|||
|
" fidelity average_purchase_delay average_price_basket \\\n",
|
|||
|
"102639 0 NaN NaN \n",
|
|||
|
"224453 0 NaN NaN \n",
|
|||
|
"103013 0 NaN NaN \n",
|
|||
|
"138386 0 NaN NaN \n",
|
|||
|
"190087 0 NaN NaN \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"101868 0 NaN NaN \n",
|
|||
|
"205168 1 0.0 28.0 \n",
|
|||
|
"67641 1 0.0 30.0 \n",
|
|||
|
"67639 0 NaN NaN \n",
|
|||
|
"256450 1 0.0 30.0 \n",
|
|||
|
"\n",
|
|||
|
" average_ticket_basket total_price preferred_category \\\n",
|
|||
|
"102639 NaN 0.0 NaN \n",
|
|||
|
"224453 NaN 0.0 NaN \n",
|
|||
|
"103013 NaN 0.0 NaN \n",
|
|||
|
"138386 NaN 0.0 NaN \n",
|
|||
|
"190087 NaN 0.0 NaN \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"101868 NaN 0.0 NaN \n",
|
|||
|
"205168 2.0 28.0 NaN \n",
|
|||
|
"67641 2.0 30.0 NaN \n",
|
|||
|
"67639 NaN 0.0 NaN \n",
|
|||
|
"256450 2.0 30.0 NaN \n",
|
|||
|
"\n",
|
|||
|
" preferred_supplier preferred_formula purchase_count \\\n",
|
|||
|
"102639 NaN NaN 0 \n",
|
|||
|
"224453 NaN NaN 0 \n",
|
|||
|
"103013 NaN NaN 0 \n",
|
|||
|
"138386 NaN NaN 0 \n",
|
|||
|
"190087 NaN NaN 0 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"101868 NaN NaN 0 \n",
|
|||
|
"205168 NaN NaN 1 \n",
|
|||
|
"67641 NaN NaN 1 \n",
|
|||
|
"67639 NaN NaN 0 \n",
|
|||
|
"256450 NaN NaN 1 \n",
|
|||
|
"\n",
|
|||
|
" first_buying_date last_visiting_date zipcode country \\\n",
|
|||
|
"102639 NaN NaN NaN fr \n",
|
|||
|
"224453 NaN NaN NaN ch \n",
|
|||
|
"103013 NaN NaN NaN ch \n",
|
|||
|
"138386 NaN NaN NaN ch \n",
|
|||
|
"190087 NaN NaN NaN ch \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"101868 NaN NaN NaN NaN \n",
|
|||
|
"205168 2023-11-09 00:25:24.716000+01:00 NaN NaN NaN \n",
|
|||
|
"67641 2023-11-09 00:28:07.511000+01:00 NaN NaN NaN \n",
|
|||
|
"67639 NaN NaN NaN NaN \n",
|
|||
|
"256450 2023-11-09 00:36:41.172000+01:00 NaN NaN NaN \n",
|
|||
|
"\n",
|
|||
|
" age tenant_id \n",
|
|||
|
"102639 NaN 1879 \n",
|
|||
|
"224453 NaN 1879 \n",
|
|||
|
"103013 NaN 1879 \n",
|
|||
|
"138386 NaN 1879 \n",
|
|||
|
"190087 NaN 1879 \n",
|
|||
|
"... ... ... \n",
|
|||
|
"101868 NaN 1879 \n",
|
|||
|
"205168 NaN 1879 \n",
|
|||
|
"67641 NaN 1879 \n",
|
|||
|
"67639 NaN 1879 \n",
|
|||
|
"256450 NaN 1879 \n",
|
|||
|
"\n",
|
|||
|
"[275622 rows x 42 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 34,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df.sort_values(by = 'id')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 24,
|
|||
|
"id": "475030ad-6a69-4c91-9cd6-943a0edeaf01",
|
|||
|
"metadata": {
|
|||
|
"scrolled": true
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_3/products_purchased_reduced.csv\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAATwAAAEQCAYAAAAta8hLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA+3klEQVR4nO3dd3gU5doG8Ht7Sd8lyaY3QgIJIZGAihRBaRIEBTyiYABBkCKICtgoR4pwFFH0iCgaEDgfogI2MEjvndBCAiGEQHqvm23v90fMypKEFJLMlud3XblgZ2dnnpmd3HmnvcNjjDEQQogN4HNdACGEtBUKPEKIzaDAI4TYDAo8QojNoMAjhNgMCjxCiM2gwCOE2AwKPEKIzaDAI4TYjDYLvAsXLkAul2P16tVtNUtCCDHRpMCLi4sDj8cz/giFQnh4eOD555/HtWvX6v1caWkpRo4ciRkzZmDGjBkPXPSD+OOPP7Bw4cI63/P398e4ceOMrzMyMrBw4UKcP3++1rgLFy4Ej8drnSKbicfj1btstuLKlStYuHAhbt682SbzW7p0KbZv395i02uJ+u+3jds81gTfffcdA8C+++47duzYMbZv3z62ePFiJpPJmJubGysoKKjzc6NGjWIvvvgiMxgMTZldq5g2bRqrb7HPnj3Lrl+/bnx96tQp4/LeKz09nR07dqy1ymwWAGzBggVcl8GprVu3MgBs3759bTI/Ozs7Fhsb22LTa4n677eN2zphc0IyPDwc0dHRAIDHH38cer0eCxYswPbt2zF+/Pha4//www/NjOOWU1FRAblcft9xoqKiGj09b29veHt7P2hZhJC21JR0rGnhnTp1ymT477//zgCwZcuWmQw/deoUGzp0KHNxcWESiYRFRkayLVu21DnN+Ph4Nm7cOObi4sLkcjmLiYlhKSkpJuPGx8ezp59+mnl5eTGJRMKCgoLYK6+8wnJzc03GW7BgAQPAzpw5w0aMGMGcnZ2ZSqVisbGxDECtn9TUVMYYY35+fsa/1vv27atz3JoWVM087qbX69ny5ctZSEgIE4vFzNXVlY0dO5alp6ebjNenTx8WFhbGTp48yXr27MlkMhkLCAhgy5YtY3q9vsHvobi4mE2cOJEpFApmZ2fHBg4cyJKSkups4SUnJ7PRo0czV1dXJhaLWWhoKPv8889r1f3BBx+wDh06MKlUypycnFjnzp3ZqlWr7ltHzTratGkTmzNnDlOpVMzOzo7FxMSwrKwsVlJSwiZNmsSUSiVTKpVs3LhxrLS01GQalZWVbN68eczf35+JRCLm6enJpk6dygoLC03G8/PzY0OGDGE7d+5kUVFRTCqVspCQELZu3TrjODXb0r0/NS30pm4/ly5dYs8//zxzdHRkbm5ubPz48ayoqMg4Xl3z6tOnD2OMsfLycvbGG28wf39/JpFImIuLC+vatSvbvHlzveuzofoZY2zdunUsIiLCOM3hw4ezK1euGN9vaBv//PPPWa9evZirqyuTy+UsPDycLV++nGk0mlrru66Wa58+fYzLyBhjkydPZhKJhJ0+fdo4TK/Xs379+jE3NzeWkZFR7/JyoUUC7/PPP2cA2E8//WQctnfvXiYWi1mvXr3Yli1b2K5du9i4ceNqfYE10/Tx8WETJkxgO3fuZGvXrmVubm7Mx8fHZMP/8ssv2bJly9gvv/zCDhw4wNavX8+6dOnCQkJCTL6wmg3Wz8+PzZ07l+3evZtt376dXb9+nY0cOZIBYMeOHTP+qNVqxpjpl1xcXGys7b333jOOWxNedQXeK6+8wgCw6dOns127drE1a9YwV1dX5uPjY/JL1adPH6ZUKllwcDBbs2YN2717N5s6dSoDwNavX3/f78BgMLC+ffsyiUTClixZwuLj49mCBQtYYGBgrcC7fPmyMbw2bNjA4uPj2RtvvMH4fD5buHChcbxly5YxgUDAFixYwPbs2cN27drFVq1aZTJOXWoCz8/Pj40bN864zPb29qxv376sf//+7M0332Tx8fFs+fLlTCAQsBkzZpgsy8CBA5lQKGTvv/8+i4+PZx999BGzs7NjUVFRxu+l5rvx9vZmnTp1Yhs2bGB//vknGzVqFAPADhw4wBhjLCcnhy1dupQBYF988YXxO8vJyWnW9hMSEsLmz5/Pdu/ezVauXMkkEgkbP368cbxjx44xmUzGnnrqKeO8Ll++zBirDgK5XM5WrlzJ9u3bx3777Tf24YcfstWrV9e7Phuqv+a90aNHs99//51t2LCBBQYGMicnJ5acnMwYYw1u46+//jr78ssv2a5du9jevXvZJ598wtq1a2eyXDXruzGBV1lZySIjI1lgYKDxd3X+/PmMz+ez+Pj4epeVK80KvOPHjzOtVstKS0vZrl27mEqlYr1792ZardY4bmhoKIuKijIZxhhjMTExzMPDw9iSqZnmM888YzLekSNHGAC2ePHiOmsxGAxMq9WytLQ0BoDt2LHD+F7NBjt//vxan7vf8Y17v+T7HcO7N/ASExMZADZ16lST8U6cOMEAsHfeecc4rE+fPgwAO3HihMm4nTp1YgMHDqyztho7d+5kANinn35qMnzJkiW1Am/gwIHM29ubFRcXm4w7ffp0JpVKjcdcY2JiWGRk5H3nW5eawBs6dKjJ8FmzZjEA7LXXXjMZPnz4cKZQKIyvd+3axQCwFStWmIy3ZcsWBoCtXbvWOMzPz49JpVKWlpZmHFZZWckUCgWbPHmycVhjj4E1Zvu5t66pU6cyqVRqciy6vmN44eHhbPjw4fetoS711V9YWGgM17vdunWLSSQS9sILLxiHNfYYnl6vZ1qtlm3YsIEJBAKTY/CNDTzGGLt27RpzdHRkw4cPZ3/99Rfj8/nsvffea3hhOdCsy1IeeeQRiEQiODg4YNCgQXBxccGOHTsgFFYfErx+/TquXr2KF198EQCg0+mMP0899RQyMzORlJRkMs2acWv06NEDfn5+2Ldvn3FYTk4OpkyZAh8fHwiFQohEIvj5+QEAEhMTa9U5YsSI5ixes9TUefdZXgDo3r07OnbsiD179pgMV6lU6N69u8mwiIgIpKWlNWo+966vF154weS1Wq3Gnj178Mwzz0Aul9f6DtRqNY4fP26sMSEhAVOnTsWff/6JkpKSxi3032JiYkxed+zYEQAwZMiQWsMLCgpQVlYGANi7dy+A2uts1KhRsLOzq7XOIiMj4evra3wtlUrRoUOHBtdZjaZuP08//bTJ64iICKjVauTk5DQ4r+7du2Pnzp2YN28e9u/fj8rKykbVWJ9jx46hsrKy1rry8fFBv379aq2r+pw7dw5PP/00lEolBAIBRCIRXnrpJej1eiQnJzertvbt2+Prr7/G9u3bERMTg169epntWeJmBd6GDRtw6tQp7N27F5MnT0ZiYiJGjx5tfD87OxsA8Oabb0IkEpn8TJ06FQCQl5dnMk2VSlVrPiqVCvn5+QAAg8GAAQMG4Oeff8acOXOwZ88enDx50vhLW9cG5eHh0ZzFa5aaOuuap6enp/H9GkqlstZ4EomkwV+M/Px8CIXCWp+/d/3l5+dDp9Nh9erVtb6Dp556CsA/38Hbb7+Njz76CMePH8fgwYOhVCrxxBNP4PTp0w0sdTWFQmHyWiwW33e4Wq02WRZXV1eT8Xg8nsl3X6O56wxo3vZz7/wkEkm9497rs88+w9y5c7F9+3b07dsXCoUCw4cPv+/lW/fT1O2rLrdu3UKvXr1w584dfPrppzh06BBOnTqFL774AkDjlqs+Q4YMgbu7O9RqNWbPng2BQNDsabWmZp2l7dixo/Esbd++faHX6/HNN9/gxx9/xMiRI9GuXTsA1b9Izz77bJ3TCAkJMXmdlZVVa5ysrCy0b98eAHDp0iUkJCQgLi4OsbGxxnGuX79eb51teZ1czS9HZmZmrbO3GRkZxnXSEvPR6XTIz883+YW8d/25uLhAIBBg7NixmDZtWp3TCggIAAAIhULMnj0bs2fPRlFREf766y+88847GDhwINLT0xs8u/2gy5Kbm2sSeowxZGVloVu3bi02r+ZsPw/Czs4OixYtwqJFi5CdnW1s7Q0
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 300x300 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"outlier_detection(directory_path = \"3\", coupure = 2)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 36,
|
|||
|
"id": "b64d04db-1c3f-4538-9d05-8f7d62c7c046",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>id</th>\n",
|
|||
|
" <th>lastname</th>\n",
|
|||
|
" <th>firstname</th>\n",
|
|||
|
" <th>birthdate</th>\n",
|
|||
|
" <th>email</th>\n",
|
|||
|
" <th>street_id</th>\n",
|
|||
|
" <th>created_at</th>\n",
|
|||
|
" <th>updated_at</th>\n",
|
|||
|
" <th>civility</th>\n",
|
|||
|
" <th>is_partner</th>\n",
|
|||
|
" <th>extra</th>\n",
|
|||
|
" <th>deleted_at</th>\n",
|
|||
|
" <th>reference</th>\n",
|
|||
|
" <th>gender</th>\n",
|
|||
|
" <th>is_email_true</th>\n",
|
|||
|
" <th>extra_field</th>\n",
|
|||
|
" <th>opt_in</th>\n",
|
|||
|
" <th>structure_id</th>\n",
|
|||
|
" <th>note</th>\n",
|
|||
|
" <th>profession</th>\n",
|
|||
|
" <th>language</th>\n",
|
|||
|
" <th>mcp_contact_id</th>\n",
|
|||
|
" <th>need_reload</th>\n",
|
|||
|
" <th>last_buying_date</th>\n",
|
|||
|
" <th>max_price</th>\n",
|
|||
|
" <th>ticket_sum</th>\n",
|
|||
|
" <th>average_price</th>\n",
|
|||
|
" <th>fidelity</th>\n",
|
|||
|
" <th>average_purchase_delay</th>\n",
|
|||
|
" <th>average_price_basket</th>\n",
|
|||
|
" <th>average_ticket_basket</th>\n",
|
|||
|
" <th>total_price</th>\n",
|
|||
|
" <th>preferred_category</th>\n",
|
|||
|
" <th>preferred_supplier</th>\n",
|
|||
|
" <th>preferred_formula</th>\n",
|
|||
|
" <th>purchase_count</th>\n",
|
|||
|
" <th>first_buying_date</th>\n",
|
|||
|
" <th>last_visiting_date</th>\n",
|
|||
|
" <th>zipcode</th>\n",
|
|||
|
" <th>country</th>\n",
|
|||
|
" <th>age</th>\n",
|
|||
|
" <th>tenant_id</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>105720</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1961-12-04</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>91159</td>\n",
|
|||
|
" <td>2021-03-02 15:35:40.452065+01:00</td>\n",
|
|||
|
" <td>2023-11-09 01:31:07.539604+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>19715.0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>2023-11-06 16:57:19</td>\n",
|
|||
|
" <td>7500.0</td>\n",
|
|||
|
" <td>2297716</td>\n",
|
|||
|
" <td>10.152196</td>\n",
|
|||
|
" <td>14917</td>\n",
|
|||
|
" <td>-39771.165147</td>\n",
|
|||
|
" <td>27.514811</td>\n",
|
|||
|
" <td>2.710232</td>\n",
|
|||
|
" <td>2.332686e+07</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>847793</td>\n",
|
|||
|
" <td>2016-01-01 10:23:36+01:00</td>\n",
|
|||
|
" <td>2023-11-06 17:12:00</td>\n",
|
|||
|
" <td>13090</td>\n",
|
|||
|
" <td>fr</td>\n",
|
|||
|
" <td>61.0</td>\n",
|
|||
|
" <td>1512</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" id lastname firstname birthdate email street_id \\\n",
|
|||
|
"105720 1 NaN NaN 1961-12-04 NaN 91159 \n",
|
|||
|
"\n",
|
|||
|
" created_at updated_at \\\n",
|
|||
|
"105720 2021-03-02 15:35:40.452065+01:00 2023-11-09 01:31:07.539604+01:00 \n",
|
|||
|
"\n",
|
|||
|
" civility is_partner extra deleted_at reference gender \\\n",
|
|||
|
"105720 NaN False NaN NaN NaN 2 \n",
|
|||
|
"\n",
|
|||
|
" is_email_true extra_field opt_in structure_id note profession \\\n",
|
|||
|
"105720 False NaN False 19715.0 NaN NaN \n",
|
|||
|
"\n",
|
|||
|
" language mcp_contact_id need_reload last_buying_date max_price \\\n",
|
|||
|
"105720 NaN NaN False 2023-11-06 16:57:19 7500.0 \n",
|
|||
|
"\n",
|
|||
|
" ticket_sum average_price fidelity average_purchase_delay \\\n",
|
|||
|
"105720 2297716 10.152196 14917 -39771.165147 \n",
|
|||
|
"\n",
|
|||
|
" average_price_basket average_ticket_basket total_price \\\n",
|
|||
|
"105720 27.514811 2.710232 2.332686e+07 \n",
|
|||
|
"\n",
|
|||
|
" preferred_category preferred_supplier preferred_formula \\\n",
|
|||
|
"105720 NaN NaN NaN \n",
|
|||
|
"\n",
|
|||
|
" purchase_count first_buying_date last_visiting_date \\\n",
|
|||
|
"105720 847793 2016-01-01 10:23:36+01:00 2023-11-06 17:12:00 \n",
|
|||
|
"\n",
|
|||
|
" zipcode country age tenant_id \n",
|
|||
|
"105720 13090 fr 61.0 1512 "
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 36,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df = load_dataset_2('3', 'customersplus')\n",
|
|||
|
"df[df['id'] == 1]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 25,
|
|||
|
"id": "1d817bee-3ded-4066-9f91-6cf095591b0e",
|
|||
|
"metadata": {
|
|||
|
"scrolled": true
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_4/products_purchased_reduced.csv\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASMAAAEQCAYAAAD7zhIuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABAUklEQVR4nO3dd3QUVRsG8Gd7S++9h4SSQGjSIdKRpoCKgIAgSFNsiChNQQTxAwWlC9IUC0VEQgldQAIEAqR30nvbZLPtfn9gVpb0ZDezm9zfOTmcnZ3MvDs7eZhy514WIYSAoiiKYWymC6AoigJoGFEUZSBoGFEUZRBoGFEUZRBoGFEUZRBoGFEUZRBoGFEUZRBoGFEUZRBoGFEUZRBaLIwiIiIgFouxZcuWllolRVFGpFFhtG/fPrBYLM0Pl8uFo6MjXn31VcTFxdX6e6WlpZg4cSIWLVqERYsWNbvo5vjrr7+watWqGt/z8PDAjBkzNK8zMjKwatUq3Lt3r9q8q1atAovF0k+RTcRisWr9bG1FZGQkVq1aheTk5BZZ3xdffIHjx4/rbHm6qL+ufdygkUbYu3cvAUD27t1Lbty4QS5evEjWrFlDRCIRsbOzIwUFBTX+3qRJk8iUKVOIWq1uzOr0YsGCBaS2j3337l0SHx+veR0WFqb5vM96/PgxuXHjhr7KbBIAZOXKlUyXwahff/2VACAXL15skfVJJBIyffp0nS1PF/XXtY8bMm5TAqxTp07o3r07AGDQoEFQqVRYuXIljh8/jpkzZ1ab/5dffmliVOpOeXk5xGJxnfMEBQU1eHkuLi5wcXFpblkURVVpTHJVHRmFhYVpTT916hQBQNatW6c1PSwsjIwZM4ZYWloSgUBAunTpQo4cOVLjMs+ePUtmzJhBLC0tiVgsJqNHjyYJCQla8549e5aMHTuWODs7E4FAQLy9vcmcOXNIbm6u1nwrV64kAMidO3fIhAkTiIWFBXFwcCDTp08nAKr9JCUlEUIIcXd31/wvd/HixRrnrTryqFrH01QqFVm/fj3x8/MjfD6f2NrakmnTppHHjx9rzTdw4EDSsWNHcuvWLdKvXz8iEomIp6cnWbduHVGpVPV+D8XFxWT27NnEysqKSCQSMnz4cBITE1PjkVFsbCyZPHkysbW1JXw+n/j7+5OtW7dWq/vzzz8n7dq1I0KhkJibm5OAgACyefPmOuuo2kaHDh0iS5YsIQ4ODkQikZDRo0eTrKwsUlJSQt58801ibW1NrK2tyYwZM0hpaanWMioqKsjSpUuJh4cH4fF4xMnJicyfP58UFhZqzefu7k5eeOEFcvr0aRIUFESEQiHx8/Mje/bs0cxTtS89+1N1ZNvY/efhw4fk1VdfJWZmZsTOzo7MnDmTFBUVaearaV0DBw4khBAilUrJ+++/Tzw8PIhAICCWlpakW7du5PDhw7Vuz/rqJ4SQPXv2kMDAQM0yx48fTyIjIzXv17ePb926lfTv35/Y2toSsVhMOnXqRNavX0/kcnm17V3TEd/AgQM1n5EQQubOnUsEAgG5ffu2ZppKpSLPP/88sbOzIxkZGbV+3mfpJIy2bt1KAJDff/9dM+3ChQuEz+eT/v37kyNHjpCQkBAyY8aMahu3apmurq7kjTfeIKdPnyY7d+4kdnZ2xNXVVWun3LZtG1m3bh35448/yOXLl8mPP/5IOnfuTPz8/LQ2ZtXO5O7uTj766CNy7tw5cvz4cRIfH08mTpxIAJAbN25ofmQyGSFE+wsoLi7W1Pbpp59q5q0KlprCaM6cOQQAWbhwIQkJCSHbt28ntra2xNXVVWuHHzhwILG2tia+vr5k+/bt5Ny5c2T+/PkEAPnxxx/r/A7UajUJDg4mAoGArF27lpw9e5asXLmSeHl5VQujR48eaYJl//795OzZs+T9998nbDabrFq1SjPfunXrCIfDIStXriShoaEkJCSEbN68WWuemlSFkbu7O5kxY4bmM5uYmJDg4GAydOhQ8sEHH5CzZ8+S9evXEw6HQxYtWqT1WYYPH064XC5Zvnw5OXv2LNm4cSORSCQkKChI871UfTcuLi6kQ4cOZP/+/eTMmTNk0qRJBAC5fPkyIYSQnJwc8sUXXxAA5LvvvtN8Zzk5OU3af/z8/MiKFSvIuXPnyP/+9z8iEAjIzJkzNfPduHGDiEQiMmrUKM26Hj16RAh58kcqFovJ//73P3Lx4kXy559/ki+//JJs2bKl1u1ZX/1V702ePJmcOnWK7N+/n3h5eRFzc3MSGxtLCCH17uPvvvsu2bZtGwkJCSEXLlwgmzZtIjY2Nlqfq2p7NySMKioqSJcuXYiXl5fmb3XFihWEzWaTs2fP1vpZa9KkMLp58yZRKBSktLSUhISEEAcHBzJgwACiUCg08/r7+5OgoCCtaYQQMnr0aOLo6Kg5Aqha5osvvqg1399//00AkDVr1tRYi1qtJgqFgqSkpBAA5MSJE5r3qnamFStWVPu9us6nn/0C6rpm9GwYRUVFEQBk/vz5WvP9888/BABZtmyZZtrAgQMJAPLPP/9ozduhQwcyfPjwGmurcvr0aQKAfPPNN1rT165dWy2Mhg8fTlxcXEhxcbHWvAsXLiRCoVBzjW/06NGkS5cuda63JlVhNGbMGK3pixcvJgDI22+/rTV9/PjxxMrKSvM6JCSEACAbNmzQmu/IkSMEANm5c6dmmru7OxEKhSQlJUUzraKiglhZWZG5c+dqpjX0mktD9p9n65o/fz4RCoVa1z5ru2bUqVMnMn78+DprqElt9RcWFmqC72mpqalEIBCQ1157TTOtodeMVCoVUSgUZP/+/YTD4Whd821oGBFCSFxcHDEzMyPjx48n58+fJ2w2m3z66af1f9hnNOnWfq9evcDj8WBqaooRI0bA0tISJ06cAJf75BJUfHw8oqOjMWXKFACAUqnU/IwaNQqZmZmIiYnRWmbVvFX69OkDd3d3XLx4UTMtJycHb731FlxdXcHlcsHj8eDu7g4AiIqKqlbnhAkTmvLxmqSqzqfvxgFAz5490b59e4SGhmpNd3BwQM+ePbWmBQYGIiUlpUHreXZ7vfbaa1qvZTIZQkND8eKLL0IsFlf7DmQyGW7evKmp8f79+5g/fz7OnDmDkpKShn3of40ePVrrdfv27QEAL7zwQrXpBQUFKCsrAwBcuHABQPVtNmnSJEgkkmrbrEuXLnBzc9O8FgqFaNeuXb3brEpj95+xY8dqvQ4MDIRMJkNOTk696+rZsydOnz6NpUuX4tKlS6ioqGhQjbW5ceMGKioqqm0rV1dXPP/889W2VW3Cw8MxduxYWFtbg8PhgMfj4fXXX4dKpUJsbGyTavPx8cGuXbtw/PhxjB49Gv3792/S3bwmhdH+/fsRFhaGCxcuYO7cuYiKisLkyZM172dnZwMAPvjgA/B4PK2f+fPnAwDy8vK0lung4FBtPQ4ODsjPzwcAqNVqDBs2DEePHsWSJUsQGhqKW7duaf6gavqyHR0dm/LxmqSqzprW6eTkpHm/irW1dbX5BAJBvTttfn4+uFxutd9/dvvl5+dDqVRiy5Yt1b6DUaNGAfjvO/j444+xceNG3Lx5EyNHjoS1tTUGDx6M27dv1/Opn7CystJ6zefz65wuk8m0Poutra3WfCwWS+u7r9LUbQY0bf95dn0CgaDWeZ/17bff4qOPPsLx48cRHBwMKysrjB8/vs4mMHVp7P5Vk9TUVPTv3x/p6en45ptvcPXqVYSFheG7774D0LDPVZsXXngB9vb2kMlkeO+998DhcBq9jCbdTWvfvr3mblpwcDBUKhV2796N3377DRMnToSNjQ2AJzv5Sy+9VOMy/Pz8tF5nZWVVmycrKws+Pj4AgIcPH+L+/fvYt28fpk+frpknPj6+1jpbsh1Q1Y6bmZlZ7S5bRkaGZpvoYj1KpRL5+flafyzPbj9LS0twOBxMmzYNCxYsqHFZnp6eAAAul4v33nsP7733HoqKinD+/HksW7YMw4cPx+PHj+u9C9ncz5Kbm6sVSIQQZGVloUePHjpbV1P2n+aQSCRYvXo1Vq9ejezsbM1R0pgxYxAdHd3o5T2
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 300x300 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"outlier_detection(directory_path = \"4\", coupure = 2)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 37,
|
|||
|
"id": "4cc07982-1070-439b-a579-fd3f351778b3",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>id</th>\n",
|
|||
|
" <th>lastname</th>\n",
|
|||
|
" <th>firstname</th>\n",
|
|||
|
" <th>birthdate</th>\n",
|
|||
|
" <th>email</th>\n",
|
|||
|
" <th>street_id</th>\n",
|
|||
|
" <th>created_at</th>\n",
|
|||
|
" <th>updated_at</th>\n",
|
|||
|
" <th>civility</th>\n",
|
|||
|
" <th>is_partner</th>\n",
|
|||
|
" <th>extra</th>\n",
|
|||
|
" <th>deleted_at</th>\n",
|
|||
|
" <th>reference</th>\n",
|
|||
|
" <th>gender</th>\n",
|
|||
|
" <th>is_email_true</th>\n",
|
|||
|
" <th>extra_field</th>\n",
|
|||
|
" <th>opt_in</th>\n",
|
|||
|
" <th>structure_id</th>\n",
|
|||
|
" <th>note</th>\n",
|
|||
|
" <th>profession</th>\n",
|
|||
|
" <th>language</th>\n",
|
|||
|
" <th>mcp_contact_id</th>\n",
|
|||
|
" <th>need_reload</th>\n",
|
|||
|
" <th>last_buying_date</th>\n",
|
|||
|
" <th>max_price</th>\n",
|
|||
|
" <th>ticket_sum</th>\n",
|
|||
|
" <th>average_price</th>\n",
|
|||
|
" <th>fidelity</th>\n",
|
|||
|
" <th>average_purchase_delay</th>\n",
|
|||
|
" <th>average_price_basket</th>\n",
|
|||
|
" <th>average_ticket_basket</th>\n",
|
|||
|
" <th>total_price</th>\n",
|
|||
|
" <th>preferred_category</th>\n",
|
|||
|
" <th>preferred_supplier</th>\n",
|
|||
|
" <th>preferred_formula</th>\n",
|
|||
|
" <th>purchase_count</th>\n",
|
|||
|
" <th>first_buying_date</th>\n",
|
|||
|
" <th>last_visiting_date</th>\n",
|
|||
|
" <th>zipcode</th>\n",
|
|||
|
" <th>country</th>\n",
|
|||
|
" <th>age</th>\n",
|
|||
|
" <th>tenant_id</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>300754</th>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>2020-09-25 19:09:07.669208+02:00</td>\n",
|
|||
|
" <td>2021-11-30 02:07:28.120188+01:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>2023-11-07 16:33:09</td>\n",
|
|||
|
" <td>360.0</td>\n",
|
|||
|
" <td>1237224</td>\n",
|
|||
|
" <td>6.056248</td>\n",
|
|||
|
" <td>236850</td>\n",
|
|||
|
" <td>0.015528</td>\n",
|
|||
|
" <td>13.493612</td>\n",
|
|||
|
" <td>2.228048</td>\n",
|
|||
|
" <td>7492935.0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>555295</td>\n",
|
|||
|
" <td>1901-01-01 00:09:21+00:09</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1342</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" id lastname firstname birthdate email street_id \\\n",
|
|||
|
"300754 2 NaN NaN NaN NaN 2 \n",
|
|||
|
"\n",
|
|||
|
" created_at updated_at \\\n",
|
|||
|
"300754 2020-09-25 19:09:07.669208+02:00 2021-11-30 02:07:28.120188+01:00 \n",
|
|||
|
"\n",
|
|||
|
" civility is_partner extra deleted_at reference gender \\\n",
|
|||
|
"300754 NaN False NaN NaN NaN 2 \n",
|
|||
|
"\n",
|
|||
|
" is_email_true extra_field opt_in structure_id note profession \\\n",
|
|||
|
"300754 False NaN False NaN NaN NaN \n",
|
|||
|
"\n",
|
|||
|
" language mcp_contact_id need_reload last_buying_date max_price \\\n",
|
|||
|
"300754 NaN NaN False 2023-11-07 16:33:09 360.0 \n",
|
|||
|
"\n",
|
|||
|
" ticket_sum average_price fidelity average_purchase_delay \\\n",
|
|||
|
"300754 1237224 6.056248 236850 0.015528 \n",
|
|||
|
"\n",
|
|||
|
" average_price_basket average_ticket_basket total_price \\\n",
|
|||
|
"300754 13.493612 2.228048 7492935.0 \n",
|
|||
|
"\n",
|
|||
|
" preferred_category preferred_supplier preferred_formula \\\n",
|
|||
|
"300754 NaN NaN NaN \n",
|
|||
|
"\n",
|
|||
|
" purchase_count first_buying_date last_visiting_date zipcode \\\n",
|
|||
|
"300754 555295 1901-01-01 00:09:21+00:09 NaN NaN \n",
|
|||
|
"\n",
|
|||
|
" country age tenant_id \n",
|
|||
|
"300754 NaN NaN 1342 "
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 37,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df = load_dataset_2('4', 'customersplus')\n",
|
|||
|
"df[df['id'] == 2]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 16,
|
|||
|
"id": "f74a9e62-a0f7-41cf-9834-78a99204547c",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAATEAAAEQCAYAAADYlUP7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABBNklEQVR4nO3dd3gUVdsG8Ht7S9s00isQWkIiSJUSBAIYmoAUQToKCKIoIK+0V3xRPgsISpEAgqioIAIKhI5IgFASSigJpJFOeja72Xa+P2IWlvSQZHZ2z++6csHOnp15ZndyZ+bM2RkOIYSAoiiKpbhMF0BRFPU8aIhRFMVqNMQoimI1GmIURbEaDTGKoliNhhhFUaxGQ4yiKFajIUZRFKvREKMoitWaLcRu3LgBqVSKDRs2NNciKYqyAPUKsZ07d4LD4Rh++Hw+XF1dMW7cOMTHx1f7uuLiYowePRrz5s3DvHnznrvo5/HXX39h5cqVVT7n4+ODKVOmGB6np6dj5cqViImJqdR25cqV4HA4TVNkA3E4nGrXzVLExcVh5cqVSEpKapbl/e9//8OBAwcabX6NUX9N27hZIvWwY8cOAoDs2LGDREVFkdOnT5PVq1cTiURCnJ2dSV5eXpWvGzNmDHn99deJXq+vz+KaxNy5c0l1q33t2jWSkJBgeBwdHW1Y32elpqaSqKiopiqzQQCQFStWMF0Go3799VcCgJw+fbpZlieTycjkyZMbbX6NUX9N27g54jck+Dp06IDOnTsDAPr27QudTocVK1bgwIEDmDp1aqX2v/zySwMjtvGUlpZCKpXW2CYkJKTO8/Pw8ICHh8fzlkVR1POqT+JV7IlFR0cbTf/zzz8JALJmzRqj6dHR0WTo0KFELpcTkUhEgoODyd69e6ucZ2RkJJkyZQqRy+VEKpWS8PBw8uDBA6O2kZGRZNiwYcTd3Z2IRCLi7+9PZs2aRXJycozarVixggAgV69eJaNGjSJ2dnbExcWFTJ48mQCo9JOYmEgIIcTb29vwV/X06dNVtq3Y06lYxtN0Oh357LPPSEBAABEKhcTJyYlMmjSJpKamGrXr06cPad++Pbl8+TJ56aWXiEQiIb6+vmTNmjVEp9PV+jkUFhaSGTNmEHt7eyKTyUhYWBi5d+9elXti9+/fJ+PHjydOTk5EKBSSNm3akI0bN1aq++OPPyatW7cmYrGY2NraksDAQLJu3boa66h4j/bs2UMWLVpEXFxciEwmI+Hh4SQzM5MUFRWRmTNnEgcHB+Lg4ECmTJlCiouLjeahVCrJkiVLiI+PDxEIBMTNzY3MmTOH5OfnG7Xz9vYmr7zyCjly5AgJCQkhYrGYBAQEkIiICEObim3p2Z+KPen6bj+3bt0i48aNIzY2NsTZ2ZlMnTqVFBQUGNpVtaw+ffoQQghRKBRk4cKFxMfHh4hEIiKXy0mnTp3Ijz/+WO37WVv9hBASERFBgoKCDPMcMWIEiYuLMzxf2za+ceNG0qtXL+Lk5ESkUinp0KED+eyzz4hara70fle1h9mnTx/DOhJCyJtvvklEIhG5cuWKYZpOpyP9+vUjzs7OJD09vdr1bSyNEmIbN24kAMi+ffsM006dOkWEQiHp1asX2bt3Lzl69CiZMmVKpQ+lYp6enp5k2rRp5MiRI2Tr1q3E2dmZeHp6Gm3MmzZtImvWrCEHDx4kZ8+eJd9//z3p2LEjCQgIMPoQKjZCb29vsnjxYnL8+HFy4MABkpCQQEaPHk0AkKioKMOPSqUihBh/cIWFhYbaPvroI0PbikCqKsRmzZpFAJC3336bHD16lGzevJk4OTkRT09Po1+UPn36EAcHB9KqVSuyefNmcvz4cTJnzhwCgHz//fc1fgZ6vZ6EhoYSkUhEPvnkExIZGUlWrFhB/Pz8KoXY7du3DYG0a9cuEhkZSRYuXEi4XC5ZuXKlod2aNWsIj8cjK1asICdPniRHjx4l69atM2pTlYoQ8/b2JlOmTDGss5WVFQkNDSUDBgwg77//PomMjCSfffYZ4fF4ZN68eUbrEhYWRvh8Plm2bBmJjIwkn3/+OZHJZCQkJMTwuVR8Nh4eHqRdu3Zk165d5NixY2TMmDEEADl79iwhhJDs7Gzyv//9jwAg33zzjeEzy87ObtD2ExAQQJYvX06OHz9OvvzySyISicjUqVMN7aKioohEIiFDhgwxLOv27duEkPJfbqlUSr788kty+vRpcvjwYfLpp5+SDRs2VPt+1lZ/xXPjx48nf/75J9m1axfx8/Mjtra25P79+4QQUus2/u6775JNmzaRo0ePklOnTpGvvvqKODo6Gq1XxftdlxBTKpUkODiY+Pn5GX5Xly9fTrhcLomMjKx2XRtTg0Ls4sWLRKPRkOLiYnL06FHi4uJCevfuTTQajaFtmzZtSEhIiNE0QggJDw8nrq6uhj2OinmOHDnSqN0///xDAJDVq1dXWYterycajYYkJycTAOSPP/4wPFexES5fvrzS62rqL3j2g6upT+zZELtz5w4BQObMmWPU7tKlSwQAWbp0qWFanz59CABy6dIlo7bt2rUjYWFhVdZW4ciRIwQAWb9+vdH0Tz75pFKIhYWFEQ8PD1JYWGjU9u233yZisdjQhxkeHk6Cg4NrXG5VKkJs6NChRtMXLFhAAJD58+cbTR8xYgSxt7c3PD569CgBQNauXWvUbu/evQQA2bp1q2Gat7c3EYvFJDk52TBNqVQSe3t78uabbxqm1bVPqS7bz7N1zZkzh4jFYqO+3er6xDp06EBGjBhRYw1Vqa7+/Px8Q2A+LSUlhYhEIjJhwgTDtLr2iel0OqLRaMiuXbsIj8cz6tOua4gRQkh8fDyxsbEhI0aMICdOnCBcLpd89NFHta9sI2nQEItu3bpBIBDA2toagwYNglwuxx9//AE+v7yLLSEhAXfv3sXrr78OANBqtYafIUOGICMjA/fu3TOaZ0XbCj169IC3tzdOnz5tmJadnY233noLnp6e4PP5EAgE8Pb2BgDcuXOnUp2jRo1qyOo1SEWdT5/dBIAuXbqgbdu2OHnypNF0FxcXdOnSxWhaUFAQkpOT67ScZ9+vCRMmGD1WqVQ4efIkRo4cCalUWukzUKlUuHjxoqHG2NhYzJkzB8eOHUNRUVHdVvpf4eHhRo/btm0LAHjllVcqTc/Ly0NJSQkA4NSpUwAqv2djxoyBTCar9J4FBwfDy8vL8FgsFqN169a1vmcV6rv9DBs2zOhxUFAQVCoVsrOza11Wly5dcOTIESxZsgRnzpyBUqmsU43ViYqKglKprPReeXp6ol+/fpXeq+pcv34dw4YNg4ODA3g8HgQCAd544w3odDrcv3+/QbW1bNkS3333HQ4cOIDw8HD06tWrWc+ONijEdu3ahejoaJw6dQpvvvkm7ty5g/Hjxxuez8rKAgC8//77EAgERj9z5swBADx+/Nhoni4uLpWW4+LigtzcXACAXq/HwIEDsX//fixatAgnT57E5cuXDb+IVW0krq6uDVm9Bqmos6plurm5GZ6v4ODgUKmdSCSqdWPPzc0Fn8+v9Ppn37/c3FxotVps2LCh0mcwZMgQAE8+gw8//BCff/45Ll68iMGDB8PBwQEvv/wyrly5Ustal7O3tzd6LBQKa5yuUqmM1sXJycmoHYfDMfrsKzT0PQMatv08uzyRSFRt22d9/fXXWLx4MQ4cOIDQ0FDY29tjxIgRNQ5Fqkl9t6+qpKSkoFevXkhLS8P69evx999/Izo6Gt988w2Auq1XdV555RW0aNECKpUK7733Hng8XoPnVV8NOjvZtm1bw9nJ0NBQ6HQ6bNu2Db/99htGjx4NR0dHAOW/HK+++mqV8wgICDB6nJmZWalNZmYmWrZsCQC4desWYmNjsXPnTkyePNnQJiEhodo6m3McV8UGn5GRUemsZXp6uuE9aYzlaLVa5ObmGv2SPfv+yeVy8Hg8TJo0CXPnzq1yXr6+vgAAPp+P9957D++99x4KCgpw4sQJLF26FGFhYUhNTa31rO7zrktOTo5RkBFCkJmZiRdffLHRltWQ7ed5yGQyrFq1CqtWrUJWVpZhr2zo0KG4e/d
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 300x300 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"outlier_detection(directory_path = \"101\", coupure = 2)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "dbebfa92-310a-417b-a7fa-36ac3593db06",
|
|||
|
"metadata": {
|
|||
|
"jp-MarkdownHeadingCollapsed": true
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## Evolution des commandes"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 8,
|
|||
|
"id": "06137694-7f50-47ba-8749-68471ececc1e",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"/tmp/ipykernel_448/3643128924.py:11: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
|||
|
" purchases = pd.read_csv(file_in, sep=\",\", parse_dates = ['purchase_date'], date_parser=custom_date_parser)\n",
|
|||
|
"/tmp/ipykernel_448/3643128924.py:19: FutureWarning: The argument 'date_parser' is deprecated and will be removed in a future version. Please use 'date_format' instead, or read your data in as 'object' dtype and then call 'to_datetime'.\n",
|
|||
|
" campaigns = pd.read_csv(file_in, sep=\",\", parse_dates = ['sent_at'], date_parser=custom_date_parser)\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Importation - Chargement des données temporaires\n",
|
|||
|
"company_number = \"1\"\n",
|
|||
|
"nom_dataframe = 'df'+ company_number +'_tickets'\n",
|
|||
|
"purchases = globals()[nom_dataframe].copy()\n",
|
|||
|
"\n",
|
|||
|
"campaigns = display_databases(company_number,'campaigns_information', ['sent_at'])\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 9,
|
|||
|
"id": "e6b962d4-1a30-4133-ac0f-359f7afef42c",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Mois du premier achat\n",
|
|||
|
"purchase_min = purchases.groupby(['customer_id'])['purchase_date'].min().reset_index()\n",
|
|||
|
"purchase_min.rename(columns = {'purchase_date' : 'first_purchase_event'}, inplace = True)\n",
|
|||
|
"purchase_min['first_purchase_event'] = pd.to_datetime(purchase_min['first_purchase_event'])\n",
|
|||
|
"purchase_min['first_purchase_month'] = pd.to_datetime(purchase_min['first_purchase_event'].dt.strftime('%Y-%m'))\n",
|
|||
|
"\n",
|
|||
|
"# Mois du premier mails\n",
|
|||
|
"first_mail_received = campaigns.groupby('customer_id')['sent_at'].min().reset_index()\n",
|
|||
|
"first_mail_received.rename(columns = {'sent_at' : 'first_email_reception'}, inplace = True)\n",
|
|||
|
"first_mail_received['first_email_reception'] = pd.to_datetime(first_mail_received['first_email_reception'])\n",
|
|||
|
"first_mail_received['first_email_month'] = pd.to_datetime(first_mail_received['first_email_reception'].dt.strftime('%Y-%m'))\n",
|
|||
|
"\n",
|
|||
|
"# Fusion \n",
|
|||
|
"known_customer = pd.merge(purchase_min[['customer_id', 'first_purchase_month']], \n",
|
|||
|
" first_mail_received[['customer_id', 'first_email_month']], on = 'customer_id', how = 'outer')\n",
|
|||
|
"\n",
|
|||
|
"# Mois à partir duquel le client est considere comme connu\n",
|
|||
|
"known_customer['known_date'] = pd.to_datetime(known_customer[['first_email_month', 'first_purchase_month']].min(axis = 1), utc = True, format = 'ISO8601')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 10,
|
|||
|
"id": "9c56e5ac-cbf4-4343-80ba-be2ab8b60eab",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Nombre de commande par mois\n",
|
|||
|
"purchases_count = pd.merge(purchases[['customer_id', 'purchase_id', 'purchase_date']].drop_duplicates(), known_customer[['customer_id', 'known_date']], on = ['customer_id'], how = 'inner')\n",
|
|||
|
"purchases_count['is_customer_known'] = purchases_count['purchase_date'] > purchases_count['known_date'] + pd.DateOffset(months=1)\n",
|
|||
|
"purchases_count['purchase_date_month'] = pd.to_datetime(purchases_count['purchase_date'].dt.strftime('%Y-%m'))\n",
|
|||
|
"purchases_count = purchases_count[purchases_count['customer_id'] != 1]\n",
|
|||
|
"\n",
|
|||
|
"# Nombre de commande par mois par type de client\n",
|
|||
|
"nb_purchases_graph = purchases_count.groupby(['purchase_date_month', 'is_customer_known'])['purchase_id'].count().reset_index()\n",
|
|||
|
"nb_purchases_graph.rename(columns = {'purchase_id' : 'nb_purchases'}, inplace = True)\n",
|
|||
|
"\n",
|
|||
|
"nb_purchases_graph_2 = purchases_count.groupby(['purchase_date_month', 'is_customer_known'])['customer_id'].nunique().reset_index()\n",
|
|||
|
"nb_purchases_graph_2.rename(columns = {'customer_id' : 'nb_new_customer'}, inplace = True)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 11,
|
|||
|
"id": "8c1aed44-03d3-49f9-b96c-b06a0df03dde",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHFCAYAAAAT5Oa6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABezElEQVR4nO3deVxV1f4//teRGYQjg0zKYE6pgKaUoJkoyBA4lwNGKqblGKkfk8ordk3TcrqaXjUHUhErxesUaip6EQdEcSQzL04J4gAHQWbW7w+/7J9HDsrBw3hez8djP+Ks/d57vxds483aa+8tE0IIEBEREWmxRrWdABEREVFtY0FEREREWo8FEREREWk9FkRERESk9VgQERERkdZjQURERERajwURERERaT0WRERERKT1WBARERGR1mNBRKRhGzduhEwmg6GhIW7evFluvZeXF1xcXGohM2DUqFFo3LhxrRz7ZWQyGSIiIqTPZd/H6hIREQGZTIYHDx5obJ9RUVFYunSpxvanrlGjRkEmk8HU1BQ5OTnl1t+8eRONGjWq8Ht95swZlfsNCgqCs7Nzufbs7Gx88803cHd3h5mZGQwMDODs7IzQ0FCcPXu23P4rWuLi4qRYZ2dnyGQyeHl5qczlp59+Urkd0avSre0EiBqqgoICfPXVV9i0aVNtp0I1JCoqCpcuXUJYWFit5aCnp4fi4mJs27YNY8aMUVq3YcMGmJqaIjs7+5WPc/36dfj6+iIjIwOffPIJ5syZg8aNG+PGjRv4+eef0aVLF2RlZUEulysd//XXXy+3r/bt2yt9NjU1xbFjx3D9+nW0bNlSad369ethZmamkT4QPYsjRETVxN/fH1FRUTh//nxtp6IRQgjk5eXVdhr0Evr6+hgwYADWr1+v1C6EwMaNGzF06NBXPkZJSQkGDhyIBw8e4MSJE/juu+8QGBiInj17YuTIkdi7dy/27dsHPT09pe1cXFzg4eFRbjEzM1OKe/vtt9GsWbNyfbh+/TqOHTumkT4QPY8FEVE1mTFjBiwtLfH555+/NDY/Px/h4eFo0aIF9PX10axZM0ycOBFZWVlKcc7OzggKCsKePXvwxhtvwMjICO3atcOePXsAPL000a5dO5iYmOCtt96q8BLI5cuX4e3tDRMTEzRt2hSTJk3CkydPlGJkMhkmTZqEf//732jXrh0MDAwQGRkJALh27RqCg4NhbW0NAwMDtGvXDj/88EOlvi/Z2dkYO3YsLC0t0bhxY/j7++PPP/+s1Lbbtm2Dr68v7OzspL7PnDkTubm55WJPnTqFvn37wtLSEoaGhmjZsqXKkZt79+5h+PDhkMvlsLGxQWhoKBQKhVLMDz/8gHfeeQfW1tYwMTGBq6srFi5ciKKiIinGy8sLe/fuxc2bN5UuB5VZtWoVOnbsiMaNG8PU1BSvv/46vvjii0r1W12hoaFISEjA1atXpbbff/8dN2/exOjRo195/zt37sTFixcRHh5e4eXfgIAAGBsbV2n/jRo1wocffojIyEiUlpZK7evXr4eDgwN8fHyqtF+iF2FBRFRNTE1N8dVXX2H//v04fPhwhXFCCAwYMADff/89QkJCsHfvXkydOhWRkZHo3bs3CgoKlOLPnz+P8PBwfP7559ixYwfkcjkGDRqE2bNn48cff8S8efOwZcsWKBQKBAUFlRvVKSoqwrvvvgtvb2/s3LkTkyZNwurVq1X+1b1z506sWrUK//jHP7B//3706NEDV65cwZtvvolLly5h0aJF2LNnDwIDAzFlyhTMmTPnhd+Tsr5u2rQJ06ZNQ0xMDDw8PBAQEFAudtSoURBCKLVdu3YN7777LtatW4fY2FiEhYXh559/Rt++fZXiynK9desWFi9ejN9++w1fffUV7t27V+44gwcPRps2bbB9+3bMnDkTUVFR+Oyzz5Rirl+/juDgYGzatAl79uzBmDFj8N133+Hjjz+WYlauXInu3bvD1tYWJ06ckBYAiI6OxoQJE9CzZ0/ExMRg586d+Oyzz1QWcprg4+MDJycnpRGWdevW4Z133kHr1q1fef8HDhwAAAwYMECt7UpKSlBcXKy0lJSUqIwNDQ3F3bt3sX//fmnbyMhIjBo1Co0a8VcXVQNBRBq1YcMGAUAkJiaKgoIC8dprrwl3d3dRWloqhBCiZ8+eokOHDlJ8bGysACAWLlyotJ9t27YJAGLNmjVSm5OTkzAyMhJ37tyR2pKTkwUAYWdnJ3Jzc6X2nTt3CgBi165dUtvIkSMFALFs2TKlY33zzTcCgIiPj5faAAi5XC4ePXqkFOvn5yeaN28uFAqFUvukSZOEoaFhufhn/fbbby88/uzZsyvc9nmlpaWiqKhIHD16VAAQ58+fl9a1bNlStGzZUuTl5VW4/ezZs1V+3ydMmCAMDQ2ln9fzSkpKRFFRkfjpp5+Ejo6OUn8DAwOFk5NTuW0mTZokmjRpUum+VdXIkSOFiYmJEOJp/2xtbUVRUZF4+PChMDAwEBs3bhT3798v971+9pxV5fl++fv7CwAiPz+/UnmV7V/VoqOjoxTr5OQkAgMDhRBP/6289957Qggh9u7dK2QymUhNTRW//PKLACCOHDlSye8M0cuxzCaqRvr6+pg7dy7OnDmDn3/+WWVM2ejRqFGjlNrff/99mJiY4NChQ0rtnTp1QrNmzaTP7dq1A/D0ks2zlyjK2lXd6TZixAilz8HBwQCAI0eOKLX37t0b5ubm0uf8/HwcOnQIAwcOhLGxsdJf+u+++y7y8/Nx8uRJlf18dv8VHf9l/ve//yE4OBi2trbQ0dGBnp4eevbsCQBISUkBAPz555+4fv06xowZA0NDw5fus1+/fkqf3dzckJ+fj4yMDKnt3Llz6NevHywtLaXjfvjhhygpKanU5b633noLWVlZGD58OP7zn/9U+s6250dUnr189DKjR4/GvXv38Ntvv2HLli3Q19fH+++/X+ntq8NPP/2ExMREpeXUqVMVxoeGhmLXrl14+PAh1q1bh169eqm8241IE1gQEVWzYcOGoXPnzvjyyy+V5pyUefjwIXR1ddG0aVOldplMBltbWzx8+FCp3cLCQumzvr7+C9vz8/OV2nV1dWFpaanUZmtrK+XyLDs7u3K5FhcXY/ny5dDT01Na3n33XQB44S/7sr5WdPwXycnJQY8ePXDq1CnMnTsXcXFxSExMxI4dOwBAujR4//59AEDz5s1fuk8A5XIxMDBQ2t+tW7fQo0cP/P3331i2bBn++9//IjExUZozVZmJ5iEhIVi/fj1u3ryJwYMHw9raGl27dsXBgwdfuJ23t7fS9zg0NLRSfQIAJycneHt7Y/369Vi/fj2GDRtW4ZweXd2nNxxXdPmquLhYaYK0o6MjACA1NbXS+QBPi3R3d3elpUuXLhXGv/feezA0NMSSJUuwe/fucnfNEWkSb7snqmYymQwLFixAnz59sGbNmnLrLS0tUVxcjPv37ysVRUIIpKen480339RoPsXFxXj48KFSIZCeni7l8nzuzzI3N4eOjg5CQkIwceJElftv0aJFhccu62tFx3+Rw4cP4+7du4iLi5NGhQCUm3he9j28c+fOS/dZGTt37kRubi527NgBJycnqT05OVmt/YwePRqjR49Gbm4ujh07htmzZyMoKAh//vmn0n6ftXr1ajx+/Fj6bGVlpdYxQ0ND8cEHH6C0tBSrVq2qMM7GxgYA8Pfff6tc//fff0sxAODn54c1a9Zg586dmDlzplo5qcPY2BjDhg3D/PnzYWZmhkGDBlXbsYg4QkRUA3x8fNCnTx98/fXX5R6Y5+3tDQDYvHmzUvv27duRm5srrdekLVu2KH2OiooCgAofhlfG2NgYvXr1wrlz5+Dm5lbur313d/dyRdWzevXq9cLjv0hZcVY2glNm9erVSp/btGmDli1bYv369eUmpFeFquMKIbB27dpysQYGBi8dMTIxMUFAQAC+/PJLFBYW4vLlyxXGtm3bVul7q+7looEDB2LgwIEIDQ2Fh4dHhXEeHh5o3Lgxtm3bVm7dlStXcPnyZaU7u/r37w9XV1fMnz8fly5dUrnP/fv3l7tzsSr
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Graphique en nombre de commande\n",
|
|||
|
"purchases_graph = nb_purchases_graph\n",
|
|||
|
"\n",
|
|||
|
"purchases_graph_used = purchases_graph[purchases_graph[\"purchase_date_month\"] >= datetime(2021,3,1)]\n",
|
|||
|
"purchases_graph_used_0 = purchases_graph_used[purchases_graph_used[\"is_customer_known\"]==False]\n",
|
|||
|
"purchases_graph_used_1 = purchases_graph_used[purchases_graph_used[\"is_customer_known\"]==True]\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# Création du barplot\n",
|
|||
|
"plt.bar(purchases_graph_used_0[\"purchase_date_month\"], purchases_graph_used_0[\"nb_purchases\"], width=12, label = \"Nouveau client\")\n",
|
|||
|
"plt.bar(purchases_graph_used_0[\"purchase_date_month\"], purchases_graph_used_1[\"nb_purchases\"], \n",
|
|||
|
" bottom = purchases_graph_used_0[\"nb_purchases\"], width=12, label = \"Ancien client\")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# commande pr afficher slt\n",
|
|||
|
"plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b%y'))\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# Ajout de titres et d'étiquettes\n",
|
|||
|
"plt.xlabel('Mois')\n",
|
|||
|
"plt.ylabel(\"Nombre d'achats\")\n",
|
|||
|
"plt.title(\"Nombre d'achats - MUCEM\")\n",
|
|||
|
"plt.legend()\n",
|
|||
|
"\n",
|
|||
|
"# Affichage du barplot\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 16,
|
|||
|
"id": "d312276c-4c46-4d29-b6d6-ed110f59890d",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoIAAAHGCAYAAADg0eryAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB5XElEQVR4nO3dd1gU1/s28Hul1xWkK82GDexRNAp2VLAmdgR7YotRYzTGiIk91mBi1Ngb6jdqrCgWMCooqNhjjMEOYqQp0jnvH77Mz6XJwi6g3J/r2kv2zNmZ58ye3X08M2dGJoQQICIiIqIKp1JZB0BEREREZYOJIBEREVEFxUSQiIiIqIJiIkhERERUQTERJCIiIqqgmAgSERERVVBMBImIiIgqKCaCRERERBUUE0EiIiKiCoqJIBG9t7755htYWlri3r17ZR0KEf1/x44dg5aWFvbv31/WoVARlCgR3LRpE2QyGXR1dfHgwYM8y93d3dGgQYOSbKLYfH19YWhoWCbbfheZTAY/P79S3aa7uzvc3d1LPY4jR46UelvV7ZdffsGmTZvKOowPnp+fH2QyWYHLjx49ilWrVuHQoUOoUaNGKUZWuKdPn8LPzw+RkZF5lr2rTQVR5XfprVu34Ofnh/v376tkfeVN7u+14OBgyGSyYrfX398fNWvWhLa2NmQyGRISEgAA3377Lezs7KCpqYnKlSuXOO7CuLu7w9fXV63bUJXHjx9jyJAhWLlyJXr16lWsddy/fx8ymYzfs8WQk5cpQyUjgmlpafj2229VsSoqRaGhoRg5cqRat3HkyBHMmTNHrdsobUwEy96jR48wbNgw7Nq1C82bNy/rcBQ8ffoUc+bMyTcRHDlyJEJDQ0s/qLfcunULc+bM+WATQVWKjIzExIkT0a5dO5w6dQqhoaEwMjLCH3/8gXnz5mHo0KEICQnBiRMnyjrUciEzMxP9+/fH6NGjMXbs2LIOh4pIUxUr8fDwwI4dOzB16lQ0bNhQFassU0IIpKamQk9Pr6xDUauWLVuWdQhExWJra4uYmJiyDkNp1apVQ7Vq1co6jPdaRkYGZDIZNDVV8vNVqJs3bwIARo0ahY8++kgqv3HjBgBg4sSJsLCwKHQdKSkpH/xvSQ5NTU2cO3eurMNQudevX0NfX7+sw1AblYwITps2DVWqVMHXX3/9zrqpqamYMWMGHB0doa2tjapVq2LcuHHScHsOBwcHeHp64tChQ2jcuDH09PRQt25dHDp0CMCb4c+6devCwMAAH330ESIiIvLd3s2bN9GhQwcYGBjA3Nwc48ePx+vXrxXqyGQyjB8/Hr/++ivq1q0LHR0dbN68GQBw9+5dDBo0CBYWFtDR0UHdunXx888/F2m/JCUlYdSoUahSpQoMDQ3h4eGBv//+O9+6JdlOdnY2/P390ahRI+jp6aFy5cpo2bIlDhw4UOjr8js0HBMTgzFjxqBatWrQ1taGo6Mj5syZg8zMTKlOzrD9kiVLsGzZMjg6OsLQ0BCurq4ICwuT6vn6+kptkMlk0qOwkYigoCD07NkT1apVg66uLmrWrIkxY8bgv//+k+r8+eefkMlk2LlzZ57Xb9myBTKZDOHh4QCAiIgIDBgwAA4ODtDT04ODgwMGDhyY51SGnOH006dP4/PPP4eZmRmqVKmCPn364OnTp1I9BwcH3Lx5EyEhIVJ7HBwcCt3PRXl/srOzsXjxYtSpUwc6OjqwsLDA0KFD8fjxY4V15RwiDA0NRatWraQ2bdy4EQBw+PBhNGnSBPr6+nB2dkZgYKDC63MOTV67dg2ffvop5HI5TE1NMXnyZGRmZuLOnTvw8PCAkZERHBwcsHjxYoXXp6amYsqUKWjUqJH0WldXV/zxxx952p3zudq6dSvq1q0LfX19NGzYUPoMv+3w4cNo1KgRdHR04OjoiCVLluS7L4UQ+OWXX6R9aWJigk8++QT//vtvoe8B8KY/5vde5Xe4VpnY3xYcHCyNUA4bNkzqIzmfs4IODe/YsQOurq4wNDSEoaEhGjVqhPXr1xe6rX379kFfXx8jR46UPp8RERHo0aMHTE1Noauri8aNG2P37t3SazZt2oRPP/0UANCuXTspvsJGuHNivnLlCvr06QNjY2PI5XIMGTIEz58/V6hb1H7s4OCQ76HO3Kew5BzW3bp1K6ZMmYKqVatCR0cH//zzT6H7pig2bNiAhg0bQldXF6ampujduzdu376tEMuQIUMAAC1atIBMJpP6UM4RMEtLS4X3N+d3a+/evWjcuDF0dXWlIyJF+W4tqpz9smPHDnz99dewtraGoaEhvLy88OzZM7x8+RKjR4+GmZkZzMzMMGzYMLx69Up6fWGHXnP/Ljx//hyjR4+Gra0tdHR0YG5ujtatW+cZBT1x4gQ6dOgAY2Nj6Ovro3Xr1jh58qRCnX/++QfDhg1DrVq1oK+vj6pVq8LLywvXr18vUrtL8juZkJCAESNGwNTUFIaGhujevTv+/fffPO3N6e+XL1/GJ598AhMTE+nUk6LmLwWddpW73+f87gQFBWHYsGEwNTWFgYEBvLy88v1OK8o+LhZRAhs3bhQARHh4uFi5cqUAIE6ePCktd3NzE/Xr15eeZ2dniy5dughNTU0xa9Yscfz4cbFkyRJhYGAgGjduLFJTU6W69vb2olq1aqJBgwZi586d4siRI6JFixZCS0tLfPfdd6J169Zi7969Yt++faJ27drC0tJSvH79Wnq9j4+P0NbWFnZ2dmLevHni+PHjws/PT2hqagpPT0+FdgAQVatWFS4uLmLHjh3i1KlT4saNG+LmzZtCLpcLZ2dnsWXLFnH8+HExZcoUUalSJeHn51fovsnOzhbt2rUTOjo60vZnz54tqlevLgCI2bNnS3VLsh0hhPD29hYymUyMHDlS/PHHH+Lo0aNi3rx5YuXKlQrvhZubW552vx1HdHS0sLW1Ffb29mLNmjXixIkT4ocffhA6OjrC19dXqhcVFSUACAcHB+Hh4SH2798v9u/fL5ydnYWJiYlISEgQQgjxzz//iE8++UQAEKGhodLj7fc5t9WrV4sFCxaIAwcOiJCQELF582bRsGFD4eTkJNLT06V6jRs3Fq1bt87z+ubNm4vmzZtLz/fs2SO+++47sW/fPhESEiICAgKEm5ubMDc3F8+fP5fq5fTl6tWriwkTJohjx46J3377TZiYmIh27dpJ9S5fviyqV68uGjduLLXn8uXLhbw7RXt/Ro8eLQCI8ePHi8DAQPHrr78Kc3NzYWtrqxCnm5ubqFKlinBychLr168Xx44dE56engKAmDNnjnB2dpY+Ly1bthQ6OjriyZMn0utnz54tAAgnJyfxww8/iKCgIDFt2jRp23Xq1BE//fSTCAoKEsOGDRMAxO+//y69PiEhQfj6+oqtW7eKU6dOicDAQDF16lRRqVIlsXnzZoV25/SRjz76SOzevVscOXJEuLu7C01NTXHv3j2p3okTJ4SGhob4+OOPxd69e8WePXtE8+bNhZ2dncj9FTVq1CihpaUlpkyZIgIDA8WOHTtEnTp1hKWlpYiJiSn0ffDx8RH29vZ5ynP2SXFizy0xMVHqS99++63URx49elTgtmbNmiUAiD59+og9e/aI48ePi2XLlolZs2ZJdXJ/ly5btkxoaGiIH374QSo7deqU0NbWFm3atBG7du0SgYGBwtfXVwAQGzduFEIIERsbK+bPny8AiJ9//lmKLzY2tsA25cRsb28vvvrqK3Hs2DGxbNky6Xv77c9lUfuxvb298PHxybOt3N9Tp0+flr6fP/nkE3HgwAFx6NAh8eLFiwLjzf29lp+cfTBw4EBx+PBhsWXLFlG9enUhl8vF33//LYR487387bffSvsvNDRU/PPPP+Ly5ctixIgRAoAIDAxUeH/t7e2FtbW1qF69utiwYYM4ffq0uHjxYpG/W4sqZ7/Y29sLX19faV8bGhqKdu3aiU6dOompU6eK48ePi0WLFgkNDQ0xYcIE6fU53+E5/aKw/delSxdhbm4u1q5dK4KDg8X+/fvFd999JwICAqQ6W7duFTKZTPTq1Uvs3btXHDx4UHh6egoNDQ1
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# graphique en nombre de client ayant commandé\n",
|
|||
|
"purchases_graph = nb_purchases_graph_2\n",
|
|||
|
"\n",
|
|||
|
"purchases_graph_used = purchases_graph[purchases_graph[\"purchase_date_month\"] >= datetime(2021,4,1)]\n",
|
|||
|
"purchases_graph_used_0 = purchases_graph_used[purchases_graph_used[\"is_customer_known\"]==False]\n",
|
|||
|
"purchases_graph_used_1 = purchases_graph_used[purchases_graph_used[\"is_customer_known\"]==True]\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# Création du barplot\n",
|
|||
|
"plt.bar(purchases_graph_used_0[\"purchase_date_month\"], purchases_graph_used_0[\"nb_new_customer\"], width=12, label = \"Nouveau client\")\n",
|
|||
|
"plt.bar(purchases_graph_used_0[\"purchase_date_month\"], purchases_graph_used_1[\"nb_new_customer\"], \n",
|
|||
|
" bottom = purchases_graph_used_0[\"nb_new_customer\"], width=12, label = \"Ancien client\")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# commande pr afficher slt\n",
|
|||
|
"plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b%y'))\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# Ajout de titres et d'étiquettes\n",
|
|||
|
"plt.xlabel('Mois')\n",
|
|||
|
"plt.ylabel(\"Nombre de client ayant commandé\")\n",
|
|||
|
"plt.title(\"Nombre de client ayant commandé un ticket pour l'offre 'muséale groupe'\")\n",
|
|||
|
"plt.legend()\n",
|
|||
|
"\n",
|
|||
|
"# Affichage du barplot\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "82895dfc-e5ca-4be0-af24-93c1be8f6248",
|
|||
|
"metadata": {
|
|||
|
"jp-MarkdownHeadingCollapsed": true
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## Proportion de tickets de prix 0"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 35,
|
|||
|
"id": "10828dd8-8ec9-49eb-b450-acca741964c7",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"barplot_prop_free_price = pd.DataFrame()\n",
|
|||
|
"for company_number in ['1', '2', '3', '4', '101'] : # \n",
|
|||
|
" nom_dataframe = 'df'+ company_number +'_tickets'\n",
|
|||
|
" df_tickets = globals()[nom_dataframe].copy()\n",
|
|||
|
" df_free_tickets = df_tickets[df_tickets['amount'] == 0 | df_tickets['amount'].isna()]\n",
|
|||
|
"\n",
|
|||
|
" if company_number == '101' :\n",
|
|||
|
" df_free_tickets_1 = df101_tickets_1[df101_tickets_1['amount'] == 0]\n",
|
|||
|
" nb_tickets = len(df_tickets) + len(df101_tickets_1)\n",
|
|||
|
" nb_free_tickets = len(df_free_tickets) + len(df_free_tickets_1)\n",
|
|||
|
" \n",
|
|||
|
" graph_dataframe = pd.DataFrame({'company_number' : [company_number], \n",
|
|||
|
" 'prop_free_tickets' : [nb_free_tickets / nb_tickets],\n",
|
|||
|
" 'nb_tickets' : [nb_tickets]})\n",
|
|||
|
" \n",
|
|||
|
" else : \n",
|
|||
|
" graph_dataframe = pd.DataFrame({'company_number' : [company_number], \n",
|
|||
|
" 'prop_free_tickets' : [len(df_free_tickets) / len(df_tickets)],\n",
|
|||
|
" 'nb_tickets' : [len(df_tickets)]})\n",
|
|||
|
"\n",
|
|||
|
" barplot_prop_free_price = pd.concat([barplot_prop_free_price, graph_dataframe])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 36,
|
|||
|
"id": "065576ef-2515-43eb-a65d-21f07f228c9e",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1cAAAIiCAYAAAAkWjI2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABeMUlEQVR4nO3deVxV1f7/8fcRhKMIKA6giUCaAzkhmqIXzQzQzKtZV8pCvWrmTXOg+iaZKTag3pui5vitJBuU+jk0aClWTkmTgg1q01Uxg5xBHFBg//7w4fl2PKjn5MYj+no+HufxcK+z9tqffdgm79be61gMwzAEAAAAALgildxdAAAAAABcDwhXAAAAAGACwhUAAAAAmIBwBQAAAAAmIFwBAAAAgAkIVwAAAABgAsIVAAAAAJiAcAUAAAAAJiBcAQAAAIAJCFcAAABusGbNGlWuXFkrV650dykATEK4AnBNSUtLk8Visb08PT1Vv359/fOf/9T+/fvdXd5ftmPHDk2aNEl79uxxeG/QoEEKDQ296jVdzJ49e2SxWJSWlmbqeP/5z38u2/f8z//Pn1NZn09oaKgGDRr0l+p58cUXK/Qvs2+//bZSU1OvaIxJkybJYrHYtc2dO9e0nzku77ffftNDDz2kmTNnqk+fPu4uB4BJCFcArkmLFi1SZmamMjIy9PDDD2vJkiWKjo7WiRMn3F3aX7Jjxw4lJyeXGa4mTJigFStWXP2irkE9e/ZUZmam6tatW27HIFxJQ4cOVWZmpl0b4erqKS4uVnx8vIYNG6ZHH33U3eUAMJGnuwsAgLI0b95cbdu2lSR17dpVJSUleu6557Ry5Uo9+OCDZe5z8uRJVa1a9WqWeVlnz551mCG4UMOGDa9SNde+2rVrq3bt2u4u46o6deqUqlSpclWPWb9+fdWvX/+qHrO8nDp1Slar9bJ/z64lnp6e+vzzz91dBoBywMwVgAqhQ4cOkqS9e/dKOnerWLVq1fTdd98pNjZWvr6+6tatmyTpyJEjevTRR3XTTTfJy8tLN998s8aPH6+ioiK7MS0Wi0aOHKkFCxaocePG8vb2Vnh4uJYuXepw/O+//169e/dWjRo1ZLVa1bp1a73++ut2fdavXy+LxaI33nhDjz/+uG666SZ5e3vrlVde0T/+8Q9J54Li+Vsez88SlHXb2+nTp5WUlKSwsDB5eXnppptu0ogRI3Ts2DG7fqGhobr77rv18ccfq02bNqpSpYqaNm2q1157zanP9ffff1e/fv3k6+srf39/xcfHKy8vr8y+33zzjf7+978rICBAVqtVEREReuedd5w6jiSVlpbqhRdeUIMGDWS1WtW2bVt98skndn3Kui3QWQUFBXriiSfsPrMxY8bYzXZaLBadOHFCr7/+uu3ncPvtt0s6F87P72+1WhUQEKC2bdtqyZIllz325s2bFRUVJavVqptuukkTJkzQK6+84nAu539ey5cvV0REhKxWq5KTkyVJc+bMUefOnVWnTh35+PioRYsWmjZtms6ePWvb//bbb9eqVau0d+9eu9tnpf+7/tavX29XW1m3eV54W2BoaKh++OEHbdiwwTbm+WuytLRUzz//vJo0aaIqVaqoevXqatmypWbOnHnJz+R8PW+++aYSExMVFBSkKlWqqEuXLsrKyrLr+8033+j+++9XaGioqlSpotDQUD3wwAO2v+/nnb8+1q5dq8GDB6t27dqqWrWqw9/tC2t4++239dRTT6lu3bqqVq2aevXqpT/++EPHjx/XsGHDVKtWLdWqVUv//Oc/VVhYeMnP7jyLxaJJkybZtg8ePKhhw4YpODhY3t7eql27tjp16qR169bZ7bdu3Tp169ZNfn5+qlq1qjp16uTw90CSfv75Z/Xv31916tSRt7e3mjVrpjlz5lzyMwfgfsxcAagQfvnlF0mym9U4c+aM/v73v+uRRx7RuHHjVFxcrNOnT6tr16769ddflZycrJYtW2rTpk1KSUlRdna2Vq1aZTfu+++/r88++0yTJ0+Wj4+P5s6dqwceeECenp667777JEk//vijOnbsqDp16mjWrFmqWbOm3nzzTQ0aNEh//PGH/ud//sduzKSkJEVFRWn+/PmqVKmS2rZtq6NHj+rpp5/WnDlz1KZNG0kXn7EyDEN9+vTRJ598oqSkJEVHR+vbb7/VxIkTlZmZqczMTHl7e9v6b9++XY8//rjGjRunwMBAvfLKKxoyZIgaNWqkzp07X/QzPXXqlO688079/vvvSklJUePGjbVq1SrFx8c79P3ss8/UvXt3tW/fXvPnz5e/v7+WLl2q+Ph4nTx50qnnn15++WWFhIQoNTVVpaWlmjZtmnr06KENGzYoKirqsvtfysmTJ9WlSxf99ttvevrpp9WyZUv98MMPevbZZ/Xdd99p3bp1slgsyszM1B133KGuXbtqwoQJkiQ/Pz9JUmJiot544w09//zzioiI0IkTJ/T999/r8OHDlzz2t99+q5iYGDVu3Fivv/66qlatqvnz5+vNN98ss/+2bdu0c+dOPfPMMwoLC5OPj48k6ddff1X//v1t4XD79u164YUXtGvXLltYnjt3roYNG6Zff/3V1FtJV6xYofvuu0/+/v6aO3euJNmusWnTpmnSpEl65pln1LlzZ509e1a7du1yCPoX8/TTT6tNmzZ65ZVXlJ+fr0mTJun2229XVlaWbr75ZknnQkyTJk10//33KyAgQLm5uZo3b57atWunHTt2qFatWnZjDh48WD179tQbb7yhEydOqHLlypetoWvXrkpLS9OePXv0xBNP2P6et2rVSkuWLFFWVpaefvpp+fr6atasWS5+glJCQoK2bdumF154QY0bN9axY8e0bds2u+vnzTff1IABA9S7d2+9/vrrqly5shYsWKC4uDitWbPG9j+IduzYoY4dO6pBgwZ66aWXFBQUpDVr1mjUqFE6dOiQJk6c6HJ9AK4SAwCuIYsWLTIkGV988YVx9uxZ4/jx48aHH35o1K5d2/D19TXy8vIMwzCMgQMHGpKM1157zW7/+fPnG5KMd955x6596tSphiRj7dq1tjZJRpUqVWxjGoZhFBcXG02bNjUaNWpka7v//vsNb29vIycnx27MHj16GFWrVjWOHTtmGIZhfPbZZ4Yko3Pnzg7n9e677xqSjM8++8zhvYEDBxohISG27Y8//tiQZEybNs2uX3p6uiHJWLhwoa0tJCTEsFqtxt69e21tp06dMgICAoxHHnnE4Vh/Nm/ePEOS8d5779m1P/zww4YkY9GiRba2pk2bGhEREcbZs2ft+t59991G3bp1jZKSkoseZ/fu3YYko169esapU6ds7QUFBUZAQIBx55132trO//x3795ta7vw8zl/3gMHDrRtp6SkGJUqVTK+/vpru37/7//9P0OSsXr1alubj4+P3b7nNW/e3OjTp89Fz+Ni/vGPfxg+Pj7GwYMHbW0lJSVGeHi4w7mEhIQYHh4exo8//njJMUtKSoyzZ88aixcvNjw8PIwjR47Y3uvZs6fD52EY/3f9XXiNnf/8//zznDhxonHhrwC33nqr0aVLF4dx7777bqN169aXrLcs5+tp06aNUVpaamvfs2ePUblyZWPo0KEX3be4uNgoLCw0fHx8jJkzZ9raz18fAwYMcKmGXr162bWPGTPGkGSMGjXKrr1Pnz5GQECAbbusz+48ScbEiRNt29WqVTPGjBlz0VpOnDhhBAQEONRSUlJitGrVyrjttttsbXFxcUb9+vWN/Px8u74jR440rFar3fUA4NrCbYEArkkdOnRQ5cqV5evrq7vvvltBQUH66KOPFBgYaNfv3nvvtdv+9NNP5ePjY5t1Ou/8zMqFt99069bNbkwPDw/Fx8frl19+0W+//WYbs1u3bgoODnYY8+TJkw4LA1xYk6s+/fRTu5rP+8c//iEfHx+Hc2jdurUaNGhg27ZarWrcuLHDLVUX+uyzz+Tr66u///3vdu39+/e32/7ll1+0a9cu27NuxcXFttddd92l3Nxc/fjjj5c9r759+8pqtdq2fX191atXL23cuFElJSWX3f9SPvzwQzVv3lytW7e2qy8
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"barplot_prop_free_price\n",
|
|||
|
"\n",
|
|||
|
"df = barplot_prop_free_price.sort_values( by = 'prop_free_tickets')\n",
|
|||
|
"\n",
|
|||
|
"# Création du barplot\n",
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"plt.bar(df['company_number'], df['prop_free_tickets'])\n",
|
|||
|
"plt.xlabel('Numéro de la société')\n",
|
|||
|
"plt.ylabel('Proportion de billets gratuits')\n",
|
|||
|
"plt.title('Proportion de billets gratuits par musée')\n",
|
|||
|
"plt.xticks(df['company_number'])\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "d6de664a-a303-48f5-bca6-1e9e9d17c461",
|
|||
|
"metadata": {
|
|||
|
"jp-MarkdownHeadingCollapsed": true
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## Répartition des prix de vente"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 40,
|
|||
|
"id": "150825c6-08b5-44ad-a02e-98ee44192d94",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"boxplot_amount = {} \n",
|
|||
|
"\n",
|
|||
|
"for company_number in ['1', '2', '3', '4', '101'] :\n",
|
|||
|
" nom_dataframe = 'df'+ company_number +'_tickets'\n",
|
|||
|
" df_tickets = globals()[nom_dataframe].copy()\n",
|
|||
|
" df_notfree_tickets = df_tickets[df_tickets['amount'] > 0]\n",
|
|||
|
" \n",
|
|||
|
" boxplot_amount[company_number] = df_notfree_tickets['amount']\n",
|
|||
|
"\n",
|
|||
|
"amount_df = pd.DataFrame(boxplot_amount)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 44,
|
|||
|
"id": "c6ce46c8-5ad1-42c0-9b9a-a84df52a3411",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <th>101</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>count</th>\n",
|
|||
|
" <td>1.062722e+06</td>\n",
|
|||
|
" <td>1.475197e+06</td>\n",
|
|||
|
" <td>3.051426e+06</td>\n",
|
|||
|
" <td>1.280045e+06</td>\n",
|
|||
|
" <td>1.133556e+07</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>mean</th>\n",
|
|||
|
" <td>1.076436e+01</td>\n",
|
|||
|
" <td>1.519766e+01</td>\n",
|
|||
|
" <td>1.285360e+01</td>\n",
|
|||
|
" <td>1.139475e+01</td>\n",
|
|||
|
" <td>1.350509e+01</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>std</th>\n",
|
|||
|
" <td>9.243106e+00</td>\n",
|
|||
|
" <td>5.714467e+00</td>\n",
|
|||
|
" <td>1.445236e+01</td>\n",
|
|||
|
" <td>1.657010e+01</td>\n",
|
|||
|
" <td>1.492325e+01</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>min</th>\n",
|
|||
|
" <td>2.500000e+00</td>\n",
|
|||
|
" <td>5.000000e+00</td>\n",
|
|||
|
" <td>3.000000e-01</td>\n",
|
|||
|
" <td>1.000000e+00</td>\n",
|
|||
|
" <td>2.000000e-02</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>25%</th>\n",
|
|||
|
" <td>9.500000e+00</td>\n",
|
|||
|
" <td>1.300000e+01</td>\n",
|
|||
|
" <td>6.000000e+00</td>\n",
|
|||
|
" <td>6.000000e+00</td>\n",
|
|||
|
" <td>1.000000e+01</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>50%</th>\n",
|
|||
|
" <td>1.100000e+01</td>\n",
|
|||
|
" <td>1.500000e+01</td>\n",
|
|||
|
" <td>1.350000e+01</td>\n",
|
|||
|
" <td>1.000000e+01</td>\n",
|
|||
|
" <td>1.300000e+01</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>75%</th>\n",
|
|||
|
" <td>1.100000e+01</td>\n",
|
|||
|
" <td>1.500000e+01</td>\n",
|
|||
|
" <td>1.700000e+01</td>\n",
|
|||
|
" <td>1.200000e+01</td>\n",
|
|||
|
" <td>1.450000e+01</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>max</th>\n",
|
|||
|
" <td>3.200000e+02</td>\n",
|
|||
|
" <td>3.000000e+02</td>\n",
|
|||
|
" <td>7.500000e+03</td>\n",
|
|||
|
" <td>1.500000e+03</td>\n",
|
|||
|
" <td>1.633000e+03</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" 1 2 3 4 101\n",
|
|||
|
"count 1.062722e+06 1.475197e+06 3.051426e+06 1.280045e+06 1.133556e+07\n",
|
|||
|
"mean 1.076436e+01 1.519766e+01 1.285360e+01 1.139475e+01 1.350509e+01\n",
|
|||
|
"std 9.243106e+00 5.714467e+00 1.445236e+01 1.657010e+01 1.492325e+01\n",
|
|||
|
"min 2.500000e+00 5.000000e+00 3.000000e-01 1.000000e+00 2.000000e-02\n",
|
|||
|
"25% 9.500000e+00 1.300000e+01 6.000000e+00 6.000000e+00 1.000000e+01\n",
|
|||
|
"50% 1.100000e+01 1.500000e+01 1.350000e+01 1.000000e+01 1.300000e+01\n",
|
|||
|
"75% 1.100000e+01 1.500000e+01 1.700000e+01 1.200000e+01 1.450000e+01\n",
|
|||
|
"max 3.200000e+02 3.000000e+02 7.500000e+03 1.500000e+03 1.633000e+03"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 44,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"amount_df.describe()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 47,
|
|||
|
"id": "a54269c1-9aec-4e49-91ba-d39fa5ece850",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0oAAAIiCAYAAAD2CjhuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABv5UlEQVR4nO3deXgUVd728btJ0k0SQoAAWWQLAQmrbIKgCKiABFQGGXEdQPFBQR02UXAB5kEQRMQRAR1HRV+FccE4OlGJCpEZQAEHRbYBhlUICEgSkpDOUu8fPGnTS0KWTlen+X6uKxfpqpPqXzqni7q7qs6xGIZhCAAAAADgUMvsAgAAAADA3xCUAAAAAMAFQQkAAAAAXBCUAAAAAMAFQQkAAAAAXBCUAAAAAMAFQQkAAAAAXBCUAAAAAMAFQQkAAAAAXBCUAFSbH3/8UWFhYXrppZfMLgUAAKBCCEoAyvTmm2/KYrE4voKDgxUbG6vbb79de/fuLfXnsrKyNGLECD388MN6+OGHfVixu5SUFM2aNcvjuhYtWmj06NGOx8eOHdOsWbO0bds2t7azZs2SxWKpniIryWKxlPq7+bPRo0erRYsW1f48rn/f6nbw4EFZLBYtXLjwom2L31sHDx50LPP0ulTld5g7d66Sk5Mr9bNwt3PnTs2aNcvpb1ZRxX3kzTffdCzbsGGDZs2apbNnz1a5RgDeQ1ACUC5vvPGGNm7cqC+//FIPPfSQ/v73v+uaa67Rr7/+6rH9fffdpx49eujZZ5/1caXuUlJSNHv2bI/rPvroIz311FOOx8eOHdPs2bM9BqWxY8dq48aN1VXmJeWpp57SRx99ZHYZphoyZIg2btyo2NjYansOgpJ37dy5U7Nnz65SUIqNjdXGjRs1ZMgQx7INGzZo9uzZBCXAzwSbXQCAmqFDhw7q3r27JKlfv34qLCzUzJkzlZycrDFjxri1f++993xdopucnByFhYWV2aZLly7l3l6TJk3UpEmTqpZ1SSv+myQkJJhdiukaNWqkRo0amV3GJa08+whvs9lsuuqqq3z6nAAqhzNKACqlODSdOHHCafmWLVt08803q0GDBqpdu7a6dOniFpqKLzlKTU3VmDFj1KBBA4WHh+umm27Sf//7X6e2qampuuWWW9SkSRPVrl1brVq10rhx43Tq1CmndsWXxX3//fcaMWKE6tevr4SEBI0ePVovv/yyJDldQlj8iXDJy5rWrVunK6+8UpI0ZswYR9viS9s8XXpXVFSkBQsWKDExUTabTY0bN9Yf/vAHHT161Kldv3791KFDB23evFl9+vRRWFiYWrZsqWeffVZFRUUXfb0zMzN1//33KyoqSnXq1NGNN96o//znPx7b7t27V3feeacaN24sm82mtm3bOl6DknXPmTNHbdq0UWhoqOrVq6dOnTrpxRdfLLOOdevWyWKx6P/9v/+nyZMnKyYmRqGhoerbt6/+/e9/O7UdPXq06tSpo+3bt2vgwIGKiIjQ9ddf71hX8hKzVatWyWKxaMmSJU7bmDlzpoKCgpSamlpmXfn5+Zo2bZpiYmIUFhama665Rt99953Htunp6Ro3bpyaNGkiq9Wq+Ph4zZ49WwUFBU7tli1bpiuuuEJ16tRRRESEEhMTNWPGjDLrKFZUVKRnnnlGzZo1U+3atdW9e3d99dVXTm08XXpXXpmZmZo6dari4+NltVp12WWXaeLEicrOzna0sVgsys7O1ooVKxx9uV+/fpIuBITin69du7YaNGig7t27a+XKlWU+b3HNa9eu1YMPPqiGDRsqKipKw4cP17Fjx9xeA1+8N86ePav77rtPDRo0UJ06dTRkyBD997//dbsstbR9hHRhv3X77berRYsWCg0NVYsWLXTHHXfo0KFDTr/773//e0lS//79Ha9p8SV0pV0i2a9fP8frLrlfejdr1iw9+uijkqT4+HjHdtetWydJ+vrrr9WvXz9FRUUpNDRUzZo106233qqcnJyLvjYAqoYzSgAq5cCBA5Kkyy+/3LFs7dq1uvHGG9WzZ08tX75ckZGRWrVqlUaOHKmcnBy3g4j77rtPAwYM0LvvvqsjR47oySefVL9+/fTjjz+qXr16kqT9+/erV69eGjt2rCIjI3Xw4EEtWrRI11xzjbZv366QkBCnbQ4fPly33367HnjgAWVnZ6tDhw7Kzs7WBx984HTZnKfLnbp27ao33nhDY8aM0ZNPPum4NKass0gPPvigXn31VT300EMaOnSoDh48qKeeekrr1q3T999/r4YNGzrapqen66677tKUKVM0c+ZMffTRR5o+fbri4uL0hz/8odTnMAxDw4YN04YNG/T000/ryiuv1L/+9S8NHjzYre3OnTvVu3dvNWvWTM8//7xiYmL0xRdf6JFHHtGpU6c0c+ZMSdKCBQs0a9YsPfnkk7r22muVn5+v3bt3l/vSnxkzZqhr16567bXXlJGRoVmzZqlfv37697//rZYtWzra2e123XzzzRo3bpwef/xxtzBS7Pbbb1daWpqmTJmiq666St27d9fXX3+tOXPmaMaMGRowYECZ9dx///166623NHXqVA0YMEA//fSThg8frqysLKd26enp6tGjh2rVqqWnn35aCQkJ2rhxo+bMmaODBw/qjTfekHQhuI0fP14PP/ywFi5cqFq1amnfvn3auXNnuV6fJUuWqHnz5lq8eLEjMAwePFhpaWnq1atXubZRmpycHPXt21dHjx7VjBkz1KlTJ+3YsUNPP/20tm/fri+//FIWi0UbN27Uddddp/79+zsuL61bt64kafLkyXr77bc1Z84cdenSRdnZ2frpp590+vTpctUwduxYDRkyxPHeffTRR3X33Xfr66+/drTxxXujqKhIN910k7Zs2aJZs2apa9eu2rhxo2688cZSf8Z1HyFdCC9t2rTR7bffrgYNGuj48eNatmyZrrzySu3cuVMNGzbUkCFDNHfuXM2YMUMvv/yyunbtKklVPjs6duxYnTlzRi+99JJWr17t2De1a9dOBw8e1JAhQ9SnTx+9/vrrqlevnn7++Wd9/vnnstvtPj8bBlxyDAAowxtvvGFIMjZt2mTk5+cbWVlZxueff27ExMQY1157rZGfn+9om5iYaHTp0sVpmWEYxtChQ43Y2FijsLDQaZu/+93vnNr961//MiQZc+bM8VhLUVGRkZ+fbxw6dMiQZHz88ceOdTNnzjQkGU8//bTbz02YMMEobXfXvHlzY9SoUY7HmzdvNiQZb7zxhlvb4ucotmvXLkOSMX78eKd23377rSHJmDFjhmNZ3759DUnGt99+69S2Xbt2xqBBgzzWVuyzzz4zJBkvvvii0/JnnnnGkGTMnDnTsWzQoEFGkyZNjIyMDKe2Dz30kFG7dm3jzJkzhmFc+Jt07ty5zOf1ZO3atYYko2vXrkZRUZFj+cGDB42QkBBj7NixjmWjRo0yJBmvv/6623ZGjRplNG/e3GnZ+fPnjS5duhjx8fHGzp07jejoaKNv375GQUFBmTUV/x0mTZrktPydd94xJDn9fceNG2fUqVPHOHTokFPbhQsXGpKMHTt2GIZx4fWqV69emc/ryYEDBwxJRlxcnJGbm+tYnpmZaTRo0MC44YYbHMuK3wcHDhxwLPP0urj20Xnz5hm1atUyNm/e7NTugw8+MCQZKSkpjmXh4eFOP1usQ4cOxrBhwyr8+xXX7NrnFyxYYEgyjh8/bhiG794b//jHPwxJxrJly5yWz5s3z+29UdY+wlVBQYFx7tw5Izw83Ol99/777xuSjLVr17r9jOvfqeTv17dvX8fj4j5Sch/z3HPPufUFw/jtb7pt27aL1gzA+7j0DkC5XHXVVQoJCVFERIRuvPFG1a9fXx9//LGCgy+cmN63b592796tu+66S5JUUFDg+EpKStLx48e1Z88ep20Wty3Wu3dvNW/eXGvXrnUsO3nypB544AE1bdpUwcHBCgkJUfPmzSVJu3btcqvz1ltv9ervXZbiOl3PlPXo0UNt27Z1u9QqJiZGPXr0cFrWqVMnp8t7ynoe19frzjvvdHp8/vx5ffXVV/rd736nsLAwt7/B+fP
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"means = amount_df.mean()\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"amount_df.boxplot()\n",
|
|||
|
"plt.scatter(x=range(1, len(means) + 1), y=means, marker='D', color='red', s=100)\n",
|
|||
|
"plt.title('Répartition des prix des billets non gratuits')\n",
|
|||
|
"plt.ylabel('Montant')\n",
|
|||
|
"plt.xlabel('Compagnie')\n",
|
|||
|
"plt.ylim(0, 50) \n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 18,
|
|||
|
"id": "b41b5434-0e5b-495b-bede-23f5cb45272c",
|
|||
|
"metadata": {
|
|||
|
"scrolled": true
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>purchase_id</th>\n",
|
|||
|
" <th>ticket_id</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>count</th>\n",
|
|||
|
" <td>73518.000000</td>\n",
|
|||
|
" <td>7.351800e+04</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>mean</th>\n",
|
|||
|
" <td>10.096167</td>\n",
|
|||
|
" <td>2.484660e+01</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>std</th>\n",
|
|||
|
" <td>2367.702603</td>\n",
|
|||
|
" <td>4.636993e+03</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>min</th>\n",
|
|||
|
" <td>1.000000</td>\n",
|
|||
|
" <td>1.000000e+00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>25%</th>\n",
|
|||
|
" <td>1.000000</td>\n",
|
|||
|
" <td>1.000000e+00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>50%</th>\n",
|
|||
|
" <td>1.000000</td>\n",
|
|||
|
" <td>2.000000e+00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>75%</th>\n",
|
|||
|
" <td>1.000000</td>\n",
|
|||
|
" <td>3.000000e+00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>max</th>\n",
|
|||
|
" <td>641981.000000</td>\n",
|
|||
|
" <td>1.256574e+06</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" purchase_id ticket_id\n",
|
|||
|
"count 73518.000000 7.351800e+04\n",
|
|||
|
"mean 10.096167 2.484660e+01\n",
|
|||
|
"std 2367.702603 4.636993e+03\n",
|
|||
|
"min 1.000000 1.000000e+00\n",
|
|||
|
"25% 1.000000 1.000000e+00\n",
|
|||
|
"50% 1.000000 2.000000e+00\n",
|
|||
|
"75% 1.000000 3.000000e+00\n",
|
|||
|
"max 641981.000000 1.256574e+06"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 18,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"purchases.groupby('customer_id')[['purchase_id', 'ticket_id']].nunique().describe()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 17,
|
|||
|
"id": "d1212b10-3933-450a-b001-9e2cbf308f79",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>ticket_id</th>\n",
|
|||
|
" <th>customer_id</th>\n",
|
|||
|
" <th>purchase_id</th>\n",
|
|||
|
" <th>event_type_id</th>\n",
|
|||
|
" <th>supplier_name</th>\n",
|
|||
|
" <th>purchase_date</th>\n",
|
|||
|
" <th>type_of_ticket_name</th>\n",
|
|||
|
" <th>amount</th>\n",
|
|||
|
" <th>children</th>\n",
|
|||
|
" <th>is_full_price</th>\n",
|
|||
|
" <th>name_event_types</th>\n",
|
|||
|
" <th>name_facilities</th>\n",
|
|||
|
" <th>name_categories</th>\n",
|
|||
|
" <th>name_events</th>\n",
|
|||
|
" <th>name_seasons</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>13070859</td>\n",
|
|||
|
" <td>48187</td>\n",
|
|||
|
" <td>5107462</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>vente en ligne</td>\n",
|
|||
|
" <td>2018-12-28 14:47:50+00:00</td>\n",
|
|||
|
" <td>Atelier</td>\n",
|
|||
|
" <td>8.0</td>\n",
|
|||
|
" <td>pricing_formula</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>spectacle vivant</td>\n",
|
|||
|
" <td>mucem</td>\n",
|
|||
|
" <td>indiv prog enfant</td>\n",
|
|||
|
" <td>l'école des magiciens</td>\n",
|
|||
|
" <td>2018</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>13070860</td>\n",
|
|||
|
" <td>48187</td>\n",
|
|||
|
" <td>5107462</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>vente en ligne</td>\n",
|
|||
|
" <td>2018-12-28 14:47:50+00:00</td>\n",
|
|||
|
" <td>Atelier</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>pricing_formula</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>spectacle vivant</td>\n",
|
|||
|
" <td>mucem</td>\n",
|
|||
|
" <td>indiv prog enfant</td>\n",
|
|||
|
" <td>l'école des magiciens</td>\n",
|
|||
|
" <td>2018</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>13070861</td>\n",
|
|||
|
" <td>48187</td>\n",
|
|||
|
" <td>5107462</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>vente en ligne</td>\n",
|
|||
|
" <td>2018-12-28 14:47:50+00:00</td>\n",
|
|||
|
" <td>Atelier</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>pricing_formula</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>spectacle vivant</td>\n",
|
|||
|
" <td>mucem</td>\n",
|
|||
|
" <td>indiv prog enfant</td>\n",
|
|||
|
" <td>l'école des magiciens</td>\n",
|
|||
|
" <td>2018</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>13070862</td>\n",
|
|||
|
" <td>48187</td>\n",
|
|||
|
" <td>5107462</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>vente en ligne</td>\n",
|
|||
|
" <td>2018-12-28 14:47:50+00:00</td>\n",
|
|||
|
" <td>Atelier</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>pricing_formula</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>spectacle vivant</td>\n",
|
|||
|
" <td>mucem</td>\n",
|
|||
|
" <td>indiv prog enfant</td>\n",
|
|||
|
" <td>l'école des magiciens</td>\n",
|
|||
|
" <td>2018</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>13070863</td>\n",
|
|||
|
" <td>48187</td>\n",
|
|||
|
" <td>5107462</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>vente en ligne</td>\n",
|
|||
|
" <td>2018-12-28 14:47:50+00:00</td>\n",
|
|||
|
" <td>Atelier</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>pricing_formula</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>spectacle vivant</td>\n",
|
|||
|
" <td>mucem</td>\n",
|
|||
|
" <td>indiv prog enfant</td>\n",
|
|||
|
" <td>l'école des magiciens</td>\n",
|
|||
|
" <td>2018</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1826667</th>\n",
|
|||
|
" <td>20662815</td>\n",
|
|||
|
" <td>1256135</td>\n",
|
|||
|
" <td>8007697</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>vente en ligne</td>\n",
|
|||
|
" <td>2023-11-08 17:23:54+00:00</td>\n",
|
|||
|
" <td>Atelier</td>\n",
|
|||
|
" <td>11.0</td>\n",
|
|||
|
" <td>pricing_formula</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>offre muséale groupe</td>\n",
|
|||
|
" <td>mucem</td>\n",
|
|||
|
" <td>indiv entrées tp</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2023</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1826668</th>\n",
|
|||
|
" <td>20662816</td>\n",
|
|||
|
" <td>1256136</td>\n",
|
|||
|
" <td>8007698</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>vente en ligne</td>\n",
|
|||
|
" <td>2023-11-08 18:32:18+00:00</td>\n",
|
|||
|
" <td>Atelier</td>\n",
|
|||
|
" <td>11.0</td>\n",
|
|||
|
" <td>pricing_formula</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>offre muséale groupe</td>\n",
|
|||
|
" <td>mucem</td>\n",
|
|||
|
" <td>indiv entrées tp</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2023</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1826669</th>\n",
|
|||
|
" <td>20662817</td>\n",
|
|||
|
" <td>1256136</td>\n",
|
|||
|
" <td>8007698</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>vente en ligne</td>\n",
|
|||
|
" <td>2023-11-08 18:32:18+00:00</td>\n",
|
|||
|
" <td>Atelier</td>\n",
|
|||
|
" <td>11.0</td>\n",
|
|||
|
" <td>pricing_formula</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>offre muséale groupe</td>\n",
|
|||
|
" <td>mucem</td>\n",
|
|||
|
" <td>indiv entrées tp</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2023</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1826670</th>\n",
|
|||
|
" <td>20662818</td>\n",
|
|||
|
" <td>1256137</td>\n",
|
|||
|
" <td>8007699</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>vente en ligne</td>\n",
|
|||
|
" <td>2023-11-08 19:30:28+00:00</td>\n",
|
|||
|
" <td>Atelier</td>\n",
|
|||
|
" <td>11.0</td>\n",
|
|||
|
" <td>pricing_formula</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>offre muséale groupe</td>\n",
|
|||
|
" <td>mucem</td>\n",
|
|||
|
" <td>indiv entrées tp</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2023</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1826671</th>\n",
|
|||
|
" <td>20662819</td>\n",
|
|||
|
" <td>1256137</td>\n",
|
|||
|
" <td>8007699</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>vente en ligne</td>\n",
|
|||
|
" <td>2023-11-08 19:30:28+00:00</td>\n",
|
|||
|
" <td>Atelier</td>\n",
|
|||
|
" <td>11.0</td>\n",
|
|||
|
" <td>pricing_formula</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>offre muséale groupe</td>\n",
|
|||
|
" <td>mucem</td>\n",
|
|||
|
" <td>indiv entrées tp</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2023</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>1826672 rows × 15 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" ticket_id customer_id purchase_id event_type_id supplier_name \\\n",
|
|||
|
"0 13070859 48187 5107462 4 vente en ligne \n",
|
|||
|
"1 13070860 48187 5107462 4 vente en ligne \n",
|
|||
|
"2 13070861 48187 5107462 4 vente en ligne \n",
|
|||
|
"3 13070862 48187 5107462 4 vente en ligne \n",
|
|||
|
"4 13070863 48187 5107462 4 vente en ligne \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"1826667 20662815 1256135 8007697 5 vente en ligne \n",
|
|||
|
"1826668 20662816 1256136 8007698 5 vente en ligne \n",
|
|||
|
"1826669 20662817 1256136 8007698 5 vente en ligne \n",
|
|||
|
"1826670 20662818 1256137 8007699 5 vente en ligne \n",
|
|||
|
"1826671 20662819 1256137 8007699 5 vente en ligne \n",
|
|||
|
"\n",
|
|||
|
" purchase_date type_of_ticket_name amount \\\n",
|
|||
|
"0 2018-12-28 14:47:50+00:00 Atelier 8.0 \n",
|
|||
|
"1 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
|
|||
|
"2 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
|
|||
|
"3 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
|
|||
|
"4 2018-12-28 14:47:50+00:00 Atelier 4.0 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"1826667 2023-11-08 17:23:54+00:00 Atelier 11.0 \n",
|
|||
|
"1826668 2023-11-08 18:32:18+00:00 Atelier 11.0 \n",
|
|||
|
"1826669 2023-11-08 18:32:18+00:00 Atelier 11.0 \n",
|
|||
|
"1826670 2023-11-08 19:30:28+00:00 Atelier 11.0 \n",
|
|||
|
"1826671 2023-11-08 19:30:28+00:00 Atelier 11.0 \n",
|
|||
|
"\n",
|
|||
|
" children is_full_price name_event_types name_facilities \\\n",
|
|||
|
"0 pricing_formula False spectacle vivant mucem \n",
|
|||
|
"1 pricing_formula False spectacle vivant mucem \n",
|
|||
|
"2 pricing_formula False spectacle vivant mucem \n",
|
|||
|
"3 pricing_formula False spectacle vivant mucem \n",
|
|||
|
"4 pricing_formula False spectacle vivant mucem \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"1826667 pricing_formula False offre muséale groupe mucem \n",
|
|||
|
"1826668 pricing_formula False offre muséale groupe mucem \n",
|
|||
|
"1826669 pricing_formula False offre muséale groupe mucem \n",
|
|||
|
"1826670 pricing_formula False offre muséale groupe mucem \n",
|
|||
|
"1826671 pricing_formula False offre muséale groupe mucem \n",
|
|||
|
"\n",
|
|||
|
" name_categories name_events name_seasons \n",
|
|||
|
"0 indiv prog enfant l'école des magiciens 2018 \n",
|
|||
|
"1 indiv prog enfant l'école des magiciens 2018 \n",
|
|||
|
"2 indiv prog enfant l'école des magiciens 2018 \n",
|
|||
|
"3 indiv prog enfant l'école des magiciens 2018 \n",
|
|||
|
"4 indiv prog enfant l'école des magiciens 2018 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"1826667 indiv entrées tp NaN 2023 \n",
|
|||
|
"1826668 indiv entrées tp NaN 2023 \n",
|
|||
|
"1826669 indiv entrées tp NaN 2023 \n",
|
|||
|
"1826670 indiv entrées tp NaN 2023 \n",
|
|||
|
"1826671 indiv entrées tp NaN 2023 \n",
|
|||
|
"\n",
|
|||
|
"[1826672 rows x 15 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 17,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"purchases"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 25,
|
|||
|
"id": "49d5fd2d-9bc1-43ac-9270-1efd73759854",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAtIAAAJICAYAAABMlwOPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACyfElEQVR4nOzdd1hT1/8H8HcYYYnIEBAH4kJR3IpiKzhwUupuxSIutGpFqtbRqjhR0aqtVsVtXVj3LAWtoqi4927rFkSRIcoM5/eHP+7XCCqJQQK+X8/j0+bck+Sd5HLz4XDuuTIhhAAREREREalEp7ADEBEREREVRSykiYiIiIjUwEKaiIiIiEgNLKSJiIiIiNTAQpqIiIiISA0spImIiIiI1MBCmoiIiIhIDSykiYiIiIjUwEKaiIiIiEgNLKSJiIqBlJQUVK9eHd27d0d2dnZhxyEqth4+fAhra2uMHDlS7cd48OABypcv/0GPQdqBhXQxtXr1ashkMhgaGuLu3bu5tru7u6NWrVqFkAzo06cPSpQoUSjPPWnSJMhksvf+c3d3f+9jHTt2DJMmTUJiYuIH51FXnz59ULFiRbXv/6Z9+/Zh0qRJeW6rWLEi+vTpo9Lj3blzBzKZDHPmzPnwcAA2bNiA+fPna+SxCkJB/FwFBQVhx44d7+3n5+cHGxsbrFu3Djo6Rf/Q/iHHiatXr2LSpEm4c+eOZkMVkJcvX2LSpEk4dOhQYUfJF3d393wdR992LNEmeR1DZTIZVq9enWf/rKwsfP3112jZsuUHHdfKlSuHP//8EytXrsS2bdvUfpzioKj9vL5Jr7ADUMFKT0/H+PHjsXbt2sKOohUGDBiAdu3aSbdjYmLQpUsXDBs2DN7e3lJ7yZIl3/tYx44dw+TJk9GnTx+UKlWqIOJ+dPv27cNvv/2W5xfg9u3b8/W+FKQNGzbg8uXLCAgIKNQcH1NQUBC6deuGTp06vbXPb7/9hosXL+Lo0aMwMDD4eOG01NWrVzF58mS4u7tr9BfNgvLy5UtMnjwZAPL1S3xhW7RoEZKTk6Xbe/fuxbRp07Bq1SpUr15dai9XrlxhxCtQ48aNg56eHn7//fcPGgQBgFq1amH79u3o0aMHateujSpVqmgoZdFS1H5e38RCuphr164dNmzYgFGjRqFOnTqFHeeDCSGQlpYGIyMjte5frlw5pYN7zm/AFSpUQJMmTTQRsdiqV69eYUegtxg6dCiGDh1a2DFUlpqaqvbPcmF4+fIljI2NCzvGR5OamgpDQ8NcBaOTk5PS7evXrwN4VRg2bNjwo+UrDLNnz9bo47m7uyMuLk6jj0kfV9H/+x+90+jRo2FpaYkxY8a8t29aWhrGjRsHBwcHyOVylC1bFkOHDs01daFixYrw9PTEnj17UK9ePRgZGaFGjRrYs2cPgFfTSmrUqAETExM0btwYp0+fzvP5rly5glatWsHExASlS5fGd999h5cvXyr1kclk+O6777BkyRLUqFEDBgYGWLNmDQDg1q1b8Pb2hrW1NQwMDFCjRg389ttvarxLue3atQtNmzaFsbExTE1N4eHhgePHj0vbJ02ahB9++AEA4ODgIP0pM+dPs5s2bUKbNm1QpkwZ6f0ZO3YsXrx4oXam1atXw9HRUXqtv//+e579MjIyMG3aNFSvXh0GBgYoXbo0+vbtiydPnrzz8fv06SO9f6//eTbnl428pnYkJiZi5MiRqFSpEgwMDGBtbY0OHTpIX6x5yczMhK+vL0qUKCHtM0IILFq0CHXr1oWRkRHMzc3RrVs3/Pfff9L93N3dsXfvXty9e1cpX47FixejTp06KFGiBExNTVG9enX8+OOP73zNADB58mS4uLjAwsICJUuWRP369bFixQoIIXL13bBhA5o2bYoSJUqgRIkSqFu3LlasWJGr36lTp/D555/D2NgYlSpVwsyZM3PNW05OTsaoUaOUft4CAgKU9hGZTIYXL15gzZo1eU47io2NxaBBg1CuXDnI5XI4ODhg8uTJyMrKUnoudd+b993vbVOTcqaWvf6n2pzjxrZt21CvXj0YGhpKo7D5lfMYYWFhqF+/PoyMjFC9enWsXLlS6bm7d+8OAGjRooX0vr3+p/r9+/ejVatWKFmyJIyNjdGsWTMcOHBA6blyXtvZs2fRrVs3mJubo3LlyvnOkeN9n9GdO3dQunRpAK/2xZy875pGdejQIchkMqxbtw4jRoyAra0tjIyM4ObmhnPnzin1PX36NL7++mtUrFgRRkZGqFixInr27Jlryl/OZxYeHo5+/fqhdOnSMDY2Rnp6+ns+ldzWrl0LmUymdMzMMWXKFOjr6+PRo0cA/jcd6siRI2jSpAmMjIxQtmxZTJgwAQqFQum+6h7bXn+N+TmG5uV93zdPnjyBXC7HhAkTct33+vXrkMlk+PXXX6W2N/eLihUrIjAwUOln9/WpcXPnzoWDgwNKlCiBpk2bIjo6OtfznD59Gl5eXrCwsIChoSHq1auHP/74I9d7IJPJ8Pfff8PPzw+WlpYoWbIkevfujRcvXiA2NhY9evRAqVKlUKZMGYwaNQqZmZlKj5Hfz0ETP6/nzp2Dp6en9L7b2dmhY8eOePDgwfs+so9HULG0atUqAUCcOnVK/PLLLwKAOHDggLTdzc1N1KxZU7qdnZ0t2rZtK/T09MSECRNEeHi4mDNnjjAxMRH16tUTaWlpUl97e3tRrlw5UatWLbFx40axb98+4eLiIvT19cXEiRNFs2bNxLZt28T27dtFtWrVhI2NjXj58qV0f19fXyGXy0WFChXE9OnTRXh4uJg0aZLQ09MTnp6eSq8DgChbtqyoXbu22LBhg/j777/F5cuXxZUrV4SZmZlwdnYWv//+uwgPDxcjR44UOjo6YtKkSfl+n27fvi0AiNmzZ0tt69evFwBEmzZtxI4dO8SmTZtEgwYNhFwuF0eOHBFCCHH//n0xbNgwAUBs27ZNHD9+XBw/flwkJSUJIYSYOnWqmDdvnti7d684dOiQWLJkiXBwcBAtWrRQev7AwECRnx/DnM/zyy+/FLt37xbr1q0TVapUEeXLlxf29vZSP4VCIdq1aydMTEzE5MmTRUREhFi+fLkoW7ascHJyUvoc3vTPP/+Ibt26CQDS6zl+/Lj02dvb2wtfX1+pf3JysqhZs6YwMTERU6ZMEX/99ZfYunWrGD58uPj777/zfH8TEhJEixYthK2trTh9+rT0WH5+fkJfX1+MHDlShIWFiQ0bNojq1asLGxsbERsbK4QQ4sqVK6JZs2bC1tZWKZ8QQmzcuFEAEMOGDRPh4eFi//79YsmSJcLf3/+9722fPn3EihUrREREhIiIiBBTp04VRkZGYvLkyUr9JkyYIACILl26iM2bN4vw8HAxd+5cMWHCBKmPm5ubsLS0FFWrVhVLliwRERERYsiQIQKAWLNmjdTvxYsXom7dusLKykrMnTtX7N+/X/zyyy/CzMxMtGzZUmRnZwshhDh+/LgwMjISHTp0kF7vlStXhBBCxMTESJ9/SEiI2L9/v5g6daowMDAQffr0kZ5L3fcmP/d72/6bs7/evn1barO3txdlypQRlSpVEitXrhQHDx4UJ0+efOvz+/r6ChMTE6W2nGOPk5OT+P3338Vff/0lunfvLgCIyMhIIYQQcXFxIigoSAAQv/32m/S+xcXFCSGEWLt2rZDJZKJTp05i27ZtYvfu3cLT01Po6uqK/fv353pt9vb2YsyYMSIiIkLs2LEj3zny+xmlpaWJsLAwAUD0799fyvvPP/+89b05ePCgACDKly+f65hQsmRJ8e+//0p9N2/eLCZOnCi2b98uIiMjRWhoqHBzcxOlS5cWT548yfWZlS1bVgwcOFD8+eefYsuWLSIrK+utOd6876lTp4QQQqSnpwtbW1vRq1cvpX6ZmZnCzs5OdO/eXWrL+Zmxs7MTv/76q/jrr7+Ev7+/ACCGDh0q9fuQY9vrGd93DM1Lfr9vOnfuLMqXLy8UCoXS/UePHi3kcrl4+vSpECLv/WLy5MlCLpcLHx8f6X45x8+KFSuKdu3aiR07dogdO3YIZ2dnYW5uLhITE6W
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 800x500 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Nombre Total de tickets achetés sur Internet par Type d'évènements\n",
|
|||
|
"\n",
|
|||
|
"nb_tickets_internet = customer.groupby('name_event_types')['nb_tickets_internet'].sum()\n",
|
|||
|
"nb_tickets_internet.plot(kind='bar', figsize=(8, 5))\n",
|
|||
|
"plt.xlabel(\"Type d'évènements\")\n",
|
|||
|
"plt.ylabel('Nombre Total de tickets achetés sur Internet')\n",
|
|||
|
"plt.title(\"Nombre Total de tickets achetés sur Internet par Type d'évènements\")\n",
|
|||
|
"plt.xticks(rotation=45)\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "d679204b-f3e8-4502-8de9-3bf4180da3bd",
|
|||
|
"metadata": {
|
|||
|
"jp-MarkdownHeadingCollapsed": true
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"# 2 - Autres informations sur client "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 57,
|
|||
|
"id": "1df2a145-f47f-4511-aa76-0df7531dd2ec",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def tags_information(tenant_id, first_tags = 20):\n",
|
|||
|
"\n",
|
|||
|
" customersplus = load_dataset_2(tenant_id, \"customersplus\")[['id', 'structure_id']]\n",
|
|||
|
" customersplus.rename(columns = {'id' : 'customer_id'}, inplace = True)\n",
|
|||
|
" tags = load_dataset_2(tenant_id, \"tags\")[['id', 'name']]\n",
|
|||
|
" tags.rename(columns = {'id' : 'tag_id', 'name' : 'tag_name'}, inplace = True)\n",
|
|||
|
" structure_tag_mappings = load_dataset_2(tenant_id, \"structure_tag_mappings\")[['structure_id', 'tag_id']]\n",
|
|||
|
" \n",
|
|||
|
" customer_tags = pd.merge(customersplus, structure_tag_mappings, on = 'structure_id', how = 'left')\n",
|
|||
|
" customer_tags = pd.merge(customer_tags, tags, on = 'tag_id', how = 'inner')\n",
|
|||
|
" \n",
|
|||
|
" nb_customers_with_tag = customer_tags['customer_id'].nunique()\n",
|
|||
|
" \n",
|
|||
|
" # print('Nombre de client avec tag : ', nb_customers_with_tag)\n",
|
|||
|
" # print('Proportion de clients avec tags : ', nb_customers_with_tag/len(customersplus))\n",
|
|||
|
" # print('Moyenne de tags par client : ', len(customer_tags)/nb_customers_with_tag)\n",
|
|||
|
" \n",
|
|||
|
" # info = customer_tags.groupby(['tag_id', 'tag_name'])['customer_id'].count().reset_index().sort_values('customer_id', ascending = False).head(first_tags)\n",
|
|||
|
"\n",
|
|||
|
" tags_informations = pd.DataFrame({'company_number' : tenant_id,\n",
|
|||
|
" 'nb_customers_with_tags' : [nb_customers_with_tag],\n",
|
|||
|
" 'prop_customers_with_tags' : [nb_customers_with_tag/len(customersplus)],\n",
|
|||
|
" 'mean_tags_per_customers' : [len(customer_tags)/nb_customers_with_tag]})\n",
|
|||
|
" \n",
|
|||
|
" return tags_informations"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 58,
|
|||
|
"id": "c4ecbb15-0f55-46dc-a3df-6e8c4ae44ebd",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Nombre de client avec tag : 13320\n",
|
|||
|
"Proportion de clients avec tags : 0.0877089012682233\n",
|
|||
|
"Moyenne de tags par client : 2.1725975975975977\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"/tmp/ipykernel_467/1769900082.py:8: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
|||
|
" df = pd.read_csv(file_in, sep=\",\")\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Nombre de client avec tag : 5953\n",
|
|||
|
"Proportion de clients avec tags : 0.021598421025897787\n",
|
|||
|
"Moyenne de tags par client : 1.0\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"/tmp/ipykernel_467/1769900082.py:8: DtypeWarning: Columns (19,20) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
|||
|
" df = pd.read_csv(file_in, sep=\",\")\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Nombre de client avec tag : 23659\n",
|
|||
|
"Proportion de clients avec tags : 0.09207484608139978\n",
|
|||
|
"Moyenne de tags par client : 3.0620482691576143\n",
|
|||
|
"Nombre de client avec tag : 10495\n",
|
|||
|
"Proportion de clients avec tags : 0.03271416949025744\n",
|
|||
|
"Moyenne de tags par client : 5.298427822772749\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"/tmp/ipykernel_467/1769900082.py:8: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
|||
|
" df = pd.read_csv(file_in, sep=\",\")\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Nombre de client avec tag : 532342\n",
|
|||
|
"Proportion de clients avec tags : 0.18660686931118298\n",
|
|||
|
"Moyenne de tags par client : 24.114082676174338\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"tags_comparaison = pd.DataFrame()\n",
|
|||
|
"\n",
|
|||
|
"for tenant_id in companies['musee'] : \n",
|
|||
|
" \n",
|
|||
|
" tags_comparaison = pd.concat([tags_comparaison, tags_information(tenant_id)])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 59,
|
|||
|
"id": "bd2dd513-3375-4073-a12a-fa0e9f20571e",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>company_number</th>\n",
|
|||
|
" <th>nb_customers_with_tags</th>\n",
|
|||
|
" <th>prop_customers_with_tags</th>\n",
|
|||
|
" <th>mean_tags_per_customers</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>13320</td>\n",
|
|||
|
" <td>0.087709</td>\n",
|
|||
|
" <td>2.172598</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>5953</td>\n",
|
|||
|
" <td>0.021598</td>\n",
|
|||
|
" <td>1.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>23659</td>\n",
|
|||
|
" <td>0.092075</td>\n",
|
|||
|
" <td>3.062048</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>10495</td>\n",
|
|||
|
" <td>0.032714</td>\n",
|
|||
|
" <td>5.298428</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>101</td>\n",
|
|||
|
" <td>532342</td>\n",
|
|||
|
" <td>0.186607</td>\n",
|
|||
|
" <td>24.114083</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" company_number nb_customers_with_tags prop_customers_with_tags \\\n",
|
|||
|
"0 1 13320 0.087709 \n",
|
|||
|
"0 2 5953 0.021598 \n",
|
|||
|
"0 3 23659 0.092075 \n",
|
|||
|
"0 4 10495 0.032714 \n",
|
|||
|
"0 101 532342 0.186607 \n",
|
|||
|
"\n",
|
|||
|
" mean_tags_per_customers \n",
|
|||
|
"0 2.172598 \n",
|
|||
|
"0 1.000000 \n",
|
|||
|
"0 3.062048 \n",
|
|||
|
"0 5.298428 \n",
|
|||
|
"0 24.114083 "
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 59,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"tags_comparaison"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "507c1db3-1d6c-4106-a9a2-0187055e0480",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# 3 - Graphiques communs"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "aac367ed-9d5e-482a-a376-bfc7d1a5767e",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Nombre de clients"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 62,
|
|||
|
"id": "94254c32-dc81-42f2-9d27-372ff38c70ed",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def compute_nb_clients(customer_musee):\n",
|
|||
|
" company_nb_clients = customer_musee[customer_musee[\"purchase_count\"]>0].groupby(\"number_company\")[\"customer_id\"].count().reset_index()\n",
|
|||
|
" plt.bar(company_nb_clients[\"number_company\"], company_nb_clients[\"customer_id\"]/1000)\n",
|
|||
|
"\n",
|
|||
|
" # Ajout de titres et d'étiquettes\n",
|
|||
|
" plt.xlabel('Company')\n",
|
|||
|
" plt.ylabel(\"Nombre de clients (milliers)\")\n",
|
|||
|
" plt.title(\"Nombre de clients de chaque compagnie de musee\")\n",
|
|||
|
" \n",
|
|||
|
" # Affichage du barplot\n",
|
|||
|
" plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 11,
|
|||
|
"id": "aed68c8c-1912-4d8d-a974-11ad371f6ee8",
|
|||
|
"metadata": {
|
|||
|
"editable": true,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": ""
|
|||
|
},
|
|||
|
"tags": []
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjwAAAHFCAYAAAD2eiPWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABJYElEQVR4nO3dd3RU1f7+8WcgPYRAAqRACAFCMwSkFzWUAEZBEBQsl6LIRSlKuyqiF1CKFKkqXq9KsQAqRSkCkRJFBKlSBAUvVYn0BCIEkuzfH34zP4YEyJAJSY7v11qzFrNnzzmffXYyeThtbMYYIwAAAAsrkt8FAAAA5DUCDwAAsDwCDwAAsDwCDwAAsDwCDwAAsDwCDwAAsDwCDwAAsDwCDwAAsDwCDwAAsDwCTwEza9Ys2Ww2eXl56fDhw1leb9asmaKiovKhMqlHjx4qVqxYvqz7Zmw2m0aMGHFb19msWTM1a9bsttexfPny2zbW/NiuFSpUUNu2bW/rOlHwHTp0SDabTbNmzcrzdeXHzz3yHoGngEpNTdXLL7+c32XASd9//72eeuqpPF3H8uXLNXLkyDxdB1DQhISE6Pvvv9f999+f36WgkCLwFFD33nuvPvnkE/3444/5XYpLGGN08eLF/C4jzzVq1EjlypXL7zIAy/H09FSjRo1UunTp/C4FhRSBp4B6/vnnFRgYqBdeeOGmfS9duqShQ4cqIiJCHh4eKlu2rPr27atz58459Ms8VLB06VLdeeed8vb2VvXq1bV06VJJfx1Oq169unx9fdWgQQNt2bIl2/Xt2bNHLVu2lK+vr0qXLq1+/frpzz//dOhjs9nUr18/vfPOO6pevbo8PT01e/ZsSdL+/fv12GOPqUyZMvL09FT16tX11ltv5Wi7JCcnq1evXgoMDFSxYsV077336pdffsm2b27Wk5GRoenTp6t27dry9vZWiRIl1KhRI3355Zc3fF92u8ITExPVu3dvlStXTh4eHoqIiNDIkSOVlpZm75O5u37ixImaNGmSIiIiVKxYMTVu3FgbN2609+vRo4d9DDabzf44dOiQJOmzzz5Tw4YN5e/vLx8fH1WsWFFPPvnkTcdbELfrihUrVKdOHXl7e6tatWr64IMPHF4/efKk+vTpoxo1aqhYsWIqU6aMWrRooW+//TbLsn7//Xd17txZfn5+8vf3V5cuXbRx48Ysh0iyO0wp/bXdK1So4NB2+fJljRo1StWqVZOnp6dKly6tJ554QidPnszRtti0aZPatWunwMBAeXl5qVKlShowYIBDn/Xr16tly5by8/OTj4+PmjRpomXLljn0yTwMvmbNGvscFi9eXN26dVNKSooSExPVuXNnlShRQiEhIRoyZIiuXLlif3/mz9748eM1evRolS9fXl5eXqpXr55Wr17tsK4DBw7oiSeeUGRkpHx8fFS2bFm1a9dOu3btyjK+PXv2qHXr1vLx8VHp0qXVt29fLVu2TDabTevWrXPY5lFRUdq8ebPuvvtu+8/t66+/royMjCx1XntIqzB8nmR+Hs6cOVNVq1aVt7e36tWrp40bN8oYowkTJth/51u0aKEDBw44vL9ChQrq0aNHluVe+/OakZGhUaNG2ddRokQJRUdHa+rUqbc0luTkZA0ZMsThb8uAAQOUkpKSo3EXOAYFysyZM40ks3nzZjN16lQjyaxevdr+ekxMjLnjjjvszzMyMkybNm2Mm5ubeeWVV8yqVavMxIkTja+vr7nzzjvNpUuX7H3Dw8NNuXLlTFRUlJk7d65Zvny5adiwoXF3dzf//ve/TdOmTc3ChQvNokWLTJUqVUxQUJD5888/7e/v3r278fDwMOXLlzejR482q1atMiNGjDBubm6mbdu2DuOQZMqWLWuio6PNJ598YtasWWN2795t9uzZY/z9/U3NmjXNnDlzzKpVq8zgwYNNkSJFzIgRI264bTIyMkzz5s2Np6enff3Dhw83FStWNJLM8OHD7X1zsx5jjOnataux2WzmqaeeMl988YX56quvzOjRo83UqVMd5iImJibLuK+u4/jx4yYsLMyEh4eb//znP+brr782r732mvH09DQ9evSw9zt48KCRZCpUqGDuvfdes3jxYrN48WJTs2ZNU7JkSXPu3DljjDEHDhwwDz30kJFkvv/+e/vj0qVLZsOGDcZms5lHHnnELF++3KxZs8bMnDnTdO3atVBt18yf0xo1apg5c+aYlStXmocffthIMgkJCfZ++/btM88884yZN2+eWbdunVm6dKnp2bOnKVKkiFm7dq29359//mmqV69u/P39zfTp083KlSvNs88+a8qXL28kmZkzZ95wTo3562c/PDzc/jw9Pd3ce++9xtfX14wcOdLEx8eb9957z5QtW9bUqFHD4fcmOytWrDDu7u4mOjrazJo1y6xZs8Z88MEH5pFHHrH3WbdunXF3dzd169Y18+fPN4sXLzatW7c2NpvNzJs3z94v8zMjIiLCDB482KxatcqMGzfOFC1a1Dz66KOmTp06ZtSoUSY+Pt688MILRpJ544037O/P/NkLCwszd911l1mwYIH57LPPTP369Y27u7vZsGGDvW9CQoIZPHiw+fzzz01CQoJZtGiR6dChg/H29jb79u2z9/v9999NYGCgKV++vJk1a5ZZvny56dq1q6lQoYKR5DA/MTExJjAw0ERGRpp33nnHxMfHmz59+hhJZvbs2VnqvHq+CsvniSQTHh5umjRp4vAZGxAQYAYOHGjat29vli5daj7++GMTFBRkoqOjTUZGhv394eHhpnv37lmWe+3P69ixY03RokXN8OHDzerVq82KFSvMlClTHGrM6VhSUlJM7dq1TalSpcykSZPM119/baZOnWr8/f1NixYtHOorLAg8BczVgSc1NdVUrFjR1KtXz/7DdW3gWbFihZFkxo8f77Cc+fPnG0nm3XfftbeFh4cbb29vc+zYMXvbjh07jCQTEhJiUlJS7O2LFy82ksyXX35pb+vevbuR5PDHyRhjRo8ebSSZ9evX29skGX9/f3PmzBmHvm3atDHlypUzSUlJDu39+vUzXl5eWfpf7auvvrrh+q/+gMrNer755hsjyQwbNuy6fYzJWeDp3bu3KVasmDl8+LBDv4kTJxpJZs+ePcaY//9hXrNmTZOWlmbv98MPPxhJZu7cufa2vn37muz+r5K5zMxwlFMFbbuGh4cbLy8vh2128eJFExAQYHr37n3d96WlpZkrV66Yli1bmgcffNDePmPGDCPJfPHFFw79e/XqdcuBZ+7cuUaSWbBggUO/zZs3G0nm7bffvuEYK1WqZCpVqmQuXrx43T6NGjUyZcqUMefPn3cYY1RUlClXrpz9MyHzM6N///4O7+/QoYORZCZNmuTQXrt2bVOnTh3788yfvdDQUId6kpOTTUBAgImNjb1ujWlpaeby5csmMjLSDBw40N7+r3/9y9hsNvvPd6Y2bdpkG3gkmU2bNjn0rVGjhmnTpk2WOq+er8LweWLMX58LwcHB5sKFC/a2zM/Y2rVrO4SHKVOmGElm586d9racBp62bdua2rVr37CWnI5l7NixpkiRImbz5s0O/T7//HMjySxfvvyG6ymIOKRVgHl4eGjUqFHasmWLPv3002z7rFmzRpKy7O58+OGH5evrm2WXdO3atVW2bFn78+rVq0v6a9eoj49PlvbsrhR7/PHHHZ4/9thjkqS1a9c6tLdo0UIlS5a0P7906ZJWr16tBx98UD4+PkpLS7M/7rvvPl26dMnh8M21Mpd/vfW7aj1fffWVJKlv377X7ZNTS5cuVfPmzRUaGupQR1xcnCQpISHBof/999+vokWL2p9HR0dLyn4erlW/fn1JUufOnfXpp5/qt99+y1GNBXG71q5dW+XLl7c/9/LyUpUqVbJsh3feeUd16tSRl5eX3Nzc5O7urtWrV2vv3r0O4/Pz89MDDzxww/E5Y+nSpSpRooTatWvnsB1q166t4OBgh0M21/rll1/066+/qmfPnvLy8sq2T0pKijZt2qSHHnrI4crIokWLqmvXrjp27Jh+/vlnh/dce2Vb5u/wtSf5Vq9ePdufp44
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"compute_nb_clients(customer_musee)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "5eb3efcc-c7a9-4a0e-aab1-29a36032cb31",
|
|||
|
"metadata": {
|
|||
|
"editable": true,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": ""
|
|||
|
},
|
|||
|
"tags": []
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## Part de consentement à la reception de mails"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 68,
|
|||
|
"id": "848963c9-6129-4106-80b5-76bf814b70d1",
|
|||
|
"metadata": {
|
|||
|
"editable": true,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": ""
|
|||
|
},
|
|||
|
"tags": []
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"/tmp/ipykernel_449/1336968363.py:1: SettingWithCopyWarning: \n",
|
|||
|
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
|||
|
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
|||
|
"\n",
|
|||
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
|||
|
" customer_musee[\"already_purchased\"] = customer_musee[\"purchase_count\"] > 0\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"customer_musee[\"already_purchased\"] = customer_musee[\"purchase_count\"] > 0\n",
|
|||
|
"\n",
|
|||
|
"def mailing_consent(customer_musee):\n",
|
|||
|
" df_graph = customer_musee.groupby([\"number_company\", \"already_purchased\"])[\"opt_in\"].mean().reset_index()\n",
|
|||
|
" # Création du barplot groupé\n",
|
|||
|
" fig, ax = plt.subplots(figsize=(10, 6))\n",
|
|||
|
" \n",
|
|||
|
" categories = df_graph[\"number_company\"].unique()\n",
|
|||
|
" bar_width = 0.35\n",
|
|||
|
" bar_positions = np.arange(len(categories))\n",
|
|||
|
" \n",
|
|||
|
" # Grouper les données par label et créer les barres groupées\n",
|
|||
|
" for label in df_graph[\"already_purchased\"].unique():\n",
|
|||
|
" label_data = df_graph[df_graph['already_purchased'] == label]\n",
|
|||
|
" values = [label_data[label_data['number_company'] == category]['opt_in'].values[0]*100 for category in categories]\n",
|
|||
|
" \n",
|
|||
|
" label_printed = \"purchased\" if label else \"no purchase\"\n",
|
|||
|
" ax.bar(bar_positions, values, bar_width, label=label_printed)\n",
|
|||
|
" \n",
|
|||
|
" # Mise à jour des positions des barres pour le prochain groupe\n",
|
|||
|
" bar_positions = [pos + bar_width for pos in bar_positions]\n",
|
|||
|
" \n",
|
|||
|
" # Ajout des étiquettes, de la légende, etc.\n",
|
|||
|
" ax.set_xlabel('Numero de compagnie')\n",
|
|||
|
" ax.set_ylabel('Part de consentement (%)')\n",
|
|||
|
" ax.set_title('Part de consentement au mailing selon les compagnies')\n",
|
|||
|
" ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])\n",
|
|||
|
" ax.set_xticklabels(categories)\n",
|
|||
|
" ax.legend()\n",
|
|||
|
" \n",
|
|||
|
" # Affichage du plot\n",
|
|||
|
" plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 69,
|
|||
|
"id": "c7239078-5c1a-4522-8d72-848efe7df28f",
|
|||
|
"metadata": {
|
|||
|
"editable": true,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": ""
|
|||
|
},
|
|||
|
"tags": []
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0oAAAIhCAYAAABwnkrAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABhr0lEQVR4nO3dd3yN9///8ecRkS0kyNCIILYY1ar4VGLvVrXVojWqRe2iRvVTsWK0xUe1RluhWqufoqqoHZugwQdVWpuIWomVSHL9/ugv59tzYuSQ5ASP++12bjfX+1qv6+Sd8fS+rvcxGYZhCAAAAABglsfeBQAAAABAbkNQAgAAAAArBCUAAAAAsEJQAgAAAAArBCUAAAAAsEJQAgAAAAArBCUAAAAAsEJQAgAAAAArBCUAAAAAsEJQArLBrFmzZDKZzK+8efPqqaeeUqdOnXTmzJksPVdkZKSWLFnyUMc4fvy4TCaTZs2alSU1PWm++OKLx/K9y4q+9SiLiIiQyWSyaAsPD1d4eLhFm8lkUkRERM4Vlk3sdR3pPy+PHz+e4+eGbR6Xvg5kFkEJyEZRUVHatm2bVq9erXfeeUfz5s3T888/r+vXr2fZOZ70P2ZzA4LS4+ntt9/Wtm3b7rvdtm3b9Pbbb+dARYB90dfxpMlr7wKAx1nFihVVvXp1SVKdOnWUmpqqkSNHasmSJWrXrt1DHfvmzZtycXHJijIB3MFTTz2lp5566r7bPffcczlQDWB/9HU8aRhRAnJQ+i+ZEydOSJKGDx+uGjVqyMvLS/nz51e1atX09ddfyzAMi/2KFy+u5s2ba9GiRapataqcnZ01fPhwmUwmXb9+XbNnzzbf5md9W5C1s2fPqnXr1vLw8JCnp6dee+01xcXF3XHbXbt26YUXXpCXl5ecnZ1VtWpVLVy4MFPXmpSUpBEjRqhcuXJydnaWt7e36tSpo61bt5q3uXXrloYMGaKgoCDly5dPRYsWVY8ePXTlypU7Xv/KlStVrVo1ubi4qGzZspo5c6bFdjdu3NCAAQMUFBQkZ2dneXl5qXr16po3b57N15V+O9D69ev17rvvqlChQvL29larVq109uxZi9oOHDig6Oho89egePHi5vUJCQnmmtKvsW/fvhlGFU0mk3r27KmoqCiVKVNGLi4uql69urZv3y7DMPTxxx8rKChI7u7uqlu3ro4ePZrhPV+zZo3q1aun/Pnzy9XVVbVq1dLatWsttkm/nezAgQNq06aNPD095ePjo7feektXr161qMfWvpXZ/ny323eKFy+ujh073vMc6beJfvzxxxo3bpyKFy8uFxcXhYeH6/fff9ft27c1ePBg+fv7y9PTUy+99JLi4+MtjrFgwQI1bNhQfn5+cnFxUbly5TR48OAMX5M73Xp3J9bXk9m+I/39fdK/f3/5+vrK1dVVtWvX1u7duzP1XkjS1KlTVblyZbm7u8vDw0Nly5bVBx98YLFNXFycunbtqqeeekr58uVTUFCQhg8frpSUlPse/3//+59efPFFFSxYUM7OzqpSpYpmz55tsc2GDRtkMpk0b948DR06VP7+/sqfP7/q16+vw4cP3/ccd5OZ/nzhwgV16dJFAQEBcnJyUuHChVWrVi2tWbPmvsf/7bff1KZNG/n4+MjJyUnFihVT+/btlZSU9EDXP3fuXA0aNEh+fn5yd3dXixYtdP78eSUmJqpLly4qVKiQChUqpE6dOunatWsWx0j//p8+fbpKly4tJycnlS9fXvPnz89wvd27d1f58uXl7u6uIkWKqG7dutq0aVOG6zt9+rReeeUVeXh4qECBAmrXrp1iYmIy3GbdsWNHubu76+jRo2ratKnc3d0VEBCg/v37W7wX6XVaf+9mtn9lpq8CuQ0jSkAOSv/jtnDhwpL+/qOva9euKlasmCRp+/bt6tWrl86cOaOPPvrIYt89e/bo0KFD+vDDDxUUFCQ3Nze1bNlSdevWVZ06dfTvf/9bkpQ/f/67nv/mzZuqX7++zp49qzFjxqh06dL6+eef9dprr2XYdv369WrcuLFq1KihadOmydPTU/Pnz9drr72mGzdu3POPuJSUFDVp0kSbNm1S3759VbduXaWkpGj79u06efKkQkNDZRiGWrZsqbVr12rIkCF6/vnntW/fPg0bNkzbtm3Ttm3b5OTkZD7m3r171b9/fw0ePFg+Pj766quv1LlzZ5UqVUq1a9eWJPXr109z5szRqFGjVLVqVV2/fl3/+9//dPHixQe+rrffflvNmjXT3LlzderUKb3//vt64403tG7dOknS4sWL9corr8jT01NffPGFJJnrvnHjhsLCwnT69Gl98MEHCgkJ0YEDB/TRRx9p//79WrNmjcUf4suWLdOvv/6qsWPHymQyadCgQWrWrJk6dOigP//8U1OmTNHVq1fVr18/vfzyy4qNjTXv/+2336p9+/Z68cUXNXv2bDk6Omr69Olq1KiRfvnlF9WrV8/iul5++WW99tpr6ty5s/bv368hQ4ZIkjl8btu2zaa+JdnWnx/W559/rpCQEH3++ee6cuWK+vfvrxYtWqhGjRpydHTUzJkzdeLECQ0YMEBvv/22li5dat73yJEjatq0qfr27Ss3Nzf99ttvGjdunHbu3Gn+umaF+/UdSerUqZMWLFiggQMHqm7dujp48KBeeuklJSQk3Pf48+fPV/fu3dWrVy998sknypMnj44ePaqDBw+at4mLi9Ozzz6rPHny6KOPPlLJkiW1bds2jRo1SsePH1dUVNRdj3/48GGFhoaqSJEimjx5sry9vfXtt9+qY8eOOn/+vAYOHGix/QcffKBatWrpq6++UkJCggYNGqQWLVro0KFDcnBwsOm9y2x/fvPNN7Vnzx6NHj1apUuX1pUrV7Rnzx6L7/k72bt3r/71r3+pUKFCGjFihIKDg3Xu3DktXbpUycnJcnJyeqDrr1OnjmbNmqXjx49rwIABatOmjfLmzavKlStr3rx5+vXXX/XBBx/Iw8NDkydPtth/6dKlWr9+vUaMGCE3Nzd98cUX5v1feeUVSdKlS5ckScOGDZOvr6+uXbumxYsXKzw8XGvXrjX/Z8b169dVp04dXbp0SePGjVOpUqW0cuXKO/6sl6Tbt2/rhRdeUOfOndW/f39t3LhRI0eOlKen5z2/dzPbvzLTV4FcyQCQ5aKiogxJxvbt243bt28biYmJxrJly4zChQsbHh4eRlxcXIZ9UlNTjdu3bxsjRowwvL29jbS0NPO6wMBAw8HBwTh8+HCG/dzc3IwOHTpkqq6pU6cakowff/zRov2dd94xJBlRUVHmtrJlyxpVq1Y1bt++bbFt8+bNDT8/PyM1NfWu5/nmm28MScaXX355121WrlxpSDLGjx9v0b5gwQJDkjFjxgxzW2BgoOHs7GycOHHC3Hbz5k3Dy8vL6Nq1q7mtYsWKRsuWLe96TluuK/1r2L17d4vtxo8fb0gyzp07Z26rUKGCERYWluFcY8aMMfLkyWPExMRYtP/3v/81JBnLly83t0kyfH19jWvXrpnblixZYkgyqlSpYtEfJk2aZEgy9u3bZxiGYVy/ft3w8vIyWrRoYXGe1NRUo3Llysazzz5rbhs2bNgd3/fu3bsbzs7OFuexpW9Zu1d/lmQMGzYswz6BgYH3Pd+xY8cMSUblypUt+mD6e/LCCy9YbN+3b19DknH16tU7Hi8tLc24ffu2ER0dbUgy9u7da16X/l79U1hYWIavtfX1ZLbvHDhwwJBkDBo0yGK7efPmGZLu+1707NnTKFCgwD236dq1q+Hu7m7xvWMYhvHJJ58YkowDBw7c9Tpef/11w8nJyTh58qTFvk2aNDFcXV2NK1euGIZhGOvXrzckGU2bNrXYbuHChYYkY9u2bfesMf39OnbsmGEYtvVnd3d3o2/fvvc8/p3UrVvXKFCggBEfH3/XbWy9fut60/te7969LdpbtmxpeHl5WbRJMlxcXCx+N6SkpBhly5Y1SpUqddcaU1JSjNu3bxv16tUzXnrpJXP7559/bkgyVqxYYbF9165dM/ys79ChgyHJWLhwocW2TZs2NcqUKZOhzn/2kcz
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"mailing_consent(customer_musee)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "a6c94f0f-4ef4-432f-b2c9-11f51627b7e9",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Part homme / femme client"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 77,
|
|||
|
"id": "321feb43-242e-4147-a5c8-ad1b41fc1246",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>customer_id</th>\n",
|
|||
|
" <th>street_id</th>\n",
|
|||
|
" <th>structure_id</th>\n",
|
|||
|
" <th>mcp_contact_id</th>\n",
|
|||
|
" <th>fidelity</th>\n",
|
|||
|
" <th>tenant_id</th>\n",
|
|||
|
" <th>is_partner</th>\n",
|
|||
|
" <th>deleted_at</th>\n",
|
|||
|
" <th>gender</th>\n",
|
|||
|
" <th>is_email_true</th>\n",
|
|||
|
" <th>opt_in</th>\n",
|
|||
|
" <th>last_buying_date</th>\n",
|
|||
|
" <th>max_price</th>\n",
|
|||
|
" <th>ticket_sum</th>\n",
|
|||
|
" <th>average_price</th>\n",
|
|||
|
" <th>average_purchase_delay</th>\n",
|
|||
|
" <th>average_price_basket</th>\n",
|
|||
|
" <th>average_ticket_basket</th>\n",
|
|||
|
" <th>total_price</th>\n",
|
|||
|
" <th>purchase_count</th>\n",
|
|||
|
" <th>first_buying_date</th>\n",
|
|||
|
" <th>country</th>\n",
|
|||
|
" <th>gender_label</th>\n",
|
|||
|
" <th>gender_female</th>\n",
|
|||
|
" <th>gender_male</th>\n",
|
|||
|
" <th>gender_other</th>\n",
|
|||
|
" <th>country_fr</th>\n",
|
|||
|
" <th>number_company</th>\n",
|
|||
|
" <th>already_purchased</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
"Empty DataFrame\n",
|
|||
|
"Columns: [customer_id, street_id, structure_id, mcp_contact_id, fidelity, tenant_id, is_partner, deleted_at, gender, is_email_true, opt_in, last_buying_date, max_price, ticket_sum, average_price, average_purchase_delay, average_price_basket, average_ticket_basket, total_price, purchase_count, first_buying_date, country, gender_label, gender_female, gender_male, gender_other, country_fr, number_company, already_purchased]\n",
|
|||
|
"Index: []"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 77,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"customer_musee[customer_musee['gender_male'] + customer_musee['gender_female'] + customer_musee['gender_other'] == 0]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 88,
|
|||
|
"id": "3bb5dc1e-05dc-41da-83bb-cb6b8d43fa73",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def gender_bar(customer_musee):\n",
|
|||
|
" company_genders = customer_musee.groupby(\"number_company\")[[\"gender_male\", \"gender_female\", \"gender_other\"]].mean().reset_index()\n",
|
|||
|
" \n",
|
|||
|
" # Création du barplot\n",
|
|||
|
" plt.bar(company_genders[\"number_company\"], company_genders[\"gender_male\"], label = \"Homme\")\n",
|
|||
|
" plt.bar(company_genders[\"number_company\"], company_genders[\"gender_female\"], \n",
|
|||
|
" bottom = company_genders[\"gender_male\"], label = \"Femme\")\n",
|
|||
|
" plt.bar(company_genders[\"number_company\"], company_genders[\"gender_other\"], \n",
|
|||
|
" bottom = company_genders[\"gender_male\"] + company_genders[\"gender_female\"], label = \"Inconnu\")\n",
|
|||
|
" \n",
|
|||
|
" # Ajout de titres et d'étiquettes\n",
|
|||
|
" plt.xlabel('Company')\n",
|
|||
|
" plt.ylabel(\"Part de clients de chaque sexe\")\n",
|
|||
|
" plt.title(\"Sexe des clients de chaque compagnie de musee\")\n",
|
|||
|
" plt.legend()\n",
|
|||
|
"\n",
|
|||
|
" # Définir les étiquettes de l'axe x\n",
|
|||
|
" plt.xticks(company_genders[\"number_company\"], [\"{}\".format(i) for i in company_genders[\"number_company\"]])\n",
|
|||
|
" \n",
|
|||
|
" \n",
|
|||
|
" # Affichage du barplot\n",
|
|||
|
" plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 89,
|
|||
|
"id": "c61c4b41-ba31-44f4-9515-25922798b86a",
|
|||
|
"metadata": {
|
|||
|
"editable": true,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": ""
|
|||
|
},
|
|||
|
"tags": []
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHFCAYAAAAOmtghAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABUDElEQVR4nO3deXhM5/8+8Huy74nIYstqX4IQS6KkUQSx71sjln5LKJqqpWqnUS1CS9CK4KOkaitNSWqJfU2oWosQSyIiJEETyczz+8Mv045JmJNMDON+XVeuy3nOc855n3Mmk9tZZUIIASIiIiI9YaDrAoiIiIi0ieGGiIiI9ArDDREREekVhhsiIiLSKww3REREpFcYboiIiEivMNwQERGRXmG4ISIiIr3CcENERER6heHmLXD8+HF0794drq6uMDU1hbOzM3x9ffHZZ5/pujTJZDIZZsyYoesy1LxY1/79+yGTybB///4yXe6yZcsQHR1dpssAXt/6/NeNGzcgk8nw7bffvrZl0tshOjoaMpkMN27cKNPl6OJzT28Ghps33G+//QY/Pz9kZ2dj/vz5iIuLw+LFi9GiRQvExMToujy91ahRIxw9ehSNGjUq0+W8rnBD9CYJCgrC0aNHUbFiRV2XQnrKSNcF0MvNnz8fHh4e2L17N4yM/t1d/fr1w/z583VYmX6zsbFB8+bNdV0GkV5ydHSEo6OjrssgPcYjN2+4Bw8ewMHBQSXYFDIwUN99MTEx8PX1haWlJaysrBAYGIikpCTl+EOHDsHY2Bjjx49Xma7wMPGqVauUbX///TcGDBgAJycnmJqaonbt2li6dKlGdWdnZ+Ojjz5C+fLlYWVlhfbt2+PKlStF9tVkOQqFAnPmzEHNmjVhbm4OOzs71K9fH4sXL35lLY8ePcJnn30GT09PmJqawsnJCR07dsSlS5eKnaa4w9mnTp1Cly5dYG9vDzMzM3h7e+Pnn39W6VO4Lfft24eRI0fCwcEB5cuXR48ePXD37l1lP3d3d5w/fx4JCQmQyWSQyWRwd3cv9fpeunQJ7du3h4WFBRwcHDBixAjk5OQU2fePP/7ABx98ABsbG1hYWKBFixbYs2fPK5cBSNuuCxcuhIeHB6ysrODr64tjx46pjD916hT69esHd3d3mJubw93dHf3798fNmzfV5nXs2DG0aNECZmZmqFSpEiZPnowffvhB7TRHcadA3d3dERISotKWlpaGjz/+GFWqVIGJiQk8PDwwc+ZMFBQUaLQtfvrpJ/j6+sLKygpWVlZo2LChyu8SAERFRaFBgwYwMzODvb09unfvjosXL6r0CQkJgZWVFS5duoTAwEBYWlqiYsWKmDdvnnLd33vvPVhaWqJGjRpYs2aNyvSFn734+HgMGTIE9vb2sLS0ROfOnXH9+nWVvvHx8ejatSuqVKkCMzMzVKtWDR9//DEyMjLU1m/79u2oX78+TE1N4enpicWLF2PGjBmQyWQq/WQyGUaPHo1169ahdu3asLCwQIMGDbBz584i63zxtFRpPo+v43Nf+L3w008/YeLEiahYsSKsrKzQuXNn3Lt3Dzk5Ofi///s/ODg4wMHBAUOGDMHjx4+V0xeeqi3qaO2Ln9f79+/j//7v/+Di4gJTU1M4OjqiRYsW+OOPP0q0LqX5Pn8rCXqjDR8+XAAQn3zyiTh27Jh49uxZsX3nzp0rZDKZGDp0qNi5c6fYsmWL8PX1FZaWluL8+fPKfvPmzRMAxPbt24UQQvz111/CwsJCDBo0SNnn/PnzwtbWVnh5eYm1a9eKuLg48dlnnwkDAwMxY8aMl9asUChEQECAMDU1FXPnzhVxcXFi+vTpwtPTUwAQ06dPl7yc8PBwYWhoKKZPny727Nkjdu3aJSIiIl5ZS3Z2tqhbt66wtLQUs2bNErt37xabN28WY8eOFXv37lX2e7Guffv2CQBi3759yra9e/cKExMT0bJlSxETEyN27dolQkJCBACxevVqZb/Vq1cLAMLT01N88sknYvfu3eLHH38U5cqVEwEBAcp+iYmJwtPTU3h7e4ujR4+Ko0ePisTExFKtb1pamnBychKVK1cWq1evFrGxsWLgwIHC1dVVbX3WrVsnZDKZ6Natm9iyZYvYsWOH6NSpkzA0NBR//PFHqbdrcnKyACDc3d1F+/btxbZt28S2bduEl5eXKFeunHj06JFyfps2bRLTpk0TW7duFQkJCWLjxo3C399fODo6ivv37yv7nT9/XlhYWIg6deqIDRs2iO3bt4vAwEDl+iUnJxe7Twu5ubmJwYMHK4dTU1OFi4uLcHNzEytWrBB//PGHmD17tjA1NRUhISEv3Q5CCDF16lQBQPTo0UNs2rRJxMXFiYULF4qpU6cq+3z11VcCgOjfv7/47bffxNq1a4Wnp6ewtbUVV65cUfYbPHiwMDExEbVr1xaLFy8W8fHxYsiQIQKAmDx5sqhRo4ZYtWqV2L17t+jUqZMAIE6dOqWcvvCz5+LiIoYOHSp+//13sXLlSuHk5CRcXFzEw4cPlX0jIyNFeHi4+PXXX0VCQoJYs2aNaNCggahZs6bK98zvv/8uDAwMxPvvvy+2bt0qNm3aJJo1aybc3d3Fi39CCvd306ZNxc8//yxiY2PF+++/L4yMjMS1a9fU6vzv/irN5/F1fe4Lvxfc3NxESEiI2LVrl1i+fLmwsrISAQEBom3btmL8+PEiLi5OfP3118LQ0FB88sknyukLfyf++33x3233389rYGCgcHR0FCtXrhT79+8X27ZtE9OmTRMbN26UvC6l+T5/WzHcvOEyMjLEe++9JwAIAMLY2Fj4+fmJ8PBwkZOTo+yXkpIijIyMVH6RhBAiJydHVKhQQfTp00fZplAoRMeOHYWdnZ3466+/RJ06dUStWrXE48ePlX0CAwNFlSpVRFZWlsr8Ro8eLczMzERmZmaxNf/+++8CgFi8eLFK+9y5c4v8BdZkOZ06dRINGzZ8xdZSN2vWLAFAxMfHv7SfJuGmVq1awtvbW+Tn56tM26lTJ1GxYkUhl8uFEP9+cYeGhqr0mz9/vgAgUlNTlW1169YV/v7+avWUdH0nTpwoZDKZOHPmjEp727ZtVdbnyZMnwt7eXnTu3Fmln1wuFw0aNBBNmzZ96XI02a6FX+ReXl6ioKBA2X7ixAkBQGzYsKHYaQsKCsTjx4+FpaWlyueob9++wtzcXKSlpan0rVWrVonDzccffyysrKzEzZs3Vfp9++23AoDKfwxedP36dWFoaCgGDhxYbJ+HDx8Kc3Nz0bFjR5X2lJQUYWpqKgYMGKBsGzx4sAAgNm/erGzLz88Xjo6OAoAy/AohxIMHD4ShoaEICwtTthV+9rp3766yrMOHDwsAYs6cOUXWqFAoRH5+vrh586bKf3yEEKJJkybCxcVF5OXlKdtycnJE+fLliww3zs7OIjs7W9mWlpYmDAwMRHh4uFqdhfurtJ/H1/W5L/xeeHH6cePGCQBizJgxKu3dunUT9vb2ymEp4cbKykqMGzeu2FqkrEtpvs/fVjwt9YYrX748Dh48iJMnT2LevHno2rUrrly5gsmTJ8PLy0t5CHn37t0oKChAcHAwCgoKlD9mZmbw9/dXOb0ik8mwdu1aWFtbw8fHB8nJyfj5559haWkJAMjNzcWePXvQvXt3WFhYqMyvY8eOyM3NVTut8F/79u0DAAwcOFClfcCAASrDUpbTtGlTnD17FqGhodi9ezeys7M12n6///47atSogTZt2mjUvzhXr17FpUuXlOv0Yq2pqam4fPmyyjRdunRRGa5fvz4AFHmq5UUlXd99+/ahbt26aNCggUr7i9v+yJEjyMzMxODBg1XWRaFQoH379jh58iSePHlS7HKkbNegoCAYGhoqh4vaDo8fP8bEiRNRrVo1GBkZwcjICFZWVnjy5InKqZt9+/bhgw8+gLOzs7LN0NAQffv2fWUdxdm5cycCAgJQqVIllW3RoUMHAEBCQkKx08bHx0Mul2PUqFHF9jl69Cj++ecftVNhLi4uaN26tdopBJlMho4dOyqHjYyMUK1aNVSsWBHe3t7Kdnt7ezg5ORX5eXrxd8/
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"gender_bar(customer_musee)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "b02a2668-5cea-44db-b2b5-168f7738a949",
|
|||
|
"metadata": {
|
|||
|
"editable": true,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": ""
|
|||
|
},
|
|||
|
"tags": []
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## Part de clients français"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 97,
|
|||
|
"id": "4b3bb641-814b-4679-9a67-4eca87a920a6",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def country_bar(customer_sport):\n",
|
|||
|
" company_country_fr = customer_sport.groupby(\"number_company\")[\"country_fr\"].mean().reset_index()\n",
|
|||
|
" # Création du barplot\n",
|
|||
|
" plt.bar(company_country_fr[\"number_company\"], company_country_fr[\"country_fr\"])\n",
|
|||
|
" \n",
|
|||
|
" # Ajout de titres et d'étiquettes\n",
|
|||
|
" plt.xlabel('Company')\n",
|
|||
|
" plt.ylabel(\"Part de clients français\")\n",
|
|||
|
" plt.title(\"Nationalité des clients de chaque compagnie de musée\")\n",
|
|||
|
"\n",
|
|||
|
" # Définir les étiquettes de l'axe x\n",
|
|||
|
" plt.xticks(company_country_fr[\"number_company\"], [\"{}\".format(i) for i in company_country_fr[\"number_company\"]])\n",
|
|||
|
"\n",
|
|||
|
" \n",
|
|||
|
" # Affichage du barplot\n",
|
|||
|
" plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 98,
|
|||
|
"id": "01258674-6b98-49e4-93f4-f4185964999f",
|
|||
|
"metadata": {
|
|||
|
"editable": true,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": ""
|
|||
|
},
|
|||
|
"tags": []
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHGCAYAAACIDqqPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABFnElEQVR4nO3deXxM9/7H8fckkYzIQqKJpRFRe+1L7U0toapoq5ZSW+mllBJqqaJcmtJW1a2li7XVVpVqqapcWyna2qngahFL0NhiDUnO7w+PzK9jEjLJxHD6ej4e83iY7/mecz5nmcnb2cZiGIYhAAAAk/BwdwEAAACuRLgBAACmQrgBAACmQrgBAACmQrgBAACmQrgBAACmQrgBAACmQrgBAACmQrgBAACmQrjBPeXSpUsqW7as2rZtq7S0NHeXAwC4xa5du+Tr66v//Oc/7i4lU4SbHJgzZ44sFousVquOHDniMPyxxx5ThQoVsjXtzz//XJMnT85wmMVi0RtvvJGt6bpa+jo4fPiwra1bt24qXry4Xb8333xTS5YsueP0XnzxRYWGhuqzzz6Th0f2d8/ixYurW7du2R4/t9xa1+HDh2WxWDRnzpxcne/t9idXulvLcyuLxaKXX375rs4T9761a9fKYrFo7dq1uTofd+337nDx4kU9++yz6tevn/r16+fucjJFuHGB5ORkvf766y6d5u3+GG3atEk9e/Z06fxcaeTIkfrmm2/s2rISbqZOnapdu3bp22+/lY+PTy5WeO8oXLiwNm3apBYtWuTqfO5WuAHuJdWqVdOmTZtUrVo1d5diGj169NAjjzyit956y92l3JaXuwswg8cff1yff/65Bg8erMqVK+f6/GrXrp3r88iJhx56KFvj9e3bV3379nVxNfc2Hx+fe357AvergIAAPl8u9tVXX7m7hCzhyI0LDBkyRMHBwRo6dOgd+06dOlWPPvqoQkJClC9fPlWsWFETJ07UjRs3bH0ee+wxff/99zpy5IgsFovtlS6j01J79uxR69atVaBAAVmtVlWpUkVz586165N+iPaLL77QiBEjVKRIEQUEBKhJkybav3+/Xd/Y2Fi1bt1aDz74oKxWq0qWLKlevXopMTHxjst462kpi8Wiy5cva+7cubZleeyxx2zDT548qV69eunBBx+Ut7e3IiIiNGbMGKWkpNxxXjdu3NCQIUNUqFAh+fr6qn79+vr1118z7JvV+UyfPl2VK1eWn5+f/P39VbZsWb322mt3rCU5OVljx45VuXLlZLVaFRwcrIYNG2rjxo2ZjpPZ4ez//e9/6tixo0JCQuTj46Ny5cpp6tSpdn2yuj3vtD9ld3lPnDihdu3ayd/fX4GBgWrfvr1OnjyZYd8tW7aoVatWCgoKktVqVdWqVbP8JenMev30009Vrlw5+fr6qnLlylq2bJnd8IMHD6p79+4qVaqUfH19VbRoUbVs2VK7d+92mNa+ffv0+OOPy9fXVwULFlTv3r21dOlSh9McmZ0Cfeyxx+z2c0lKSkrS4MGDFRERIW9vbxUtWlQDBgzQ5cuXs7QuVqxYocaNGyswMFC+vr4qV66cYmJi7Pp89913qlOnjnx9feXv76+oqCht2rTJrs8bb7whi8WiXbt2qW3btgoMDFRQUJCio6OVkpKi/fv36/HHH5e/v7+KFy+uiRMn2o2fvu999tlnio6OVqFChZQ3b15FRkZq+/btdn23bNmiDh06qHjx4sqbN6+KFy+u5557LsNT+Rs2bFCdOnVktVpVtGhRjRw5Up988onDqe/ixYvrySef1IoVK1StWjXlzZtXZcuW1axZszKs89bTUjnZH+/Gfp/+vfD2229rwoQJtnX32GOP6cCBA7px44aGDRumIkWKKDAwUE8//bROnz5tN43MLl+4dX+9cuWKbZ+0Wq0KCgpSjRo19MUXX2RrWXLyfe5KHLlxAX9/f73++ut65ZVXtHr1ajVq1CjTvn/88Yc6duxo+3LbuXOnxo8fr3379tk+mNOmTdO//vUv/fHHHw6ndzKyf/9+1a1bVyEhIZoyZYqCg4P12WefqVu3bjp16pSGDBli1/+1115TvXr19MknnygpKUlDhw5Vy5YtFRcXJ09PT1udderUUc+ePRUYGKjDhw9r0qRJql+/vnbv3q08efJkef1s2rRJjRo1UsOGDTVy5EhJN/9HJd38IDzyyCPy8PDQqFGj9NBDD2nTpk0aN26cDh8+rNmzZ9922i+++KLmzZunwYMHKyoqSnv27NEzzzyjixcv2vXL6ny+/PJL9enTR/369dM777wjDw8PHTx4UHv37r1tHSkpKWrevLnWr1+vAQMGqFGjRkpJSdHmzZsVHx+vunXrZnl97d27V3Xr1lWxYsX07rvvqlChQvrxxx/Vv39/JSYmavTo0Xb977Q9b7c/ZXd5r169qiZNmujEiROKiYlR6dKl9f3336t9+/YOfdesWaPHH39ctWrV0owZMxQYGKgvv/xS7du315UrV257bZQz6/X777/Xb7/9prFjx8rPz08TJ07U008/rf3796tEiRKSbv5hCg4O1ltvvaUHHnhAZ8+e1dy5c1WrVi1t375dZcqUkSSdOnVKkZGRypMnj6ZNm6bQ0FDNnz8/R9f1XLlyRZGRkTp27Jhee+01VapUSb///rtGjRql3bt367///a9d6LzVzJkz9eKLLyoyMlIzZsxQSEiIDhw4oD179tj6fP755+rUqZOaNm2qL774QsnJyZo4caIee+wxrVq1SvXr17ebZrt27fT888+rV69eio2Ntf1H67///a/69OmjwYMH6/PPP9fQoUNVsmRJPfPMM3bjv/baa6pWrZo++eQTXbhwQW+88YYee+wxbd++3bbODx8+rDJlyqhDhw4KCgpSQkKCpk+frpo1a2rv3r0qWLCgpJsXqUZFRal06dKaO3eufH19NWPGDH322WcZro+dO3dq0KBBGjZsmEJDQ/XJJ5+oR48eKlmypB599NFM12NO9se7td+nmzp1qipVqqSpU6fq/PnzGjRokFq2bKlatWopT548mjVrlo4cOaLBgwerZ8+e+u677+44zVtFR0fr008/1bhx41S1alVdvnxZe/bs0ZkzZ5xelpx+n7uUgWybPXu2Icn47bffjOTkZKNEiRJGjRo1jLS0NMMwDCMyMtJ4+OGHMx0/NTXVuHHjhjFv3jzD09PTOHv2rG1YixYtjPDw8AzHk2SMHj3a9r5Dhw6Gj4+PER8fb9evefPmhq+vr3H+/HnDMAxjzZo1hiTjiSeesOv31VdfGZKMTZs2ZTi/tLQ048aNG8aRI0cMSca3337rsA4OHTpka+vatatD7fny5TO6du3qMO1evXoZfn5+xpEjR+za33nnHUOS8fvvv2dYk2EYRlxcnCHJGDhwoF37/PnzDUl288vqfF5++WUjf/78mc4zM/PmzTMkGR9//PFt+4WHh9vVdejQIUOSMXv2bFtbs2bNjAcffNC4cOGC3bgvv/yyYbVabfuJM9szs/0pu8s7ffp0h33BMAzjxRdfdFiesmXLGlWrVjVu3Lhh1/fJJ580ChcubKSmpmY6n6yuV0lGaGiokZSUZGs7efKk4eHhYcTExGQ6XkpKinH9+nWjVKlSdvvR0KFDDYvFYuzYscOuf1RUlCHJWLNmja3t1m2aLjIy0oiMjLS9j4mJMTw8PIzffvvNrt/XX39tSDKWL1+eaZ0XL140AgICjPr169u+X26VmppqFClSxKhYsaLdOr148aIREhJi1K1b19Y2evRoQ5Lx7rvv2k2jSpUqhiRj8eLFtrYbN24YDzzwgPHMM8/Y2tL3vWrVqtnVc/jwYSNPnjxGz549M12WlJQU49KlS0a+fPmM999/39betm1bI1++fMZff/1lt0zly5d3+I4JDw83rFar3ef56tWrRlBQkNGrVy+HOv++vXKyP96t/T79e6Fy5cp2/SZPnmxIMlq1amXXf8CAAYYku++MW/9OpLt1f61QoYLx1FNPZVqLM8uSk+9zV+O0lIt4e3tr3Lhx2rJly20PO27fvl2tWrVScHCwPD09lSdPHnXp0kWpqak
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"country_bar(customer_musee)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "6c9e8051-06d1-44ed-aa17-eb8ddd9fb5ed",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Ouverture mails"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 95,
|
|||
|
"id": "e346fdc0-617f-463b-af29-caa0d9f88485",
|
|||
|
"metadata": {
|
|||
|
"editable": true,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": ""
|
|||
|
},
|
|||
|
"tags": []
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def campaigns_effectiveness(customer_sport, Train=False):\n",
|
|||
|
" if not Train:\n",
|
|||
|
" customer_sport[\"already_purchased\"] = customer_sport[\"purchase_count\"]>0\n",
|
|||
|
"\n",
|
|||
|
" nb_customers_purchasing = customer_sport[customer_sport[\"already_purchased\"]].groupby([\"number_company\",\"already_purchased\"])[\"customer_id\"].count().reset_index()\n",
|
|||
|
" nb_customers_no_purchase = customer_sport[~customer_sport[\"already_purchased\"]].groupby([\"number_company\",\"already_purchased\"])[\"customer_id\"].count().reset_index()\n",
|
|||
|
"\n",
|
|||
|
" plt.bar(nb_customers_purchasing[\"number_company\"], nb_customers_purchasing[\"customer_id\"]/1000, label = \"has purchased\")\n",
|
|||
|
" plt.bar(nb_customers_no_purchase[\"number_company\"], nb_customers_no_purchase[\"customer_id\"]/1000, \n",
|
|||
|
" bottom = nb_customers_purchasing[\"customer_id\"]/1000, label = \"has not purchased\")\n",
|
|||
|
" \n",
|
|||
|
" # Ajout de titres et d'étiquettes\n",
|
|||
|
" plt.xlabel('Company')\n",
|
|||
|
" plt.ylabel(\"Nombre de clients (en milliers)\")\n",
|
|||
|
" plt.title(\"Nombre de clients ayant acheté ou été ciblés par des mails pour les compagnies de musée\")\n",
|
|||
|
" plt.legend()\n",
|
|||
|
"\n",
|
|||
|
" \n",
|
|||
|
" \n",
|
|||
|
" # Affichage du barplot\n",
|
|||
|
" plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 96,
|
|||
|
"id": "928dab1d-96ff-4fc2-91ea-2ff119620f31",
|
|||
|
"metadata": {
|
|||
|
"editable": true,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": ""
|
|||
|
},
|
|||
|
"tags": []
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"/tmp/ipykernel_449/3480143790.py:3: SettingWithCopyWarning: \n",
|
|||
|
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
|||
|
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
|||
|
"\n",
|
|||
|
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
|||
|
" customer_sport[\"already_purchased\"] = customer_sport[\"purchase_count\"]>0\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAw4AAAHGCAYAAADdZty9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABvkklEQVR4nO3dd1gU1/s28Hul9yZdBIyCKCAqVhJBxYIltsQaxd41KsYSo2IlGjtG/cbeWyxRY8OGBbvYjdEItkCwgqIg5bx/+DI/l7ZL2wW8P9e1l+6ZMzPPmbM7Ow8zZ0YmhBAgIiIiIiLKRRl1B0BERERERMUfEwciIiIiIlKIiQMRERERESnExIGIiIiIiBRi4kBERERERAoxcSAiIiIiIoWYOBARERERkUJMHIiIiIiISCEmDkREREREpBATByLKt6dPn8LKygpBQUHqDiVX3bp1g6urK54/f56v+dPS0uDr64svv/wSSUlJhRxd0SgpfUNERIVDFb9VeUoc1qxZA5lMBl1dXTx8+DDLdD8/P7i7uxdacHnRs2dPGBoaqmXdishkMgQHB6t0nX5+fvDz81N5HPv371d5W9Xt3bt3CA4OxokTJ9SyficnJ7Rq1arQlqdse1JTU9G5c2c0atQIc+bMKbT1F7Zly5bh6NGjOHjwIMqWLSs3bcmSJVizZo3CZUyYMAFxcXHYs2cPdHV1CyWu4OBgyGQyuTJl+zI6OhoymSzH2EtK3xSWEydOQCaTqe07WNScnJzQs2dP6b2i/i/N1PF7SnnXs2dPODk5qTsMANkfD5VWRfFblVm+zjgkJyfjp59+KuxYqIidPXsWffv2LdJ17N+/H1OmTCnSdRQ37969w5QpU0rNQYuy7Rk/fjw0NTWxbt26LAfAxcWVK1cwceJE7N+/H87OzlmmK5M4/Pnnn1i/fj0OHjwIc3PzQoutb9++OHv2bKEt71MloW9Iebt27cLEiRPVHQaR0iZOnIhdu3apO4zPSlH9VmWmmZ+Zmjdvjk2bNmH06NGoVq1aYcekckIIJCUlQU9PT92hFKm6deuqOwQqRX755Rd1h6BQjRo18OzZswIto2XLlnj69GkhRfR/ypUrh3LlyhX6coGS0Tc5ef/+PXR1dZnwfKJ69erqDkFlUlJSIJPJoKmZr8MTKia++OILdYfw2Smq36rM8nXGYcyYMbCwsMDYsWMV1k1KSsL48ePh7OwMbW1t2NvbY8iQIXj9+rVcvYxT9Pv27UP16tWhp6cHNzc37Nu3D8DHy6Tc3NxgYGCA2rVr49KlS9mu79atW2jcuDEMDAxgaWmJoUOH4t27d3J1ZDIZhg4dimXLlsHNzQ06OjpYu3YtAODevXvo2rUrrKysoKOjAzc3N/z6669KbZeEhAT069cPFhYWMDQ0RPPmzfH3339nW7cg60lPT0doaCi8vLygp6cHU1NT1K1bF3v27Ml1vuxO8cbGxmLAgAEoV64ctLW14ezsjClTpiA1NVWqk3FafM6cOZg3bx6cnZ1haGiIevXq4dy5c1K9nj17Sm2QyWTSKzo6GgCwfft21KlTByYmJtDX10eFChXQu3dvhe399ddf0aBBA1hZWcHAwAAeHh6YPXs2UlJSpDrTpk2DpqYmHj9+nGX+3r17w8LCQrreb+vWrWjatClsbW2lz9m4ceOQmJgoN1/G5W/3799HixYtYGhoCAcHBwQFBSE5OVnaNpaWlgCAKVOmSG3+9LKCzJKSkhAUFAQvLy+YmJjA3Nwc9erVwx9//JGlbl76+uDBg6hRowb09PRQuXJlrFq1KksdRf2tTHsK8tlVdn+Q0+UImS/ZyMmHDx8wffp0VK5cGTo6OrC0tESvXr3kkggnJyfcunUL4eHhUjs/PbWekJCA0aNHy8U6YsSILJ+TnBw8eBCNGzeWPu9ubm4ICQmRpmd3qVKGXbt2wdPTE7q6uqhQoQIWLVqk1DqV6Zv09HRMnz4drq6u0mfK09MTCxcuzHXZGZcDbdiwAaNGjYKNjQ309PTg6+uLyMhIubqXLl1C586d4eTkBD09PTg5OaFLly5ZLnHNuPz18OHD6N27NywtLaGvry99v7Lz119/oXnz5tDX10fZsmUxcOBAvHnzJtu6R44cQePGjWFsbAx9fX34+Pjg6NGjcnWePXuG/v37w8HBQfqs+Pj44MiRI7luj4z+u379Or799lvpuzxq1Cikpqbi7t27aN68OYyMjODk5ITZs2fLzZ+X/YAyn/uCtiMyMhLt27eHsbExTExM8N1332VJutPT0zF79mzpe2VlZYUePXrgyZMnSsWb+ZKRjM/U+vXrERQUBHt7e+jo6OD+/fu5xpyZMr9jALB06VJUq1YNhoaGMDIyQuXKlfHjjz8qXH5ycjKmTp0KNzc36OrqwsLCAg0bNkRERIRUR1XHOhm/S8oc6yjz2wl8/OPpzJkz4ejoCF1dXXh7eyMsLCzH/tq8eTMmTJgAOzs7GBsbw9/fH3fv3s0SZ+ZLlYQQWLJkifR7ZmZmhm+++QYPHjyQqxcZGYlWrVpJ+zE7Ozu0bNkyy+csMyEEZs+eLbWjRo0aOHDgQLZ1C7J/z7gs/+zZs6hfv760j1u9ejWAj3/5r1GjBvT19eHh4YGDBw8q3DZA9r8Jyhw3KdsWZbe/QiIPVq9eLQCIixcvioULFwoA4ujRo9J0X19fUbVqVel9enq6aNasmdDU1BQTJ04Uhw8fFnPmzBEGBgaievXqIikpSarr6OgoypUrJ9zd3cXmzZvF/v37RZ06dYSWlpaYNGmS8PHxETt37hS7du0SLi4uwtraWrx7906aPzAwUGhra4vy5cuLGTNmiMOHD4vg4GChqakpWrVqJdcOAMLe3l54enqKTZs2iWPHjombN2+KW7duCRMTE+Hh4SHWrVsnDh8+LIKCgkSZMmVEcHBwrtsmPT1dNGzYUOjo6Ejrnzx5sqhQoYIAICZPnizVLch6hBCie/fuQiaTib59+4o//vhDHDhwQMyYMUMsXLhQri98fX2ztPvTOGJiYoSDg4NwdHQU//vf/8SRI0fEtGnThI6OjujZs6dULyoqSgAQTk5Oonnz5mL37t1i9+7dwsPDQ5iZmYnXr18LIYS4f/+++OabbwQAcfbsWemVlJQkIiIihEwmE507dxb79+8Xx44dE6tXrxbdu3dX2N6RI0eKpUuXioMHD4pjx46J+fPni7Jly4pevXpJdf777z+ho6MjJkyYIDfvixcvhJ6envjhhx+ksmnTpon58+eLP//8U5w4cUIsW7ZMODs7i4YNG8rNm/GZcnNzE3PmzBFHjhwRkyZNEjKZTEyZMkUIIURSUpI4ePCgACD69Okjtfn+/fs5tuf169eiZ8+eYv369eLYsWPi4MGDYvTo0aJMmTJi7dq1cnWV6euM706VKlXEunXrxKFDh8S3334rAIjw8PA89bei9hT0O6Ls/iDzZ/XTtgYGBua6nrS0NNG8eXNhYGAgpkyZIsLCwsSKFSuEvb29qFKlirTfuHLliqhQoYKoXr261M4rV64IIYRITEwUXl5eomzZsmLevHniyJEjYuHChcLExEQ0atRIpKen5xrDihUrhEwmE35+fmLTpk3iyJEjYsmSJWLw4MFSncmTJ4vMu2BHR0dhb28vypcvL1atWiX2798vunXrJgCIX375RaqX8Z1cvXq1VKZs34SEhAgNDQ0xefJkcfToUXHw4EGxYMEChf13/PhxAUA4ODiINm3aiL1794oNGzaIihUrCmNjY/HPP/9Idbdv3y4mTZokdu3aJcLDw8WWLVuEr6+vsLS0FM+ePZPqZfym2Nvbi/79+4sDBw6I33//XaSmpmYbQ2xsrLCyshL29vZi9erV0vYpX768ACCOHz8u1V2/fr2QyWSibdu2YufOnWLv3r2iVatWQkNDQxw5ckSq16xZM2FpaSl+++03ceLECbF7924xadIksWXLlly3R0b/ubq6imnTpomwsDAxZswYAUAMHTpUVK5cWSxatEiEhYWJXr16CQBix44d0vx52Q9k/txn1/8FbYejo6P44YcfxKFDh8S8efOk7+WHDx+kuv3795f
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"campaigns_effectiveness(customer_musee)"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3 (ipykernel)",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.11.6"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 5
|
|||
|
}
|