1240 lines
100 KiB
Plaintext
1240 lines
100 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 1,
|
|||
|
"id": "dd143b00-1989-44cf-8558-a30087d17f70",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import os\n",
|
|||
|
"import s3fs\n",
|
|||
|
"import warnings\n",
|
|||
|
"from datetime import date, timedelta, datetime\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import matplotlib.pyplot as plt"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 2,
|
|||
|
"id": "08c63120-1b56-4145-9014-18a637b22876",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"exec(open('../../0_KPI_functions.py').read())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 3,
|
|||
|
"id": "f8bd679d-fa76-49d4-9ec1-9f15516f16d3",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Ignore warning\n",
|
|||
|
"warnings.filterwarnings('ignore')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "ec9e996d-3eae-4836-8cf5-268e5dc0d672",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Statistiques descriptives : compagnies sport"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "43f81515-fbd0-49c0-b3f8-0e0fb663e2c1",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Importations et chargement des données"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 4,
|
|||
|
"id": "945c59bb-05b4-4f21-82f0-0db40d7957b3",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Create filesystem object\n",
|
|||
|
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
|||
|
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 5,
|
|||
|
"id": "41a67995-0a08-45c0-bbad-6e6cee5474c8",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_5/customerplus_cleaned.csv\n",
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_5/campaigns_information.csv\n",
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_5/products_purchased_reduced.csv\n",
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_5/target_information.csv\n",
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_6/customerplus_cleaned.csv\n",
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_6/campaigns_information.csv\n",
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_6/products_purchased_reduced.csv\n",
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_6/target_information.csv\n",
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_7/customerplus_cleaned.csv\n",
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_7/campaigns_information.csv\n",
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_7/products_purchased_reduced.csv\n",
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_7/target_information.csv\n",
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_8/customerplus_cleaned.csv\n",
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_8/campaigns_information.csv\n",
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_8/products_purchased_reduced.csv\n",
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_8/target_information.csv\n",
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_9/customerplus_cleaned.csv\n",
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_9/campaigns_information.csv\n",
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_9/products_purchased_reduced.csv\n",
|
|||
|
"File path : projet-bdc2324-team1/0_Input/Company_9/target_information.csv\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# création des bases contenant les KPI pour les 5 compagnies de spectacle\n",
|
|||
|
"\n",
|
|||
|
"# liste des compagnies de spectacle\n",
|
|||
|
"nb_compagnie=['5','6','7','8','9']\n",
|
|||
|
"\n",
|
|||
|
"customer_sport = pd.DataFrame()\n",
|
|||
|
"campaigns_sport = pd.DataFrame()\n",
|
|||
|
"products_sport = pd.DataFrame()\n",
|
|||
|
"tickets_sport = pd.DataFrame()\n",
|
|||
|
"\n",
|
|||
|
"# début de la boucle permettant de générer des datasets agrégés pour les 5 compagnies de spectacle\n",
|
|||
|
"for directory_path in nb_compagnie:\n",
|
|||
|
" df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
|
|||
|
" df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
|
|||
|
" df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
|
|||
|
" df_target_information = display_databases(directory_path, file_name = \"target_information\")\n",
|
|||
|
" \n",
|
|||
|
" df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information) \n",
|
|||
|
" df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n",
|
|||
|
" df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)\n",
|
|||
|
"\n",
|
|||
|
" \n",
|
|||
|
"# creation de la colonne Number compagnie, qui permettra d'agréger les résultats\n",
|
|||
|
" df_tickets_kpi[\"number_company\"]=int(directory_path)\n",
|
|||
|
" df_campaigns_kpi[\"number_company\"]=int(directory_path)\n",
|
|||
|
" df_customerplus_clean[\"number_company\"]=int(directory_path)\n",
|
|||
|
" df_target_information[\"number_company\"]=int(directory_path)\n",
|
|||
|
"\n",
|
|||
|
"# Traitement des index\n",
|
|||
|
" df_tickets_kpi[\"customer_id\"]= directory_path + '_' + df_tickets_kpi['customer_id'].astype('str')\n",
|
|||
|
" df_campaigns_kpi[\"customer_id\"]= directory_path + '_' + df_campaigns_kpi['customer_id'].astype('str') \n",
|
|||
|
" df_customerplus_clean[\"customer_id\"]= directory_path + '_' + df_customerplus_clean['customer_id'].astype('str') \n",
|
|||
|
" df_products_purchased_reduced[\"customer_id\"]= directory_path + '_' + df_products_purchased_reduced['customer_id'].astype('str') \n",
|
|||
|
"\n",
|
|||
|
"# Concaténation\n",
|
|||
|
" customer_sport = pd.concat([customer_sport, df_customerplus_clean], ignore_index=True)\n",
|
|||
|
" campaigns_sport = pd.concat([campaigns_sport, df_campaigns_kpi], ignore_index=True)\n",
|
|||
|
" tickets_sport = pd.concat([tickets_sport, df_tickets_kpi], ignore_index=True)\n",
|
|||
|
" products_sport = pd.concat([products_sport, df_products_purchased_reduced], ignore_index=True)\n",
|
|||
|
" "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "62922029-8071-402e-8115-c145a2874a2f",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Statistiques descriptives"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "d347bca9-3041-4414-b18e-19b626998a3e",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### 0. Détection du client anonyme (outlier) - utile pour la section 3"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 6,
|
|||
|
"id": "c4d4b2ad-8a3c-477b-bc52-dd4860527bfe",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"array([5, 6, 7, 8, 9])"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 6,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"sport_comp = tickets_sport['number_company'].unique()\n",
|
|||
|
"sport_comp"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 7,
|
|||
|
"id": "97a9e235-1c04-46bf-9f3c-5496e141cc40",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def outlier_detection(company_list, show_diagram=False):\n",
|
|||
|
"\n",
|
|||
|
" outlier_list = list()\n",
|
|||
|
" \n",
|
|||
|
" for company in company_list:\n",
|
|||
|
" total_amount_share = tickets_sport[tickets_sport['number_company']==company].groupby('customer_id')['total_amount'].sum().reset_index()\n",
|
|||
|
" total_amount_share['CA'] = total_amount_share['total_amount'].sum()\n",
|
|||
|
" total_amount_share['share_total_amount'] = total_amount_share['total_amount']/total_amount_share['CA']\n",
|
|||
|
" \n",
|
|||
|
" total_amount_share_index = total_amount_share.set_index('customer_id')\n",
|
|||
|
" df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False)\n",
|
|||
|
" top = df_circulaire[:1]\n",
|
|||
|
" outlier_list.append(top.index[0])\n",
|
|||
|
" rest = df_circulaire[1:]\n",
|
|||
|
" \n",
|
|||
|
" # Calculez la somme du reste\n",
|
|||
|
" rest_sum = rest.sum()\n",
|
|||
|
" \n",
|
|||
|
" # Créez une nouvelle série avec les cinq plus grandes parts et 'Autre'\n",
|
|||
|
" new_series = pd.concat([top, pd.Series([rest_sum], index=['Autre'])])\n",
|
|||
|
" \n",
|
|||
|
" # Créez le graphique circulaire\n",
|
|||
|
" if show_diagram:\n",
|
|||
|
" plt.figure(figsize=(3, 3))\n",
|
|||
|
" plt.pie(new_series, labels=new_series.index, autopct='%1.1f%%', startangle=140, pctdistance=0.5)\n",
|
|||
|
" plt.axis('equal') # Assurez-vous que le graphique est un cercle\n",
|
|||
|
" plt.title(f'Répartition des montants totaux pour la compagnie {company}')\n",
|
|||
|
" plt.show()\n",
|
|||
|
" return outlier_list\n",
|
|||
|
" "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 8,
|
|||
|
"id": "770cd3fc-bfe2-4a69-89bc-0eb946311130",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"['5_191835', '6_591412', '7_49632', '8_1942', '9_19683']"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 8,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"outlier_list = outlier_detection(sport_comp)\n",
|
|||
|
"outlier_list"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 9,
|
|||
|
"id": "70b6e961-c303-465e-93f4-609721d38454",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Suppression Réussie\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# On filtre les outliers\n",
|
|||
|
"\n",
|
|||
|
"def remove_elements(lst, elements_to_remove):\n",
|
|||
|
" return [x for x in lst if x not in elements_to_remove]\n",
|
|||
|
" \n",
|
|||
|
"databases = [customer_sport, campaigns_sport, tickets_sport, products_sport]\n",
|
|||
|
"\n",
|
|||
|
"for dataset in databases:\n",
|
|||
|
" dataset['customer_id'] = dataset['customer_id'].apply(lambda x: remove_elements(x, outlier_list))\n",
|
|||
|
"\n",
|
|||
|
"# On test\n",
|
|||
|
"\n",
|
|||
|
"bool = '5_191835' in customer_sport['customer_id']\n",
|
|||
|
"if not bool:\n",
|
|||
|
" print(\"Suppression Réussie\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 10,
|
|||
|
"id": "b54b920a-7b46-490f-ba7e-d1859055a4e3",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>customer_id</th>\n",
|
|||
|
" <th>street_id</th>\n",
|
|||
|
" <th>structure_id</th>\n",
|
|||
|
" <th>mcp_contact_id</th>\n",
|
|||
|
" <th>fidelity</th>\n",
|
|||
|
" <th>tenant_id</th>\n",
|
|||
|
" <th>is_partner</th>\n",
|
|||
|
" <th>deleted_at</th>\n",
|
|||
|
" <th>gender</th>\n",
|
|||
|
" <th>is_email_true</th>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <th>purchase_count</th>\n",
|
|||
|
" <th>first_buying_date</th>\n",
|
|||
|
" <th>country</th>\n",
|
|||
|
" <th>gender_label</th>\n",
|
|||
|
" <th>gender_female</th>\n",
|
|||
|
" <th>gender_male</th>\n",
|
|||
|
" <th>gender_other</th>\n",
|
|||
|
" <th>country_fr</th>\n",
|
|||
|
" <th>has_tags</th>\n",
|
|||
|
" <th>number_company</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>[5, _, 6, 0, 0, 9, 7, 4, 5]</td>\n",
|
|||
|
" <td>1372685</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1771</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>af</td>\n",
|
|||
|
" <td>other</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>[5, _, 6, 0, 1, 1, 2, 2, 8]</td>\n",
|
|||
|
" <td>1372685</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1771</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>af</td>\n",
|
|||
|
" <td>other</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>[5, _, 6, 0, 5, 8, 9, 5, 0]</td>\n",
|
|||
|
" <td>1372685</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1771</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>af</td>\n",
|
|||
|
" <td>other</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>[5, _, 6, 0, 6, 2, 4, 0, 4]</td>\n",
|
|||
|
" <td>1372685</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1771</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>af</td>\n",
|
|||
|
" <td>other</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>[5, _, 2, 5, 0, 2, 1, 7]</td>\n",
|
|||
|
" <td>78785</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>11035.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1771</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>fr</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>5 rows × 29 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" customer_id street_id structure_id mcp_contact_id \\\n",
|
|||
|
"0 [5, _, 6, 0, 0, 9, 7, 4, 5] 1372685 NaN NaN \n",
|
|||
|
"1 [5, _, 6, 0, 1, 1, 2, 2, 8] 1372685 NaN NaN \n",
|
|||
|
"2 [5, _, 6, 0, 5, 8, 9, 5, 0] 1372685 NaN NaN \n",
|
|||
|
"3 [5, _, 6, 0, 6, 2, 4, 0, 4] 1372685 NaN NaN \n",
|
|||
|
"4 [5, _, 2, 5, 0, 2, 1, 7] 78785 NaN 11035.0 \n",
|
|||
|
"\n",
|
|||
|
" fidelity tenant_id is_partner deleted_at gender is_email_true ... \\\n",
|
|||
|
"0 0 1771 False NaN 2 True ... \n",
|
|||
|
"1 0 1771 False NaN 2 True ... \n",
|
|||
|
"2 0 1771 False NaN 2 True ... \n",
|
|||
|
"3 0 1771 False NaN 2 True ... \n",
|
|||
|
"4 0 1771 False NaN 0 True ... \n",
|
|||
|
"\n",
|
|||
|
" purchase_count first_buying_date country gender_label gender_female \\\n",
|
|||
|
"0 0 NaN af other 0 \n",
|
|||
|
"1 0 NaN af other 0 \n",
|
|||
|
"2 0 NaN af other 0 \n",
|
|||
|
"3 0 NaN af other 0 \n",
|
|||
|
"4 0 NaN fr female 1 \n",
|
|||
|
"\n",
|
|||
|
" gender_male gender_other country_fr has_tags number_company \n",
|
|||
|
"0 0 1 0.0 0 5 \n",
|
|||
|
"1 0 1 0.0 0 5 \n",
|
|||
|
"2 0 1 0.0 0 5 \n",
|
|||
|
"3 0 1 0.0 0 5 \n",
|
|||
|
"4 0 0 1.0 0 5 \n",
|
|||
|
"\n",
|
|||
|
"[5 rows x 29 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 10,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"customer_sport.head()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "d40fe668-e1d7-4544-9db8-02498afe65fe",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### 1. customerplus_clean"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 55,
|
|||
|
"id": "eec1ac0b-2502-452b-97e6-69ffb77156d6",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def compute_nb_clients(customer_sport):\n",
|
|||
|
" company_nb_clients = customer_sport[customer_sport[\"purchase_count\"]>0].groupby(\"number_company\")[\"customer_id\"].count().reset_index()\n",
|
|||
|
" plt.bar(company_nb_clients[\"number_company\"], company_nb_clients[\"customer_id\"]/1000)\n",
|
|||
|
"\n",
|
|||
|
" # Ajout de titres et d'étiquettes\n",
|
|||
|
" plt.xlabel('Company')\n",
|
|||
|
" plt.ylabel(\"Nombre de clients (milliers)\")\n",
|
|||
|
" plt.title(\"Nombre de clients de chaque compagnie de spectacle\")\n",
|
|||
|
" \n",
|
|||
|
" # Affichage du barplot\n",
|
|||
|
" plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 56,
|
|||
|
"id": "db4494e7-6f65-4f7e-bf8c-8ec321d0b02d",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAHFCAYAAAAUpjivAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABJuElEQVR4nO3dd3zNd///8edBtghSEiEIYoZarZa2oVZTlKKqepmlWlq1alzaCjVqVHVy6TCqRnsVV6lZNKpRe9So0dqkdmKGJO/fH345X0dCc+TEiY/H/XY7t5vz/rzP5/M67zPy9P6MYzPGGAEAAFhUDncXAAAAkJUIOwAAwNIIOwAAwNIIOwAAwNIIOwAAwNIIOwAAwNIIOwAAwNIIOwAAwNIIOwAAwNIIO1lsypQpstls8vb21sGDB9Msr127tiIiItxQmdShQwflzp3bLdv+JzabTdHR0Xd1m7Vr11bt2rXveh0LFy68a8/VHeNavHhxNW7c+K5uE9nfgQMHZLPZNGXKlCzfljve9+5w7NgxRUdHa8uWLVm6nbv52rkKYecuSUxM1FtvveXuMuCkNWvWqHPnzlm6jYULF2rIkCFZug0guylUqJDWrFmjRo0aubsUyzh27JiGDBmS5WHnXkTYuUueeuopzZgxQ1u3bnV3KS5hjNHly5fdXUaWe+SRR1SkSBF3lwFYjpeXlx555BEVKFDA3aXgPkDYuUv69eunwMBA9e/f/x/7XrlyRQMHDlRYWJg8PT1VuHBhde/eXefOnXPol7p7YMGCBapSpYp8fHxUrlw5LViwQNL1XWjlypWTn5+fHn74YW3YsCHd7e3YsUN169aVn5+fChQooNdee02XLl1y6GOz2fTaa69p4sSJKleunLy8vDR16lRJ0t69e9WmTRsVLFhQXl5eKleunD799NMMjUtCQoK6dOmiwMBA5c6dW0899ZT27NmTbt/MbCclJUUff/yxKleuLB8fH+XNm1ePPPKIfvjhh9s+Lr3p77i4OHXt2lVFihSRp6enwsLCNGTIECUlJdn7pE7zjh07VuPGjVNYWJhy586tRx99VL/99pu9X4cOHezPwWaz2W8HDhyQJH333XeqUaOGAgIC5OvrqxIlSqhTp07/+Hyz47guXrxYVatWlY+Pj8qWLauvvvrKYfnJkyfVrVs3lS9fXrlz51bBggX15JNP6pdffkmzrmPHjqlVq1by9/dXQECAnn/+ef32229pptbT2zUpXR/34sWLO7RdvXpVw4YNU9myZeXl5aUCBQqoY8eOOnnyZIbGYu3atWrSpIkCAwPl7e2tkiVLqmfPng59Vq9erbp168rf31++vr6qWbOmfvzxR4c+qbu+V6xYYX8N8+TJo3bt2unixYuKi4tTq1atlDdvXhUqVEh9+/bVtWvX7I9Pfe+NHj1aw4cPV9GiReXt7a3q1atr+fLlDtvat2+fOnbsqPDwcPn6+qpw4cJq0qSJfv/99zTPb8eOHWrQoIF8fX1VoEABde/eXT/++KNsNpt+/vlnhzGPiIjQ+vXr9fjjj9vft++9955SUlLS1HnzrpB74fvknz6XP//8s2w2m6ZPn67evXsrODhYPj4+ioyM1ObNm9Osb8OGDXrmmWeUP39+eXt7q0qVKvr222/T9Dt69KhefvllhYaGytPTUyEhIWrZsqX+/vtv/fzzz3rooYckSR07drR/l6R+f23YsEGtW7dW8eLF5ePjo+LFi+uFF15I9/CK223ndjIzplnOIEtNnjzZSDLr1683H374oZFkli9fbl8eGRlpKlSoYL+fkpJiGjZsaHLlymXefvtts3TpUjN27Fjj5+dnqlSpYq5cuWLvW6xYMVOkSBETERFhZs6caRYuXGhq1KhhPDw8zDvvvGNq1apl5syZY+bOnWtKly5tgoKCzKVLl+yPb9++vfH09DRFixY1w4cPN0uXLjXR0dEmV65cpnHjxg7PQ5IpXLiwqVSpkpkxY4ZZsWKF2b59u9mxY4cJCAgwFStWNNOmTTNLly41ffr0MTly5DDR0dG3HZuUlBRTp04d4+XlZd/+4MGDTYkSJYwkM3jwYHvfzGzHGGPatm1rbDab6dy5s/nf//5nFi1aZIYPH24+/PBDh9ciMjIyzfO+sY7jx4+b0NBQU6xYMfOf//zH/PTTT+bdd981Xl5epkOHDvZ++/fvN5JM8eLFzVNPPWXmzZtn5s2bZypWrGjy5ctnzp07Z4wxZt++faZly5ZGklmzZo39duXKFRMbG2tsNptp3bq1WbhwoVmxYoWZPHmyadu27T01rqnv0/Lly5tp06aZJUuWmOeee85IMjExMfZ+f/zxh3n11VfNrFmzzM8//2wWLFhgXnrpJZMjRw6zcuVKe79Lly6ZcuXKmYCAAPPxxx+bJUuWmB49epiiRYsaSWby5Mm3fU2Nuf7eL1asmP1+cnKyeeqpp4yfn58ZMmSIWbZsmfniiy9M4cKFTfny5R0+N+lZvHix8fDwMJUqVTJTpkwxK1asMF999ZVp3bq1vc/PP/9sPDw8TLVq1czs2bPNvHnzTIMGDYzNZjOzZs2y90v9zggLCzN9+vQxS5cuNaNGjTI5c+Y0L7zwgqlataoZNmyYWbZsmenfv7+RZN5//33741Pfe6Ghoeaxxx4z33//vfnuu+/MQw89ZDw8PExsbKy9b0xMjOnTp4/573//a2JiYszcuXNNs2bNjI+Pj/njjz/s/Y4dO2YCAwNN0aJFzZQpU8zChQtN27ZtTfHixY0kh9cnMjLSBAYGmvDwcDNx4kSzbNky061bNyPJTJ06NU2dN75e98L3SUY+lytXrrS/Bk2bNjXz588306dPN6VKlTJ58uQxf/75p73vihUrjKenp3n88cfN7NmzzeLFi02HDh3SjM2RI0dMoUKFzAMPPGDGjRtnfvrpJzN79mzTqVMns2vXLhMfH29/77z11lv275LDhw8bY4z57rvvzDvvvGPmzp1rYmJizKxZs0xkZKQpUKCAOXnyZIa3kxWv3d1A2MliN4adxMREU6JECVO9enWTkpJijEkbdhYvXmwkmdGjRzusZ/bs2UaSmTRpkr2tWLFixsfHxxw5csTetmXLFiPJFCpUyFy8eNHePm/ePCPJ/PDDD/a29u3bG0kOf5iMMWb48OFGklm9erW9TZIJCAgwZ86ccejbsGFDU6RIERMfH+/Q/tprrxlvb+80/W+0aNGi227/xi+nzGxn1apVRpIZNGjQLfsYk7Gw07VrV5M7d25z8OBBh35jx441ksyOHTuMMf/3ZVCxYkWTlJRk77du3TojycycOdPe1r17d5Pe/ztS15kajDIqu41rsWLFjLe3t8OYXb582eTPn9907dr1lo9LSkoy165dM3Xr1jXPPvusvX3ChAlGkvnf//7n0L9Lly53HHZmzpxpJJnvv//eod/69euNJPPZZ5/d9jmWLFnSlCxZ0ly+fPmWfR555BFTsGBBc/78eYfnGBERYYoUKWL/Tkj9znj99dcdHt+sWTMjyYwbN86hvXLlyqZq1ar2+6nvvZCQEId6EhISTP78+U29evVuWWNSUpK5evWqCQ8PN7169bK3v/nmm8Zms9nf36kaNmyYbtiRZNauXevQt3z58qZhw4Zp6rzx9boXvk8y8rlMDTtVq1a1v67GGHPgwAHj4eFhOnfubG8rW7asqVKlirl27ZrDOho3bmwKFSpkkpOTjTHGdOrUyXh4eJidO3fecrup79cbx/RWkpKSzIULF4yfn5/DmGVkO65+7e4GdmPdRZ6enho2bJg2bNiQ7hSlJK1YsULS9Wn2Gz333HPy8/NLMw1duXJlFS5c2H6/XLlykq5PJfv6+qZpT2/K8sUXX3S436ZNG0nSypUrHdqffPJJ5cuXz37/ypUrWr58uZ599ln5+voqKSnJfnv66ad15coVh102N0td/62276rtLFq0SJLUvXv3W/bJqAULFqhOnToKCQlxqCMqKkqSFBMT49C/UaNGypkzp/1+pUqVJKX/OtwsdUq6VatW+vbbb3X06NEM1Zgdx7Vy5coqWrSo/b63t7dKly6dZhwmTpyoqlWrytv
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"compute_nb_clients(customer_sport)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 59,
|
|||
|
"id": "a12a59a0-edfe-4e52-8037-9b875f823b33",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def maximum_price_paid(customer_sport):\n",
|
|||
|
" company_max_price = customer_sport.groupby(\"number_company\")[\"max_price\"].max().reset_index()\n",
|
|||
|
" # Création du barplot\n",
|
|||
|
" plt.bar(company_max_price[\"number_company\"], company_max_price[\"max_price\"])\n",
|
|||
|
" \n",
|
|||
|
" # Ajout de titres et d'étiquettes\n",
|
|||
|
" plt.xlabel('Company')\n",
|
|||
|
" plt.ylabel(\"Prix maximal d'un billet vendu\")\n",
|
|||
|
" plt.title(\"Prix maximal de vente observé par compagnie de spectacle\")\n",
|
|||
|
" \n",
|
|||
|
" # Affichage du barplot\n",
|
|||
|
" plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 60,
|
|||
|
"id": "2c7c2d26-4e35-4163-b771-fa4d3e8ca83e",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlAAAAHGCAYAAAC7NbWGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABXWUlEQVR4nO3deVyN6f8/8NfRpvWotJLKMpGyjwiTLTGVscxYoqkYM7YxjRrLZ8ZYxs5g8DFmk7GMzBKDbNkaRlnCEPGxRKEwScmSOl2/P3y7f47KnJvTnBOv5+NxHjrXfZ37ft9355xerntTCCEEiIiIiEhj1XRdABEREVFVwwBFREREJBMDFBEREZFMDFBEREREMjFAEREREcnEAEVEREQkEwMUERERkUwMUEREREQyMUARERERycQARUT0nP7zn//AwcEBFy9e1HUpRPQvY4DSkpUrV0KhUEgPQ0ND1K5dGxEREbh27ZpG8wgPD4ebm1vlFqpDpdvo8uXL//qyL1++DIVCgZUrVz7X6/ft2weFQoF9+/ZptS5d2Lp1K6ZMmaKz5Ze+D44ePaqzGrRh27ZtWLp0KbZs2YJ69erpuhyqRAqF4l/5zLi5uSE8PLzSl6Nr9+/fx5QpU/6V79PK/N0xQGlZTEwMkpKSkJCQgGHDhmHdunXo0KED7t2794+vnTRpEjZs2PAvVKkbgYGBSEpKgpOTk65LeaVt3boVU6dO1XUZVVpmZiYiIiKwfv16vP7667ouhypZUlIS3nvvPV2X8dK4f/8+pk6dWuX/Q2qo6wJeNl5eXmjVqhUAoFOnTlCpVPjiiy+wceNGDBo0qNzX3L9/H2ZmZi/9/2Lt7OxgZ2en6zKIJKWfPblcXFyQnZ1dCRW9uOddJ6pYmzZtdF0C6SGOQFWy0g/elStXADzeTWdhYYFTp06hW7dusLS0RJcuXaRpT+7Ci42NhUKhwNKlS9XmOXnyZBgYGCAhIeGZy3Zzc0NQUBC2bNmC5s2bw9TUFI0aNcKWLVsAPN6V0qhRI5ibm6N169ZldqkcPXoUAwYMgJubG0xNTeHm5oaBAwdK6wIAQgi8+eabsLW1RUZGhtR+//59NG7cGI0aNZJG38rbhdexY0d4eXkhKSkJvr6+0nJiYmIAAPHx8WjRogXMzMzg7e2N7du3q9V44cIFREREoEGDBjAzM0OtWrUQHByMU6dOPXPbPMvZs2fRvXt3mJmZoWbNmhg+fDju3r1bbt9du3ahS5cusLKygpmZGdq1a4fdu3c/c/63bt2CsbExJk2aVO6yFQoFFi9eLLVlZ2fjgw8+QO3atWFsbAx3d3dMnToVxcXFUp/SXZTz58/HggUL4O7uDgsLC7Rt2xbJyclSv/DwcPz3v/8FALVdzqW/EyEEli1bhmbNmsHU1BTW1tZ4++23cenSJY223YEDB9ClSxdYWlrCzMwMvr6+iI+PL7dvbm4uIiIiYGNjA3NzcwQHB5dZzvHjxxEUFAR7e3uYmJjA2dkZgYGBuHr1qtRH05pL32t//PEHfH19YWZmhiFDhqBXr15wdXVFSUlJmRp9fHzQokUL2csqz5QpU6BQKHD8+HH06dMHVlZWUCqVGDx4MG7duqXWd/369ejWrRucnJykz+2ECRPKjGQ/6/ukImfPnsXAgQPh4OAAExMT1KlTB++++y4KCwulPqmpqXjrrbdgbW2N6tWro1mzZvjxxx/V5lO6W/unn37C+PHj4eTkBAsLCwQHB+PGjRu4e/cu3n//fdSsWRM1a9ZEREQECgoK1OahUCgwevRofPPNN3jttddgYmICT09PxMbGqvW7desWRo4cCU9PT1hYWMDe3h6dO3fG/v37y6zf1atX8fbbb8PS0hI1atTAoEGDcOTIkTK78Eu33YULF/Dmm2/CwsICLi4uiIqKUtsWpXU+vRtIk89lRYqKijBu3Dg4OjrCzMwM7du3x+HDh8vt+yLL2bNnDzp27AhbW1uYmpqiTp066Nu3L+7fvw/g/39vzJ07FzNmzECdOnVQvXp1tGrVqtzvsfPnzyMkJET6PDZq1Ej6PnnSnTt3EBUVhbp168LExAT29vZ48803cfbsWVy+fFn6j/TUqVOl76DSXZdyvtOftZxneZFtqkaQVsTExAgA4siRI2rtX331lQAgvv32WyGEEGFhYcLIyEi4ubmJWbNmid27d4sdO3ZI01xdXdVeP3z4cGFsbCzNd/fu3aJatWris88++8eaXF1dRe3atYWXl5dYt26d2Lp1q/Dx8RFGRkbi888/F+3atRNxcXFiw4YN4rXXXhMODg7i/v370ut/+eUX8fnnn4sNGzaIxMREERsbK/z8/ISdnZ24deuW1O/vv/8WtWvXFj4+PuLRo0fSupiamoqTJ0+W2Ubp6elSm5+fn7C1tRUeHh7ihx9+EDt27BBBQUECgJg6darw9vaWam/Tpo0wMTER165dk16fmJgooqKixK+//ioSExPFhg0bRK9evYSpqak4e/as1C89PV0AEDExMc/cZtnZ2cLe3l7UqlVLxMTEiK1bt4pBgwaJOnXqCABi7969Ut/Vq1cLhUIhevXqJeLi4sTmzZtFUFCQMDAwELt27Xrmcnr37i1cXFyESqVSax83bpwwNjYWf//9txBCiKysLOHi4iJcXV3FN998I3bt2iW++OILYWJiIsLDw8usn5ubm+jevbvYuHGj2Lhxo/D29hbW1tbizp07QgghLly4IN5++20BQCQlJUmPhw8fCiGEGDZsmDAyMhJRUVFi+/bt4qeffhINGzYUDg4OIjs7+5nrtG/fPmFkZCRatmwp1q9fLzZu3Ci6desmFAqFiI2NlfqVvg9cXFzEkCFDxLZt28S3334r7O3thYuLi8jNzRVCCFFQUCBsbW1Fq1atxM8//ywSExPF+vXrxfDhw8WZM2ek+Wlas5+fn7CxsREuLi5iyZIlYu/evSIxMVH8/vvvAoBISEhQW5+0tDQBQCxevFj2ssozefJkAUC4urqKTz75ROzYsUMsWLBAmJubi+bNm0ufHSGE+OKLL8TChQtFfHy82Ldvn1i+fLlwd3cXnTp1Upvns75PynPixAlhYWEh3NzcxPLly8Xu3bvFmjVrRL9+/UR+fr4QQoizZ88KS0tLUa9ePbFq1SoRHx8vBg4cKACIOXPmSPPau3evtD7h4eFi+/btYvny5cLCwkJ06tRJ+Pv7i+joaLFz504xZ84cYWBgID788EO1ekrfB56enmLdunVi06ZNonv37gKA+OWXX6R+Z8+eFSNGjBCxsbFi3759YsuWLWLo0KGiWrVqap/JgoICUb9+fWFjYyP++9//ih07doiPP/5YuLu7l/n8h4WFCWNjY9GoUSMxf/58sWvXLvH5558LhUIhpk6dWqbOyZMnS881/VxWJCwsTCgUCvHJJ5+InTt3igULFohatWoJKysrERYWppXlpKeni+rVqwt/f3+xceNGsW/fPrF27VoRGhoqfcZKvzdcXFxE+/btxW+//SZ++eUX8frrrwsjIyNx8OBBaX6nT58WSqVSeHt7i1WrVomdO3eKqKgoUa1aNTFlyhSpX35+vmjcuLEwNzcX06ZNEzt27BC//fab+Oijj8SePXvEw4cPxfbt2wUAMXToUOk76MKFC0IIzb/T/2k5lfW7exIDlJaU/lFITk4WRUVF4u7du2LLli3Czs5OWFpaSl+uYWFhAoBYsWJFmXmUF6AePnwomjdvLtzd3cWZM2eEg4OD8PPzE8XFxf9Yk6urqzA1NRVXr16V2k6cOCEACCcnJ3Hv3j2pfePGjQKA2LRpU4XzKy4uFgUFBcLc3Fx89dVXatMOHDggDA0NRWRkpFixYoUAIL7//vtyt9HTAQqAOHr0qNSWk5MjDAwMhKmpqVpYKq39yT9o5dX46NEj0aBBA/Hxxx9L7ZoGqPHjxwuFQiFOnDih1u7v768WoO7duydsbGxEcHCwWj+VSiWaNm0qWrdu/czlbNq0SQAQO3fuVKvd2dlZ9O3bV2r74IMPhIWFhbhy5Yra6+fPny8AiNOnT6utn7e3t9p74/DhwwKAWLdundQ2atQoUd7/nZK
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"maximum_price_paid(customer_sport)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 65,
|
|||
|
"id": "597d4361-8beb-43f4-9224-8f7dc34b187c",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Statistiques Descriptives company 5\n",
|
|||
|
" average_price average_price_basket average_ticket_basket \\\n",
|
|||
|
"count 145390.000000 68869.000000 68869.000000 \n",
|
|||
|
"mean 11.070309 65.969693 3.655202 \n",
|
|||
|
"std 16.353610 195.462869 13.119612 \n",
|
|||
|
"min 0.000000 0.000000 1.000000 \n",
|
|||
|
"25% 0.000000 20.000000 1.000000 \n",
|
|||
|
"50% 0.000000 45.000000 2.000000 \n",
|
|||
|
"75% 20.000000 79.500000 3.000000 \n",
|
|||
|
"max 500.000000 24159.405000 2139.833333 \n",
|
|||
|
"\n",
|
|||
|
" purchase_count total_price \n",
|
|||
|
"count 471598.00000 3.950770e+05 \n",
|
|||
|
"mean 0.29900 2.608544e+01 \n",
|
|||
|
"std 7.22753 2.089636e+03 \n",
|
|||
|
"min 0.00000 0.000000e+00 \n",
|
|||
|
"25% 0.00000 0.000000e+00 \n",
|
|||
|
"50% 0.00000 0.000000e+00 \n",
|
|||
|
"75% 0.00000 0.000000e+00 \n",
|
|||
|
"max 3532.00000 1.262516e+06 \n",
|
|||
|
"Statistiques Descriptives company 6\n",
|
|||
|
" average_price average_price_basket average_ticket_basket \\\n",
|
|||
|
"count 33779.000000 33779.000000 33779.000000 \n",
|
|||
|
"mean 24.033859 56.711279 2.413530 \n",
|
|||
|
"std 21.217031 72.841926 3.763809 \n",
|
|||
|
"min -52.740000 -1046.666667 1.000000 \n",
|
|||
|
"25% 10.000000 19.000000 1.080000 \n",
|
|||
|
"50% 19.333333 39.000000 2.000000 \n",
|
|||
|
"75% 30.000000 72.990000 3.000000 \n",
|
|||
|
"max 199.990000 3922.845361 309.047619 \n",
|
|||
|
"\n",
|
|||
|
" purchase_count total_price \n",
|
|||
|
"count 79938.000000 79938.000000 \n",
|
|||
|
"mean 2.842090 102.251041 \n",
|
|||
|
"std 74.949889 4290.159858 \n",
|
|||
|
"min 0.000000 -3140.000000 \n",
|
|||
|
"25% 0.000000 0.000000 \n",
|
|||
|
"50% 0.000000 0.000000 \n",
|
|||
|
"75% 1.000000 54.980000 \n",
|
|||
|
"max 14750.000000 762695.290000 \n",
|
|||
|
"Statistiques Descriptives company 7\n",
|
|||
|
" average_price average_price_basket average_ticket_basket \\\n",
|
|||
|
"count 39524.000000 39524.000000 39524.000000 \n",
|
|||
|
"mean 33.110568 155.618778 3.365885 \n",
|
|||
|
"std 85.221328 1085.613137 6.283143 \n",
|
|||
|
"min 0.000000 0.000000 1.000000 \n",
|
|||
|
"25% 17.250000 25.000000 1.800000 \n",
|
|||
|
"50% 25.000000 57.676364 2.000000 \n",
|
|||
|
"75% 43.054691 115.837500 3.555556 \n",
|
|||
|
"max 10770.000000 86160.000000 400.000000 \n",
|
|||
|
"\n",
|
|||
|
" purchase_count total_price \n",
|
|||
|
"count 68800.000000 68800.000000 \n",
|
|||
|
"mean 3.290029 944.593729 \n",
|
|||
|
"std 88.071870 12118.394731 \n",
|
|||
|
"min 0.000000 0.000000 \n",
|
|||
|
"25% 0.000000 0.000000 \n",
|
|||
|
"50% 1.000000 9.000000 \n",
|
|||
|
"75% 2.000000 132.000000 \n",
|
|||
|
"max 22934.000000 940874.200000 \n",
|
|||
|
"Statistiques Descriptives company 8\n",
|
|||
|
" average_price average_price_basket average_ticket_basket \\\n",
|
|||
|
"count 129198.000000 129198.000000 129198.000000 \n",
|
|||
|
"mean 18.409977 38.492520 2.258036 \n",
|
|||
|
"std 19.159059 71.136628 5.270858 \n",
|
|||
|
"min -20.000000 -1545.000000 1.000000 \n",
|
|||
|
"25% 0.000000 0.000000 1.000000 \n",
|
|||
|
"50% 15.000000 20.000000 2.000000 \n",
|
|||
|
"75% 28.461538 52.500000 2.000000 \n",
|
|||
|
"max 390.000000 7618.227273 750.000000 \n",
|
|||
|
"\n",
|
|||
|
" purchase_count total_price \n",
|
|||
|
"count 197376.000000 197376.000000 \n",
|
|||
|
"mean 4.637448 130.336075 \n",
|
|||
|
"std 96.228665 2791.899946 \n",
|
|||
|
"min 0.000000 -36124.000000 \n",
|
|||
|
"25% 0.000000 0.000000 \n",
|
|||
|
"50% 1.000000 0.000000 \n",
|
|||
|
"75% 2.000000 75.000000 \n",
|
|||
|
"max 40272.000000 702080.290000 \n",
|
|||
|
"Statistiques Descriptives company 9\n",
|
|||
|
" average_price average_price_basket average_ticket_basket \\\n",
|
|||
|
"count 102710.000000 102710.000000 102710.000000 \n",
|
|||
|
"mean 60.312171 62.384177 1.042402 \n",
|
|||
|
"std 50.018101 52.009984 0.268064 \n",
|
|||
|
"min -291.670000 -291.670000 1.000000 \n",
|
|||
|
"25% 41.500000 42.350000 1.000000 \n",
|
|||
|
"50% 59.000000 61.070000 1.000000 \n",
|
|||
|
"75% 74.550000 77.710000 1.000000 \n",
|
|||
|
"max 1116.500000 1216.950000 23.000000 \n",
|
|||
|
"\n",
|
|||
|
" purchase_count total_price \n",
|
|||
|
"count 181134.000000 181134.000000 \n",
|
|||
|
"mean 1.021354 63.476966 \n",
|
|||
|
"std 1.805412 129.781944 \n",
|
|||
|
"min 0.000000 -291.670000 \n",
|
|||
|
"25% 0.000000 0.000000 \n",
|
|||
|
"50% 1.000000 0.000000 \n",
|
|||
|
"75% 1.000000 80.000000 \n",
|
|||
|
"max 273.000000 14343.950000 \n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"for company in sport_comp:\n",
|
|||
|
" print(f'Statistiques Descriptives company {company}')\n",
|
|||
|
" company_data = customer_sport[customer_sport['number_company'] == company][['average_price', 'average_price_basket',\n",
|
|||
|
" 'average_ticket_basket', 'purchase_count', 'total_price']]\n",
|
|||
|
" print(company_data.describe())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"id": "5058d3c9-73a0-4e01-881e-4d2423f0d291",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"customer_sport[\"already_purchased\"] = customer_sport[\"purchase_count\"] > 0"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 69,
|
|||
|
"id": "986a0e41-ae31-46c5-a009-861530d85f45",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>customer_id</th>\n",
|
|||
|
" <th>street_id</th>\n",
|
|||
|
" <th>structure_id</th>\n",
|
|||
|
" <th>mcp_contact_id</th>\n",
|
|||
|
" <th>fidelity</th>\n",
|
|||
|
" <th>tenant_id</th>\n",
|
|||
|
" <th>is_partner</th>\n",
|
|||
|
" <th>deleted_at</th>\n",
|
|||
|
" <th>gender</th>\n",
|
|||
|
" <th>is_email_true</th>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <th>purchase_count</th>\n",
|
|||
|
" <th>first_buying_date</th>\n",
|
|||
|
" <th>country</th>\n",
|
|||
|
" <th>gender_label</th>\n",
|
|||
|
" <th>gender_female</th>\n",
|
|||
|
" <th>gender_male</th>\n",
|
|||
|
" <th>gender_other</th>\n",
|
|||
|
" <th>country_fr</th>\n",
|
|||
|
" <th>has_tags</th>\n",
|
|||
|
" <th>number_company</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>[5, _, 6, 0, 0, 9, 7, 4, 5]</td>\n",
|
|||
|
" <td>1372685</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1771</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>af</td>\n",
|
|||
|
" <td>other</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>[5, _, 6, 0, 1, 1, 2, 2, 8]</td>\n",
|
|||
|
" <td>1372685</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1771</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>af</td>\n",
|
|||
|
" <td>other</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>[5, _, 6, 0, 5, 8, 9, 5, 0]</td>\n",
|
|||
|
" <td>1372685</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1771</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>af</td>\n",
|
|||
|
" <td>other</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>[5, _, 6, 0, 6, 2, 4, 0, 4]</td>\n",
|
|||
|
" <td>1372685</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1771</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>af</td>\n",
|
|||
|
" <td>other</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>[5, _, 2, 5, 0, 2, 1, 7]</td>\n",
|
|||
|
" <td>78785</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>11035.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1771</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>fr</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>998841</th>\n",
|
|||
|
" <td>[9, _, 9, 9, 5, 1, 4, 6]</td>\n",
|
|||
|
" <td>607676</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1490</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2022-05-12 06:20:49+00:00</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>9</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>998842</th>\n",
|
|||
|
" <td>[9, _, 9, 7, 0, 8, 9, 1]</td>\n",
|
|||
|
" <td>587855</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1490</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2022-05-03 04:20:43+00:00</td>\n",
|
|||
|
" <td>fr</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>9</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>998843</th>\n",
|
|||
|
" <td>[9, _, 8, 4, 4, 3, 0, 2]</td>\n",
|
|||
|
" <td>484177</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1490</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2022-03-27 12:15:02+00:00</td>\n",
|
|||
|
" <td>de</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>9</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>998844</th>\n",
|
|||
|
" <td>[9, _, 9, 4, 1, 2, 6, 0]</td>\n",
|
|||
|
" <td>564032</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1490</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2022-04-20 15:12:38+00:00</td>\n",
|
|||
|
" <td>ch</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>9</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>998845</th>\n",
|
|||
|
" <td>[9, _, 8, 0, 9, 7, 4, 2]</td>\n",
|
|||
|
" <td>453747</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1490</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2022-03-07 20:42:07+00:00</td>\n",
|
|||
|
" <td>fr</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>9</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>998846 rows × 29 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" customer_id street_id structure_id mcp_contact_id \\\n",
|
|||
|
"0 [5, _, 6, 0, 0, 9, 7, 4, 5] 1372685 NaN NaN \n",
|
|||
|
"1 [5, _, 6, 0, 1, 1, 2, 2, 8] 1372685 NaN NaN \n",
|
|||
|
"2 [5, _, 6, 0, 5, 8, 9, 5, 0] 1372685 NaN NaN \n",
|
|||
|
"3 [5, _, 6, 0, 6, 2, 4, 0, 4] 1372685 NaN NaN \n",
|
|||
|
"4 [5, _, 2, 5, 0, 2, 1, 7] 78785 NaN 11035.0 \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"998841 [9, _, 9, 9, 5, 1, 4, 6] 607676 NaN NaN \n",
|
|||
|
"998842 [9, _, 9, 7, 0, 8, 9, 1] 587855 NaN NaN \n",
|
|||
|
"998843 [9, _, 8, 4, 4, 3, 0, 2] 484177 NaN NaN \n",
|
|||
|
"998844 [9, _, 9, 4, 1, 2, 6, 0] 564032 NaN NaN \n",
|
|||
|
"998845 [9, _, 8, 0, 9, 7, 4, 2] 453747 NaN NaN \n",
|
|||
|
"\n",
|
|||
|
" fidelity tenant_id is_partner deleted_at gender is_email_true \\\n",
|
|||
|
"0 0 1771 False NaN 2 True \n",
|
|||
|
"1 0 1771 False NaN 2 True \n",
|
|||
|
"2 0 1771 False NaN 2 True \n",
|
|||
|
"3 0 1771 False NaN 2 True \n",
|
|||
|
"4 0 1771 False NaN 0 True \n",
|
|||
|
"... ... ... ... ... ... ... \n",
|
|||
|
"998841 1 1490 False NaN 1 True \n",
|
|||
|
"998842 1 1490 False NaN 1 True \n",
|
|||
|
"998843 1 1490 False NaN 1 True \n",
|
|||
|
"998844 1 1490 False NaN 1 True \n",
|
|||
|
"998845 1 1490 False NaN 1 True \n",
|
|||
|
"\n",
|
|||
|
" ... purchase_count first_buying_date country gender_label \\\n",
|
|||
|
"0 ... 0 NaN af other \n",
|
|||
|
"1 ... 0 NaN af other \n",
|
|||
|
"2 ... 0 NaN af other \n",
|
|||
|
"3 ... 0 NaN af other \n",
|
|||
|
"4 ... 0 NaN fr female \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"998841 ... 1 2022-05-12 06:20:49+00:00 NaN male \n",
|
|||
|
"998842 ... 1 2022-05-03 04:20:43+00:00 fr male \n",
|
|||
|
"998843 ... 1 2022-03-27 12:15:02+00:00 de male \n",
|
|||
|
"998844 ... 1 2022-04-20 15:12:38+00:00 ch male \n",
|
|||
|
"998845 ... 1 2022-03-07 20:42:07+00:00 fr male \n",
|
|||
|
"\n",
|
|||
|
" gender_female gender_male gender_other country_fr has_tags \\\n",
|
|||
|
"0 0 0 1 0.0 0 \n",
|
|||
|
"1 0 0 1 0.0 0 \n",
|
|||
|
"2 0 0 1 0.0 0 \n",
|
|||
|
"3 0 0 1 0.0 0 \n",
|
|||
|
"4 1 0 0 1.0 0 \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"998841 0 1 0 NaN 0 \n",
|
|||
|
"998842 0 1 0 1.0 0 \n",
|
|||
|
"998843 0 1 0 0.0 0 \n",
|
|||
|
"998844 0 1 0 0.0 0 \n",
|
|||
|
"998845 0 1 0 1.0 0 \n",
|
|||
|
"\n",
|
|||
|
" number_company \n",
|
|||
|
"0 5 \n",
|
|||
|
"1 5 \n",
|
|||
|
"2 5 \n",
|
|||
|
"3 5 \n",
|
|||
|
"4 5 \n",
|
|||
|
"... ... \n",
|
|||
|
"998841 9 \n",
|
|||
|
"998842 9 \n",
|
|||
|
"998843 9 \n",
|
|||
|
"998844 9 \n",
|
|||
|
"998845 9 \n",
|
|||
|
"\n",
|
|||
|
"[998846 rows x 29 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 69,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"customer_sport"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 67,
|
|||
|
"id": "848963c9-6129-4106-80b5-76bf814b70d1",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def mailing_consent(customer_sport):\n",
|
|||
|
" df_graph = customer_sport.groupby([\"number_company\", \"already_purchased\"])[\"opt_in\"].mean().reset_index()\n",
|
|||
|
" # Création du barplot groupé\n",
|
|||
|
" fig, ax = plt.subplots(figsize=(10, 6))\n",
|
|||
|
" \n",
|
|||
|
" categories = df_graph[\"number_company\"].unique()\n",
|
|||
|
" bar_width = 0.35\n",
|
|||
|
" bar_positions = np.arange(len(categories))\n",
|
|||
|
" \n",
|
|||
|
" # Grouper les données par label et créer les barres groupées\n",
|
|||
|
" for label in df_graph[\"already_purchased\"].unique():\n",
|
|||
|
" label_data = df_graph[df_graph['already_purchased'] == label]\n",
|
|||
|
" values = [label_data[label_data['number_company'] == category]['opt_in'].values[0]*100 for category in categories]\n",
|
|||
|
" \n",
|
|||
|
" label_printed = \"purchased\" if label else \"no purchase\"\n",
|
|||
|
" ax.bar(bar_positions, values, bar_width, label=label_printed)\n",
|
|||
|
" \n",
|
|||
|
" # Mise à jour des positions des barres pour le prochain groupe\n",
|
|||
|
" bar_positions = [pos + bar_width for pos in bar_positions]\n",
|
|||
|
" \n",
|
|||
|
" # Ajout des étiquettes, de la légende, etc.\n",
|
|||
|
" ax.set_xlabel('Numero de compagnie')\n",
|
|||
|
" ax.set_ylabel('Part de consentement (%)')\n",
|
|||
|
" ax.set_title('Part de consentement au mailing selon les compagnies')\n",
|
|||
|
" ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])\n",
|
|||
|
" ax.set_xticklabels(categories)\n",
|
|||
|
" ax.legend()\n",
|
|||
|
" \n",
|
|||
|
" # Affichage du plot\n",
|
|||
|
" plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"id": "d8071891-e6f5-4d93-b039-9e99c20ec4b0",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def gender_bar(customer_sport):\n",
|
|||
|
" company_genders = customer_sport.groupby(\"number_company\")[[\"gender_male\", \"gender_female\", \"gender_other\"]].mean().reset_index()\n",
|
|||
|
" # Création du barplot\n",
|
|||
|
" plt.bar(company_genders[\"number_company\"], company_genders[\"gender_male\"], label = \"Homme\")\n",
|
|||
|
" plt.bar(company_genders[\"number_company\"], company_genders[\"gender_female\"], \n",
|
|||
|
" bottom = company_genders[\"gender_male\"], label = \"Femme\")\n",
|
|||
|
" \n",
|
|||
|
" \n",
|
|||
|
" # Ajout de titres et d'étiquettes\n",
|
|||
|
" plt.xlabel('Company')\n",
|
|||
|
" plt.ylabel(\"Part de clients de chaque sexe\")\n",
|
|||
|
" plt.title(\"Sexe des clients de chaque compagnie de spectacle\")\n",
|
|||
|
" plt.legend()\n",
|
|||
|
" \n",
|
|||
|
" # Affichage du barplot\n",
|
|||
|
" plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"id": "2fc30f1d-cf64-4efb-9442-4d97bb50b29f",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"gender_bar()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 11,
|
|||
|
"id": "4b3bb641-814b-4679-9a67-4eca87a920a6",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def country_bar(customer_sport):\n",
|
|||
|
" company_country_fr = customer_sport.groupby(\"number_compagny\")[\"country_fr\"].mean().reset_index()\n",
|
|||
|
" # Création du barplot\n",
|
|||
|
" plt.bar(company_country_fr[\"number_company\"], company_country_fr[\"country_fr\"])\n",
|
|||
|
" \n",
|
|||
|
" # Ajout de titres et d'étiquettes\n",
|
|||
|
" plt.xlabel('Company')\n",
|
|||
|
" plt.ylabel(\"Part de clients français\")\n",
|
|||
|
" plt.title(\"Nationalité des clients de chaque compagnie de spectacle\")\n",
|
|||
|
" \n",
|
|||
|
" # Affichage du barplot\n",
|
|||
|
" plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"id": "01258674-6b98-49e4-93f4-f4185964999f",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"country_bar()"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3 (ipykernel)",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.11.6"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 5
|
|||
|
}
|