2024-03-04 16:55:58 +01:00
|
|
|
|
{
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 6,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "dd143b00-1989-44cf-8558-a30087d17f70",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
"import os\n",
|
|
|
|
|
"import s3fs\n",
|
|
|
|
|
"import warnings\n",
|
|
|
|
|
"from datetime import date, timedelta, datetime\n",
|
|
|
|
|
"import numpy as np\n",
|
|
|
|
|
"import matplotlib.pyplot as plt"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 2,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "08c63120-1b56-4145-9014-18a637b22876",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"exec(open('../../0_KPI_functions.py').read())"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 3,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "f8bd679d-fa76-49d4-9ec1-9f15516f16d3",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"# Ignore warning\n",
|
|
|
|
|
"warnings.filterwarnings('ignore')"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "ec9e996d-3eae-4836-8cf5-268e5dc0d672",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"# Statistiques descriptives : compagnies sport"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "43f81515-fbd0-49c0-b3f8-0e0fb663e2c1",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## Importations et chargement des données"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 7,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "945c59bb-05b4-4f21-82f0-0db40d7957b3",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"# Create filesystem object\n",
|
|
|
|
|
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
|
|
|
|
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 5,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "41a67995-0a08-45c0-bbad-6e6cee5474c8",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"File path : projet-bdc2324-team1/0_Input/Company_5/customerplus_cleaned.csv\n",
|
|
|
|
|
"File path : projet-bdc2324-team1/0_Input/Company_5/campaigns_information.csv\n",
|
|
|
|
|
"File path : projet-bdc2324-team1/0_Input/Company_5/products_purchased_reduced.csv\n",
|
|
|
|
|
"File path : projet-bdc2324-team1/0_Input/Company_5/target_information.csv\n",
|
|
|
|
|
"File path : projet-bdc2324-team1/0_Input/Company_6/customerplus_cleaned.csv\n",
|
|
|
|
|
"File path : projet-bdc2324-team1/0_Input/Company_6/campaigns_information.csv\n",
|
|
|
|
|
"File path : projet-bdc2324-team1/0_Input/Company_6/products_purchased_reduced.csv\n",
|
|
|
|
|
"File path : projet-bdc2324-team1/0_Input/Company_6/target_information.csv\n",
|
|
|
|
|
"File path : projet-bdc2324-team1/0_Input/Company_7/customerplus_cleaned.csv\n",
|
|
|
|
|
"File path : projet-bdc2324-team1/0_Input/Company_7/campaigns_information.csv\n",
|
|
|
|
|
"File path : projet-bdc2324-team1/0_Input/Company_7/products_purchased_reduced.csv\n",
|
|
|
|
|
"File path : projet-bdc2324-team1/0_Input/Company_7/target_information.csv\n",
|
|
|
|
|
"File path : projet-bdc2324-team1/0_Input/Company_8/customerplus_cleaned.csv\n",
|
|
|
|
|
"File path : projet-bdc2324-team1/0_Input/Company_8/campaigns_information.csv\n",
|
|
|
|
|
"File path : projet-bdc2324-team1/0_Input/Company_8/products_purchased_reduced.csv\n",
|
|
|
|
|
"File path : projet-bdc2324-team1/0_Input/Company_8/target_information.csv\n",
|
|
|
|
|
"File path : projet-bdc2324-team1/0_Input/Company_9/customerplus_cleaned.csv\n",
|
|
|
|
|
"File path : projet-bdc2324-team1/0_Input/Company_9/campaigns_information.csv\n",
|
|
|
|
|
"File path : projet-bdc2324-team1/0_Input/Company_9/products_purchased_reduced.csv\n",
|
|
|
|
|
"File path : projet-bdc2324-team1/0_Input/Company_9/target_information.csv\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"# création des bases contenant les KPI pour les 5 compagnies de spectacle\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# liste des compagnies de spectacle\n",
|
|
|
|
|
"nb_compagnie=['5','6','7','8','9']\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"customer_sport = pd.DataFrame()\n",
|
|
|
|
|
"campaigns_sport = pd.DataFrame()\n",
|
|
|
|
|
"products_sport = pd.DataFrame()\n",
|
|
|
|
|
"tickets_sport = pd.DataFrame()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# début de la boucle permettant de générer des datasets agrégés pour les 5 compagnies de spectacle\n",
|
|
|
|
|
"for directory_path in nb_compagnie:\n",
|
|
|
|
|
" df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
|
|
|
|
|
" df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
|
|
|
|
|
" df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
|
|
|
|
|
" df_target_information = display_databases(directory_path, file_name = \"target_information\")\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information) \n",
|
|
|
|
|
" df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n",
|
|
|
|
|
" df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" \n",
|
|
|
|
|
"# creation de la colonne Number compagnie, qui permettra d'agréger les résultats\n",
|
|
|
|
|
" df_tickets_kpi[\"number_company\"]=int(directory_path)\n",
|
|
|
|
|
" df_campaigns_kpi[\"number_company\"]=int(directory_path)\n",
|
|
|
|
|
" df_customerplus_clean[\"number_company\"]=int(directory_path)\n",
|
|
|
|
|
" df_target_information[\"number_company\"]=int(directory_path)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Traitement des index\n",
|
|
|
|
|
" df_tickets_kpi[\"customer_id\"]= directory_path + '_' + df_tickets_kpi['customer_id'].astype('str')\n",
|
|
|
|
|
" df_campaigns_kpi[\"customer_id\"]= directory_path + '_' + df_campaigns_kpi['customer_id'].astype('str') \n",
|
|
|
|
|
" df_customerplus_clean[\"customer_id\"]= directory_path + '_' + df_customerplus_clean['customer_id'].astype('str') \n",
|
|
|
|
|
" df_products_purchased_reduced[\"customer_id\"]= directory_path + '_' + df_products_purchased_reduced['customer_id'].astype('str') \n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Concaténation\n",
|
|
|
|
|
" customer_sport = pd.concat([customer_sport, df_customerplus_clean], ignore_index=True)\n",
|
|
|
|
|
" campaigns_sport = pd.concat([campaigns_sport, df_campaigns_kpi], ignore_index=True)\n",
|
|
|
|
|
" tickets_sport = pd.concat([tickets_sport, df_tickets_kpi], ignore_index=True)\n",
|
|
|
|
|
" products_sport = pd.concat([products_sport, df_products_purchased_reduced], ignore_index=True)\n",
|
|
|
|
|
" "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "62922029-8071-402e-8115-c145a2874a2f",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## Statistiques descriptives"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "d347bca9-3041-4414-b18e-19b626998a3e",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 0. Détection du client anonyme (outlier) - utile pour la section 3"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 6,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "c4d4b2ad-8a3c-477b-bc52-dd4860527bfe",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"array([5, 6, 7, 8, 9])"
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 6,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"sport_comp = tickets_sport['number_company'].unique()\n",
|
|
|
|
|
"sport_comp"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 7,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "97a9e235-1c04-46bf-9f3c-5496e141cc40",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"def outlier_detection(company_list, show_diagram=False):\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" outlier_list = list()\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" for company in company_list:\n",
|
|
|
|
|
" total_amount_share = tickets_sport[tickets_sport['number_company']==company].groupby('customer_id')['total_amount'].sum().reset_index()\n",
|
|
|
|
|
" total_amount_share['CA'] = total_amount_share['total_amount'].sum()\n",
|
|
|
|
|
" total_amount_share['share_total_amount'] = total_amount_share['total_amount']/total_amount_share['CA']\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" total_amount_share_index = total_amount_share.set_index('customer_id')\n",
|
|
|
|
|
" df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False)\n",
|
|
|
|
|
" top = df_circulaire[:1]\n",
|
|
|
|
|
" outlier_list.append(top.index[0])\n",
|
|
|
|
|
" rest = df_circulaire[1:]\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" # Calculez la somme du reste\n",
|
|
|
|
|
" rest_sum = rest.sum()\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" # Créez une nouvelle série avec les cinq plus grandes parts et 'Autre'\n",
|
|
|
|
|
" new_series = pd.concat([top, pd.Series([rest_sum], index=['Autre'])])\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" # Créez le graphique circulaire\n",
|
|
|
|
|
" if show_diagram:\n",
|
|
|
|
|
" plt.figure(figsize=(3, 3))\n",
|
|
|
|
|
" plt.pie(new_series, labels=new_series.index, autopct='%1.1f%%', startangle=140, pctdistance=0.5)\n",
|
|
|
|
|
" plt.axis('equal') # Assurez-vous que le graphique est un cercle\n",
|
|
|
|
|
" plt.title(f'Répartition des montants totaux pour la compagnie {company}')\n",
|
|
|
|
|
" plt.show()\n",
|
|
|
|
|
" return outlier_list\n",
|
|
|
|
|
" "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 8,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "770cd3fc-bfe2-4a69-89bc-0eb946311130",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"['5_191835', '6_591412', '7_49632', '8_1942', '9_19683']"
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 8,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"outlier_list = outlier_detection(sport_comp)\n",
|
|
|
|
|
"outlier_list"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 9,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "70b6e961-c303-465e-93f4-609721d38454",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"Suppression Réussie\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"# On filtre les outliers\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def remove_elements(lst, elements_to_remove):\n",
|
2024-03-04 19:29:21 +01:00
|
|
|
|
" return ''.join([x for x in lst if x not in elements_to_remove])\n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
" \n",
|
|
|
|
|
"databases = [customer_sport, campaigns_sport, tickets_sport, products_sport]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"for dataset in databases:\n",
|
|
|
|
|
" dataset['customer_id'] = dataset['customer_id'].apply(lambda x: remove_elements(x, outlier_list))\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# On test\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"bool = '5_191835' in customer_sport['customer_id']\n",
|
|
|
|
|
"if not bool:\n",
|
|
|
|
|
" print(\"Suppression Réussie\")"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 10,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "b54b920a-7b46-490f-ba7e-d1859055a4e3",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>customer_id</th>\n",
|
|
|
|
|
" <th>street_id</th>\n",
|
|
|
|
|
" <th>structure_id</th>\n",
|
|
|
|
|
" <th>mcp_contact_id</th>\n",
|
|
|
|
|
" <th>fidelity</th>\n",
|
|
|
|
|
" <th>tenant_id</th>\n",
|
|
|
|
|
" <th>is_partner</th>\n",
|
|
|
|
|
" <th>deleted_at</th>\n",
|
|
|
|
|
" <th>gender</th>\n",
|
|
|
|
|
" <th>is_email_true</th>\n",
|
|
|
|
|
" <th>...</th>\n",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
" <th>total_price</th>\n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
" <th>purchase_count</th>\n",
|
|
|
|
|
" <th>first_buying_date</th>\n",
|
|
|
|
|
" <th>country</th>\n",
|
|
|
|
|
" <th>gender_label</th>\n",
|
|
|
|
|
" <th>gender_female</th>\n",
|
|
|
|
|
" <th>gender_male</th>\n",
|
|
|
|
|
" <th>gender_other</th>\n",
|
|
|
|
|
" <th>country_fr</th>\n",
|
|
|
|
|
" <th>number_company</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
2024-03-04 19:29:21 +01:00
|
|
|
|
" <td>5_6009745</td>\n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
" <td>1372685</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1771</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>True</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
" <td>0.0</td>\n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>af</td>\n",
|
|
|
|
|
" <td>other</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
2024-03-04 19:29:21 +01:00
|
|
|
|
" <td>5_6011228</td>\n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
" <td>1372685</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1771</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>True</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
" <td>0.0</td>\n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>af</td>\n",
|
|
|
|
|
" <td>other</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
2024-03-04 19:29:21 +01:00
|
|
|
|
" <td>5_6058950</td>\n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
" <td>1372685</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1771</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>True</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
" <td>0.0</td>\n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>af</td>\n",
|
|
|
|
|
" <td>other</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
2024-03-04 19:29:21 +01:00
|
|
|
|
" <td>5_6062404</td>\n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
" <td>1372685</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1771</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>True</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
" <td>0.0</td>\n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>af</td>\n",
|
|
|
|
|
" <td>other</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
2024-03-04 19:29:21 +01:00
|
|
|
|
" <td>5_250217</td>\n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
" <td>78785</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>11035.0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1771</td>\n",
|
|
|
|
|
" <td>False</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>True</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
" <td>NaN</td>\n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>fr</td>\n",
|
|
|
|
|
" <td>female</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"<p>5 rows × 28 columns</p>\n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
2024-03-04 19:29:21 +01:00
|
|
|
|
" customer_id street_id structure_id mcp_contact_id fidelity tenant_id \\\n",
|
|
|
|
|
"0 5_6009745 1372685 NaN NaN 0 1771 \n",
|
|
|
|
|
"1 5_6011228 1372685 NaN NaN 0 1771 \n",
|
|
|
|
|
"2 5_6058950 1372685 NaN NaN 0 1771 \n",
|
|
|
|
|
"3 5_6062404 1372685 NaN NaN 0 1771 \n",
|
|
|
|
|
"4 5_250217 78785 NaN 11035.0 0 1771 \n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"\n",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
" is_partner deleted_at gender is_email_true ... total_price \\\n",
|
|
|
|
|
"0 False NaN 2 True ... 0.0 \n",
|
|
|
|
|
"1 False NaN 2 True ... 0.0 \n",
|
|
|
|
|
"2 False NaN 2 True ... 0.0 \n",
|
|
|
|
|
"3 False NaN 2 True ... 0.0 \n",
|
|
|
|
|
"4 False NaN 0 True ... NaN \n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"\n",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
" purchase_count first_buying_date country gender_label gender_female \\\n",
|
|
|
|
|
"0 0 NaN af other 0 \n",
|
|
|
|
|
"1 0 NaN af other 0 \n",
|
|
|
|
|
"2 0 NaN af other 0 \n",
|
|
|
|
|
"3 0 NaN af other 0 \n",
|
|
|
|
|
"4 0 NaN fr female 1 \n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"\n",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
" gender_male gender_other country_fr number_company \n",
|
|
|
|
|
"0 0 1 0.0 5 \n",
|
|
|
|
|
"1 0 1 0.0 5 \n",
|
|
|
|
|
"2 0 1 0.0 5 \n",
|
|
|
|
|
"3 0 1 0.0 5 \n",
|
|
|
|
|
"4 0 0 1.0 5 \n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"\n",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"[5 rows x 28 columns]"
|
2024-03-04 16:55:58 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 10,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"customer_sport.head()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "d40fe668-e1d7-4544-9db8-02498afe65fe",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 1. customerplus_clean"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 11,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "eec1ac0b-2502-452b-97e6-69ffb77156d6",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"def compute_nb_clients(customer_sport):\n",
|
|
|
|
|
" company_nb_clients = customer_sport[customer_sport[\"purchase_count\"]>0].groupby(\"number_company\")[\"customer_id\"].count().reset_index()\n",
|
|
|
|
|
" plt.bar(company_nb_clients[\"number_company\"], company_nb_clients[\"customer_id\"]/1000)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" # Ajout de titres et d'étiquettes\n",
|
|
|
|
|
" plt.xlabel('Company')\n",
|
|
|
|
|
" plt.ylabel(\"Nombre de clients (milliers)\")\n",
|
2024-03-04 19:29:21 +01:00
|
|
|
|
" plt.title(\"Nombre de clients de chaque compagnie de sport\")\n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
" \n",
|
|
|
|
|
" # Affichage du barplot\n",
|
|
|
|
|
" plt.show()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 12,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "db4494e7-6f65-4f7e-bf8c-8ec321d0b02d",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAHFCAYAAAAUpjivAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABH/ElEQVR4nO3dd3hU1f7+/XuAdEKACAmBAAFCDUhTFNSANBEQpIuHDgcFRYpSfqgEpEgREQseVJpI0UM5gkgREYTQi0iRonSI9IQaSLKeP3gyX4YkmIEJk2zer+uaC/baa/b+zM7Mzp21y9iMMUYAAAAWlc3dBQAAAGQkwg4AALA0wg4AALA0wg4AALA0wg4AALA0wg4AALA0wg4AALA0wg4AALA0wg4AALA0wk4WMm3aNNlsNnl7e+vIkSMp5tesWVMRERFuqEzq2LGjcubM6ZZ1/xObzaaoqKgHus6aNWuqZs2aD7yOJUuWPLDX6o7tWrRoUTVq1OiBrhOZ3+HDh2Wz2TRt2rQMX5c73vcP0oPchzxIhJ0sKD4+Xm+//ba7y4CT1q9fr65du2boOpYsWaKhQ4dm6DqAzKZAgQJav369GjZs6O5Ssjyr7kMIO1nQc889p1mzZum3335zdykuYYzRtWvX3F1GhnviiSdUqFAhd5cBWI6Xl5eeeOIJ5cuXz92lZFlXr151dwkZirCTBfXv31+BgYEaMGDAP/a9fv26Bg0apLCwMHl6eqpgwYLq2bOnLl686NAv+fDA4sWLValSJfn4+KhMmTJavHixpFuH0MqUKSM/Pz89/vjj2rJlS6rr2717t2rXri0/Pz/ly5dPr732WooPkc1m02uvvabPP/9cZcqUkZeXl6ZPny5JOnDggNq2bav8+fPLy8tLZcqU0aeffpqu7RIXF6du3bopMDBQOXPm1HPPPaf9+/en2vd+1pOUlKSPP/5YFStWlI+Pj3Lnzq0nnnhC33///V2fl9rwd0xMjLp3765ChQrJ09NTYWFhGjp0qBISEux9kofox40bp/HjxyssLEw5c+bUk08+qQ0bNtj7dezY0f4abDab/XH48GFJ0nfffadq1aopICBAvr6+KlasmDp37vyPrzczbtelS5eqcuXK8vHxUenSpTVlyhSH+WfOnFGPHj1UtmxZ5cyZU/nz59ezzz6rX3/9NcWyTp48qVatWsnf318BAQFq3bq1NmzYkOKwSGqHJqVb271o0aIObTdu3NDw4cNVunRpeXl5KV++fOrUqZPOnDmTrm2xceNGNW7cWIGBgfL29lbx4sXVu3dvhz5r165V7dq15e/vL19fX1WvXl0//PCDQ5/kQ98///yz/WeYK1cutW/fXleuXFFMTIxatWql3Llzq0CBAnrzzTd18+ZN+/OT33tjxozRiBEjVLhwYXl7e6tq1apauXKlw7oOHjyoTp06KTw8XL6+vipYsKAaN26s33//PcXr2717t+rVqydfX1/ly5dPPXv21A8//CCbzaZffvnFYZtHRERo8+bNevrpp+3v2/fff19JSUkp6rzzMFZW2J/80+fyl19+kc1m08yZM9W3b18FBwfLx8dHkZGR2r59e4rlff/993ryySfl6+srf39/1a1bV+vXr3foExUVJZvNpm3btqlFixbKkyePihcv/o/7kCzNIMuYOnWqkWQ2b95sPvroIyPJrFy50j4/MjLSlCtXzj6dlJRk6tevb3LkyGHeeecds3z5cjNu3Djj5+dnKlWqZK5fv27vW6RIEVOoUCETERFhZs+ebZYsWWKqVatmPDw8zLvvvmtq1Khh5s+fbxYsWGBKlixpgoKCzNWrV+3P79Chg/H09DSFCxc2I0aMMMuXLzdRUVEmR44cplGjRg6vQ5IpWLCgqVChgpk1a5b5+eefza5du8zu3btNQECAKV++vJkxY4ZZvny56devn8mWLZuJioq667ZJSkoytWrVMl5eXvb1DxkyxBQrVsxIMkOGDLH3vZ/1GGNMu3btjM1mM127djX/+9//zI8//mhGjBhhPvroI4efRWRkZIrXfXsdp06dMqGhoaZIkSLmP//5j/npp5/Me++9Z7y8vEzHjh3t/Q4dOmQkmaJFi5rnnnvOLFy40CxcuNCUL1/e5MmTx1y8eNEYY8zBgwdNixYtjCSzfv16++P69esmOjra2Gw206ZNG7NkyRLz888/m6lTp5p27dplqe2a/D4tW7asmTFjhlm2bJlp2bKlkWRWr15t7/fHH3+YV1991cyZM8f88ssvZvHixaZLly4mW7ZsZtWqVfZ+V69eNWXKlDEBAQHm448/NsuWLTO9evUyhQsXNpLM1KlT7/ozNebWe79IkSL26cTERPPcc88ZPz8/M3ToULNixQrz5ZdfmoIFC5qyZcs6fG5Ss3TpUuPh4WEqVKhgpk2bZn7++WczZcoU06ZNG3ufX375xXh4eJgqVaqYuXPnmoULF5p69eoZm81m5syZY++XvM8ICwsz/fr1M8uXLzejR4822bNnNy+99JKpXLmyGT58uFmxYoUZMGCAkWQ++OAD+/OT33uhoaHmqaeeMvPmzTPfffedeeyxx4yHh4eJjo629129erXp16+f+e9//2tWr15tFixYYJo2bWp8fHzMH3/8Ye938uRJExgYaAoXLmymTZtmlixZYtq1a2eKFi1qJDn8fCIjI01gYKAJDw83n3/+uVmxYoXp0aOHkWSmT5+eos7bf15ZYX+Sns/lqlWr7D+DJk2amEWLFpmZM2eaEiVKmFy5cpk///zT3vebb74xkky9evXMwoULzdy5c02VKlWMp6en+fXXX+39hgwZYiSZIkWKmAEDBpgVK1aYhQsX3nUfktURdrKQ28NOfHy8KVasmKlatapJSkoyxqQMO0uXLjWSzJgxYxyWM3fuXCPJTJ482d5WpEgR4+PjY44fP25v27Fjh5FkChQoYK5cuWJvX7hwoZFkvv/+e3tbhw4djCSHX0zGGDNixAgjyaxdu9beJskEBASY8+fPO/StX7++KVSokImNjXVof+2114y3t3eK/rf78ccf77r+23dO97OeNWvWGElm8ODBafYxJn1hp3v37iZnzpzmyJEjDv3GjRtnJJndu3cbY/5vR16+fHmTkJBg77dp0yYjycyePdve1rNnT5Pa3zDJy0wORumV2bZrkSJFjLe3t8M2u3btmsmbN6/p3r17ms9LSEgwN2/eNLVr1zYvvviivX3SpElGkvnf//7n0L9bt273HHZmz55tJJl58+Y59Nu8ebORZD777LO7vsbixYub4sWLm2vXrqXZ54knnjD58+c3ly5dcniNERERplChQvZ9QvI+4/XXX3d4ftOmTY0kM378eIf2ihUrmsqVK9unk997ISEhDvXExcWZvHnzmjp16qRZY0JCgrlx44YJDw83ffr0sbe/9dZbxmaz2d/fyerXr59q2JFkNm7c6NC3bNmypn79+inqvP3nlRX2J+n5XCaHncqVK9t/rsYYc/jwYePh4WG6du1qjLkVskNCQkz58uVNYmKivd+lS5dM/vz5TfXq1e1tyWHn3XffTbG+tPYhWR2HsbIoT09PDR8+XFu2bNG3336bap+ff/5Z0q1h9tu1bNlSfn5+KYahK1asqIIFC9qny5QpI+nWULKvr2+K9tSuCHv55Zcdptu2bStJWrVqlUP7s88+qzx58tinr1+/rpUrV+rFF1+Ur6+vEhIS7I/nn39e169fdzhkc6fk5ae1flet58cff5Qk9ezZM80+6bV48WLVqlVLISEhDnU0aNBAkrR69WqH/g0bNlT27Nnt0xUqVJCU+s/hTo899pgkqVWrVvr222914sSJdNWYGbdrxYoVVbhwYfu0t7e3SpYsmWI7fP7556pcubK8vb2VI0cOeXh4aOXKldq7d6/D6/P399cLL7xw19fnjMWLFyt37txq3Lixw3aoWLGigoODHQ7T3Gn//v36888/1aVLF3l7e6fa58qVK9q4caNatGjhcAVk9uzZ1a5dOx0/flz79u1zeM6dV7Alf4bvPKG3TJkyqb6fmjVr5lCPv7+/GjdurDVr1igxMVGSlJCQoJEjR6p
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"compute_nb_clients(customer_sport)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 13,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "a12a59a0-edfe-4e52-8037-9b875f823b33",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"def maximum_price_paid(customer_sport):\n",
|
|
|
|
|
" company_max_price = customer_sport.groupby(\"number_company\")[\"max_price\"].max().reset_index()\n",
|
|
|
|
|
" # Création du barplot\n",
|
|
|
|
|
" plt.bar(company_max_price[\"number_company\"], company_max_price[\"max_price\"])\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" # Ajout de titres et d'étiquettes\n",
|
|
|
|
|
" plt.xlabel('Company')\n",
|
|
|
|
|
" plt.ylabel(\"Prix maximal d'un billet vendu\")\n",
|
2024-03-04 19:29:21 +01:00
|
|
|
|
" plt.title(\"Prix maximal de vente observé par compagnie de sport\")\n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
" \n",
|
|
|
|
|
" # Affichage du barplot\n",
|
|
|
|
|
" plt.show()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 14,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "2c7c2d26-4e35-4163-b771-fa4d3e8ca83e",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAHGCAYAAABpZb/eAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABVsUlEQVR4nO3dd1gUV/828HulSV2KVEXAEhTBniCowYZoAGNJLCgCGo2xPUSJJdFYYteoUR9jmhhLxBQ0KjZiIRrAghpF0ccOKqhBBLEgLOf9Iz/mdQXMrEJ2wftzXXuFOXN25jvjljtnyiqEEAJERERE9Fw1tF0AERERUVXA0EREREQkA0MTERERkQwMTUREREQyMDQRERERycDQRERERCQDQxMRERGRDAxNRERERDIwNBERERHJwNBERKRlH3/8Mezt7XHp0iVtl0JEz8HQpGPWrFkDhUIhPfT19VGnTh1ERETgxo0bspYRHh4OV1fXyi1Ui0r20dWrV//1dV+9ehUKhQJr1qx5oecfOHAACoUCBw4cqNC6tGHHjh2YPn261tZf8jo4duyY1mqoCDt37sSKFSuwfft21K9fX9vlUCVSKBT/ynvG1dUV4eHhlb4ebUlMTMT06dNx7969f33dDE06Kjo6GklJSYiPj8ewYcOwceNGtG/fHg8ePPjH506dOhWbN2/+F6rUjsDAQCQlJcHR0VHbpbzSduzYgRkzZmi7jCotIyMDERER2LRpE15//XVtl0OVLCkpCe+99562y6jyEhMTMWPGDK2EJv1/fY0ki6enJ1q3bg0A6NixI1QqFT777DNs2bIFAwcOLPM5Dx8+hImJSbX/v1VbW1vY2tpquwwiScl7T1POzs7IysqqhIpe3otuE5WvTZs22i6hSnv06BFq1qyp1Ro40lRFlLzZrl27BuDvQ3BmZmY4ffo0unbtCnNzc3Tu3Fma9/ThuZiYGCgUCqxYsUJtmdOmTYOenh7i4+Ofu25XV1cEBQVh+/btaNGiBYyNjdG4cWNs374dwN+HSRo3bgxTU1O88cYbpQ6XHDt2DP3794erqyuMjY3h6uqKAQMGSNsCAEIIvPXWW7CxsUF6errU/vDhQzRp0gSNGzeWRtnKOjzXoUMHeHp6IikpCb6+vtJ6oqOjAQBxcXFo2bIlTExM4OXlhV27dqnVePHiRURERKBhw4YwMTFB7dq1ERwcjNOnTz933zzPuXPn0K1bN5iYmKBWrVoYMWIE7t+/X2bf3377DZ07d4aFhQVMTEzQtm1b7N2797nLv3PnDgwNDTF16tQy161QKLBs2TKpLSsrC++//z7q1KkDQ0NDuLm5YcaMGSgqKpL6lBx+XLRoERYvXgw3NzeYmZnBx8cHycnJUr/w8HD897//BQC1w8kl/yZCCKxcuRLNmzeHsbExrKys8M477+Dy5cuy9t2hQ4fQuXNnmJubw8TEBL6+voiLiyuzb05ODiIiImBtbQ1TU1MEBweXWs+JEycQFBQEOzs7GBkZwcnJCYGBgbh+/brUR27NJa+133//Hb6+vjAxMcGQIUPQs2dPuLi4oLi4uFSN3t7eaNmypcbrKsv06dOhUChw4sQJ9O7dGxYWFlAqlRg0aBDu3Lmj1nfTpk3o2rUrHB0dpfftpEmTSo1YP+/zpDznzp3DgAEDYG9vDyMjI9StWxeDBw9GQUGB1Cc1NRVvv/02rKysULNmTTRv3hzff/+92nJKDln/8MMPmDhxIhwdHWFmZobg4GDcunUL9+/fx/Dhw1GrVi3UqlULERERyM/PV1uGQqHA6NGj8dVXX+G1116DkZERPDw8EBMTo9bvzp07GDlyJDw8PGBmZgY7Ozt06tQJBw8eLLV9169fxzvvvANzc3NYWlpi4MCBOHr0aKnD8yX77uLFi3jrrbdgZmYGZ2dnjB8/Xm1flNT57OE5Oe/L8hQWFmLChAlwcHCAiYkJ2rVrhyNHjpTZ92XWs2/fPnTo0AE2NjYwNjZG3bp10adPHzx8+BDA///cWLBgAWbPno26deuiZs2aaN26dZmfY3Le3yWf83v27MGQIUNga2sLExMTTJ48GR999BEAwM3NTfrs+ddOeRCkU6KjowUAcfToUbX2L774QgAQX3/9tRBCiLCwMGFgYCBcXV3F3Llzxd69e8Xu3buleS4uLmrPHzFihDA0NJSWu3fvXlGjRg0xZcqUf6zJxcVF1KlTR3h6eoqNGzeKHTt2CG9vb2FgYCA+/fRT0bZtWxEbGys2b94sXnvtNWFvby8ePnwoPf+nn34Sn376qdi8ebNISEgQMTExws/PT9ja2oo7d+5I/f766y9Rp04d4e3tLZ48eSJti7GxsTh16lSpfXTlyhWpzc/PT9jY2Ah3d3fx3Xffid27d4ugoCABQMyYMUN4eXlJtbdp00YYGRmJGzduSM9PSEgQ48ePFz///LNISEgQmzdvFj179hTGxsbi3LlzUr8rV64IACI6Ovq5+ywrK0vY2dmJ2rVri+joaLFjxw4xcOBAUbduXQFA7N+/X+q7bt06oVAoRM+ePUVsbKzYtm2bCAoKEnp6euK333577np69eolnJ2dhUqlUmufMGGCMDQ0FH/99ZcQQojMzEzh7OwsXFxcxFdffSV+++038dlnnwkjIyMRHh5eavtcXV1Ft27dxJYtW8SWLVuEl5eXsLKyEvfu3RNCCHHx4kXxzjvvCAAiKSlJejx+/FgIIcSwYcOEgYGBGD9+vNi1a5f44YcfRKNGjYS9vb3Iysp67jYdOHBAGBgYiFatWolNmzaJLVu2iK5duwqFQiFiYmKkfiWvA2dnZzFkyBCxc+dO8fXXXws7Ozvh7OwscnJyhBBC5OfnCxsbG9G6dWvx448/ioSEBLFp0yYxYsQIcfbsWWl5cmv28/MT1tbWwtnZWSxfvlzs379fJCQkiF9//VUAEPHx8Wrbk5aWJgCIZcuWabyuskybNk0AEC4uLuKjjz4Su3fvFosXLxampqaiRYsW0ntHCCE+++wzsWTJEhEXFycOHDggVq1aJdzc3ETHjh3Vlvm8z5OynDx5UpiZmQlXV1exatUqsXfvXrF+/XrRt29fkZeXJ4QQ4ty5c8Lc3FzUr19frF27VsTFxYkBAwYIAGL+/PnSsvbv3y9tT3h4uNi1a5dYtWqVMDMzEx07dhT+/v4iKipK7NmzR8yfP1/o6emJMWPGqNVT8jrw8PAQGzduFFu3bhXdunUTAMRPP/0k9Tt37pz44IMPRExMjDhw4IDYvn27GDp0qKhRo4baezI/P180aNBAWFtbi//+979i9+7d4sMPPxRubm6l3v9hYWHC0NBQNG7cWCxatEj89ttv4tNPPxUKhULMmDGjVJ3Tpk2TpuW+L8sTFhYmFAqF+Oijj8SePXvE4sWLRe3atYWFhYUICwurkPVcuXJF1KxZU/j7+4stW7aIAwcOiA0bNojQ0FDpPVbyueHs7CzatWsnfvnlF/HTTz+J119/XRgYGIjExERpeZq+v2vXri2GDx8udu7cKX7++Wdx9epVMWbMGAFAxMbGSp89ubm5/7i/KgJDk44peaEkJyeLwsJCcf/+fbF9+3Zha2srzM3NpQ/UsLAwAUCsXr261DLKCk2PHz8WLVq0EG5ubuLs2bPC3t5e+Pn5iaKion+sycXFRRgbG4vr169LbSdPnhQAhKOjo3jw4IHUvmXLFgFAbN26tdzlFRUVifz8fGFqaiq++OILtXmHDh0S+vr6IjIyUqxevVoAEN9++22Z++jZ0ARAHDt2TGrLzs4Wenp6wtjYWC0gldT+9JdYWTU+efJENGzYUHz44YdSu9zQNHHiRKFQKMTJkyfV2v39/dVC04MHD4S1tbUIDg5W66dSqUSzZs3EG2+88dz1bN26VQAQe/bsUavdyclJ9OnTR2p7//33hZmZmbh27Zra8xctWiQAiDNnzqhtn5eXl9pr48iRIwKA2Lhxo9Q2atQoUdb/dyUlJQkA4vPPP1drz8jIEMbGxmLChAnP3aY2bdoIOzs7cf/+fbVt8vT0FHXq1BHFxcVCiP//OujVq5fa8//44w8BQMyaNUsIIcSxY8c
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"maximum_price_paid(customer_sport)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 15,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "597d4361-8beb-43f4-9224-8f7dc34b187c",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"Statistiques Descriptives company 5\n",
|
|
|
|
|
" average_price average_price_basket average_ticket_basket \\\n",
|
|
|
|
|
"count 145390.000000 68869.000000 68869.000000 \n",
|
|
|
|
|
"mean 11.070309 65.969693 3.655202 \n",
|
|
|
|
|
"std 16.353610 195.462869 13.119612 \n",
|
|
|
|
|
"min 0.000000 0.000000 1.000000 \n",
|
|
|
|
|
"25% 0.000000 20.000000 1.000000 \n",
|
|
|
|
|
"50% 0.000000 45.000000 2.000000 \n",
|
|
|
|
|
"75% 20.000000 79.500000 3.000000 \n",
|
|
|
|
|
"max 500.000000 24159.405000 2139.833333 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" purchase_count total_price \n",
|
|
|
|
|
"count 471598.00000 3.950770e+05 \n",
|
|
|
|
|
"mean 0.29900 2.608544e+01 \n",
|
|
|
|
|
"std 7.22753 2.089636e+03 \n",
|
|
|
|
|
"min 0.00000 0.000000e+00 \n",
|
|
|
|
|
"25% 0.00000 0.000000e+00 \n",
|
|
|
|
|
"50% 0.00000 0.000000e+00 \n",
|
|
|
|
|
"75% 0.00000 0.000000e+00 \n",
|
|
|
|
|
"max 3532.00000 1.262516e+06 \n",
|
|
|
|
|
"Statistiques Descriptives company 6\n",
|
|
|
|
|
" average_price average_price_basket average_ticket_basket \\\n",
|
|
|
|
|
"count 33779.000000 33779.000000 33779.000000 \n",
|
|
|
|
|
"mean 24.033859 56.711279 2.413530 \n",
|
|
|
|
|
"std 21.217031 72.841926 3.763809 \n",
|
|
|
|
|
"min -52.740000 -1046.666667 1.000000 \n",
|
|
|
|
|
"25% 10.000000 19.000000 1.080000 \n",
|
|
|
|
|
"50% 19.333333 39.000000 2.000000 \n",
|
|
|
|
|
"75% 30.000000 72.990000 3.000000 \n",
|
|
|
|
|
"max 199.990000 3922.845361 309.047619 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" purchase_count total_price \n",
|
|
|
|
|
"count 79938.000000 79938.000000 \n",
|
|
|
|
|
"mean 2.842090 102.251041 \n",
|
|
|
|
|
"std 74.949889 4290.159858 \n",
|
|
|
|
|
"min 0.000000 -3140.000000 \n",
|
|
|
|
|
"25% 0.000000 0.000000 \n",
|
|
|
|
|
"50% 0.000000 0.000000 \n",
|
|
|
|
|
"75% 1.000000 54.980000 \n",
|
|
|
|
|
"max 14750.000000 762695.290000 \n",
|
|
|
|
|
"Statistiques Descriptives company 7\n",
|
|
|
|
|
" average_price average_price_basket average_ticket_basket \\\n",
|
|
|
|
|
"count 39524.000000 39524.000000 39524.000000 \n",
|
|
|
|
|
"mean 33.110568 155.618778 3.365885 \n",
|
|
|
|
|
"std 85.221328 1085.613137 6.283143 \n",
|
|
|
|
|
"min 0.000000 0.000000 1.000000 \n",
|
|
|
|
|
"25% 17.250000 25.000000 1.800000 \n",
|
|
|
|
|
"50% 25.000000 57.676364 2.000000 \n",
|
|
|
|
|
"75% 43.054691 115.837500 3.555556 \n",
|
|
|
|
|
"max 10770.000000 86160.000000 400.000000 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" purchase_count total_price \n",
|
|
|
|
|
"count 68800.000000 68800.000000 \n",
|
|
|
|
|
"mean 3.290029 944.593729 \n",
|
|
|
|
|
"std 88.071870 12118.394731 \n",
|
|
|
|
|
"min 0.000000 0.000000 \n",
|
|
|
|
|
"25% 0.000000 0.000000 \n",
|
|
|
|
|
"50% 1.000000 9.000000 \n",
|
|
|
|
|
"75% 2.000000 132.000000 \n",
|
|
|
|
|
"max 22934.000000 940874.200000 \n",
|
|
|
|
|
"Statistiques Descriptives company 8\n",
|
|
|
|
|
" average_price average_price_basket average_ticket_basket \\\n",
|
|
|
|
|
"count 129198.000000 129198.000000 129198.000000 \n",
|
|
|
|
|
"mean 18.409977 38.492520 2.258036 \n",
|
|
|
|
|
"std 19.159059 71.136628 5.270858 \n",
|
|
|
|
|
"min -20.000000 -1545.000000 1.000000 \n",
|
|
|
|
|
"25% 0.000000 0.000000 1.000000 \n",
|
|
|
|
|
"50% 15.000000 20.000000 2.000000 \n",
|
|
|
|
|
"75% 28.461538 52.500000 2.000000 \n",
|
|
|
|
|
"max 390.000000 7618.227273 750.000000 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" purchase_count total_price \n",
|
|
|
|
|
"count 197376.000000 197376.000000 \n",
|
|
|
|
|
"mean 4.637448 130.336075 \n",
|
|
|
|
|
"std 96.228665 2791.899946 \n",
|
|
|
|
|
"min 0.000000 -36124.000000 \n",
|
|
|
|
|
"25% 0.000000 0.000000 \n",
|
|
|
|
|
"50% 1.000000 0.000000 \n",
|
|
|
|
|
"75% 2.000000 75.000000 \n",
|
|
|
|
|
"max 40272.000000 702080.290000 \n",
|
|
|
|
|
"Statistiques Descriptives company 9\n",
|
|
|
|
|
" average_price average_price_basket average_ticket_basket \\\n",
|
|
|
|
|
"count 102710.000000 102710.000000 102710.000000 \n",
|
|
|
|
|
"mean 60.312171 62.384177 1.042402 \n",
|
|
|
|
|
"std 50.018101 52.009984 0.268064 \n",
|
|
|
|
|
"min -291.670000 -291.670000 1.000000 \n",
|
|
|
|
|
"25% 41.500000 42.350000 1.000000 \n",
|
|
|
|
|
"50% 59.000000 61.070000 1.000000 \n",
|
|
|
|
|
"75% 74.550000 77.710000 1.000000 \n",
|
|
|
|
|
"max 1116.500000 1216.950000 23.000000 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" purchase_count total_price \n",
|
|
|
|
|
"count 181134.000000 181134.000000 \n",
|
|
|
|
|
"mean 1.021354 63.476966 \n",
|
|
|
|
|
"std 1.805412 129.781944 \n",
|
|
|
|
|
"min 0.000000 -291.670000 \n",
|
|
|
|
|
"25% 0.000000 0.000000 \n",
|
|
|
|
|
"50% 1.000000 0.000000 \n",
|
|
|
|
|
"75% 1.000000 80.000000 \n",
|
|
|
|
|
"max 273.000000 14343.950000 \n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"for company in sport_comp:\n",
|
|
|
|
|
" print(f'Statistiques Descriptives company {company}')\n",
|
|
|
|
|
" company_data = customer_sport[customer_sport['number_company'] == company][['average_price', 'average_price_basket',\n",
|
|
|
|
|
" 'average_ticket_basket', 'purchase_count', 'total_price']]\n",
|
|
|
|
|
" print(company_data.describe())"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 16,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "5058d3c9-73a0-4e01-881e-4d2423f0d291",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"customer_sport[\"already_purchased\"] = customer_sport[\"purchase_count\"] > 0"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 17,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "848963c9-6129-4106-80b5-76bf814b70d1",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"def mailing_consent(customer_sport):\n",
|
|
|
|
|
" df_graph = customer_sport.groupby([\"number_company\", \"already_purchased\"])[\"opt_in\"].mean().reset_index()\n",
|
|
|
|
|
" # Création du barplot groupé\n",
|
|
|
|
|
" fig, ax = plt.subplots(figsize=(10, 6))\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" categories = df_graph[\"number_company\"].unique()\n",
|
|
|
|
|
" bar_width = 0.35\n",
|
|
|
|
|
" bar_positions = np.arange(len(categories))\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" # Grouper les données par label et créer les barres groupées\n",
|
|
|
|
|
" for label in df_graph[\"already_purchased\"].unique():\n",
|
|
|
|
|
" label_data = df_graph[df_graph['already_purchased'] == label]\n",
|
|
|
|
|
" values = [label_data[label_data['number_company'] == category]['opt_in'].values[0]*100 for category in categories]\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" label_printed = \"purchased\" if label else \"no purchase\"\n",
|
|
|
|
|
" ax.bar(bar_positions, values, bar_width, label=label_printed)\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" # Mise à jour des positions des barres pour le prochain groupe\n",
|
|
|
|
|
" bar_positions = [pos + bar_width for pos in bar_positions]\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" # Ajout des étiquettes, de la légende, etc.\n",
|
|
|
|
|
" ax.set_xlabel('Numero de compagnie')\n",
|
|
|
|
|
" ax.set_ylabel('Part de consentement (%)')\n",
|
|
|
|
|
" ax.set_title('Part de consentement au mailing selon les compagnies')\n",
|
|
|
|
|
" ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])\n",
|
|
|
|
|
" ax.set_xticklabels(categories)\n",
|
|
|
|
|
" ax.legend()\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" # Affichage du plot\n",
|
|
|
|
|
" plt.show()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 18,
|
2024-03-04 19:29:21 +01:00
|
|
|
|
"id": "b78ef715-c645-4625-a128-4f5b49e5339d",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0oAAAIhCAYAAABwnkrAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABjxklEQVR4nO3dd1yV9f//8ecR2SAqKsNQUHHvLBU/iXtbZuXOVampGak5shIXjgr9mOWoBLJcfTIzc+89cqdGam5FzIUbgev3Rz/Ot3NwcBQ8iI/77XZuN6/3tV7X4c14+r6u9zEZhmEIAAAAAGCWw94FAAAAAEBWQ1ACAAAAACsEJQAAAACwQlACAAAAACsEJQAAAACwQlACAAAAACsEJQAAAACwQlACAAAAACsEJQAAAACwQlACMkF0dLRMJpP5lTNnTj3zzDPq0qWLTp8+naHnioiI0Pz58x/pGMeOHZPJZFJ0dHSG1PS0+fLLL7Ple5cRfetJFh4eLpPJZNFWq1Yt1apVy6LNZDIpPDz88RWWSex1Hak/L48dO/bYzw3bZJe+DqQXQQnIRFFRUdq8ebOWL1+ut956S7NmzdILL7yg69evZ9g5nvY/ZrMCglL29Oabb2rz5s0P3G7z5s168803H0NFgH3R1/G0yWnvAoDsrGzZsqpSpYokqXbt2kpOTtaIESM0f/58tW/f/pGOffPmTbm6umZEmQDu4plnntEzzzzzwO2qVav2GKoB7I++jqcNI0rAY5T6S+b48eOSpGHDhqlq1arKmzevcuXKpcqVK+ubb76RYRgW+wUGBqpZs2aaN2+eKlWqJBcXFw0bNkwmk0nXr19XTEyM+TY/69uCrJ05c0atWrWSp6envLy81Lp1a8XFxd11299++00vvvii8ubNKxcXF1WqVElz585N17Xevn1bw4cPV6lSpeTi4iJvb2/Vrl1bmzZtMm9z69YtDR48WEFBQXJyclLBggXVq1cvXb58+a7Xv2TJElWuXFmurq4qWbKkpk+fbrHdjRs31L9/fwUFBcnFxUV58+ZVlSpVNGvWLJuvK/V2oNWrV+vtt99Wvnz55O3trZYtW+rMmTMWte3fv19r1641fw0CAwPN6xMSEsw1pV5jWFhYmlFFk8mk3r17KyoqSiVKlJCrq6uqVKmiLVu2yDAMffLJJwoKCpKHh4fq1Kmjw4cPp3nPV6xYobp16ypXrlxyc3NTjRo1tHLlSottUm8n279/v9q2bSsvLy/5+Pioa9euunLlikU9tvat9Pbne92+ExgYqM6dO9/3HKm3iX7yyScaO3asAgMD5erqqlq1aunPP//UnTt3NGjQIPn7+8vLy0svv/yy4uPjLY4xZ84cNWjQQH5+fnJ1dVWpUqU0aNCgNF+Tu916dzfW15PeviP9833Sr18/+fr6ys3NTTVr1tSOHTvS9V5I0uTJk1WhQgV5eHjI09NTJUuW1AcffGCxTVxcnLp3765nnnlGTk5OCgoK0rBhw5SUlPTA4//+++966aWXlCdPHrm4uKhixYqKiYmx2GbNmjUymUyaNWuWhgwZIn9/f+XKlUv16tVTbGzsA89xL+npz+fPn1e3bt0UEBAgZ2dn5c+fXzVq1NCKFSseePw//vhDbdu2lY+Pj5ydnVWoUCF17NhRt2/ffqjrnzlzpgYOHCg/Pz95eHioefPmOnfunK5evapu3bopX758ypcvn7p06aJr165ZHCP1+3/q1KkqXry4nJ2dVbp0ac2ePTvN9fbs2VOlS5eWh4eHChQooDp16mj9+vVpru/UqVN69dVX5enpqdy5c6t9+/bavn17mtusO3fuLA8PDx0+fFhNmjSRh4eHAgIC1K9fP4v3IrVO6+/d9Pav9PRVIKthRAl4jFL/uM2fP7+kf/7o6969uwoVKiRJ2rJli9555x2dPn1aH3/8scW+O3fu1MGDB/Xhhx8qKChI7u7uatGiherUqaPatWvro48+kiTlypXrnue/efOm6tWrpzNnzmj06NEqXry4fv31V7Vu3TrNtqtXr1ajRo1UtWpVTZkyRV5eXpo9e7Zat26tGzdu3PePuKSkJDVu3Fjr169XWFiY6tSpo6SkJG3ZskUnTpxQSEiIDMNQixYttHLlSg0ePFgvvPCC9u7dq6FDh2rz5s3avHmznJ2dzcfcs2eP+vXrp0GDBsnHx0dff/213njjDRUrVkw1a9aUJPXt21czZszQyJEjValSJV2/fl2///67Lly48NDX9eabb6pp06aaOXOmTp48qffff18dOnTQqlWrJEk//fSTXn31VXl5eenLL7+UJHPdN27cUGhoqE6dOqUPPvhA5cuX1/79+/Xxxx9r3759WrFihcUf4gsXLtSuXbs0ZswYmUwmDRw4UE2bNlWnTp30119/adKkSbpy5Yr69u2rV155Rbt37zbv/91336ljx4566aWXFBMTI0dHR02dOlUNGzbU0qVLVbduXYvreuWVV9S6dWu98cYb2rdvnwYPHixJ5vC5efNmm/qWZFt/flRffPGFypcvry+++EKXL19Wv3791Lx5c1WtWlWOjo6aPn26jh8/rv79++vNN9/UggULzPseOnRITZo0UVhYmNzd3fXHH39o7Nix2rZtm/nrmhEe1HckqUuXLpozZ44GDBigOnXq6MCBA3r55ZeVkJDwwOPPnj1bPXv21DvvvKNPP/1UOXLk0OHDh3XgwAHzNnFxcXr++eeVI0cOffzxxypatKg2b96skSNH6tixY4qKirrn8WNjYxUSEqICBQpo4sSJ8vb21nfffafOnTvr3LlzGjBggMX2H3zwgWrUqKGvv/5aCQkJGjhwoJo3b66DBw/KwcHBpvcuvf359ddf186dOzVq1CgVL15cly9f1s6dOy2+5+9mz549+s9//qN8+fJp+PDhCg4O1tmzZ7VgwQIlJibK2dn5oa6/du3aio6O1rFjx9S/f3+1bdtWOXPmVIUKFTRr1izt2rVLH3zwgTw9PTVx4kSL/RcsWKDVq1dr+PDhcnd315dffmne/9VXX5UkXbx4UZI0dOhQ+fr66tq1a/rpp59Uq1YtrVy50vyfGdevX1ft2rV18eJFjR07VsWKFdOSJUvu+rNeku7cuaMXX3xRb7zxhvr166d169ZpxIgR8vLyuu/3bnr7V3r6KpAlGQAyXFRUlCHJ2LJli3Hnzh3j6tWrxsKFC438+fMbnp6eRlxcXJp9kpOTjTt37hjDhw83vL29jZSUFPO6woULGw4ODkZsbGya/dzd3Y1OnTqlq67Jkycbkoyff/7Zov2tt94yJBlRUVHmtpIlSxqVKlUy7ty5Y7Fts2bNDD8/PyM5Ofme5/n2228NScZXX311z22WLFliSDLGjRtn0T5nzhxDkjFt2jRzW+HChQ0XFxfj+PHj5rabN28aefPmNbp3725uK1u2rNGiRYt7ntOW60r9Gvbs2dNiu3HjxhmSjLNnz5rbypQpY4SGhqY51+jRo40cOXIY27dvt2j/3//+Z0gyFi1aZG6TZPj6+hrXrl0zt82fP9+QZFSsWNGiP0yYMMGQZOzdu9cwDMO4fv26kTdvXqN58+YW50lOTjYqVKhgPP/88+a2oUOH3vV979mzp+Hi4mJxHlv6lrX79WdJxtChQ9PsU7hw4Qee7+jRo4Yko0KFChZ9MPU9efHFFy22DwsLMyQZV65cuevxUlJSjDt37hhr1641JBl79uwxr0t9r/4tNDQ0zdfa+nrS23f2799vSDIGDhxosd2sWbMMSQ98L3r37m3kzp37vtt0797d8PDwsPjeMQzD+PTTTw1Jxv79++95HW3atDGcnZ2NEydOWOzbuHFjw83Nzbh8+bJhGIaxevVqQ5LRpEkTi+3mzp1rSDI2b9583xpT36+jR48ahmFbf/bw8DDCwsLue/y7qVOnjpE7d24jPj7+ntvYev3W9ab2vT59+li0t2jRwsibN69FmyTD1dXV4ndDUlKSUbJkSaNYsWL3rDEpKcm4c+eOUbduXePll182t3/xxReGJGPx4sUW23fv3j3Nz/pOnToZkoy5c+dabNukSROjRIkSaer8dx9Jb/9KT18
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 1000x600 with 1 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"mailing_consent(customer_sport)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 19,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "d8071891-e6f5-4d93-b039-9e99c20ec4b0",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"def gender_bar(customer_sport):\n",
|
|
|
|
|
" company_genders = customer_sport.groupby(\"number_company\")[[\"gender_male\", \"gender_female\", \"gender_other\"]].mean().reset_index()\n",
|
|
|
|
|
" # Création du barplot\n",
|
|
|
|
|
" plt.bar(company_genders[\"number_company\"], company_genders[\"gender_male\"], label = \"Homme\")\n",
|
|
|
|
|
" plt.bar(company_genders[\"number_company\"], company_genders[\"gender_female\"], \n",
|
|
|
|
|
" bottom = company_genders[\"gender_male\"], label = \"Femme\")\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" \n",
|
|
|
|
|
" # Ajout de titres et d'étiquettes\n",
|
|
|
|
|
" plt.xlabel('Company')\n",
|
|
|
|
|
" plt.ylabel(\"Part de clients de chaque sexe\")\n",
|
2024-03-04 19:29:21 +01:00
|
|
|
|
" plt.title(\"Sexe des clients de chaque compagnie de sport\")\n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
" plt.legend()\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" # Affichage du barplot\n",
|
|
|
|
|
" plt.show()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 20,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "2fc30f1d-cf64-4efb-9442-4d97bb50b29f",
|
|
|
|
|
"metadata": {},
|
2024-03-04 19:29:21 +01:00
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHFCAYAAAAOmtghAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABVPElEQVR4nO3deVhU5f8+8HsY9l1EEBVZ3BdUBE0wNXJBRTP3JUVFyyVTRHOJVNzCrBTrk7gkImZFplkaieRC7imiLaC54wLiCq4oM8/vD3/Mt3FA58DgyPF+Xddccp55zjnvOTMMt8/ZFEIIASIiIiKZMDF2AURERESGxHBDREREssJwQ0RERLLCcENERESywnBDREREssJwQ0RERLLCcENERESywnBDREREssJwQ0RERLLCcFPBHTx4ED179kTNmjVhYWEBV1dXBAQEYNKkScYuTTKFQoGoqChjl6Hjybp27doFhUKBXbt2let6ly5divj4+HJdB/D8Xs9/nTt3DgqFAp9++ulzWydVDPHx8VAoFDh37ly5rscYn/vn7Xl9h7yIGG4qsF9++QWBgYHIz8/HwoULsW3bNixZsgStW7dGYmKiscuTrebNm2P//v1o3rx5ua7nZf5iopdXSEgI9u/fDzc3N2OXUuG9zN8hpsYugEpv4cKF8PLyQnJyMkxN/++tHDBgABYuXGjEyuTN3t4erVq1MnYZRLJUpUoVVKlSxdhlVGj37t2DtbW1scswKo7cVGDXr1+Hs7OzVrApYmKi+9YmJiYiICAANjY2sLW1RXBwMNLT0zXP79mzB2ZmZpg8ebLWfEXDxKtWrdK0nTx5EoMGDYKLiwssLCzQoEEDfPnll3rVnZ+fj7fffhuVK1eGra0tOnfujH///bfYvvqsR61WY968eahXrx6srKzg6OiIJk2aYMmSJc+s5datW5g0aRK8vb1hYWEBFxcXdO3aFcePHy9xnpKGsw8fPow33ngDTk5OsLS0hK+vL77//nutPkXbcufOnRgzZgycnZ1RuXJl9OrVC5cvX9b08/T0xD///IPU1FQoFAooFAp4enqW+fUeP34cnTt3hrW1NZydnTF69Gjcvn272L6//fYb2rdvD3t7e1hbW6N169bYvn37M9cBSNuuixYtgpeXF2xtbREQEIADBw5oPX/48GEMGDAAnp6esLKygqenJwYOHIjz58/rLOvAgQNo3bo1LC0tUa1aNUyfPh0rV67U2c1R0i5QT09PDBs2TKstJycHo0aNQo0aNWBubg4vLy/Mnj0bhYWFem2Lb775BgEBAbC1tYWtrS2aNWum9bsEAHFxcWjatCksLS3h5OSEnj17IjMzU6vPsGHDYGtri+PHjyM4OBg2NjZwc3PDggULNK/91VdfhY2NDerWrYs1a9ZozV/02UtJScHw4cPh5OQEGxsbdO/eHWfOnNHqm5KSgh49eqBGjRqwtLRE7dq1MWrUKFy7dk3n9f30009o0qQJLCws4O3tjSVLliAqKgoKhUKrn0KhwLhx47B27Vo0aNAA1tbWaNq0KbZs2VJsnU/ulirL5/F5fO71+b0s2i7p6eno1asX7O3t4eDggMGDB+Pq1as6y1u4cCHq16+v+R0KDQ3FxYsXtfq99tpraNy4MX7//XcEBgbC2toaYWFhT/0OeSkIqrBGjhwpAIj33ntPHDhwQDx8+LDEvvPnzxcKhUKEhYWJLVu2iI0bN4qAgABhY2Mj/vnnH02/BQsWCADip59+EkII8ffffwtra2sxePBgTZ9//vlHODg4CB8fH5GQkCC2bdsmJk2aJExMTERUVNRTa1ar1SIoKEhYWFiI+fPni23btolZs2YJb29vAUDMmjVL8nqio6OFUqkUs2bNEtu3bxdbt24VMTExz6wlPz9fNGrUSNjY2Ig5c+aI5ORksWHDBjFhwgSxY8cOTb8n69q5c6cAIHbu3Klp27FjhzA3Nxdt2rQRiYmJYuvWrWLYsGECgFi9erWm3+rVqwUA4e3tLd577z2RnJwsvvrqK1GpUiURFBSk6XfkyBHh7e0tfH19xf79+8X+/fvFkSNHyvR6c3JyhIuLi6hevbpYvXq1SEpKEm+99ZaoWbOmzutZu3atUCgU4s033xQbN24UmzdvFt26dRNKpVL89ttvZd6uZ8+eFQCEp6en6Ny5s9i0aZPYtGmT8PHxEZUqVRK3bt3SLG/9+vVi5syZ4scffxSpqaniu+++E+3atRNVqlQRV69e1fT7559/hLW1tWjYsKH49ttvxU8//SSCg4M1r+/s2bMlvqdFPDw8xNChQzXT2dnZwt3dXXh4eIjly5eL3377TcydO1dYWFiIYcOGPXU7CCHEjBkzBADRq1cvsX79erFt2zaxaNEiMWPGDE2fjz76SAAQAwcOFL/88otISEgQ3t7ewsHBQfz777+afkOHDhXm5uaiQYMGYsmSJSIlJUUMHz5cABDTp08XdevWFatWrRLJycmiW7duAoA4fPiwZv6iz567u7sICwsTv/76q1ixYoVwcXER7u7u4ubNm5q+sbGxIjo6Wvz8888iNTVVrFmzRjRt2lTUq1dP63vm119/FSYmJuK1114TP/74o1i/fr145ZVXhKenp3jyz0vR+92yZUvx/fffi6SkJPHaa68JU1NTcfr0aZ06//t+leXz+Lw+9/r8Xs6aNUsAEB4eHuL9998XycnJYtGiRcLGxkb4+vpqbdt33nlHABDjxo0TW7duFcuWLRNVqlQR7u7uWp/7du3aCScnJ+Hu7i6++OILsXPnTpGamvrU75CXAcNNBXbt2jXx6quvCgACgDAzMxOBgYEiOjpa3L59W9MvKytLmJqaivfee09r/tu3b4uqVauKfv36adrUarXo2rWrcHR0FH///bdo2LChqF+/vrhz546mT3BwsKhRo4bIy8vTWt64ceOEpaWluHHjRok1//rrrwKAWLJkiVb7/Pnzdf7g6Luebt26iWbNmj1ja+maM2eOACBSUlKe2k+fcFO/fn3h6+srHj16pDVvt27dhJubm1CpVEKI//viHjt2rFa/hQsXCgAiOztb09aoUSPRrl07nXpK+3qnTp0qFAqFOHr0qFZ7x44dtV7P3bt3hZOTk+jevbtWP5VKJZo2bSpatmz51PXos12Lwo2Pj48oLCzUtP/xxx8CgPj2229LnLewsFDcuXNH2NjYaH2O+vfvL6ysrEROTo5W3/r165c63IwaNUrY2tqK8+fPa/X79NNPBQCt/xg86cyZM0KpVIq33nqrxD43b94UVlZWomvXrlrtWVlZwsLCQgwaNEjTNnToUAFAbNiwQdP26NEjUaVKFQFA6w/X9evXhVKpFBEREZq2os9ez549tda1d+9eAUDMmzev2BrVarV49OiROH/+vNZ/fIQQokWLFsLd3V0UFBRo2m7fvi0qV65cbLhxdXUV+fn5mracnBxhYmIioqOjdeoser/K+nl8Xp97fX4vi8LNxIkTtdrXrVsnAIivv/5aCCFEZmZmsd8TBw8eFADEBx98oGlr166dACC2b9+us76SvkNeBtwtVYFVrlwZu3fvxqFDh7BgwQL06NED//77L6ZPnw4fHx/NEHJycjIKCwsRGhqKwsJCzcPS0hLt2rXT2r2iUCiQkJAAOzs7+Pv74+zZs/j+++9hY2MDAHjw4AG2b9+Onj17wtraWmt5Xbt2xYMHD3R2K/zXzp07AQBvvfWWVvugQYO0pqWsp2XLljh27BjGjh2L5ORk5Ofn67X9fv31V9StWxcdOnTQq39JTp06hePHj2te05O1Zmdn48SJE1rzvPHGG1rTTZo0AYBid7U8qbSvd+fOnWjUqBGaNm2q1f7ktt+3bx9u3LiBoUOHar0WtVqNzp0749ChQ7h7926J65GyXUNCQqBUKjXTxW2HO3fuYOrUqahduzZMTU1hamoKW1tb3L17V2vXzc6dO9G+fXu4urpq2pRKJfr37//MOkqyZcsWBAUFoVq1alrbokuXLgCA1NTUEudNSUmBSqXCu+++W2Kf/fv34/79+zq7wtzd3fH666/r7A5RKBTo2rWrZtrU1BS1a9eGm5sbfH1
|
2024-03-04 19:29:21 +01:00
|
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"source": [
|
2024-03-04 19:29:21 +01:00
|
|
|
|
"gender_bar(customer_sport)"
|
2024-03-04 16:55:58 +01:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 21,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "4b3bb641-814b-4679-9a67-4eca87a920a6",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"def country_bar(customer_sport):\n",
|
2024-03-04 19:29:21 +01:00
|
|
|
|
" company_country_fr = customer_sport.groupby(\"number_company\")[\"country_fr\"].mean().reset_index()\n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
" # Création du barplot\n",
|
|
|
|
|
" plt.bar(company_country_fr[\"number_company\"], company_country_fr[\"country_fr\"])\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" # Ajout de titres et d'étiquettes\n",
|
|
|
|
|
" plt.xlabel('Company')\n",
|
|
|
|
|
" plt.ylabel(\"Part de clients français\")\n",
|
2024-03-04 19:29:21 +01:00
|
|
|
|
" plt.title(\"Nationalité des clients de chaque compagnie de sport\")\n",
|
2024-03-04 16:55:58 +01:00
|
|
|
|
" \n",
|
|
|
|
|
" # Affichage du barplot\n",
|
|
|
|
|
" plt.show()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 22,
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"id": "01258674-6b98-49e4-93f4-f4185964999f",
|
|
|
|
|
"metadata": {},
|
2024-03-04 19:29:21 +01:00
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHGCAYAAACIDqqPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABHeklEQVR4nO3dd3QU9f7/8deSSkhBAgkBQxKk96ZAEJESEBFQkarSvSB4EUIRRAS5IIKK6FcBC1VQEUEURCEXkQsCUqRKBGkJJQFDC81Akvn9wcn+XDeB3WTDhuH5OCfnsJ/9zHzeMzu7+2LaWgzDMAQAAGAShdxdAAAAgCsRbgAAgKkQbgAAgKkQbgAAgKkQbgAAgKkQbgAAgKkQbgAAgKkQbgAAgKkQbgAAgKkQbnBHuHTpkipVqqSOHTsqMzPT3eUAAAowwk0+mDt3riwWi3x9fZWQkGD3/MMPP6xq1arlat6fffaZpk2blu1zFotF48aNy9V8XS1rHRw9etTa1rNnT0VGRtr0e/3117Vs2bJbzu+5555TaGioFixYoEKFcr/ZRkZGqmfPnrmePr/8s66jR4/KYrFo7ty5+TruzbYnV7pdy/NPFotFL7zwwm0dEwXfTz/9JIvFop9++ilfx3HXdn873a7PEGcRbvJRWlqaXnnlFZfO82Yb0qZNm9S3b1+XjudKY8aM0ddff23T5ki4+eCDD7R7925988038vHxyccKC46wsDBt2rRJbdq0yddxCuoHE5Cf6tSpo02bNqlOnTruLuWOV1A/Qwg3+eiRRx7RZ599pl27dt2W8Ro0aKB77733toyVG/fdd59q167t9HQDBw7Ub7/9pqJFi7q+qALKx8dHDRo0UIkSJdxdCmA6gYGBatCggQIDA91dyh3rypUr7i7hpgg3+WjEiBEKDg7WSy+9dMu+H3zwgR566CGFhISoSJEiql69uqZMmaLr169b+zz88MP67rvvlJCQIIvFYv3Lkt1hqb1796p9+/a655575Ovrq1q1amnevHk2fbJ20X7++ecaPXq0SpUqpcDAQLVo0UL79++36RsXF6f27dvr3nvvla+vr8qVK6d+/fopJSXllsv4z8NSFotFly9f1rx586zL8vDDD1ufT05OVr9+/XTvvffK29tbUVFReu2115Senn7Lsa5fv64RI0aoZMmS8vPz04MPPqgtW7Zk29fRcWbMmKGaNWvK399fAQEBqlSpkl5++eVb1pKWlqbx48ercuXK8vX1VXBwsJo2baqNGzfmOE1Ou7P/+OMPdevWTSEhIfLx8VHlypX1wQcf2PRx9PW81faU2+U9efKkOnXqpICAAAUFBalz585KTk7Otu+2bdvUrl07FStWTL6+vqpdu7a+/PLLW44hObdeP/30U1WuXFl+fn6qWbOmVqxYYfP8wYMH1atXL5UvX15+fn4qXbq02rZtqz179tjN6/fff9cjjzwiPz8/FS9eXP3799fy5cvtDnPkdAj04YcfttnOJSk1NVXDhg1TVFSUvL29Vbp0aQ0ePFiXL192aF388MMPat68uYKCguTn56fKlStr0qRJNn2+/fZbNWzYUH5+fgoICFBMTIw2bdpk02fcuHGyWCzavXu3OnbsqKCgIBUrVkyxsbFKT0/X/v379cgjjyggIECRkZGaMmWKzfRZ296CBQsUGxurkiVLqnDhwmrSpIl27Nhh03fbtm3q0qWLIiMjVbhwYUVGRqpr167ZHsrfsGGDGjZsKF9fX5UuXVpjxozRJ598YnfoOzIyUo899ph++OEH1alTR4ULF1alSpU0e/bsbOv852GpvGyPt2u7v9X7MuuUgLi4OPXq1UvFihVTkSJF1LZtWx0+fNhufrNnz1bNmjXl6+urYsWK6YknnlB8fLxNn549e8rf31979uxRy5YtFRAQoObNm9/yM8SdPN1dgJkFBATolVde0Ysvvqgff/xRzZo1y7HvoUOH1K1bN+uH265duzRx4kT9/vvv1jfm9OnT9a9//UuHDh2yO7yTnf379ys6OlohISF67733FBwcrAULFqhnz546deqURowYYdP/5ZdfVqNGjfTJJ58oNTVVL730ktq2bav4+Hh5eHhY62zYsKH69u2roKAgHT16VFOnTtWDDz6oPXv2yMvLy+H1s2nTJjVr1kxNmzbVmDFjJMn6P6nk5GQ98MADKlSokF599VXdd9992rRpkyZMmKCjR49qzpw5N533c889p/nz52vYsGGKiYnR3r179eSTT+rixYs2/Rwd54svvtCAAQP073//W2+99ZYKFSqkgwcPat++fTetIz09Xa1bt9b69es1ePBgNWvWTOnp6dq8ebMSExMVHR3t8Prat2+foqOjVaZMGb399tsqWbKkVq1apUGDBiklJUVjx4616X+r1/Nm21Nul/fq1atq0aKFTp48qUmTJqlChQr67rvv1LlzZ7u+a9eu1SOPPKL69etr5syZCgoK0hdffKHOnTvrypUrNz03ypn1+t1332nr1q0aP368/P39NWXKFD3xxBPav3+/ypYtK+nGF1NwcLDeeOMNlShRQmfPntW8efNUv3597dixQxUrVpQknTp1Sk2aNJGXl5emT5+u0NBQLVy4ME/n9Vy5ckVNmjTR8ePH9fLLL6tGjRr67bff9Oqrr2rPnj3673//e9MvjFmzZum5555TkyZNNHPmTIWEhOjAgQPau3evtc9nn32mp59+Wi1bttTnn3+utLQ0TZkyRQ8//LDWrFmjBx980GaenTp10jPPPKN+/fopLi7O+h+t//73vxowYICGDRumzz77TC+99JLKlSunJ5980mb6l19+WXXq1NEnn3yiCxcuaNy4cXr44Ye1Y8cO6zo/evSoKlasqC5duqhYsWJKSkrSjBkzdP/992vfvn0qXry4JGn37t2KiYlRhQoVNG/ePPn5+WnmzJlasGBBtutj165dGjp0qEaOHKnQ0FB98skn6tOnj8qVK6eHHnoox/WYl+3xdm33zrwv+/Tpo5iYGH322Wc6duyYXnnlFT388MPavXu3dS/4pEmT9PLLL6tr166aNGmSzpw5o3Hjxqlhw4baunWrypcvb53ftWvX1K5dO/Xr108jR45Uenq67r33Xqe+k24rAy43Z84cQ5KxdetWIy0tzShbtqxRr149IzMz0zAMw2jSpIlRtWrVHKfPyMgwrl+/bsyfP9/w8PAwzp49a32uTZs2RkRERLbTSTLGjh1rfdylSxfDx8fHSExMtOnXunVrw8/Pzzh//rxhGIaxdu1aQ5Lx6KOP2vT78ssvDUnGpk2bsh0vMzPTuH79upGQkGBIMr755hu7dXDkyBFrW48ePexqL1KkiNGjRw+7effr18/w9/c3EhISbNrfeustQ5Lx22+/ZVuTYRhGfHy8IckYMmSITfvChQsNSTbjOTrOCy+8YBQtWjTHMXMyf/58Q5Lx8ccf37RfRESETV1HjhwxJBlz5syxtrVq1cq49957jQsXLthM+8ILLxi+vr7W7cSZ1zOn7Sm3yztjxgy7bcEwDOO5556zW55KlSoZtWvXNq5fv27T97HHHjPCwsKMjIyMHMdxdL1KMkJDQ43U1FRrW3JyslGoUCFj0qRJOU6Xnp5uXLt2zShfvrzNdvTSSy8ZFovF2Llzp03/mJgYQ5Kxdu1aa9s/X9MsTZo0MZo0aWJ9PGnSJKNQoULG1q1bbfp99dVXhiRj5cqVOdZ58eJFIzAw0HjwwQetny//lJGRYZQqVcqoXr26zTq9ePGiERISYkRHR1vbxo4da0gy3n77bZt51KpVy5BkLF261Np2/fp1o0SJEsaTTz5pbcva9urUqWNTz9GjRw0vLy+jb9++OS5Lenq6cenSJaNIkSLGu+++a23v2LGjUaRIEePPP/+0WaYqVarYfcZEREQYvr6+Nu/nq1evGsWKFTP69etnV+ffX6+8bI+3a7t35H2Z9dn7xBNP2LT//PPPhiRjwoQJhmEYxrlz54zChQvbfU4kJiYaPj4+Rrdu3axtPXr0MCQZs2fPthvvZt9J7sRhqXzm7e2tCRMmaNu2bTfd7bhjxw61a9dOwcHB8vD
|
2024-03-04 19:29:21 +01:00
|
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"country_bar(customer_sport)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "43d63ea3-75f4-4356-a7e9-35905d86baa5",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 2. campaigns_information"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 23,
|
2024-03-04 19:29:21 +01:00
|
|
|
|
"id": "8d116e34-cdd6-4ef9-8622-474da79f79ef",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"Nombre de lignes de la table : 463098\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"customer_id 0\n",
|
|
|
|
|
"nb_campaigns 0\n",
|
|
|
|
|
"nb_campaigns_opened 0\n",
|
|
|
|
|
"time_to_open 178826\n",
|
|
|
|
|
"number_company 0\n",
|
|
|
|
|
"dtype: int64"
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 23,
|
2024-03-04 19:29:21 +01:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"print(\"Nombre de lignes de la table : \",campaigns_sport.shape[0])\n",
|
|
|
|
|
"campaigns_sport.isna().sum()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 24,
|
2024-03-04 19:29:21 +01:00
|
|
|
|
"id": "724d3c33-c219-4212-b8b6-dd78481674cb",
|
|
|
|
|
"metadata": {},
|
2024-03-04 16:55:58 +01:00
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
2024-03-04 19:29:21 +01:00
|
|
|
|
"campaigns_sport[\"no_campaign_opened\"] = pd.isna(campaigns_sport[\"time_to_open\"])\n",
|
|
|
|
|
"company_lazy_customers = campaigns_sport.groupby(\"number_company\")[\"no_campaign_opened\"].mean().reset_index()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def lazy_customer_plot(campaigns_sport):\n",
|
|
|
|
|
" company_lazy_customers = campaigns_sport.groupby(\"number_company\")[\"no_campaign_opened\"].mean().reset_index()\n",
|
|
|
|
|
" # Création du barplot\n",
|
|
|
|
|
" plt.bar(company_lazy_customers[\"number_company\"], company_lazy_customers[\"no_campaign_opened\"])\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" # Ajout de titres et d'étiquettes\n",
|
|
|
|
|
" plt.xlabel('Company')\n",
|
|
|
|
|
" plt.ylabel(\"Part de clients n'ayant ouvert aucun mail\")\n",
|
|
|
|
|
" plt.title(\"Part de clients n'ayant ouvert aucun mail pour les compagnies de sport\")\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" # Affichage du barplot\n",
|
|
|
|
|
" plt.show()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-03-06 11:56:52 +01:00
|
|
|
|
"execution_count": 25,
|
2024-03-04 19:29:21 +01:00
|
|
|
|
"id": "e513f308-3a9c-40ed-99d5-ed420bd67384",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmkAAAHFCAYAAACpR27aAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABYlElEQVR4nO3deVwU9f8H8NdyLDcoyKUiRwqigOKRoinhRYpKHnlfoBYeeWeaeYAaauZVqVlepBGZZB544Jml5oVaad6CB4jigSfo8vn94Zf9uSzoDuy6m/t6Ph48aj87x3tmZ2dfzsxnRiaEECAiIiIig2Ki7wKIiIiISB1DGhEREZEBYkgjIiIiMkAMaUREREQGiCGNiIiIyAAxpBEREREZIIY0IiIiIgPEkEZERERkgBjSiIiIiAyQ5JC2YsUKyGQy5Z+ZmRkqV66MqKgoXL16VavFffbZZ1i3bl2ZpnHp0iXIZDKsWLFCKzVpysvLC/369Xvldfzwww+YN2+eTudRkilTpsDLy0sv8y6thw8fYsqUKdi9e7e+S3klUlJSMGXKFH2XYZRkMpnKut+9ezdkMtlrt+0V3feRYZoyZQpkMpm+ywAA9OvX7z/32yHFtWvXMGXKFBw7dkzyuKU+krZ8+XLs378fqampGDhwIBITE9GkSRM8ePCgtJNUo42QZijc3d2xf/9+RERE6HQ++gxp/0UPHz5EbGzsa/dDWZKUlBTExsbquwyjtH//fgwYMEDfZRABAAYMGID9+/fruwyjcO3aNcTGxpYqpJmVdqYBAQGoV68eACAsLAwKhQJTp07FunXr0LNnz9JOFgDw6NEjWFlZlWkahsbCwgINGzbUdxlkpB4+fAhra2t9l2HUXpfvv0KhwNOnT2FhYaHvUqgMKleujMqVK+u7jNda4XelLLR2TVrhDig9PR0AEBsbiwYNGsDR0RH29vaoU6cOli5diqLPc/fy8kLbtm2RnJyM4OBgWFpaIjY2FjKZDA8ePMDKlSuVp1bffvvtF9Zw7do1dOnSBXZ2dnBwcEDXrl2RlZVV7LCHDx9G+/bt4ejoCEtLSwQHB+Onn37SaFnz8vIQFxcHf39/WFpawsnJCWFhYdi3b1+J45R0uvPs2bPo0aMHXFxcYGFhAX9/f3z99dcqwxSeFklMTMSECRNQsWJF2Nvbo0WLFjh9+rRyuLfffhubNm1Cenq6yinpQosWLUKtWrVga2sLOzs7VK9eHZ988skLl7Ww7tmzZ2POnDnw9vaGra0tQkJCcODAgZeuq6+//hpNmzaFi4sLbGxsEBgYiFmzZuHJkyfKYaZOnQozMzNcvnxZbfzo6Gg4OTnh8ePHAICkpCS0atUK7u7usLKygr+/P8aNG6d2BLdfv36wtbXFuXPn0KZNG9ja2sLDwwOjR49GXl6ectmcnZ0BQLnNyWSyl56qycjIQK9evVQ+sy+++AIFBQXKYUo6lVV0O5g3bx5kMhnOnTunNp+PP/4YcrkcN2/eVLZt374dzZs3h729PaytrdG4cWPs2LFDZbzC0xhHjx5F586dUb58ebzxxhvo16+fctt6fvu4dOlSicuampqKyMhIVK5cGZaWlqhatSo++OADlZoK13dxpyuKO6VSUFCAL7/8ErVr14aVlRXKlSuHhg0bYv369cphip4aLFT0VFrh5Re7du3CoEGDUKFCBTg5OaFjx464du1aicv1fN22trb4999/ER4eDhsbG7i7u2PGjBkAgAMHDuCtt96CjY0NfH19sXLlSpXxb9y4gcGDB6NGjRqwtbWFi4sLmjVrhr1796rNq6RlepnCZUxNTUVUVBQcHR1hY2ODdu3a4cKFC2rDL1u2DLVq1YKlpSUcHR3RoUMHnDp1SmWYt99+u9j9adHPsXB7nTVrFqZNmwZvb29YWFhg165dkpYhNzcXY8aMgbe3N+RyOSpVqoQRI0aofW/XrFmDBg0awMHBAdbW1vDx8UF0dPRLp6/JNlVQUIBZs2ahevXqsLCwgIuLC/r06YMrV66orZuAgADs378fjRo1gpWVFby8vLB8+XIAwKZNm1CnTh1YW1sjMDAQW7ZsURm/cJtPS0tDx44dYW9vDwcHB/Tq1Qs3btxQGVbT/RkAfPvtt/D19YWFhQVq1KiBH374ocTPS5P9dUmnO5OSkhASEgIbGxvY2toiPDwcaWlpKsNcuHAB3bp1Q8WKFWFhYQFXV1c0b95coyNFK1asgJ+fn3LfmZCQUOxw+fn5mDZtmvLzcnZ2RlRUlNo6LI4m9RVmj19++QVBQUGwtLSEj48PFixYoDY9Tfb5L/qu1K9fHwAQFRWl3O9qvC8QEi1fvlwAEIcOHVJpnz9/vgAglixZIoQQol+/fmLp0qUiNTVVpKamiqlTpworKysRGxurMp6np6dwd3cXPj4+YtmyZWLXrl3i4MGDYv/+/cLKykq0adNG7N+/X+zfv1/8888/Jdb18OFD4e/vLxwcHMSXX34ptm7dKoYNGyaqVKkiAIjly5crh925c6eQy+WiSZMmIikpSWzZskX069dPbbjiPHnyRISFhQkzMzMxZswYkZKSItavXy8++eQTkZiYqLJcffv2Vb6+ePGi2vT/+ecf4eDgIAIDA0VCQoLYtm2bGD16tDAxMRFTpkxRDrdr1y4BQHh5eYmePXuKTZs2icTERFGlShVRrVo18fTpU+X0GjduLNzc3JTrbP/+/UIIIRITEwUA8eGHH4pt27aJ7du3i8WLF4thw4a9cHkL6/by8hLvvPOOWLdunVi3bp0IDAwU5cuXF3fu3Hnh+CNHjhSLFi0SW7ZsETt37hRz584VFSpUEFFRUcphrl+/LiwsLMSECRNUxs3JyRFWVlbio48+UrZNnTpVzJ07V2zatEns3r1bLF68WHh7e4uwsDCVcfv27Svkcrnw9/cXs2fPFtu3bxeTJk0SMplMuQ0+fvxYbNmyRQAQ/fv3V66vc+fOlbg82dnZolKlSsLZ2VksXrxYbNmyRQwdOlQAEIMGDVIOV/iZ7dq1q9j1Wbgd3LhxQ8jlcrVlf/r0qahYsaLo2LGjsu37778XMplMvPvuuyI5OVls2LBBtG3bVpiamort27crh5s8ebIAIDw9PcXHH38sUlNTxbp168S5c+dE586dBQCV7ePx48clLu+iRYtEfHy8WL9+vdizZ49YuXKlqFWrlvDz8xP5+fkq69vT01Nt/MJante7d28hk8nEgAEDxK+//io2b94spk+fLubPn68cBoCYPHmy2vSKfq8K90c+Pj7iww8/FFu3bhXfffedKF++vNo2UZznt5P58+eL1NRUERUVJQCI8ePHC19fX7F06VKxdetW0bZtWwFAHD58WDn+v//+KwYNGiR+/PFHsXv3brFx40bRv39/YWJiovbZF12mkraRogqX0cPDQ0RHR4vNmzeLJUuWCBcXF+Hh4SFu376tHPazzz4TAET37t3Fpk2bREJCgvDx8REODg7izJkzyuFCQ0NFaGhosevj+c+xcHutVKmSCAsLEz///LPYtm2buHjxYon1Fv2MHjx4IGrXri0qVKgg5syZI7Zv3y7mz58vHBwcRLNmzURBQYEQQoh9+/YJmUwmunXrJlJSUsTOnTvF8uXLRe/evV+4foTQbJt6//33BQAxdOhQsWXLFrF48WLh7OwsPDw8xI0bN1TWjZOTk/Dz81P77GNjY0VgYKBITEwUKSkpomHDhsLCwkJcvXpVOf7z37+PPvpIbN26VcyZM0fY2NiI4OBgle+Npvuzb775RgAQnTp1Ehs3bhSrV68Wvr6+wtPTs9jPS5P9dXHfzenTpwuZTCaio6PFxo0bRXJysggJCRE2NjYqv79+fn6iatWq4vvvvxd79uwRa9euFaNHj9Z4W46MjBQbNmwQq1atElWrVhUeHh4qy6FQKMQ777wjbGxsRGxsrEhNTRXfffedqFSpkqhRo4Z4+PDhC+ejSX2enp6iUqVKokqVKmLZsmUiJSVF9OzZUwAQn3/+uXI4Tff5JX1Xjh8/rlzuTz/9VLnfvXz58guXoVCpQ9qBAwfEkydPxL1798TGjRuFs7OzsLOzE1l
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"lazy_customer_plot(campaigns_sport)"
|
2024-03-04 16:55:58 +01:00
|
|
|
|
]
|
2024-03-06 11:56:52 +01:00
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"id": "038423ec-d095-4297-8ea8-42d205da510b",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"def "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 16,
|
|
|
|
|
"id": "264dd0f3-721b-4ddb-9e7c-0d21c6c0ddeb",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"def display_databases(directory_path, file_name):\n",
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
" This function returns the file from s3 storage \n",
|
|
|
|
|
" \"\"\"\n",
|
|
|
|
|
" file_path = \"projet-bdc2324-team1\" + \"/Generalization/\" + directory_path + \"/\" + file_name + \".csv\"\n",
|
|
|
|
|
" print(\"File path : \", file_path)\n",
|
|
|
|
|
" with fs.open(file_path, mode=\"rb\") as file_in:\n",
|
|
|
|
|
" df = pd.read_csv(file_in, sep=\",\") \n",
|
|
|
|
|
" return df "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 17,
|
|
|
|
|
"id": "f0cfdd97-5ba2-4209-b827-d10ef0e80262",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"File path : projet-bdc2324-team1/Generalization/musique/Test_set.csv\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "stderr",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"/tmp/ipykernel_439/3124665301.py:8: DtypeWarning: Columns (20,29,39) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
|
|
|
|
" df = pd.read_csv(file_in, sep=\",\")\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>customer_id</th>\n",
|
|
|
|
|
" <th>nb_tickets</th>\n",
|
|
|
|
|
" <th>nb_purchases</th>\n",
|
|
|
|
|
" <th>total_amount</th>\n",
|
|
|
|
|
" <th>nb_suppliers</th>\n",
|
|
|
|
|
" <th>vente_internet_max</th>\n",
|
|
|
|
|
" <th>purchase_date_min</th>\n",
|
|
|
|
|
" <th>purchase_date_max</th>\n",
|
|
|
|
|
" <th>time_between_purchase</th>\n",
|
|
|
|
|
" <th>nb_tickets_internet</th>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <th>gender_label</th>\n",
|
|
|
|
|
" <th>gender_female</th>\n",
|
|
|
|
|
" <th>gender_male</th>\n",
|
|
|
|
|
" <th>gender_other</th>\n",
|
|
|
|
|
" <th>country_fr</th>\n",
|
|
|
|
|
" <th>has_tags</th>\n",
|
|
|
|
|
" <th>nb_campaigns</th>\n",
|
|
|
|
|
" <th>nb_campaigns_opened</th>\n",
|
|
|
|
|
" <th>time_to_open</th>\n",
|
|
|
|
|
" <th>y_has_purchased</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>10_1</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>other</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>10_2</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>other</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>10_3</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>other</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>10_4</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>other</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>10_5</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>other</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1523683</th>\n",
|
|
|
|
|
" <td>14_6884748</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>male</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1523684</th>\n",
|
|
|
|
|
" <td>14_6884749</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>male</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1523685</th>\n",
|
|
|
|
|
" <td>14_6884750</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>male</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1523686</th>\n",
|
|
|
|
|
" <td>14_6884751</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>female</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1523687</th>\n",
|
|
|
|
|
" <td>14_6884753</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>male</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"<p>1523688 rows × 41 columns</p>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
|
|
|
|
"0 10_1 0.0 0.0 0.0 0.0 \n",
|
|
|
|
|
"1 10_2 0.0 0.0 0.0 0.0 \n",
|
|
|
|
|
"2 10_3 0.0 0.0 0.0 0.0 \n",
|
|
|
|
|
"3 10_4 0.0 0.0 0.0 0.0 \n",
|
|
|
|
|
"4 10_5 0.0 0.0 0.0 0.0 \n",
|
|
|
|
|
"... ... ... ... ... ... \n",
|
|
|
|
|
"1523683 14_6884748 0.0 0.0 0.0 0.0 \n",
|
|
|
|
|
"1523684 14_6884749 0.0 0.0 0.0 0.0 \n",
|
|
|
|
|
"1523685 14_6884750 0.0 0.0 0.0 0.0 \n",
|
|
|
|
|
"1523686 14_6884751 0.0 0.0 0.0 0.0 \n",
|
|
|
|
|
"1523687 14_6884753 0.0 0.0 0.0 0.0 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
|
|
|
|
"0 0.0 NaN NaN \n",
|
|
|
|
|
"1 0.0 NaN NaN \n",
|
|
|
|
|
"2 0.0 NaN NaN \n",
|
|
|
|
|
"3 0.0 NaN NaN \n",
|
|
|
|
|
"4 0.0 NaN NaN \n",
|
|
|
|
|
"... ... ... ... \n",
|
|
|
|
|
"1523683 0.0 NaN NaN \n",
|
|
|
|
|
"1523684 0.0 NaN NaN \n",
|
|
|
|
|
"1523685 0.0 NaN NaN \n",
|
|
|
|
|
"1523686 0.0 NaN NaN \n",
|
|
|
|
|
"1523687 0.0 NaN NaN \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" time_between_purchase nb_tickets_internet ... gender_label \\\n",
|
|
|
|
|
"0 NaN 0.0 ... other \n",
|
|
|
|
|
"1 NaN 0.0 ... other \n",
|
|
|
|
|
"2 NaN 0.0 ... other \n",
|
|
|
|
|
"3 NaN 0.0 ... other \n",
|
|
|
|
|
"4 NaN 0.0 ... other \n",
|
|
|
|
|
"... ... ... ... ... \n",
|
|
|
|
|
"1523683 NaN 0.0 ... male \n",
|
|
|
|
|
"1523684 NaN 0.0 ... male \n",
|
|
|
|
|
"1523685 NaN 0.0 ... male \n",
|
|
|
|
|
"1523686 NaN 0.0 ... female \n",
|
|
|
|
|
"1523687 NaN 0.0 ... male \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" gender_female gender_male gender_other country_fr has_tags \\\n",
|
|
|
|
|
"0 0 0 1 NaN 0 \n",
|
|
|
|
|
"1 0 0 1 NaN 0 \n",
|
|
|
|
|
"2 0 0 1 NaN 0 \n",
|
|
|
|
|
"3 0 0 1 NaN 0 \n",
|
|
|
|
|
"4 0 0 1 NaN 0 \n",
|
|
|
|
|
"... ... ... ... ... ... \n",
|
|
|
|
|
"1523683 0 1 0 1.0 0 \n",
|
|
|
|
|
"1523684 0 1 0 1.0 0 \n",
|
|
|
|
|
"1523685 0 1 0 1.0 0 \n",
|
|
|
|
|
"1523686 1 0 0 1.0 0 \n",
|
|
|
|
|
"1523687 0 1 0 1.0 0 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" nb_campaigns nb_campaigns_opened time_to_open y_has_purchased \n",
|
|
|
|
|
"0 0.0 0.0 NaN NaN \n",
|
|
|
|
|
"1 0.0 0.0 NaN NaN \n",
|
|
|
|
|
"2 0.0 0.0 NaN NaN \n",
|
|
|
|
|
"3 0.0 0.0 NaN NaN \n",
|
|
|
|
|
"4 0.0 0.0 NaN NaN \n",
|
|
|
|
|
"... ... ... ... ... \n",
|
|
|
|
|
"1523683 0.0 0.0 NaN NaN \n",
|
|
|
|
|
"1523684 0.0 0.0 NaN NaN \n",
|
|
|
|
|
"1523685 0.0 0.0 NaN NaN \n",
|
|
|
|
|
"1523686 0.0 0.0 NaN NaN \n",
|
|
|
|
|
"1523687 0.0 0.0 NaN NaN \n",
|
|
|
|
|
"\n",
|
|
|
|
|
"[1523688 rows x 41 columns]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 17,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"train = display_databases('musique', 'Test_set')\n",
|
|
|
|
|
"train"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 18,
|
|
|
|
|
"id": "b6a6feb7-2557-4932-8038-24cd9b363665",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"array([nan])"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 18,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"train['y_has_purchased'].unique()"
|
|
|
|
|
]
|
2024-03-04 16:55:58 +01:00
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
"display_name": "Python 3 (ipykernel)",
|
|
|
|
|
"language": "python",
|
|
|
|
|
"name": "python3"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
"version": 3
|
|
|
|
|
},
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
"name": "python",
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
"version": "3.11.6"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 5
|
|
|
|
|
}
|