1240 lines
		
	
	
		
			100 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			1240 lines
		
	
	
		
			100 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
|  | { | |||
|  |  "cells": [ | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": 1, | |||
|  |    "id": "dd143b00-1989-44cf-8558-a30087d17f70", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [], | |||
|  |    "source": [ | |||
|  |     "import pandas as pd\n", | |||
|  |     "import os\n", | |||
|  |     "import s3fs\n", | |||
|  |     "import warnings\n", | |||
|  |     "from datetime import date, timedelta, datetime\n", | |||
|  |     "import numpy as np\n", | |||
|  |     "import matplotlib.pyplot as plt" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": 2, | |||
|  |    "id": "08c63120-1b56-4145-9014-18a637b22876", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [], | |||
|  |    "source": [ | |||
|  |     "exec(open('../../0_KPI_functions.py').read())" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": 3, | |||
|  |    "id": "f8bd679d-fa76-49d4-9ec1-9f15516f16d3", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [], | |||
|  |    "source": [ | |||
|  |     "# Ignore warning\n", | |||
|  |     "warnings.filterwarnings('ignore')" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "markdown", | |||
|  |    "id": "ec9e996d-3eae-4836-8cf5-268e5dc0d672", | |||
|  |    "metadata": {}, | |||
|  |    "source": [ | |||
|  |     "# Statistiques descriptives : compagnies sport" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "markdown", | |||
|  |    "id": "43f81515-fbd0-49c0-b3f8-0e0fb663e2c1", | |||
|  |    "metadata": {}, | |||
|  |    "source": [ | |||
|  |     "## Importations et chargement des données" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": 4, | |||
|  |    "id": "945c59bb-05b4-4f21-82f0-0db40d7957b3", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [], | |||
|  |    "source": [ | |||
|  |     "# Create filesystem object\n", | |||
|  |     "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", | |||
|  |     "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": 5, | |||
|  |    "id": "41a67995-0a08-45c0-bbad-6e6cee5474c8", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [ | |||
|  |     { | |||
|  |      "name": "stdout", | |||
|  |      "output_type": "stream", | |||
|  |      "text": [ | |||
|  |       "File path :  projet-bdc2324-team1/0_Input/Company_5/customerplus_cleaned.csv\n", | |||
|  |       "File path :  projet-bdc2324-team1/0_Input/Company_5/campaigns_information.csv\n", | |||
|  |       "File path :  projet-bdc2324-team1/0_Input/Company_5/products_purchased_reduced.csv\n", | |||
|  |       "File path :  projet-bdc2324-team1/0_Input/Company_5/target_information.csv\n", | |||
|  |       "File path :  projet-bdc2324-team1/0_Input/Company_6/customerplus_cleaned.csv\n", | |||
|  |       "File path :  projet-bdc2324-team1/0_Input/Company_6/campaigns_information.csv\n", | |||
|  |       "File path :  projet-bdc2324-team1/0_Input/Company_6/products_purchased_reduced.csv\n", | |||
|  |       "File path :  projet-bdc2324-team1/0_Input/Company_6/target_information.csv\n", | |||
|  |       "File path :  projet-bdc2324-team1/0_Input/Company_7/customerplus_cleaned.csv\n", | |||
|  |       "File path :  projet-bdc2324-team1/0_Input/Company_7/campaigns_information.csv\n", | |||
|  |       "File path :  projet-bdc2324-team1/0_Input/Company_7/products_purchased_reduced.csv\n", | |||
|  |       "File path :  projet-bdc2324-team1/0_Input/Company_7/target_information.csv\n", | |||
|  |       "File path :  projet-bdc2324-team1/0_Input/Company_8/customerplus_cleaned.csv\n", | |||
|  |       "File path :  projet-bdc2324-team1/0_Input/Company_8/campaigns_information.csv\n", | |||
|  |       "File path :  projet-bdc2324-team1/0_Input/Company_8/products_purchased_reduced.csv\n", | |||
|  |       "File path :  projet-bdc2324-team1/0_Input/Company_8/target_information.csv\n", | |||
|  |       "File path :  projet-bdc2324-team1/0_Input/Company_9/customerplus_cleaned.csv\n", | |||
|  |       "File path :  projet-bdc2324-team1/0_Input/Company_9/campaigns_information.csv\n", | |||
|  |       "File path :  projet-bdc2324-team1/0_Input/Company_9/products_purchased_reduced.csv\n", | |||
|  |       "File path :  projet-bdc2324-team1/0_Input/Company_9/target_information.csv\n" | |||
|  |      ] | |||
|  |     } | |||
|  |    ], | |||
|  |    "source": [ | |||
|  |     "# création des bases contenant les KPI pour les 5 compagnies de spectacle\n", | |||
|  |     "\n", | |||
|  |     "# liste des compagnies de spectacle\n", | |||
|  |     "nb_compagnie=['5','6','7','8','9']\n", | |||
|  |     "\n", | |||
|  |     "customer_sport = pd.DataFrame()\n", | |||
|  |     "campaigns_sport = pd.DataFrame()\n", | |||
|  |     "products_sport = pd.DataFrame()\n", | |||
|  |     "tickets_sport = pd.DataFrame()\n", | |||
|  |     "\n", | |||
|  |     "# début de la boucle permettant de générer des datasets agrégés pour les 5 compagnies de spectacle\n", | |||
|  |     "for directory_path in nb_compagnie:\n", | |||
|  |     "    df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n", | |||
|  |     "    df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n", | |||
|  |     "    df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n", | |||
|  |     "    df_target_information = display_databases(directory_path, file_name = \"target_information\")\n", | |||
|  |     "    \n", | |||
|  |     "    df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information) \n", | |||
|  |     "    df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n", | |||
|  |     "    df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)\n", | |||
|  |     "\n", | |||
|  |     "        \n", | |||
|  |     "# creation de la colonne Number compagnie, qui permettra d'agréger les résultats\n", | |||
|  |     "    df_tickets_kpi[\"number_company\"]=int(directory_path)\n", | |||
|  |     "    df_campaigns_kpi[\"number_company\"]=int(directory_path)\n", | |||
|  |     "    df_customerplus_clean[\"number_company\"]=int(directory_path)\n", | |||
|  |     "    df_target_information[\"number_company\"]=int(directory_path)\n", | |||
|  |     "\n", | |||
|  |     "# Traitement des index\n", | |||
|  |     "    df_tickets_kpi[\"customer_id\"]= directory_path + '_' +  df_tickets_kpi['customer_id'].astype('str')\n", | |||
|  |     "    df_campaigns_kpi[\"customer_id\"]= directory_path + '_' +  df_campaigns_kpi['customer_id'].astype('str') \n", | |||
|  |     "    df_customerplus_clean[\"customer_id\"]= directory_path + '_' +  df_customerplus_clean['customer_id'].astype('str') \n", | |||
|  |     "    df_products_purchased_reduced[\"customer_id\"]= directory_path + '_' +  df_products_purchased_reduced['customer_id'].astype('str') \n", | |||
|  |     "\n", | |||
|  |     "# Concaténation\n", | |||
|  |     "    customer_sport = pd.concat([customer_sport, df_customerplus_clean], ignore_index=True)\n", | |||
|  |     "    campaigns_sport = pd.concat([campaigns_sport, df_campaigns_kpi], ignore_index=True)\n", | |||
|  |     "    tickets_sport = pd.concat([tickets_sport, df_tickets_kpi], ignore_index=True)\n", | |||
|  |     "    products_sport = pd.concat([products_sport, df_products_purchased_reduced], ignore_index=True)\n", | |||
|  |     "    " | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "markdown", | |||
|  |    "id": "62922029-8071-402e-8115-c145a2874a2f", | |||
|  |    "metadata": {}, | |||
|  |    "source": [ | |||
|  |     "## Statistiques descriptives" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "markdown", | |||
|  |    "id": "d347bca9-3041-4414-b18e-19b626998a3e", | |||
|  |    "metadata": {}, | |||
|  |    "source": [ | |||
|  |     "### 0. Détection du client anonyme (outlier) - utile pour la section 3" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": 6, | |||
|  |    "id": "c4d4b2ad-8a3c-477b-bc52-dd4860527bfe", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [ | |||
|  |     { | |||
|  |      "data": { | |||
|  |       "text/plain": [ | |||
|  |        "array([5, 6, 7, 8, 9])" | |||
|  |       ] | |||
|  |      }, | |||
|  |      "execution_count": 6, | |||
|  |      "metadata": {}, | |||
|  |      "output_type": "execute_result" | |||
|  |     } | |||
|  |    ], | |||
|  |    "source": [ | |||
|  |     "sport_comp = tickets_sport['number_company'].unique()\n", | |||
|  |     "sport_comp" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": 7, | |||
|  |    "id": "97a9e235-1c04-46bf-9f3c-5496e141cc40", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [], | |||
|  |    "source": [ | |||
|  |     "def outlier_detection(company_list, show_diagram=False):\n", | |||
|  |     "\n", | |||
|  |     "    outlier_list = list()\n", | |||
|  |     "    \n", | |||
|  |     "    for company in company_list:\n", | |||
|  |     "        total_amount_share = tickets_sport[tickets_sport['number_company']==company].groupby('customer_id')['total_amount'].sum().reset_index()\n", | |||
|  |     "        total_amount_share['CA'] = total_amount_share['total_amount'].sum()\n", | |||
|  |     "        total_amount_share['share_total_amount'] = total_amount_share['total_amount']/total_amount_share['CA']\n", | |||
|  |     "        \n", | |||
|  |     "        total_amount_share_index = total_amount_share.set_index('customer_id')\n", | |||
|  |     "        df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False)\n", | |||
|  |     "        top = df_circulaire[:1]\n", | |||
|  |     "        outlier_list.append(top.index[0])\n", | |||
|  |     "        rest = df_circulaire[1:]\n", | |||
|  |     "    \n", | |||
|  |     "        # Calculez la somme du reste\n", | |||
|  |     "        rest_sum = rest.sum()\n", | |||
|  |     "        \n", | |||
|  |     "        # Créez une nouvelle série avec les cinq plus grandes parts et 'Autre'\n", | |||
|  |     "        new_series = pd.concat([top, pd.Series([rest_sum], index=['Autre'])])\n", | |||
|  |     "        \n", | |||
|  |     "        # Créez le graphique circulaire\n", | |||
|  |     "        if show_diagram:\n", | |||
|  |     "            plt.figure(figsize=(3, 3))\n", | |||
|  |     "            plt.pie(new_series, labels=new_series.index, autopct='%1.1f%%', startangle=140, pctdistance=0.5)\n", | |||
|  |     "            plt.axis('equal')  # Assurez-vous que le graphique est un cercle\n", | |||
|  |     "            plt.title(f'Répartition des montants totaux pour la compagnie {company}')\n", | |||
|  |     "            plt.show()\n", | |||
|  |     "    return outlier_list\n", | |||
|  |     "    " | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": 8, | |||
|  |    "id": "770cd3fc-bfe2-4a69-89bc-0eb946311130", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [ | |||
|  |     { | |||
|  |      "data": { | |||
|  |       "text/plain": [ | |||
|  |        "['5_191835', '6_591412', '7_49632', '8_1942', '9_19683']" | |||
|  |       ] | |||
|  |      }, | |||
|  |      "execution_count": 8, | |||
|  |      "metadata": {}, | |||
|  |      "output_type": "execute_result" | |||
|  |     } | |||
|  |    ], | |||
|  |    "source": [ | |||
|  |     "outlier_list = outlier_detection(sport_comp)\n", | |||
|  |     "outlier_list" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": 9, | |||
|  |    "id": "70b6e961-c303-465e-93f4-609721d38454", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [ | |||
|  |     { | |||
|  |      "name": "stdout", | |||
|  |      "output_type": "stream", | |||
|  |      "text": [ | |||
|  |       "Suppression Réussie\n" | |||
|  |      ] | |||
|  |     } | |||
|  |    ], | |||
|  |    "source": [ | |||
|  |     "# On filtre les outliers\n", | |||
|  |     "\n", | |||
|  |     "def remove_elements(lst, elements_to_remove):\n", | |||
|  |     "    return [x for x in lst if x not in elements_to_remove]\n", | |||
|  |     "    \n", | |||
|  |     "databases = [customer_sport, campaigns_sport, tickets_sport, products_sport]\n", | |||
|  |     "\n", | |||
|  |     "for dataset in databases:\n", | |||
|  |     "    dataset['customer_id'] = dataset['customer_id'].apply(lambda x: remove_elements(x, outlier_list))\n", | |||
|  |     "\n", | |||
|  |     "# On test\n", | |||
|  |     "\n", | |||
|  |     "bool = '5_191835' in customer_sport['customer_id']\n", | |||
|  |     "if not bool:\n", | |||
|  |     "    print(\"Suppression Réussie\")" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": 10, | |||
|  |    "id": "b54b920a-7b46-490f-ba7e-d1859055a4e3", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [ | |||
|  |     { | |||
|  |      "data": { | |||
|  |       "text/html": [ | |||
|  |        "<div>\n", | |||
|  |        "<style scoped>\n", | |||
|  |        "    .dataframe tbody tr th:only-of-type {\n", | |||
|  |        "        vertical-align: middle;\n", | |||
|  |        "    }\n", | |||
|  |        "\n", | |||
|  |        "    .dataframe tbody tr th {\n", | |||
|  |        "        vertical-align: top;\n", | |||
|  |        "    }\n", | |||
|  |        "\n", | |||
|  |        "    .dataframe thead th {\n", | |||
|  |        "        text-align: right;\n", | |||
|  |        "    }\n", | |||
|  |        "</style>\n", | |||
|  |        "<table border=\"1\" class=\"dataframe\">\n", | |||
|  |        "  <thead>\n", | |||
|  |        "    <tr style=\"text-align: right;\">\n", | |||
|  |        "      <th></th>\n", | |||
|  |        "      <th>customer_id</th>\n", | |||
|  |        "      <th>street_id</th>\n", | |||
|  |        "      <th>structure_id</th>\n", | |||
|  |        "      <th>mcp_contact_id</th>\n", | |||
|  |        "      <th>fidelity</th>\n", | |||
|  |        "      <th>tenant_id</th>\n", | |||
|  |        "      <th>is_partner</th>\n", | |||
|  |        "      <th>deleted_at</th>\n", | |||
|  |        "      <th>gender</th>\n", | |||
|  |        "      <th>is_email_true</th>\n", | |||
|  |        "      <th>...</th>\n", | |||
|  |        "      <th>purchase_count</th>\n", | |||
|  |        "      <th>first_buying_date</th>\n", | |||
|  |        "      <th>country</th>\n", | |||
|  |        "      <th>gender_label</th>\n", | |||
|  |        "      <th>gender_female</th>\n", | |||
|  |        "      <th>gender_male</th>\n", | |||
|  |        "      <th>gender_other</th>\n", | |||
|  |        "      <th>country_fr</th>\n", | |||
|  |        "      <th>has_tags</th>\n", | |||
|  |        "      <th>number_company</th>\n", | |||
|  |        "    </tr>\n", | |||
|  |        "  </thead>\n", | |||
|  |        "  <tbody>\n", | |||
|  |        "    <tr>\n", | |||
|  |        "      <th>0</th>\n", | |||
|  |        "      <td>[5, _, 6, 0, 0, 9, 7, 4, 5]</td>\n", | |||
|  |        "      <td>1372685</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1771</td>\n", | |||
|  |        "      <td>False</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>2</td>\n", | |||
|  |        "      <td>True</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>af</td>\n", | |||
|  |        "      <td>other</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>0.0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>5</td>\n", | |||
|  |        "    </tr>\n", | |||
|  |        "    <tr>\n", | |||
|  |        "      <th>1</th>\n", | |||
|  |        "      <td>[5, _, 6, 0, 1, 1, 2, 2, 8]</td>\n", | |||
|  |        "      <td>1372685</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1771</td>\n", | |||
|  |        "      <td>False</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>2</td>\n", | |||
|  |        "      <td>True</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>af</td>\n", | |||
|  |        "      <td>other</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>0.0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>5</td>\n", | |||
|  |        "    </tr>\n", | |||
|  |        "    <tr>\n", | |||
|  |        "      <th>2</th>\n", | |||
|  |        "      <td>[5, _, 6, 0, 5, 8, 9, 5, 0]</td>\n", | |||
|  |        "      <td>1372685</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1771</td>\n", | |||
|  |        "      <td>False</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>2</td>\n", | |||
|  |        "      <td>True</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>af</td>\n", | |||
|  |        "      <td>other</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>0.0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>5</td>\n", | |||
|  |        "    </tr>\n", | |||
|  |        "    <tr>\n", | |||
|  |        "      <th>3</th>\n", | |||
|  |        "      <td>[5, _, 6, 0, 6, 2, 4, 0, 4]</td>\n", | |||
|  |        "      <td>1372685</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1771</td>\n", | |||
|  |        "      <td>False</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>2</td>\n", | |||
|  |        "      <td>True</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>af</td>\n", | |||
|  |        "      <td>other</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>0.0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>5</td>\n", | |||
|  |        "    </tr>\n", | |||
|  |        "    <tr>\n", | |||
|  |        "      <th>4</th>\n", | |||
|  |        "      <td>[5, _, 2, 5, 0, 2, 1, 7]</td>\n", | |||
|  |        "      <td>78785</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>11035.0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1771</td>\n", | |||
|  |        "      <td>False</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>True</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>fr</td>\n", | |||
|  |        "      <td>female</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1.0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>5</td>\n", | |||
|  |        "    </tr>\n", | |||
|  |        "  </tbody>\n", | |||
|  |        "</table>\n", | |||
|  |        "<p>5 rows × 29 columns</p>\n", | |||
|  |        "</div>" | |||
|  |       ], | |||
|  |       "text/plain": [ | |||
|  |        "                   customer_id  street_id  structure_id  mcp_contact_id  \\\n", | |||
|  |        "0  [5, _, 6, 0, 0, 9, 7, 4, 5]    1372685           NaN             NaN   \n", | |||
|  |        "1  [5, _, 6, 0, 1, 1, 2, 2, 8]    1372685           NaN             NaN   \n", | |||
|  |        "2  [5, _, 6, 0, 5, 8, 9, 5, 0]    1372685           NaN             NaN   \n", | |||
|  |        "3  [5, _, 6, 0, 6, 2, 4, 0, 4]    1372685           NaN             NaN   \n", | |||
|  |        "4     [5, _, 2, 5, 0, 2, 1, 7]      78785           NaN         11035.0   \n", | |||
|  |        "\n", | |||
|  |        "   fidelity  tenant_id  is_partner  deleted_at  gender  is_email_true  ...  \\\n", | |||
|  |        "0         0       1771       False         NaN       2           True  ...   \n", | |||
|  |        "1         0       1771       False         NaN       2           True  ...   \n", | |||
|  |        "2         0       1771       False         NaN       2           True  ...   \n", | |||
|  |        "3         0       1771       False         NaN       2           True  ...   \n", | |||
|  |        "4         0       1771       False         NaN       0           True  ...   \n", | |||
|  |        "\n", | |||
|  |        "   purchase_count first_buying_date  country  gender_label  gender_female  \\\n", | |||
|  |        "0               0               NaN       af         other              0   \n", | |||
|  |        "1               0               NaN       af         other              0   \n", | |||
|  |        "2               0               NaN       af         other              0   \n", | |||
|  |        "3               0               NaN       af         other              0   \n", | |||
|  |        "4               0               NaN       fr        female              1   \n", | |||
|  |        "\n", | |||
|  |        "   gender_male  gender_other  country_fr  has_tags  number_company  \n", | |||
|  |        "0            0             1         0.0         0               5  \n", | |||
|  |        "1            0             1         0.0         0               5  \n", | |||
|  |        "2            0             1         0.0         0               5  \n", | |||
|  |        "3            0             1         0.0         0               5  \n", | |||
|  |        "4            0             0         1.0         0               5  \n", | |||
|  |        "\n", | |||
|  |        "[5 rows x 29 columns]" | |||
|  |       ] | |||
|  |      }, | |||
|  |      "execution_count": 10, | |||
|  |      "metadata": {}, | |||
|  |      "output_type": "execute_result" | |||
|  |     } | |||
|  |    ], | |||
|  |    "source": [ | |||
|  |     "customer_sport.head()" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "markdown", | |||
|  |    "id": "d40fe668-e1d7-4544-9db8-02498afe65fe", | |||
|  |    "metadata": {}, | |||
|  |    "source": [ | |||
|  |     "### 1. customerplus_clean" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": 55, | |||
|  |    "id": "eec1ac0b-2502-452b-97e6-69ffb77156d6", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [], | |||
|  |    "source": [ | |||
|  |     "def compute_nb_clients(customer_sport):\n", | |||
|  |     "    company_nb_clients = customer_sport[customer_sport[\"purchase_count\"]>0].groupby(\"number_company\")[\"customer_id\"].count().reset_index()\n", | |||
|  |     "    plt.bar(company_nb_clients[\"number_company\"], company_nb_clients[\"customer_id\"]/1000)\n", | |||
|  |     "\n", | |||
|  |     "    # Ajout de titres et d'étiquettes\n", | |||
|  |     "    plt.xlabel('Company')\n", | |||
|  |     "    plt.ylabel(\"Nombre de clients (milliers)\")\n", | |||
|  |     "    plt.title(\"Nombre de clients de chaque compagnie de spectacle\")\n", | |||
|  |     "    \n", | |||
|  |     "    # Affichage du barplot\n", | |||
|  |     "    plt.show()" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": 56, | |||
|  |    "id": "db4494e7-6f65-4f7e-bf8c-8ec321d0b02d", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [ | |||
|  |     { | |||
|  |      "data": { | |||
|  |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAHFCAYAAAAUpjivAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABJuElEQVR4nO3dd3zNd///8edBtghSEiEIYoZarZa2oVZTlKKqepmlWlq1alzaCjVqVHVy6TCqRnsVV6lZNKpRe9So0dqkdmKGJO/fH345X0dCc+TEiY/H/XY7t5vz/rzP5/M67zPy9P6MYzPGGAEAAFhUDncXAAAAkJUIOwAAwNIIOwAAwNIIOwAAwNIIOwAAwNIIOwAAwNIIOwAAwNIIOwAAwNIIOwAAwNIIO1lsypQpstls8vb21sGDB9Msr127tiIiItxQmdShQwflzp3bLdv+JzabTdHR0Xd1m7Vr11bt2rXveh0LFy68a8/VHeNavHhxNW7c+K5uE9nfgQMHZLPZNGXKlCzfljve9+5w7NgxRUdHa8uWLVm6nbv52rkKYecuSUxM1FtvveXuMuCkNWvWqHPnzlm6jYULF2rIkCFZug0guylUqJDWrFmjRo0aubsUyzh27JiGDBmS5WHnXkTYuUueeuopzZgxQ1u3bnV3KS5hjNHly5fdXUaWe+SRR1SkSBF3lwFYjpeXlx555BEVKFDA3aXgPkDYuUv69eunwMBA9e/f/x/7XrlyRQMHDlRYWJg8PT1VuHBhde/eXefOnXPol7p7YMGCBapSpYp8fHxUrlw5LViwQNL1XWjlypWTn5+fHn74YW3YsCHd7e3YsUN169aVn5+fChQooNdee02XLl1y6GOz2fTaa69p4sSJKleunLy8vDR16lRJ0t69e9WmTRsVLFhQXl5eKleunD799NMMjUtCQoK6dOmiwMBA5c6dW0899ZT27NmTbt/MbCclJUUff/yxKleuLB8fH+XNm1ePPPKIfvjhh9s+Lr3p77i4OHXt2lVFihSRp6enwsLCNGTIECUlJdn7pE7zjh07VuPGjVNYWJhy586tRx99VL/99pu9X4cOHezPwWaz2W8HDhyQJH333XeqUaOGAgIC5OvrqxIlSqhTp07/+Hyz47guXrxYVatWlY+Pj8qWLauvvvrKYfnJkyfVrVs3lS9fXrlz51bBggX15JNP6pdffkmzrmPHjqlVq1by9/dXQECAnn/+ef32229pptbT2zUpXR/34sWLO7RdvXpVw4YNU9myZeXl5aUCBQqoY8eOOnnyZIbGYu3atWrSpIkCAwPl7e2tkiVLqmfPng59Vq9erbp168rf31++vr6qWbOmfvzxR4c+qbu+V6xYYX8N8+TJo3bt2unixYuKi4tTq1atlDdvXhUqVEh9+/bVtWvX7I9Pfe+NHj1aw4cPV9GiReXt7a3q1atr+fLlDtvat2+fOnbsqPDwcPn6+qpw4cJq0qSJfv/99zTPb8eOHWrQoIF8fX1VoEABde/eXT/++KNsNpt+/vlnhzGPiIjQ+vXr9fjjj9vft++9955SUlLS1HnzrpB74fvknz6XP//8s2w2m6ZPn67evXsrODhYPj4+ioyM1ObNm9Osb8OGDXrmmWeUP39+eXt7q0qVKvr222/T9Dt69KhefvllhYaGytPTUyEhIWrZsqX+/vtv/fzzz3rooYckSR07drR/l6R+f23YsEGtW7dW8eLF5ePjo+LFi+uFF15I9/CK223ndjIzplnOIEtNnjzZSDLr1683H374oZFkli9fbl8eGRlpKlSoYL+fkpJiGjZsaHLlymXefvtts3TpUjN27Fjj5+dnqlSpYq5cuWLvW6xYMVOkSBETERFhZs6caRYuXGhq1KhhPDw8zDvvvGNq1apl5syZY+bOnWtKly5tgoKCzKVLl+yPb9++vfH09DRFixY1w4cPN0uXLjXR0dEmV65cpnHjxg7PQ5IpXLiwqVSpkpkxY4ZZsWKF2b59u9mxY4cJCAgwFStWNNOmTTNLly41ffr0MTly5DDR0dG3HZuUlBRTp04d4+XlZd/+4MGDTYkSJYwkM3jwYHvfzGzHGGPatm1rbDab6dy5s/nf//5nFi1aZIYPH24+/PBDh9ciMjIyzfO+sY7jx4+b0NBQU6xYMfOf//zH/PTTT+bdd981Xl5epkOHDvZ++/fvN5JM8eLFzVNPPWXmzZtn5s2bZypWrGjy5ctnzp07Z4wxZt++faZly5ZGklmzZo39duXKFRMbG2tsNptp3bq1WbhwoVmxYoWZPHmyadu27T01rqnv0/Lly5tp06aZJUuWmOeee85IMjExMfZ+f/zxh3n11VfNrFmzzM8//2wWLFhgXnrpJZMjRw6zcuVKe79Lly6ZcuXKmYCAAPPxxx+bJUuWmB49epiiRYsaSWby5Mm3fU2Nuf7eL1asmP1+cnKyeeqpp4yfn58ZMmSIWbZsmfniiy9M4cKFTfny5R0+N+lZvHix8fDwMJUqVTJTpkwxK1asMF999ZVp3bq1vc/PP/9sPDw8TLVq1czs2bPNvHnzTIMGDYzNZjOzZs2y90v9zggLCzN9+vQxS5cuNaNGjTI5c+Y0L7zwgqlataoZNmyYWbZsmenfv7+RZN5//33741Pfe6Ghoeaxxx4z33//vfnuu+/MQw89ZDw8PExsbKy9b0xMjOnTp4/573//a2JiYszcuXNNs2bNjI+Pj/njjz/s/Y4dO2YCAwNN0aJFzZQpU8zChQtN27ZtTfHixY0kh9cnMjLSBAYGmvDwcDNx4kSzbNky061bNyPJTJ06NU2dN75e98L3SUY+lytXrrS/Bk2bNjXz588306dPN6VKlTJ58uQxf/75p73vihUrjKenp3n88cfN7NmzzeLFi02HDh3SjM2RI0dMoUKFzAMPPGDGjRtnfvrpJzN79mzTqVMns2vXLhMfH29/77z11lv275LDhw8bY4z57rvvzDvvvGPmzp1rYmJizKxZs0xkZKQpUKCAOXnyZIa3kxWv3d1A2MliN4adxMREU6JECVO9enWTkpJijEkbdhYvXmwkmdGjRzusZ/bs2UaSmTRpkr2tWLFixsfHxxw5csTetmXLFiPJFCpUyFy8eNHePm/ePCPJ/PDDD/a29u3bG0kOf5iMMWb48OFGklm9erW9TZIJCAgwZ86ccejbsGFDU6RIERMfH+/Q/tprrxlvb+80/W+0aNGi227/xi+nzGxn1apVRpIZNGjQLfsYk7Gw07VrV5M7d25z8OBBh35jx441ksyOHTuMMf/3ZVCxYkWTlJRk77du3TojycycOdPe1r17d5Pe/ztS15kajDIqu41rsWLFjLe3t8OYXb582eTPn9907dr1lo9LSkoy165dM3Xr1jXPPvusvX3ChAlGkvnf//7n0L9Lly53HHZmzpxpJJnvv//eod/69euNJPPZZ5/d9jmWLFnSlCxZ0ly+fPmWfR555BFTsGBBc/78eYfnGBERYYoUKWL/Tkj9znj99dcdHt+sWTMjyYwbN86hvXLlyqZq1ar2+6nvvZCQEId6EhISTP78+U29evVuWWNSUpK5evWqCQ8PN7169bK3v/nmm8Zms9nf36kaNmyYbtiRZNauXevQt3z58qZhw4Zp6rzx9boXvk8y8rlMDTtVq1a1v67GGHPgwAHj4eFhOnfubG8rW7asqVKlirl27ZrDOho3bmwKFSpkkpOTjTHGdOrUyXh4eJidO3fecrup79cbx/RWkpKSzIULF4yfn5/DmGVkO65+7e4GdmPdRZ6enho2bJg2bNiQ7hSlJK1YsULS9Wn2Gz333HPy8/NLMw1duXJlFS5c2H6/XLlykq5PJfv6+qZpT2/K8sUXX3S436ZNG0nSypUrHdqffPJJ5cuXz37/ypUrWr58uZ599ln5+voqKSnJfnv66ad15coVh102N0td/62276rtLFq0SJLUvXv3W/bJqAULFqhOnToKCQlxqCMqKkqSFBMT49C/UaNGypkzp/1+pUqVJKX/OtwsdUq6VatW+vbbb3X06NEM1Zgdx7Vy5coqWrSo/b63t7dKly6dZhwmTpyoqlWrytv | |||
|  |       "text/plain": [ | |||
|  |        "<Figure size 640x480 with 1 Axes>" | |||
|  |       ] | |||
|  |      }, | |||
|  |      "metadata": {}, | |||
|  |      "output_type": "display_data" | |||
|  |     } | |||
|  |    ], | |||
|  |    "source": [ | |||
|  |     "compute_nb_clients(customer_sport)" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": 59, | |||
|  |    "id": "a12a59a0-edfe-4e52-8037-9b875f823b33", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [], | |||
|  |    "source": [ | |||
|  |     "def maximum_price_paid(customer_sport):\n", | |||
|  |     "    company_max_price = customer_sport.groupby(\"number_company\")[\"max_price\"].max().reset_index()\n", | |||
|  |     "    # Création du barplot\n", | |||
|  |     "    plt.bar(company_max_price[\"number_company\"], company_max_price[\"max_price\"])\n", | |||
|  |     "    \n", | |||
|  |     "    # Ajout de titres et d'étiquettes\n", | |||
|  |     "    plt.xlabel('Company')\n", | |||
|  |     "    plt.ylabel(\"Prix maximal d'un billet vendu\")\n", | |||
|  |     "    plt.title(\"Prix maximal de vente observé par compagnie de spectacle\")\n", | |||
|  |     "    \n", | |||
|  |     "    # Affichage du barplot\n", | |||
|  |     "    plt.show()" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": 60, | |||
|  |    "id": "2c7c2d26-4e35-4163-b771-fa4d3e8ca83e", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [ | |||
|  |     { | |||
|  |      "data": { | |||
|  |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlAAAAHGCAYAAAC7NbWGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABXWUlEQVR4nO3deVyN6f8/8NfRpvWotJLKMpGyjwiTLTGVscxYoqkYM7YxjRrLZ8ZYxs5g8DFmk7GMzBKDbNkaRlnCEPGxRKEwScmSOl2/P3y7f47KnJvTnBOv5+NxHjrXfZ37ft9355xerntTCCEEiIiIiEhj1XRdABEREVFVwwBFREREJBMDFBEREZFMDFBEREREMjFAEREREcnEAEVEREQkEwMUERERkUwMUEREREQyMUARERERycQARUT0nP7zn//AwcEBFy9e1HUpRPQvY4DSkpUrV0KhUEgPQ0ND1K5dGxEREbh27ZpG8wgPD4ebm1vlFqpDpdvo8uXL//qyL1++DIVCgZUrVz7X6/ft2weFQoF9+/ZptS5d2Lp1K6ZMmaKz5Ze+D44ePaqzGrRh27ZtWLp0KbZs2YJ69erpuhyqRAqF4l/5zLi5uSE8PLzSl6Nr9+/fx5QpU/6V79PK/N0xQGlZTEwMkpKSkJCQgGHDhmHdunXo0KED7t2794+vnTRpEjZs2PAvVKkbgYGBSEpKgpOTk65LeaVt3boVU6dO1XUZVVpmZiYiIiKwfv16vP7667ouhypZUlIS3nvvPV2X8dK4f/8+pk6dWuX/Q2qo6wJeNl5eXmjVqhUAoFOnTlCpVPjiiy+wceNGDBo0qNzX3L9/H2ZmZi/9/2Lt7OxgZ2en6zKIJKWfPblcXFyQnZ1dCRW9uOddJ6pYmzZtdF0C6SGOQFWy0g/elStXADzeTWdhYYFTp06hW7dusLS0RJcuXaRpT+7Ci42NhUKhwNKlS9XmOXnyZBgYGCAhIeGZy3Zzc0NQUBC2bNmC5s2bw9TUFI0aNcKWLVsAPN6V0qhRI5ibm6N169ZldqkcPXoUAwYMgJubG0xNTeHm5oaBAwdK6wIAQgi8+eabsLW1RUZGhtR+//59NG7cGI0aNZJG38rbhdexY0d4eXkhKSkJvr6+0nJiYmIAAPHx8WjRogXMzMzg7e2N7du3q9V44cIFREREoEGDBjAzM0OtWrUQHByMU6dOPXPbPMvZs2fRvXt3mJmZoWbNmhg+fDju3r1bbt9du3ahS5cusLKygpmZGdq1a4fdu3c/c/63bt2CsbExJk2aVO6yFQoFFi9eLLVlZ2fjgw8+QO3atWFsbAx3d3dMnToVxcXFUp/SXZTz58/HggUL4O7uDgsLC7Rt2xbJyclSv/DwcPz3v/8FALVdzqW/EyEEli1bhmbNmsHU1BTW1tZ4++23cenSJY223YEDB9ClSxdYWlrCzMwMvr6+iI+PL7dvbm4uIiIiYGNjA3NzcwQHB5dZzvHjxxEUFAR7e3uYmJjA2dkZgYGBuHr1qtRH05pL32t//PEHfH19YWZmhiFDhqBXr15wdXVFSUlJmRp9fHzQokUL2csqz5QpU6BQKHD8+HH06dMHVlZWUCqVGDx4MG7duqXWd/369ejWrRucnJykz+2ECRPKjGQ/6/ukImfPnsXAgQPh4OAAExMT1KlTB++++y4KCwulPqmpqXjrrbdgbW2N6tWro1mzZvjxxx/V5lO6W/unn37C+PHj4eTkBAsLCwQHB+PGjRu4e/cu3n//fdSsWRM1a9ZEREQECgoK1OahUCgwevRofPPNN3jttddgYmICT09PxMbGqvW7desWRo4cCU9PT1hYWMDe3h6dO3fG/v37y6zf1atX8fbbb8PS0hI1atTAoEGDcOTIkTK78Eu33YULF/Dmm2/CwsICLi4uiIqKUtsWpXU+vRtIk89lRYqKijBu3Dg4OjrCzMwM7du3x+HDh8vt+yLL2bNnDzp27AhbW1uYmpqiTp066Nu3L+7fvw/g/39vzJ07FzNmzECdOnVQvXp1tGrVqtzvsfPnzyMkJET6PDZq1Ej6PnnSnTt3EBUVhbp168LExAT29vZ48803cfbsWVy+fFn6j/TUqVOl76DSXZdyvtOftZxneZFtqkaQVsTExAgA4siRI2rtX331lQAgvv32WyGEEGFhYcLIyEi4ubmJWbNmid27d4sdO3ZI01xdXdVeP3z4cGFsbCzNd/fu3aJatWris88++8eaXF1dRe3atYWXl5dYt26d2Lp1q/Dx8RFGRkbi888/F+3atRNxcXFiw4YN4rXXXhMODg7i/v370ut/+eUX8fnnn4sNGzaIxMREERsbK/z8/ISdnZ24deuW1O/vv/8WtWvXFj4+PuLRo0fSupiamoqTJ0+W2Ubp6elSm5+fn7C1tRUeHh7ihx9+EDt27BBBQUECgJg6darw9vaWam/Tpo0wMTER165dk16fmJgooqKixK+//ioSExPFhg0bRK9evYSpqak4e/as1C89PV0AEDExMc/cZtnZ2cLe3l7UqlVLxMTEiK1bt4pBgwaJOnXqCABi7969Ut/Vq1cLhUIhevXqJeLi4sTmzZtFUFCQMDAwELt27Xrmcnr37i1cXFyESqVSax83bpwwNjYWf//9txBCiKysLOHi4iJcXV3FN998I3bt2iW++OILYWJiIsLDw8usn5ubm+jevbvYuHGj2Lhxo/D29hbW1tbizp07QgghLly4IN5++20BQCQlJUmPhw8fCiGEGDZsmDAyMhJRUVFi+/bt4qeffhINGzYUDg4OIjs7+5nrtG/fPmFkZCRatmwp1q9fLzZu3Ci6desmFAqFiI2NlfqVvg9cXFzEkCFDxLZt28S3334r7O3thYuLi8jNzRVCCFFQUCBsbW1Fq1atxM8//ywSExPF+vXrxfDhw8WZM2ek+Wlas5+fn7CxsREuLi5iyZIlYu/evSIxMVH8/vvvAoBISEhQW5+0tDQBQCxevFj2ssozefJkAUC4urqKTz75ROzYsUMsWLBAmJubi+bNm0ufHSGE+OKLL8TChQtFfHy82Ldvn1i+fLlwd3cXnTp1Upvns75PynPixAlhYWEh3NzcxPLly8Xu3bvFmjVrRL9+/UR+fr4QQoizZ88KS0tLUa9ePbFq1SoRHx8vBg4cKACIOXPmSPPau3evtD7h4eFi+/btYvny5cLCwkJ06tRJ+Pv7i+joaLFz504xZ84cYWBgID788EO1ekrfB56enmLdunVi06ZNonv37gKA+OWXX6R+Z8+eFSNGjBCxsbFi3759YsuWLWLo0KGiWrVqap/JgoICUb9+fWFjYyP++9//ih07doiPP/5YuLu7l/n8h4WFCWNjY9GoUSMxf/58sWvXLvH5558LhUIhpk6dWqbOyZMnS881/VxWJCwsTCgUCvHJJ5+InTt3igULFohatWoJKysrERYWppXlpKeni+rVqwt/f3+xceNGsW/fPrF27VoRGhoqfcZKvzdcXFxE+/btxW+//SZ++eUX8frrrwsjIyNx8OBBaX6nT58WSqVSeHt7i1WrVomdO3eKqKgoUa1aNTFlyhSpX35+vmjcuLEwNzcX06ZNEzt27BC//fab+Oijj8SePXvEw4cPxfbt2wUAMXToUOk76MKFC0IIzb/T/2k5lfW7exIDlJaU/lFITk4WRUVF4u7du2LLli3Czs5OWFpaSl+uYWFhAoBYsWJFmXmUF6AePnwomjdvLtzd3cWZM2eEg4OD8PPzE8XFxf9Yk6urqzA1NRVXr16V2k6cOCEACCcnJ3Hv3j2pfePGjQKA2LRpU4XzKy4uFgUFBcLc3Fx89dVXatMOHDggDA0NRWRkpFixYoUAIL7//vtyt9HTAQqAOHr0qNSWk5MjDAwMhKmpqVpYKq39yT9o5dX46NEj0aBBA/Hxxx9L7ZoGqPHjxwuFQiFOnDih1u7v768WoO7duydsbGxEcHCwWj+VSiWaNm0qWrdu/czlbNq0SQAQO3fuVKvd2dlZ9O3bV2r74IMPhIWFhbhy5Yra6+fPny8AiNOnT6utn7e3t9p74/DhwwKAWLdundQ2atQoUd7/nZK | |||
|  |       "text/plain": [ | |||
|  |        "<Figure size 640x480 with 1 Axes>" | |||
|  |       ] | |||
|  |      }, | |||
|  |      "metadata": {}, | |||
|  |      "output_type": "display_data" | |||
|  |     } | |||
|  |    ], | |||
|  |    "source": [ | |||
|  |     "maximum_price_paid(customer_sport)" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": 65, | |||
|  |    "id": "597d4361-8beb-43f4-9224-8f7dc34b187c", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [ | |||
|  |     { | |||
|  |      "name": "stdout", | |||
|  |      "output_type": "stream", | |||
|  |      "text": [ | |||
|  |       "Statistiques Descriptives company 5\n", | |||
|  |       "       average_price  average_price_basket  average_ticket_basket  \\\n", | |||
|  |       "count  145390.000000          68869.000000           68869.000000   \n", | |||
|  |       "mean       11.070309             65.969693               3.655202   \n", | |||
|  |       "std        16.353610            195.462869              13.119612   \n", | |||
|  |       "min         0.000000              0.000000               1.000000   \n", | |||
|  |       "25%         0.000000             20.000000               1.000000   \n", | |||
|  |       "50%         0.000000             45.000000               2.000000   \n", | |||
|  |       "75%        20.000000             79.500000               3.000000   \n", | |||
|  |       "max       500.000000          24159.405000            2139.833333   \n", | |||
|  |       "\n", | |||
|  |       "       purchase_count   total_price  \n", | |||
|  |       "count    471598.00000  3.950770e+05  \n", | |||
|  |       "mean          0.29900  2.608544e+01  \n", | |||
|  |       "std           7.22753  2.089636e+03  \n", | |||
|  |       "min           0.00000  0.000000e+00  \n", | |||
|  |       "25%           0.00000  0.000000e+00  \n", | |||
|  |       "50%           0.00000  0.000000e+00  \n", | |||
|  |       "75%           0.00000  0.000000e+00  \n", | |||
|  |       "max        3532.00000  1.262516e+06  \n", | |||
|  |       "Statistiques Descriptives company 6\n", | |||
|  |       "       average_price  average_price_basket  average_ticket_basket  \\\n", | |||
|  |       "count   33779.000000          33779.000000           33779.000000   \n", | |||
|  |       "mean       24.033859             56.711279               2.413530   \n", | |||
|  |       "std        21.217031             72.841926               3.763809   \n", | |||
|  |       "min       -52.740000          -1046.666667               1.000000   \n", | |||
|  |       "25%        10.000000             19.000000               1.080000   \n", | |||
|  |       "50%        19.333333             39.000000               2.000000   \n", | |||
|  |       "75%        30.000000             72.990000               3.000000   \n", | |||
|  |       "max       199.990000           3922.845361             309.047619   \n", | |||
|  |       "\n", | |||
|  |       "       purchase_count    total_price  \n", | |||
|  |       "count    79938.000000   79938.000000  \n", | |||
|  |       "mean         2.842090     102.251041  \n", | |||
|  |       "std         74.949889    4290.159858  \n", | |||
|  |       "min          0.000000   -3140.000000  \n", | |||
|  |       "25%          0.000000       0.000000  \n", | |||
|  |       "50%          0.000000       0.000000  \n", | |||
|  |       "75%          1.000000      54.980000  \n", | |||
|  |       "max      14750.000000  762695.290000  \n", | |||
|  |       "Statistiques Descriptives company 7\n", | |||
|  |       "       average_price  average_price_basket  average_ticket_basket  \\\n", | |||
|  |       "count   39524.000000          39524.000000           39524.000000   \n", | |||
|  |       "mean       33.110568            155.618778               3.365885   \n", | |||
|  |       "std        85.221328           1085.613137               6.283143   \n", | |||
|  |       "min         0.000000              0.000000               1.000000   \n", | |||
|  |       "25%        17.250000             25.000000               1.800000   \n", | |||
|  |       "50%        25.000000             57.676364               2.000000   \n", | |||
|  |       "75%        43.054691            115.837500               3.555556   \n", | |||
|  |       "max     10770.000000          86160.000000             400.000000   \n", | |||
|  |       "\n", | |||
|  |       "       purchase_count    total_price  \n", | |||
|  |       "count    68800.000000   68800.000000  \n", | |||
|  |       "mean         3.290029     944.593729  \n", | |||
|  |       "std         88.071870   12118.394731  \n", | |||
|  |       "min          0.000000       0.000000  \n", | |||
|  |       "25%          0.000000       0.000000  \n", | |||
|  |       "50%          1.000000       9.000000  \n", | |||
|  |       "75%          2.000000     132.000000  \n", | |||
|  |       "max      22934.000000  940874.200000  \n", | |||
|  |       "Statistiques Descriptives company 8\n", | |||
|  |       "       average_price  average_price_basket  average_ticket_basket  \\\n", | |||
|  |       "count  129198.000000         129198.000000          129198.000000   \n", | |||
|  |       "mean       18.409977             38.492520               2.258036   \n", | |||
|  |       "std        19.159059             71.136628               5.270858   \n", | |||
|  |       "min       -20.000000          -1545.000000               1.000000   \n", | |||
|  |       "25%         0.000000              0.000000               1.000000   \n", | |||
|  |       "50%        15.000000             20.000000               2.000000   \n", | |||
|  |       "75%        28.461538             52.500000               2.000000   \n", | |||
|  |       "max       390.000000           7618.227273             750.000000   \n", | |||
|  |       "\n", | |||
|  |       "       purchase_count    total_price  \n", | |||
|  |       "count   197376.000000  197376.000000  \n", | |||
|  |       "mean         4.637448     130.336075  \n", | |||
|  |       "std         96.228665    2791.899946  \n", | |||
|  |       "min          0.000000  -36124.000000  \n", | |||
|  |       "25%          0.000000       0.000000  \n", | |||
|  |       "50%          1.000000       0.000000  \n", | |||
|  |       "75%          2.000000      75.000000  \n", | |||
|  |       "max      40272.000000  702080.290000  \n", | |||
|  |       "Statistiques Descriptives company 9\n", | |||
|  |       "       average_price  average_price_basket  average_ticket_basket  \\\n", | |||
|  |       "count  102710.000000         102710.000000          102710.000000   \n", | |||
|  |       "mean       60.312171             62.384177               1.042402   \n", | |||
|  |       "std        50.018101             52.009984               0.268064   \n", | |||
|  |       "min      -291.670000           -291.670000               1.000000   \n", | |||
|  |       "25%        41.500000             42.350000               1.000000   \n", | |||
|  |       "50%        59.000000             61.070000               1.000000   \n", | |||
|  |       "75%        74.550000             77.710000               1.000000   \n", | |||
|  |       "max      1116.500000           1216.950000              23.000000   \n", | |||
|  |       "\n", | |||
|  |       "       purchase_count    total_price  \n", | |||
|  |       "count   181134.000000  181134.000000  \n", | |||
|  |       "mean         1.021354      63.476966  \n", | |||
|  |       "std          1.805412     129.781944  \n", | |||
|  |       "min          0.000000    -291.670000  \n", | |||
|  |       "25%          0.000000       0.000000  \n", | |||
|  |       "50%          1.000000       0.000000  \n", | |||
|  |       "75%          1.000000      80.000000  \n", | |||
|  |       "max        273.000000   14343.950000  \n" | |||
|  |      ] | |||
|  |     } | |||
|  |    ], | |||
|  |    "source": [ | |||
|  |     "for company in sport_comp:\n", | |||
|  |     "    print(f'Statistiques Descriptives company {company}')\n", | |||
|  |     "    company_data = customer_sport[customer_sport['number_company'] == company][['average_price', 'average_price_basket',\n", | |||
|  |     "          'average_ticket_basket', 'purchase_count', 'total_price']]\n", | |||
|  |     "    print(company_data.describe())" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": null, | |||
|  |    "id": "5058d3c9-73a0-4e01-881e-4d2423f0d291", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [], | |||
|  |    "source": [ | |||
|  |     "customer_sport[\"already_purchased\"] = customer_sport[\"purchase_count\"] > 0" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": 69, | |||
|  |    "id": "986a0e41-ae31-46c5-a009-861530d85f45", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [ | |||
|  |     { | |||
|  |      "data": { | |||
|  |       "text/html": [ | |||
|  |        "<div>\n", | |||
|  |        "<style scoped>\n", | |||
|  |        "    .dataframe tbody tr th:only-of-type {\n", | |||
|  |        "        vertical-align: middle;\n", | |||
|  |        "    }\n", | |||
|  |        "\n", | |||
|  |        "    .dataframe tbody tr th {\n", | |||
|  |        "        vertical-align: top;\n", | |||
|  |        "    }\n", | |||
|  |        "\n", | |||
|  |        "    .dataframe thead th {\n", | |||
|  |        "        text-align: right;\n", | |||
|  |        "    }\n", | |||
|  |        "</style>\n", | |||
|  |        "<table border=\"1\" class=\"dataframe\">\n", | |||
|  |        "  <thead>\n", | |||
|  |        "    <tr style=\"text-align: right;\">\n", | |||
|  |        "      <th></th>\n", | |||
|  |        "      <th>customer_id</th>\n", | |||
|  |        "      <th>street_id</th>\n", | |||
|  |        "      <th>structure_id</th>\n", | |||
|  |        "      <th>mcp_contact_id</th>\n", | |||
|  |        "      <th>fidelity</th>\n", | |||
|  |        "      <th>tenant_id</th>\n", | |||
|  |        "      <th>is_partner</th>\n", | |||
|  |        "      <th>deleted_at</th>\n", | |||
|  |        "      <th>gender</th>\n", | |||
|  |        "      <th>is_email_true</th>\n", | |||
|  |        "      <th>...</th>\n", | |||
|  |        "      <th>purchase_count</th>\n", | |||
|  |        "      <th>first_buying_date</th>\n", | |||
|  |        "      <th>country</th>\n", | |||
|  |        "      <th>gender_label</th>\n", | |||
|  |        "      <th>gender_female</th>\n", | |||
|  |        "      <th>gender_male</th>\n", | |||
|  |        "      <th>gender_other</th>\n", | |||
|  |        "      <th>country_fr</th>\n", | |||
|  |        "      <th>has_tags</th>\n", | |||
|  |        "      <th>number_company</th>\n", | |||
|  |        "    </tr>\n", | |||
|  |        "  </thead>\n", | |||
|  |        "  <tbody>\n", | |||
|  |        "    <tr>\n", | |||
|  |        "      <th>0</th>\n", | |||
|  |        "      <td>[5, _, 6, 0, 0, 9, 7, 4, 5]</td>\n", | |||
|  |        "      <td>1372685</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1771</td>\n", | |||
|  |        "      <td>False</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>2</td>\n", | |||
|  |        "      <td>True</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>af</td>\n", | |||
|  |        "      <td>other</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>0.0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>5</td>\n", | |||
|  |        "    </tr>\n", | |||
|  |        "    <tr>\n", | |||
|  |        "      <th>1</th>\n", | |||
|  |        "      <td>[5, _, 6, 0, 1, 1, 2, 2, 8]</td>\n", | |||
|  |        "      <td>1372685</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1771</td>\n", | |||
|  |        "      <td>False</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>2</td>\n", | |||
|  |        "      <td>True</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>af</td>\n", | |||
|  |        "      <td>other</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>0.0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>5</td>\n", | |||
|  |        "    </tr>\n", | |||
|  |        "    <tr>\n", | |||
|  |        "      <th>2</th>\n", | |||
|  |        "      <td>[5, _, 6, 0, 5, 8, 9, 5, 0]</td>\n", | |||
|  |        "      <td>1372685</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1771</td>\n", | |||
|  |        "      <td>False</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>2</td>\n", | |||
|  |        "      <td>True</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>af</td>\n", | |||
|  |        "      <td>other</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>0.0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>5</td>\n", | |||
|  |        "    </tr>\n", | |||
|  |        "    <tr>\n", | |||
|  |        "      <th>3</th>\n", | |||
|  |        "      <td>[5, _, 6, 0, 6, 2, 4, 0, 4]</td>\n", | |||
|  |        "      <td>1372685</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1771</td>\n", | |||
|  |        "      <td>False</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>2</td>\n", | |||
|  |        "      <td>True</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>af</td>\n", | |||
|  |        "      <td>other</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>0.0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>5</td>\n", | |||
|  |        "    </tr>\n", | |||
|  |        "    <tr>\n", | |||
|  |        "      <th>4</th>\n", | |||
|  |        "      <td>[5, _, 2, 5, 0, 2, 1, 7]</td>\n", | |||
|  |        "      <td>78785</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>11035.0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1771</td>\n", | |||
|  |        "      <td>False</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>True</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>fr</td>\n", | |||
|  |        "      <td>female</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1.0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>5</td>\n", | |||
|  |        "    </tr>\n", | |||
|  |        "    <tr>\n", | |||
|  |        "      <th>...</th>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "    </tr>\n", | |||
|  |        "    <tr>\n", | |||
|  |        "      <th>998841</th>\n", | |||
|  |        "      <td>[9, _, 9, 9, 5, 1, 4, 6]</td>\n", | |||
|  |        "      <td>607676</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>1490</td>\n", | |||
|  |        "      <td>False</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>True</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>2022-05-12 06:20:49+00:00</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>male</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>9</td>\n", | |||
|  |        "    </tr>\n", | |||
|  |        "    <tr>\n", | |||
|  |        "      <th>998842</th>\n", | |||
|  |        "      <td>[9, _, 9, 7, 0, 8, 9, 1]</td>\n", | |||
|  |        "      <td>587855</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>1490</td>\n", | |||
|  |        "      <td>False</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>True</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>2022-05-03 04:20:43+00:00</td>\n", | |||
|  |        "      <td>fr</td>\n", | |||
|  |        "      <td>male</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1.0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>9</td>\n", | |||
|  |        "    </tr>\n", | |||
|  |        "    <tr>\n", | |||
|  |        "      <th>998843</th>\n", | |||
|  |        "      <td>[9, _, 8, 4, 4, 3, 0, 2]</td>\n", | |||
|  |        "      <td>484177</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>1490</td>\n", | |||
|  |        "      <td>False</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>True</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>2022-03-27 12:15:02+00:00</td>\n", | |||
|  |        "      <td>de</td>\n", | |||
|  |        "      <td>male</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>0.0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>9</td>\n", | |||
|  |        "    </tr>\n", | |||
|  |        "    <tr>\n", | |||
|  |        "      <th>998844</th>\n", | |||
|  |        "      <td>[9, _, 9, 4, 1, 2, 6, 0]</td>\n", | |||
|  |        "      <td>564032</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>1490</td>\n", | |||
|  |        "      <td>False</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>True</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>2022-04-20 15:12:38+00:00</td>\n", | |||
|  |        "      <td>ch</td>\n", | |||
|  |        "      <td>male</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>0.0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>9</td>\n", | |||
|  |        "    </tr>\n", | |||
|  |        "    <tr>\n", | |||
|  |        "      <th>998845</th>\n", | |||
|  |        "      <td>[9, _, 8, 0, 9, 7, 4, 2]</td>\n", | |||
|  |        "      <td>453747</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>1490</td>\n", | |||
|  |        "      <td>False</td>\n", | |||
|  |        "      <td>NaN</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>True</td>\n", | |||
|  |        "      <td>...</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>2022-03-07 20:42:07+00:00</td>\n", | |||
|  |        "      <td>fr</td>\n", | |||
|  |        "      <td>male</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>1.0</td>\n", | |||
|  |        "      <td>0</td>\n", | |||
|  |        "      <td>9</td>\n", | |||
|  |        "    </tr>\n", | |||
|  |        "  </tbody>\n", | |||
|  |        "</table>\n", | |||
|  |        "<p>998846 rows × 29 columns</p>\n", | |||
|  |        "</div>" | |||
|  |       ], | |||
|  |       "text/plain": [ | |||
|  |        "                        customer_id  street_id  structure_id  mcp_contact_id  \\\n", | |||
|  |        "0       [5, _, 6, 0, 0, 9, 7, 4, 5]    1372685           NaN             NaN   \n", | |||
|  |        "1       [5, _, 6, 0, 1, 1, 2, 2, 8]    1372685           NaN             NaN   \n", | |||
|  |        "2       [5, _, 6, 0, 5, 8, 9, 5, 0]    1372685           NaN             NaN   \n", | |||
|  |        "3       [5, _, 6, 0, 6, 2, 4, 0, 4]    1372685           NaN             NaN   \n", | |||
|  |        "4          [5, _, 2, 5, 0, 2, 1, 7]      78785           NaN         11035.0   \n", | |||
|  |        "...                             ...        ...           ...             ...   \n", | |||
|  |        "998841     [9, _, 9, 9, 5, 1, 4, 6]     607676           NaN             NaN   \n", | |||
|  |        "998842     [9, _, 9, 7, 0, 8, 9, 1]     587855           NaN             NaN   \n", | |||
|  |        "998843     [9, _, 8, 4, 4, 3, 0, 2]     484177           NaN             NaN   \n", | |||
|  |        "998844     [9, _, 9, 4, 1, 2, 6, 0]     564032           NaN             NaN   \n", | |||
|  |        "998845     [9, _, 8, 0, 9, 7, 4, 2]     453747           NaN             NaN   \n", | |||
|  |        "\n", | |||
|  |        "        fidelity  tenant_id  is_partner  deleted_at  gender  is_email_true  \\\n", | |||
|  |        "0              0       1771       False         NaN       2           True   \n", | |||
|  |        "1              0       1771       False         NaN       2           True   \n", | |||
|  |        "2              0       1771       False         NaN       2           True   \n", | |||
|  |        "3              0       1771       False         NaN       2           True   \n", | |||
|  |        "4              0       1771       False         NaN       0           True   \n", | |||
|  |        "...          ...        ...         ...         ...     ...            ...   \n", | |||
|  |        "998841         1       1490       False         NaN       1           True   \n", | |||
|  |        "998842         1       1490       False         NaN       1           True   \n", | |||
|  |        "998843         1       1490       False         NaN       1           True   \n", | |||
|  |        "998844         1       1490       False         NaN       1           True   \n", | |||
|  |        "998845         1       1490       False         NaN       1           True   \n", | |||
|  |        "\n", | |||
|  |        "        ...  purchase_count          first_buying_date  country  gender_label  \\\n", | |||
|  |        "0       ...               0                        NaN       af         other   \n", | |||
|  |        "1       ...               0                        NaN       af         other   \n", | |||
|  |        "2       ...               0                        NaN       af         other   \n", | |||
|  |        "3       ...               0                        NaN       af         other   \n", | |||
|  |        "4       ...               0                        NaN       fr        female   \n", | |||
|  |        "...     ...             ...                        ...      ...           ...   \n", | |||
|  |        "998841  ...               1  2022-05-12 06:20:49+00:00      NaN          male   \n", | |||
|  |        "998842  ...               1  2022-05-03 04:20:43+00:00       fr          male   \n", | |||
|  |        "998843  ...               1  2022-03-27 12:15:02+00:00       de          male   \n", | |||
|  |        "998844  ...               1  2022-04-20 15:12:38+00:00       ch          male   \n", | |||
|  |        "998845  ...               1  2022-03-07 20:42:07+00:00       fr          male   \n", | |||
|  |        "\n", | |||
|  |        "        gender_female  gender_male  gender_other  country_fr  has_tags  \\\n", | |||
|  |        "0                   0            0             1         0.0         0   \n", | |||
|  |        "1                   0            0             1         0.0         0   \n", | |||
|  |        "2                   0            0             1         0.0         0   \n", | |||
|  |        "3                   0            0             1         0.0         0   \n", | |||
|  |        "4                   1            0             0         1.0         0   \n", | |||
|  |        "...               ...          ...           ...         ...       ...   \n", | |||
|  |        "998841              0            1             0         NaN         0   \n", | |||
|  |        "998842              0            1             0         1.0         0   \n", | |||
|  |        "998843              0            1             0         0.0         0   \n", | |||
|  |        "998844              0            1             0         0.0         0   \n", | |||
|  |        "998845              0            1             0         1.0         0   \n", | |||
|  |        "\n", | |||
|  |        "        number_company  \n", | |||
|  |        "0                    5  \n", | |||
|  |        "1                    5  \n", | |||
|  |        "2                    5  \n", | |||
|  |        "3                    5  \n", | |||
|  |        "4                    5  \n", | |||
|  |        "...                ...  \n", | |||
|  |        "998841               9  \n", | |||
|  |        "998842               9  \n", | |||
|  |        "998843               9  \n", | |||
|  |        "998844               9  \n", | |||
|  |        "998845               9  \n", | |||
|  |        "\n", | |||
|  |        "[998846 rows x 29 columns]" | |||
|  |       ] | |||
|  |      }, | |||
|  |      "execution_count": 69, | |||
|  |      "metadata": {}, | |||
|  |      "output_type": "execute_result" | |||
|  |     } | |||
|  |    ], | |||
|  |    "source": [ | |||
|  |     "customer_sport" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": 67, | |||
|  |    "id": "848963c9-6129-4106-80b5-76bf814b70d1", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [], | |||
|  |    "source": [ | |||
|  |     "def mailing_consent(customer_sport):\n", | |||
|  |     "    df_graph = customer_sport.groupby([\"number_company\", \"already_purchased\"])[\"opt_in\"].mean().reset_index()\n", | |||
|  |     "    # Création du barplot groupé\n", | |||
|  |     "    fig, ax = plt.subplots(figsize=(10, 6))\n", | |||
|  |     "    \n", | |||
|  |     "    categories = df_graph[\"number_company\"].unique()\n", | |||
|  |     "    bar_width = 0.35\n", | |||
|  |     "    bar_positions = np.arange(len(categories))\n", | |||
|  |     "    \n", | |||
|  |     "    # Grouper les données par label et créer les barres groupées\n", | |||
|  |     "    for label in df_graph[\"already_purchased\"].unique():\n", | |||
|  |     "        label_data = df_graph[df_graph['already_purchased'] == label]\n", | |||
|  |     "        values = [label_data[label_data['number_company'] == category]['opt_in'].values[0]*100 for category in categories]\n", | |||
|  |     "    \n", | |||
|  |     "        label_printed = \"purchased\" if label else \"no purchase\"\n", | |||
|  |     "        ax.bar(bar_positions, values, bar_width, label=label_printed)\n", | |||
|  |     "    \n", | |||
|  |     "        # Mise à jour des positions des barres pour le prochain groupe\n", | |||
|  |     "        bar_positions = [pos + bar_width for pos in bar_positions]\n", | |||
|  |     "    \n", | |||
|  |     "    # Ajout des étiquettes, de la légende, etc.\n", | |||
|  |     "    ax.set_xlabel('Numero de compagnie')\n", | |||
|  |     "    ax.set_ylabel('Part de consentement (%)')\n", | |||
|  |     "    ax.set_title('Part de consentement au mailing selon les compagnies')\n", | |||
|  |     "    ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])\n", | |||
|  |     "    ax.set_xticklabels(categories)\n", | |||
|  |     "    ax.legend()\n", | |||
|  |     "    \n", | |||
|  |     "    # Affichage du plot\n", | |||
|  |     "    plt.show()" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": null, | |||
|  |    "id": "d8071891-e6f5-4d93-b039-9e99c20ec4b0", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [], | |||
|  |    "source": [ | |||
|  |     "def gender_bar(customer_sport):\n", | |||
|  |     "    company_genders = customer_sport.groupby(\"number_company\")[[\"gender_male\", \"gender_female\", \"gender_other\"]].mean().reset_index()\n", | |||
|  |     "    # Création du barplot\n", | |||
|  |     "    plt.bar(company_genders[\"number_company\"], company_genders[\"gender_male\"], label = \"Homme\")\n", | |||
|  |     "    plt.bar(company_genders[\"number_company\"], company_genders[\"gender_female\"], \n", | |||
|  |     "            bottom = company_genders[\"gender_male\"], label = \"Femme\")\n", | |||
|  |     "    \n", | |||
|  |     "    \n", | |||
|  |     "    # Ajout de titres et d'étiquettes\n", | |||
|  |     "    plt.xlabel('Company')\n", | |||
|  |     "    plt.ylabel(\"Part de clients de chaque sexe\")\n", | |||
|  |     "    plt.title(\"Sexe des clients de chaque compagnie de spectacle\")\n", | |||
|  |     "    plt.legend()\n", | |||
|  |     "    \n", | |||
|  |     "    # Affichage du barplot\n", | |||
|  |     "    plt.show()" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": null, | |||
|  |    "id": "2fc30f1d-cf64-4efb-9442-4d97bb50b29f", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [], | |||
|  |    "source": [ | |||
|  |     "gender_bar()" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": 11, | |||
|  |    "id": "4b3bb641-814b-4679-9a67-4eca87a920a6", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [], | |||
|  |    "source": [ | |||
|  |     "def country_bar(customer_sport):\n", | |||
|  |     "    company_country_fr = customer_sport.groupby(\"number_compagny\")[\"country_fr\"].mean().reset_index()\n", | |||
|  |     "    # Création du barplot\n", | |||
|  |     "    plt.bar(company_country_fr[\"number_company\"], company_country_fr[\"country_fr\"])\n", | |||
|  |     "    \n", | |||
|  |     "    # Ajout de titres et d'étiquettes\n", | |||
|  |     "    plt.xlabel('Company')\n", | |||
|  |     "    plt.ylabel(\"Part de clients français\")\n", | |||
|  |     "    plt.title(\"Nationalité des clients de chaque compagnie de spectacle\")\n", | |||
|  |     "    \n", | |||
|  |     "    # Affichage du barplot\n", | |||
|  |     "    plt.show()" | |||
|  |    ] | |||
|  |   }, | |||
|  |   { | |||
|  |    "cell_type": "code", | |||
|  |    "execution_count": null, | |||
|  |    "id": "01258674-6b98-49e4-93f4-f4185964999f", | |||
|  |    "metadata": {}, | |||
|  |    "outputs": [], | |||
|  |    "source": [ | |||
|  |     "country_bar()" | |||
|  |    ] | |||
|  |   } | |||
|  |  ], | |||
|  |  "metadata": { | |||
|  |   "kernelspec": { | |||
|  |    "display_name": "Python 3 (ipykernel)", | |||
|  |    "language": "python", | |||
|  |    "name": "python3" | |||
|  |   }, | |||
|  |   "language_info": { | |||
|  |    "codemirror_mode": { | |||
|  |     "name": "ipython", | |||
|  |     "version": 3 | |||
|  |    }, | |||
|  |    "file_extension": ".py", | |||
|  |    "mimetype": "text/x-python", | |||
|  |    "name": "python", | |||
|  |    "nbconvert_exporter": "python", | |||
|  |    "pygments_lexer": "ipython3", | |||
|  |    "version": "3.11.6" | |||
|  |   } | |||
|  |  }, | |||
|  |  "nbformat": 4, | |||
|  |  "nbformat_minor": 5 | |||
|  | } |