{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "84b6e27e-4bda-4d38-8689-ec7fc0da1848",
   "metadata": {},
   "source": [
    "# Define segment and predict sales associated"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ec059482-45d3-4ae6-99bc-9b4ced115db3",
   "metadata": {},
   "source": [
    "## Importations of packages "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "9771bf29-d08e-4674-8c23-9a2672fbef8f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from pandas import DataFrame\n",
    "import numpy as np\n",
    "import os\n",
    "import s3fs\n",
    "import re\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, recall_score\n",
    "from sklearn.utils import class_weight\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.compose import ColumnTransformer\n",
    "from sklearn.preprocessing import OneHotEncoder\n",
    "from sklearn.impute import SimpleImputer\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler\n",
    "from sklearn.metrics import make_scorer, f1_score, balanced_accuracy_score\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score\n",
    "from sklearn.exceptions import ConvergenceWarning, DataConversionWarning\n",
    "from sklearn.naive_bayes import GaussianNB\n",
    "from scipy.optimize import fsolve\n",
    "import io\n",
    "\n",
    "import pickle\n",
    "import warnings"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "048fcd7c-800a-4a6b-b725-faf8410f924a",
   "metadata": {},
   "source": [
    "## load databases"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "539ccbdf-f29f-4f04-99c1-8c88d0efe514",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create filesystem object\n",
    "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
    "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "d6017ed0-6233-4888-85a7-05dec50a255b",
   "metadata": {},
   "outputs": [],
   "source": [
    "type_of_activity = \"sport\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "0c3a6ddc-9345-4a42-b6bf-a20a95de3028",
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_train_test(type_of_activity):\n",
    "    # BUCKET = f\"projet-bdc2324-team1/Generalization/{type_of_activity}\"\n",
    "    BUCKET = f\"projet-bdc2324-team1/1_Temp/1_0_Modelling_Datasets/{type_of_activity}\"\n",
    "    File_path_train = BUCKET + \"/Train_set.csv\"\n",
    "    File_path_test = BUCKET + \"/Test_set.csv\"\n",
    "    \n",
    "    with fs.open( File_path_train, mode=\"rb\") as file_in:\n",
    "        dataset_train = pd.read_csv(file_in, sep=\",\")\n",
    "        # dataset_train['y_has_purchased'] = dataset_train['y_has_purchased'].fillna(0)\n",
    "\n",
    "    with fs.open(File_path_test, mode=\"rb\") as file_in:\n",
    "        dataset_test = pd.read_csv(file_in, sep=\",\")\n",
    "        # dataset_test['y_has_purchased'] = dataset_test['y_has_purchased'].fillna(0)\n",
    "    \n",
    "    return dataset_train, dataset_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "2831d546-b365-498b-8248-c618bd9c3057",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_427/290017524.py:8: DtypeWarning: Columns (10,24) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  dataset_train = pd.read_csv(file_in, sep=\",\")\n",
      "/tmp/ipykernel_427/290017524.py:12: DtypeWarning: Columns (10,24) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  dataset_test = pd.read_csv(file_in, sep=\",\")\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "customer_id              0\n",
       "street_id                0\n",
       "structure_id        222819\n",
       "mcp_contact_id       70845\n",
       "fidelity                 0\n",
       "                     ...  \n",
       "purchases_8_2021         0\n",
       "purchases_8_2022         0\n",
       "purchases_9_2021         0\n",
       "purchases_9_2022         0\n",
       "y_has_purchased          0\n",
       "Length: 87, dtype: int64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset_train, dataset_test = load_train_test(type_of_activity)\n",
    "dataset_train.isna().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "b8827f7b-b304-4f51-9814-c7a98ed88cf0",
   "metadata": {},
   "outputs": [],
   "source": [
    "def features_target_split(dataset_train, dataset_test):\n",
    "    \n",
    "    features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'purchase_date_min', 'purchase_date_max', \n",
    "            'time_between_purchase', 'fidelity',  'is_email_true', 'opt_in', #'is_partner', 'nb_tickets_internet',, 'vente_internet_max'\n",
    "            'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n",
    "\n",
    "    # we suppress fidelity, time between purchase, and gender other (colinearity issue)\n",
    "    \"\"\"\n",
    "    features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', \n",
    "                  'purchase_date_min', 'purchase_date_max', 'nb_tickets_internet',  'is_email_true', \n",
    "                  'opt_in', 'gender_female', 'gender_male', 'nb_campaigns', 'nb_campaigns_opened']\n",
    "    \"\"\"\n",
    "    \n",
    "    X_train = dataset_train # [features_l]\n",
    "    y_train = dataset_train[['y_has_purchased']]\n",
    "\n",
    "    X_test = dataset_test # [features_l]\n",
    "    y_test = dataset_test[['y_has_purchased']]\n",
    "    return X_train, X_test, y_train, y_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "c18195fc-ed40-4e39-a59e-c9ecc5a8e6c3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Shape train :  (224213, 87)\n",
      "Shape test :  (96096, 87)\n"
     ]
    }
   ],
   "source": [
    "X_train, X_test, y_train, y_test = features_target_split(dataset_train, dataset_test)\n",
    "print(\"Shape train : \", X_train.shape)\n",
    "print(\"Shape test : \", X_test.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "74eda066-5e01-43aa-b0cf-cc6d9bbf770e",
   "metadata": {},
   "source": [
    "## get results from the logit cross validated model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "7c81390e-598c-4f02-bd56-dd03b00dcb33",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      customer_id \n",
       "      street_id \n",
       "      structure_id \n",
       "      mcp_contact_id \n",
       "      fidelity \n",
       "      tenant_id \n",
       "      is_partner \n",
       "      deleted_at \n",
       "      is_email_true \n",
       "      opt_in \n",
       "      ... \n",
       "      purchases_5_2022 \n",
       "      purchases_6_2021 \n",
       "      purchases_6_2022 \n",
       "      purchases_7_2021 \n",
       "      purchases_7_2022 \n",
       "      purchases_8_2021 \n",
       "      purchases_8_2022 \n",
       "      purchases_9_2021 \n",
       "      purchases_9_2022 \n",
       "      y_has_purchased \n",
       "     \n",
       "   \n",
       "  \n",
       "    \n",
       "      0 \n",
       "      5_4317407 \n",
       "      969908 \n",
       "      NaN \n",
       "      6156473.0 \n",
       "      1 \n",
       "      1771 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      0 \n",
       "      ... \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "     \n",
       "    \n",
       "      1 \n",
       "      5_477635 \n",
       "      109121 \n",
       "      NaN \n",
       "      6213652.0 \n",
       "      2 \n",
       "      1771 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      0 \n",
       "      ... \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "     \n",
       "    \n",
       "      2 \n",
       "      5_411639 \n",
       "      92929 \n",
       "      NaN \n",
       "      6160271.0 \n",
       "      4 \n",
       "      1771 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      0 \n",
       "      ... \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "     \n",
       "    \n",
       "      3 \n",
       "      5_326623 \n",
       "      79862 \n",
       "      NaN \n",
       "      6140109.0 \n",
       "      1 \n",
       "      1771 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      1 \n",
       "      ... \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      1.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "     \n",
       "    \n",
       "      4 \n",
       "      5_383915 \n",
       "      85421 \n",
       "      NaN \n",
       "      6149409.0 \n",
       "      2 \n",
       "      1771 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      1 \n",
       "      ... \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "     \n",
       "    \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "     \n",
       "    \n",
       "      96091 \n",
       "      9_91205 \n",
       "      76215 \n",
       "      NaN \n",
       "      47280.0 \n",
       "      0 \n",
       "      1490 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      1 \n",
       "      ... \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "     \n",
       "    \n",
       "      96092 \n",
       "      9_369887 \n",
       "      815891 \n",
       "      NaN \n",
       "      30764537.0 \n",
       "      4 \n",
       "      1490 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      0 \n",
       "      ... \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      1.0 \n",
       "     \n",
       "    \n",
       "      96093 \n",
       "      9_1007562 \n",
       "      1 \n",
       "      NaN \n",
       "      NaN \n",
       "      0 \n",
       "      1490 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      0 \n",
       "      ... \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "     \n",
       "    \n",
       "      96094 \n",
       "      9_15037 \n",
       "      12992 \n",
       "      NaN \n",
       "      2213448.0 \n",
       "      0 \n",
       "      1490 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      1 \n",
       "      ... \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "     \n",
       "    \n",
       "      96095 \n",
       "      9_135370 \n",
       "      76215 \n",
       "      NaN \n",
       "      2164740.0 \n",
       "      0 \n",
       "      1490 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      1 \n",
       "      ... \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "     \n",
       "   \n",
       "
\n",
       "
96096 rows × 87 columns
\n",
       "
GridSearchCV(cv=3, error_score='raise',\n",
       "             estimator=Pipeline(steps=[('preprocessor',\n",
       "                                        ColumnTransformer(transformers=[('num',\n",
       "                                                                         Pipeline(steps=[('imputer',\n",
       "                                                                                          SimpleImputer(fill_value=0,\n",
       "                                                                                                        strategy='constant')),\n",
       "                                                                                         ('scaler',\n",
       "                                                                                          StandardScaler())]),\n",
       "                                                                         ['nb_campaigns',\n",
       "                                                                          'taux_ouverture_mail',\n",
       "                                                                          'prop_purchases_internet',\n",
       "                                                                          'nb_tickets',\n",
       "                                                                          'nb_purchases',\n",
       "                                                                          'total_amount',\n",
       "                                                                          'nb_suppliers',\n",
       "                                                                          'pu...\n",
       "       1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
       "       2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
       "       4.000000e+00, 8.000000e+00, 1.600000e+01, 3.200000e+01,\n",
       "       6.400000e+01]),\n",
       "                         'LogisticRegression_cv__class_weight': ['balanced',\n",
       "                                                                 {0.0: 0.5834990214856762,\n",
       "                                                                  1.0: 3.49404706249026}],\n",
       "                         'LogisticRegression_cv__penalty': ['l1', 'l2']},\n",
       "             scoring=make_scorer(recall_score, response_method='predict')) In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.    GridSearchCV?Documentation for GridSearchCV  iFitted   GridSearchCV(cv=3, error_score='raise',\n",
       "             estimator=Pipeline(steps=[('preprocessor',\n",
       "                                        ColumnTransformer(transformers=[('num',\n",
       "                                                                         Pipeline(steps=[('imputer',\n",
       "                                                                                          SimpleImputer(fill_value=0,\n",
       "                                                                                                        strategy='constant')),\n",
       "                                                                                         ('scaler',\n",
       "                                                                                          StandardScaler())]),\n",
       "                                                                         ['nb_campaigns',\n",
       "                                                                          'taux_ouverture_mail',\n",
       "                                                                          'prop_purchases_internet',\n",
       "                                                                          'nb_tickets',\n",
       "                                                                          'nb_purchases',\n",
       "                                                                          'total_amount',\n",
       "                                                                          'nb_suppliers',\n",
       "                                                                          'pu...\n",
       "       1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
       "       2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
       "       4.000000e+00, 8.000000e+00, 1.600000e+01, 3.200000e+01,\n",
       "       6.400000e+01]),\n",
       "                         'LogisticRegression_cv__class_weight': ['balanced',\n",
       "                                                                 {0.0: 0.5834990214856762,\n",
       "                                                                  1.0: 3.49404706249026}],\n",
       "                         'LogisticRegression_cv__penalty': ['l1', 'l2']},\n",
       "             scoring=make_scorer(recall_score, response_method='predict')) estimator: Pipeline Pipeline(steps=[('preprocessor',\n",
       "                 ColumnTransformer(transformers=[('num',\n",
       "                                                  Pipeline(steps=[('imputer',\n",
       "                                                                   SimpleImputer(fill_value=0,\n",
       "                                                                                 strategy='constant')),\n",
       "                                                                  ('scaler',\n",
       "                                                                   StandardScaler())]),\n",
       "                                                  ['nb_campaigns',\n",
       "                                                   'taux_ouverture_mail',\n",
       "                                                   'prop_purchases_internet',\n",
       "                                                   'nb_tickets', 'nb_purchases',\n",
       "                                                   'total_amount',\n",
       "                                                   'nb_suppliers',\n",
       "                                                   'purchases_10_2021',\n",
       "                                                   'purchases_10_2022',\n",
       "                                                   'purchases_...\n",
       "                                                   'categorie_age_0_10',\n",
       "                                                   'categorie_age_10_20',\n",
       "                                                   'categorie_age_20_30',\n",
       "                                                   'categorie_age_30_40',\n",
       "                                                   'categorie_age_40_50',\n",
       "                                                   'categorie_age_50_60',\n",
       "                                                   'categorie_age_60_70',\n",
       "                                                   'categorie_age_70_80',\n",
       "                                                   'categorie_age_plus_80',\n",
       "                                                   'categorie_age_inconnue',\n",
       "                                                   'country_fr',\n",
       "                                                   'is_profession_known',\n",
       "                                                   'is_zipcode_known',\n",
       "                                                   'opt_in'])])),\n",
       "                ('LogisticRegression_cv',\n",
       "                 LogisticRegression(max_iter=5000, solver='saga'))])  preprocessor: ColumnTransformer?Documentation for preprocessor: ColumnTransformer   ColumnTransformer(transformers=[('num',\n",
       "                                 Pipeline(steps=[('imputer',\n",
       "                                                  SimpleImputer(fill_value=0,\n",
       "                                                                strategy='constant')),\n",
       "                                                 ('scaler', StandardScaler())]),\n",
       "                                 ['nb_campaigns', 'taux_ouverture_mail',\n",
       "                                  'prop_purchases_internet', 'nb_tickets',\n",
       "                                  'nb_purchases', 'total_amount',\n",
       "                                  'nb_suppliers', 'purchases_10_2021',\n",
       "                                  'purchases_10_2022', 'purchases_11_2021',\n",
       "                                  'purchases_12_2021', 'pu...\n",
       "                                                  SimpleImputer(strategy='most_frequent'))]),\n",
       "                                 ['gender_female', 'gender_male',\n",
       "                                  'achat_internet', 'categorie_age_0_10',\n",
       "                                  'categorie_age_10_20', 'categorie_age_20_30',\n",
       "                                  'categorie_age_30_40', 'categorie_age_40_50',\n",
       "                                  'categorie_age_50_60', 'categorie_age_60_70',\n",
       "                                  'categorie_age_70_80',\n",
       "                                  'categorie_age_plus_80',\n",
       "                                  'categorie_age_inconnue', 'country_fr',\n",
       "                                  'is_profession_known', 'is_zipcode_known',\n",
       "                                  'opt_in'])]) num ['nb_campaigns', 'taux_ouverture_mail', 'prop_purchases_internet', 'nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'purchases_10_2021', 'purchases_10_2022', 'purchases_11_2021', 'purchases_12_2021', 'purchases_1_2022', 'purchases_2_2022', 'purchases_3_2022', 'purchases_4_2022', 'purchases_5_2021', 'purchases_5_2022', 'purchases_6_2021', 'purchases_6_2022', 'purchases_7_2021', 'purchases_7_2022', 'purchases_8_2021', 'purchases_8_2022', 'purchases_9_2021', 'purchases_9_2022', 'purchase_date_min', 'purchase_date_max', 'nb_targets', 'time_to_open'] bin ['gender_female', 'gender_male', 'achat_internet', 'categorie_age_0_10', 'categorie_age_10_20', 'categorie_age_20_30', 'categorie_age_30_40', 'categorie_age_40_50', 'categorie_age_50_60', 'categorie_age_60_70', 'categorie_age_70_80', 'categorie_age_plus_80', 'categorie_age_inconnue', 'country_fr', 'is_profession_known', 'is_zipcode_known', 'opt_in'] \n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      customer_id \n",
       "      street_id \n",
       "      structure_id \n",
       "      mcp_contact_id \n",
       "      fidelity \n",
       "      tenant_id \n",
       "      is_partner \n",
       "      deleted_at \n",
       "      is_email_true \n",
       "      opt_in \n",
       "      ... \n",
       "      purchases_7_2022 \n",
       "      purchases_8_2021 \n",
       "      purchases_8_2022 \n",
       "      purchases_9_2021 \n",
       "      purchases_9_2022 \n",
       "      y_has_purchased \n",
       "      has_purchased \n",
       "      has_purchased_estim \n",
       "      score \n",
       "      quartile \n",
       "     \n",
       "   \n",
       "  \n",
       "    \n",
       "      0 \n",
       "      5_4317407 \n",
       "      969908 \n",
       "      NaN \n",
       "      6156473.0 \n",
       "      1 \n",
       "      1771 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      0 \n",
       "      ... \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.445019 \n",
       "      2 \n",
       "     \n",
       "    \n",
       "      1 \n",
       "      5_477635 \n",
       "      109121 \n",
       "      NaN \n",
       "      6213652.0 \n",
       "      2 \n",
       "      1771 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      0 \n",
       "      ... \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.382586 \n",
       "      2 \n",
       "     \n",
       "    \n",
       "      2 \n",
       "      5_411639 \n",
       "      92929 \n",
       "      NaN \n",
       "      6160271.0 \n",
       "      4 \n",
       "      1771 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      0 \n",
       "      ... \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      1.0 \n",
       "      0.916747 \n",
       "      4 \n",
       "     \n",
       "    \n",
       "      3 \n",
       "      5_326623 \n",
       "      79862 \n",
       "      NaN \n",
       "      6140109.0 \n",
       "      1 \n",
       "      1771 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      1 \n",
       "      ... \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      1.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.090534 \n",
       "      1 \n",
       "     \n",
       "    \n",
       "      4 \n",
       "      5_383915 \n",
       "      85421 \n",
       "      NaN \n",
       "      6149409.0 \n",
       "      2 \n",
       "      1771 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      1 \n",
       "      ... \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.346571 \n",
       "      2 \n",
       "     \n",
       "    \n",
       "      5 \n",
       "      5_233172 \n",
       "      141401 \n",
       "      NaN \n",
       "      3324.0 \n",
       "      1 \n",
       "      1771 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      1 \n",
       "      ... \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      1.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      1.0 \n",
       "      0.924684 \n",
       "      4 \n",
       "     \n",
       "    \n",
       "      6 \n",
       "      5_389999 \n",
       "      95759 \n",
       "      NaN \n",
       "      6151025.0 \n",
       "      1 \n",
       "      1771 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      0 \n",
       "      ... \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      1.0 \n",
       "      0.569031 \n",
       "      3 \n",
       "     \n",
       "    \n",
       "      7 \n",
       "      5_4292211 \n",
       "      78897 \n",
       "      NaN \n",
       "      4729841.0 \n",
       "      1 \n",
       "      1771 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      1 \n",
       "      ... \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.125622 \n",
       "      1 \n",
       "     \n",
       "    \n",
       "      8 \n",
       "      5_353553 \n",
       "      84189 \n",
       "      NaN \n",
       "      6146995.0 \n",
       "      1 \n",
       "      1771 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      1 \n",
       "      ... \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.229432 \n",
       "      1 \n",
       "     \n",
       "    \n",
       "      9 \n",
       "      5_401296 \n",
       "      3491 \n",
       "      NaN \n",
       "      6155457.0 \n",
       "      1 \n",
       "      1771 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      0 \n",
       "      ... \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      1.0 \n",
       "      0.503987 \n",
       "      3 \n",
       "     \n",
       "   \n",
       "
\n",
       "
10 rows × 91 columns
\n",
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      nb_tickets \n",
       "      nb_purchases \n",
       "      total_amount \n",
       "      nb_suppliers \n",
       "      vente_internet_max \n",
       "      purchase_date_min \n",
       "      purchase_date_max \n",
       "      time_between_purchase \n",
       "      nb_tickets_internet \n",
       "      fidelity \n",
       "      ... \n",
       "      opt_in \n",
       "      gender_female \n",
       "      gender_male \n",
       "      gender_other \n",
       "      nb_campaigns \n",
       "      nb_campaigns_opened \n",
       "      has_purchased \n",
       "      has_purchased_estim \n",
       "      score \n",
       "      quartile \n",
       "     \n",
       "   \n",
       "  \n",
       "    \n",
       "      0 \n",
       "      4.0 \n",
       "      1.0 \n",
       "      100.00 \n",
       "      1.0 \n",
       "      0.0 \n",
       "      5.177187 \n",
       "      5.177187 \n",
       "      0.000000 \n",
       "      0.0 \n",
       "      1 \n",
       "      ... \n",
       "      False \n",
       "      1 \n",
       "      0 \n",
       "      0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.006066 \n",
       "      1 \n",
       "     \n",
       "    \n",
       "      1 \n",
       "      1.0 \n",
       "      1.0 \n",
       "      55.00 \n",
       "      1.0 \n",
       "      0.0 \n",
       "      426.265613 \n",
       "      426.265613 \n",
       "      0.000000 \n",
       "      0.0 \n",
       "      2 \n",
       "      ... \n",
       "      True \n",
       "      0 \n",
       "      1 \n",
       "      0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      1.0 \n",
       "      0.0 \n",
       "      0.288847 \n",
       "      2 \n",
       "     \n",
       "    \n",
       "      2 \n",
       "      17.0 \n",
       "      1.0 \n",
       "      80.00 \n",
       "      1.0 \n",
       "      0.0 \n",
       "      436.033437 \n",
       "      436.033437 \n",
       "      0.000000 \n",
       "      0.0 \n",
       "      2 \n",
       "      ... \n",
       "      True \n",
       "      1 \n",
       "      0 \n",
       "      0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.103264 \n",
       "      1 \n",
       "     \n",
       "    \n",
       "      3 \n",
       "      4.0 \n",
       "      1.0 \n",
       "      120.00 \n",
       "      1.0 \n",
       "      0.0 \n",
       "      5.196412 \n",
       "      5.196412 \n",
       "      0.000000 \n",
       "      0.0 \n",
       "      1 \n",
       "      ... \n",
       "      False \n",
       "      1 \n",
       "      0 \n",
       "      0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.008928 \n",
       "      1 \n",
       "     \n",
       "    \n",
       "      4 \n",
       "      34.0 \n",
       "      2.0 \n",
       "      416.00 \n",
       "      1.0 \n",
       "      0.0 \n",
       "      478.693148 \n",
       "      115.631470 \n",
       "      363.061678 \n",
       "      0.0 \n",
       "      4 \n",
       "      ... \n",
       "      False \n",
       "      1 \n",
       "      0 \n",
       "      0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      1.0 \n",
       "      1.0 \n",
       "      0.992809 \n",
       "      4 \n",
       "     \n",
       "    \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "     \n",
       "    \n",
       "      96091 \n",
       "      1.0 \n",
       "      1.0 \n",
       "      67.31 \n",
       "      1.0 \n",
       "      1.0 \n",
       "      278.442257 \n",
       "      278.442257 \n",
       "      0.000000 \n",
       "      1.0 \n",
       "      2 \n",
       "      ... \n",
       "      False \n",
       "      0 \n",
       "      1 \n",
       "      0 \n",
       "      15.0 \n",
       "      5.0 \n",
       "      1.0 \n",
       "      0.0 \n",
       "      0.351762 \n",
       "      2 \n",
       "     \n",
       "    \n",
       "      96092 \n",
       "      1.0 \n",
       "      1.0 \n",
       "      61.41 \n",
       "      1.0 \n",
       "      1.0 \n",
       "      189.207373 \n",
       "      189.207373 \n",
       "      0.000000 \n",
       "      1.0 \n",
       "      1 \n",
       "      ... \n",
       "      False \n",
       "      0 \n",
       "      1 \n",
       "      0 \n",
       "      12.0 \n",
       "      9.0 \n",
       "      0.0 \n",
       "      1.0 \n",
       "      0.567814 \n",
       "      3 \n",
       "     \n",
       "    \n",
       "      96093 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.00 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      550.000000 \n",
       "      550.000000 \n",
       "      -1.000000 \n",
       "      0.0 \n",
       "      1 \n",
       "      ... \n",
       "      True \n",
       "      1 \n",
       "      0 \n",
       "      0 \n",
       "      29.0 \n",
       "      3.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.004652 \n",
       "      1 \n",
       "     \n",
       "    \n",
       "      96094 \n",
       "      1.0 \n",
       "      1.0 \n",
       "      79.43 \n",
       "      1.0 \n",
       "      1.0 \n",
       "      279.312905 \n",
       "      279.312905 \n",
       "      0.000000 \n",
       "      1.0 \n",
       "      1 \n",
       "      ... \n",
       "      False \n",
       "      0 \n",
       "      1 \n",
       "      0 \n",
       "      20.0 \n",
       "      4.0 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.293042 \n",
       "      2 \n",
       "     \n",
       "    \n",
       "      96095 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      0.00 \n",
       "      0.0 \n",
       "      0.0 \n",
       "      550.000000 \n",
       "      550.000000 \n",
       "      -1.000000 \n",
       "      0.0 \n",
       "      2 \n",
       "      ... \n",
       "      False \n",
       "      0 \n",
       "      1 \n",
       "      0 \n",
       "      31.0 \n",
       "      4.0 \n",
       "      0.0 \n",
       "      1.0 \n",
       "      0.787852 \n",
       "      4 \n",
       "     \n",
       "   \n",
       "
\n",
       "
96096 rows × 21 columns
\n",
       "
"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plot_hist_scores(X_test_segment, score = \"score\", score_adjusted = \"score_adjusted\", type_of_activity = type_of_activity)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "add631d7-0757-45a5-bb5b-f7f4b4baa961",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "projet-bdc2324-team1/Output_expected_CA/sport/\n"
     ]
    }
   ],
   "source": [
    "# define path so save graphics\n",
    "\n",
    "# define type of activity \n",
    "type_of_activity = \"sport\"\n",
    "PATH = f\"projet-bdc2324-team1/Output_expected_CA/{type_of_activity}/\"\n",
    "print(PATH)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "3a5b5bd9-e033-4436-8c56-bf5fb61df87f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# export png \n",
    "\n",
    "# plot adjusted scores and save (to be tested)\n",
    "plot_hist_scores(X_test_segment, score = \"score\", score_adjusted = \"score_adjusted\", type_of_activity = type_of_activity)\n",
    "\n",
    "image_buffer = io.BytesIO()\n",
    "plt.savefig(image_buffer, format='png')\n",
    "image_buffer.seek(0)\n",
    "file_name = \"hist_score_adjusted_\"\n",
    "FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + \".png\"\n",
    "with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:\n",
    "    s3_file.write(image_buffer.read())\n",
    "plt.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e6fae260-fab8-4f51-90dc-9b6d7314c77b",
   "metadata": {},
   "source": [
    "## Compute number of tickets and CA by segment with the recalibrated score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "90c4c2b5-0ede-4001-889f-749cfbd9df04",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      quartile \n",
       "      score (%) \n",
       "      score adjusted (%) \n",
       "      has purchased (%) \n",
       "     \n",
       "   \n",
       "  \n",
       "    \n",
       "      0 \n",
       "      1 \n",
       "      10.20 \n",
       "      1.94 \n",
       "      1.19 \n",
       "     \n",
       "    \n",
       "      1 \n",
       "      2 \n",
       "      37.08 \n",
       "      9.12 \n",
       "      10.62 \n",
       "     \n",
       "    \n",
       "      2 \n",
       "      3 \n",
       "      62.07 \n",
       "      22.00 \n",
       "      28.67 \n",
       "     \n",
       "    \n",
       "      3 \n",
       "      4 \n",
       "      90.35 \n",
       "      67.16 \n",
       "      63.09 \n",
       "     \n",
       "   \n",
       "
\n",
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      customer_id \n",
       "      street_id \n",
       "      structure_id \n",
       "      mcp_contact_id \n",
       "      fidelity \n",
       "      tenant_id \n",
       "      is_partner \n",
       "      deleted_at \n",
       "      is_email_true \n",
       "      opt_in \n",
       "      ... \n",
       "      score \n",
       "      quartile \n",
       "      score_adjusted \n",
       "      nb_tickets_projected \n",
       "      total_amount_projected \n",
       "      nb_tickets_expected \n",
       "      total_amount_expected \n",
       "      pace_purchase \n",
       "      avg_ticket_price \n",
       "      total_amount_corrected \n",
       "     \n",
       "   \n",
       "  \n",
       "    \n",
       "      0 \n",
       "      5_4317407 \n",
       "      969908 \n",
       "      NaN \n",
       "      6156473.0 \n",
       "      1 \n",
       "      1771 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      0 \n",
       "      ... \n",
       "      0.445019 \n",
       "      2 \n",
       "      0.117551 \n",
       "      1.411765 \n",
       "      17.647059 \n",
       "      0.165955 \n",
       "      2.074432 \n",
       "      17.000000 \n",
       "      12.500 \n",
       "      25.00 \n",
       "     \n",
       "    \n",
       "      1 \n",
       "      5_477635 \n",
       "      109121 \n",
       "      NaN \n",
       "      6213652.0 \n",
       "      2 \n",
       "      1771 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      0 \n",
       "      ... \n",
       "      0.382586 \n",
       "      2 \n",
       "      0.093333 \n",
       "      1.411765 \n",
       "      64.941176 \n",
       "      0.131765 \n",
       "      6.061181 \n",
       "      8.500000 \n",
       "      46.000 \n",
       "      92.00 \n",
       "     \n",
       "    \n",
       "      2 \n",
       "      5_411639 \n",
       "      92929 \n",
       "      NaN \n",
       "      6160271.0 \n",
       "      4 \n",
       "      1771 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      0 \n",
       "      ... \n",
       "      0.916747 \n",
       "      4 \n",
       "      0.646556 \n",
       "      2.117647 \n",
       "      31.764706 \n",
       "      1.369178 \n",
       "      20.537670 \n",
       "      5.666667 \n",
       "      15.000 \n",
       "      45.00 \n",
       "     \n",
       "    \n",
       "      3 \n",
       "      5_326623 \n",
       "      79862 \n",
       "      NaN \n",
       "      6140109.0 \n",
       "      1 \n",
       "      1771 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      1 \n",
       "      ... \n",
       "      0.090534 \n",
       "      1 \n",
       "      0.016268 \n",
       "      1.000000 \n",
       "      10.000000 \n",
       "      0.016268 \n",
       "      0.162683 \n",
       "      17.000000 \n",
       "      10.000 \n",
       "      10.00 \n",
       "     \n",
       "    \n",
       "      4 \n",
       "      5_383915 \n",
       "      85421 \n",
       "      NaN \n",
       "      6149409.0 \n",
       "      2 \n",
       "      1771 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      1 \n",
       "      ... \n",
       "      0.346571 \n",
       "      2 \n",
       "      0.080976 \n",
       "      5.647059 \n",
       "      89.647059 \n",
       "      0.457279 \n",
       "      7.259298 \n",
       "      8.500000 \n",
       "      15.875 \n",
       "      127.00 \n",
       "     \n",
       "    \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "      ... \n",
       "     \n",
       "    \n",
       "      96091 \n",
       "      9_91205 \n",
       "      76215 \n",
       "      NaN \n",
       "      47280.0 \n",
       "      0 \n",
       "      1490 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      1 \n",
       "      ... \n",
       "      0.014966 \n",
       "      1 \n",
       "      0.002518 \n",
       "      1.000000 \n",
       "      48.713098 \n",
       "      0.002518 \n",
       "      0.122642 \n",
       "      NaN \n",
       "      NaN \n",
       "      0.00 \n",
       "     \n",
       "    \n",
       "      96092 \n",
       "      9_369887 \n",
       "      815891 \n",
       "      NaN \n",
       "      30764537.0 \n",
       "      4 \n",
       "      1490 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      0 \n",
       "      ... \n",
       "      0.834257 \n",
       "      4 \n",
       "      0.455392 \n",
       "      1.411765 \n",
       "      71.216471 \n",
       "      0.642906 \n",
       "      32.431379 \n",
       "      8.500000 \n",
       "      50.445 \n",
       "      100.89 \n",
       "     \n",
       "    \n",
       "      96093 \n",
       "      9_1007562 \n",
       "      1 \n",
       "      NaN \n",
       "      NaN \n",
       "      0 \n",
       "      1490 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      0 \n",
       "      ... \n",
       "      0.062886 \n",
       "      1 \n",
       "      0.011025 \n",
       "      1.000000 \n",
       "      48.713098 \n",
       "      0.011025 \n",
       "      0.537071 \n",
       "      NaN \n",
       "      NaN \n",
       "      0.00 \n",
       "     \n",
       "    \n",
       "      96094 \n",
       "      9_15037 \n",
       "      12992 \n",
       "      NaN \n",
       "      2213448.0 \n",
       "      0 \n",
       "      1490 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      1 \n",
       "      ... \n",
       "      0.068998 \n",
       "      1 \n",
       "      0.012162 \n",
       "      1.000000 \n",
       "      48.713098 \n",
       "      0.012162 \n",
       "      0.592451 \n",
       "      NaN \n",
       "      NaN \n",
       "      0.00 \n",
       "     \n",
       "    \n",
       "      96095 \n",
       "      9_135370 \n",
       "      76215 \n",
       "      NaN \n",
       "      2164740.0 \n",
       "      0 \n",
       "      1490 \n",
       "      False \n",
       "      NaN \n",
       "      True \n",
       "      1 \n",
       "      ... \n",
       "      0.018486 \n",
       "      1 \n",
       "      0.003119 \n",
       "      1.000000 \n",
       "      48.713098 \n",
       "      0.003119 \n",
       "      0.151938 \n",
       "      NaN \n",
       "      NaN \n",
       "      0.00 \n",
       "     \n",
       "   \n",
       "
\n",
       "
96096 rows × 99 columns
\n",
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      quartile \n",
       "      size \n",
       "      size_perct \n",
       "      nb_tickets_expected \n",
       "      total_amount_expected \n",
       "      revenue_recovered_perct \n",
       "      share_future_revenue_perct \n",
       "      pace_purchase \n",
       "     \n",
       "   \n",
       "  \n",
       "    \n",
       "      0 \n",
       "      1 \n",
       "      54123 \n",
       "      56.32 \n",
       "      1480.36 \n",
       "      55345.21 \n",
       "      11.99 \n",
       "      0.37 \n",
       "      15.33 \n",
       "     \n",
       "    \n",
       "      1 \n",
       "      2 \n",
       "      18181 \n",
       "      18.92 \n",
       "      4381.84 \n",
       "      130503.26 \n",
       "      11.65 \n",
       "      0.88 \n",
       "      15.31 \n",
       "     \n",
       "    \n",
       "      2 \n",
       "      3 \n",
       "      11111 \n",
       "      11.56 \n",
       "      8827.97 \n",
       "      285945.50 \n",
       "      24.00 \n",
       "      1.93 \n",
       "      14.03 \n",
       "     \n",
       "    \n",
       "      3 \n",
       "      4 \n",
       "      12681 \n",
       "      13.20 \n",
       "      239758.61 \n",
       "      10313321.91 \n",
       "      85.74 \n",
       "      69.67 \n",
       "      8.56 \n",
       "     \n",
       "   \n",
       "
\n",
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      quartile \n",
       "      score (%) \n",
       "      score adjusted (%) \n",
       "      has purchased (%) \n",
       "     \n",
       "   \n",
       "  \n",
       "    \n",
       "      0 \n",
       "      1 \n",
       "      13.25 \n",
       "      2.51 \n",
       "      1.57 \n",
       "     \n",
       "    \n",
       "      1 \n",
       "      2 \n",
       "      33.89 \n",
       "      8.00 \n",
       "      9.85 \n",
       "     \n",
       "    \n",
       "      2 \n",
       "      3 \n",
       "      63.06 \n",
       "      22.58 \n",
       "      21.47 \n",
       "     \n",
       "    \n",
       "      3 \n",
       "      4 \n",
       "      90.52 \n",
       "      66.20 \n",
       "      65.01 \n",
       "     \n",
       "   \n",
       "
\n",
       "
\n",
       "\n",
       "
\n",
       "  \n",
       "    \n",
       "      quartile \n",
       "      size \n",
       "      size_perct \n",
       "      nb_tickets_expected \n",
       "      total_amount_expected \n",
       "      perct_revenue_recovered \n",
       "     \n",
       "   \n",
       "  \n",
       "    \n",
       "      0 \n",
       "      1 \n",
       "      37410 \n",
       "      38.93 \n",
       "      419.76 \n",
       "      9245.08 \n",
       "      21.71 \n",
       "     \n",
       "    \n",
       "      1 \n",
       "      2 \n",
       "      29517 \n",
       "      30.72 \n",
       "      11549.06 \n",
       "      296522.02 \n",
       "      39.24 \n",
       "     \n",
       "    \n",
       "      2 \n",
       "      3 \n",
       "      20137 \n",
       "      20.96 \n",
       "      29997.85 \n",
       "      954751.91 \n",
       "      63.34 \n",
       "     \n",
       "    \n",
       "      3 \n",
       "      4 \n",
       "      9032 \n",
       "      9.40 \n",
       "      244655.82 \n",
       "      10736011.95 \n",
       "      97.72 \n",
       "     \n",
       "   \n",
       "
\n",
       "