diff --git a/Sport/Modelization/CA_segment_sport.ipynb b/Sport/Modelization/CA_segment_sport.ipynb
new file mode 100644
index 0000000..c958e46
--- /dev/null
+++ b/Sport/Modelization/CA_segment_sport.ipynb
@@ -0,0 +1,4538 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "84b6e27e-4bda-4d38-8689-ec7fc0da1848",
+ "metadata": {},
+ "source": [
+ "# Define segment and predict sales associated"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ec059482-45d3-4ae6-99bc-9b4ced115db3",
+ "metadata": {},
+ "source": [
+ "## Importations of packages "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 97,
+ "id": "9771bf29-d08e-4674-8c23-9a2672fbef8f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import os\n",
+ "import s3fs\n",
+ "import re\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.ensemble import RandomForestClassifier\n",
+ "from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, recall_score\n",
+ "from sklearn.utils import class_weight\n",
+ "from sklearn.neighbors import KNeighborsClassifier\n",
+ "from sklearn.pipeline import Pipeline\n",
+ "from sklearn.compose import ColumnTransformer\n",
+ "from sklearn.preprocessing import OneHotEncoder\n",
+ "from sklearn.impute import SimpleImputer\n",
+ "from sklearn.model_selection import GridSearchCV\n",
+ "from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler\n",
+ "from sklearn.metrics import make_scorer, f1_score, balanced_accuracy_score\n",
+ "import seaborn as sns\n",
+ "import matplotlib.pyplot as plt\n",
+ "from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score\n",
+ "from sklearn.exceptions import ConvergenceWarning, DataConversionWarning\n",
+ "from sklearn.naive_bayes import GaussianNB\n",
+ "from scipy.optimize import fsolve\n",
+ "\n",
+ "import pickle\n",
+ "import warnings"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "048fcd7c-800a-4a6b-b725-faf8410f924a",
+ "metadata": {},
+ "source": [
+ "## load databases"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "539ccbdf-f29f-4f04-99c1-8c88d0efe514",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create filesystem object\n",
+ "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
+ "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0c3a6ddc-9345-4a42-b6bf-a20a95de3028",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def load_train_test():\n",
+ " BUCKET = \"projet-bdc2324-team1/Generalization/sport\"\n",
+ " File_path_train = BUCKET + \"/Train_set.csv\"\n",
+ " File_path_test = BUCKET + \"/Test_set.csv\"\n",
+ " \n",
+ " with fs.open( File_path_train, mode=\"rb\") as file_in:\n",
+ " dataset_train = pd.read_csv(file_in, sep=\",\")\n",
+ " # dataset_train['y_has_purchased'] = dataset_train['y_has_purchased'].fillna(0)\n",
+ "\n",
+ " with fs.open(File_path_test, mode=\"rb\") as file_in:\n",
+ " dataset_test = pd.read_csv(file_in, sep=\",\")\n",
+ " # dataset_test['y_has_purchased'] = dataset_test['y_has_purchased'].fillna(0)\n",
+ " \n",
+ " return dataset_train, dataset_test"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2831d546-b365-498b-8248-c618bd9c3057",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataset_train, dataset_test = load_train_test()\n",
+ "dataset_train.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 85,
+ "id": "b8827f7b-b304-4f51-9814-c7a98ed88cf0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def features_target_split(dataset_train, dataset_test):\n",
+ " \n",
+ " features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n",
+ " 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n",
+ " 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n",
+ "\n",
+ " # we suppress fidelity, time between purchase, and gender other (colinearity issue)\n",
+ " \"\"\"\n",
+ " features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', \n",
+ " 'purchase_date_min', 'purchase_date_max', 'nb_tickets_internet', 'is_email_true', \n",
+ " 'opt_in', 'gender_female', 'gender_male', 'nb_campaigns', 'nb_campaigns_opened']\n",
+ " \"\"\"\n",
+ " \n",
+ " X_train = dataset_train[features_l]\n",
+ " y_train = dataset_train[['y_has_purchased']]\n",
+ "\n",
+ " X_test = dataset_test[features_l]\n",
+ " y_test = dataset_test[['y_has_purchased']]\n",
+ " return X_train, X_test, y_train, y_test"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 86,
+ "id": "c18195fc-ed40-4e39-a59e-c9ecc5a8e6c3",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Shape train : (224213, 17)\n",
+ "Shape test : (96096, 17)\n"
+ ]
+ }
+ ],
+ "source": [
+ "X_train, X_test, y_train, y_test = features_target_split(dataset_train, dataset_test)\n",
+ "print(\"Shape train : \", X_train.shape)\n",
+ "print(\"Shape test : \", X_test.shape)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "74eda066-5e01-43aa-b0cf-cc6d9bbf770e",
+ "metadata": {},
+ "source": [
+ "## get results from the logit cross validated model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 87,
+ "id": "7c81390e-598c-4f02-bd56-dd03b00dcb33",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " nb_tickets \n",
+ " nb_purchases \n",
+ " total_amount \n",
+ " nb_suppliers \n",
+ " vente_internet_max \n",
+ " purchase_date_min \n",
+ " purchase_date_max \n",
+ " time_between_purchase \n",
+ " nb_tickets_internet \n",
+ " fidelity \n",
+ " is_email_true \n",
+ " opt_in \n",
+ " gender_female \n",
+ " gender_male \n",
+ " gender_other \n",
+ " nb_campaigns \n",
+ " nb_campaigns_opened \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 4.0 \n",
+ " 1.0 \n",
+ " 100.00 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 5.177187 \n",
+ " 5.177187 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " True \n",
+ " False \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 55.00 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 426.265613 \n",
+ " 426.265613 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 2 \n",
+ " True \n",
+ " True \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 17.0 \n",
+ " 1.0 \n",
+ " 80.00 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 436.033437 \n",
+ " 436.033437 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 2 \n",
+ " True \n",
+ " True \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 4.0 \n",
+ " 1.0 \n",
+ " 120.00 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 5.196412 \n",
+ " 5.196412 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " True \n",
+ " False \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 34.0 \n",
+ " 2.0 \n",
+ " 416.00 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 478.693148 \n",
+ " 115.631470 \n",
+ " 363.061678 \n",
+ " 0.0 \n",
+ " 4 \n",
+ " True \n",
+ " False \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " \n",
+ " \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " \n",
+ " \n",
+ " 96091 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 67.31 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 278.442257 \n",
+ " 278.442257 \n",
+ " 0.000000 \n",
+ " 1.0 \n",
+ " 2 \n",
+ " True \n",
+ " False \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 15.0 \n",
+ " 5.0 \n",
+ " \n",
+ " \n",
+ " 96092 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 61.41 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 189.207373 \n",
+ " 189.207373 \n",
+ " 0.000000 \n",
+ " 1.0 \n",
+ " 1 \n",
+ " True \n",
+ " False \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 12.0 \n",
+ " 9.0 \n",
+ " \n",
+ " \n",
+ " 96093 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.00 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 550.000000 \n",
+ " 550.000000 \n",
+ " -1.000000 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " True \n",
+ " True \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 29.0 \n",
+ " 3.0 \n",
+ " \n",
+ " \n",
+ " 96094 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 79.43 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 279.312905 \n",
+ " 279.312905 \n",
+ " 0.000000 \n",
+ " 1.0 \n",
+ " 1 \n",
+ " True \n",
+ " False \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 20.0 \n",
+ " 4.0 \n",
+ " \n",
+ " \n",
+ " 96095 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.00 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 550.000000 \n",
+ " 550.000000 \n",
+ " -1.000000 \n",
+ " 0.0 \n",
+ " 2 \n",
+ " True \n",
+ " False \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 31.0 \n",
+ " 4.0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
96096 rows × 17 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " nb_tickets nb_purchases total_amount nb_suppliers \\\n",
+ "0 4.0 1.0 100.00 1.0 \n",
+ "1 1.0 1.0 55.00 1.0 \n",
+ "2 17.0 1.0 80.00 1.0 \n",
+ "3 4.0 1.0 120.00 1.0 \n",
+ "4 34.0 2.0 416.00 1.0 \n",
+ "... ... ... ... ... \n",
+ "96091 1.0 1.0 67.31 1.0 \n",
+ "96092 1.0 1.0 61.41 1.0 \n",
+ "96093 0.0 0.0 0.00 0.0 \n",
+ "96094 1.0 1.0 79.43 1.0 \n",
+ "96095 0.0 0.0 0.00 0.0 \n",
+ "\n",
+ " vente_internet_max purchase_date_min purchase_date_max \\\n",
+ "0 0.0 5.177187 5.177187 \n",
+ "1 0.0 426.265613 426.265613 \n",
+ "2 0.0 436.033437 436.033437 \n",
+ "3 0.0 5.196412 5.196412 \n",
+ "4 0.0 478.693148 115.631470 \n",
+ "... ... ... ... \n",
+ "96091 1.0 278.442257 278.442257 \n",
+ "96092 1.0 189.207373 189.207373 \n",
+ "96093 0.0 550.000000 550.000000 \n",
+ "96094 1.0 279.312905 279.312905 \n",
+ "96095 0.0 550.000000 550.000000 \n",
+ "\n",
+ " time_between_purchase nb_tickets_internet fidelity is_email_true \\\n",
+ "0 0.000000 0.0 1 True \n",
+ "1 0.000000 0.0 2 True \n",
+ "2 0.000000 0.0 2 True \n",
+ "3 0.000000 0.0 1 True \n",
+ "4 363.061678 0.0 4 True \n",
+ "... ... ... ... ... \n",
+ "96091 0.000000 1.0 2 True \n",
+ "96092 0.000000 1.0 1 True \n",
+ "96093 -1.000000 0.0 1 True \n",
+ "96094 0.000000 1.0 1 True \n",
+ "96095 -1.000000 0.0 2 True \n",
+ "\n",
+ " opt_in gender_female gender_male gender_other nb_campaigns \\\n",
+ "0 False 1 0 0 0.0 \n",
+ "1 True 0 1 0 0.0 \n",
+ "2 True 1 0 0 0.0 \n",
+ "3 False 1 0 0 0.0 \n",
+ "4 False 1 0 0 0.0 \n",
+ "... ... ... ... ... ... \n",
+ "96091 False 0 1 0 15.0 \n",
+ "96092 False 0 1 0 12.0 \n",
+ "96093 True 1 0 0 29.0 \n",
+ "96094 False 0 1 0 20.0 \n",
+ "96095 False 0 1 0 31.0 \n",
+ "\n",
+ " nb_campaigns_opened \n",
+ "0 0.0 \n",
+ "1 0.0 \n",
+ "2 0.0 \n",
+ "3 0.0 \n",
+ "4 0.0 \n",
+ "... ... \n",
+ "96091 5.0 \n",
+ "96092 9.0 \n",
+ "96093 3.0 \n",
+ "96094 4.0 \n",
+ "96095 4.0 \n",
+ "\n",
+ "[96096 rows x 17 columns]"
+ ]
+ },
+ "execution_count": 87,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_test"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 75,
+ "id": "c708f439-bb75-4688-bf4f-4c04e13deaae",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def load_model(type_of_activity, model):\n",
+ " BUCKET = f\"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}/\"\n",
+ " filename = model + '.pkl'\n",
+ " file_path = BUCKET + filename\n",
+ " with fs.open(file_path, mode=\"rb\") as f:\n",
+ " model_bytes = f.read()\n",
+ "\n",
+ " model = pickle.loads(model_bytes)\n",
+ " return model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 80,
+ "id": "5261a803-05b8-41a0-968c-dc7bde48ddd3",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Exception ignored in: \n",
+ "Traceback (most recent call last):\n",
+ " File \"/opt/mamba/lib/python3.11/site-packages/fsspec/spec.py\", line 1952, in __del__\n",
+ " self.close()\n",
+ " File \"/opt/mamba/lib/python3.11/site-packages/fsspec/spec.py\", line 1929, in close\n",
+ " if not self.forced:\n",
+ " ^^^^^^^^^^^\n",
+ "AttributeError: 'S3File' object has no attribute 'forced'\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "GridSearchCV(cv=3, error_score='raise',\n",
+ " estimator=Pipeline(steps=[('preprocessor',\n",
+ " ColumnTransformer(transformers=[('num',\n",
+ " Pipeline(steps=[('scaler',\n",
+ " StandardScaler())]),\n",
+ " ['nb_tickets',\n",
+ " 'nb_purchases',\n",
+ " 'total_amount',\n",
+ " 'nb_suppliers',\n",
+ " 'vente_internet_max',\n",
+ " 'purchase_date_min',\n",
+ " 'purchase_date_max',\n",
+ " 'time_between_purchase',\n",
+ " 'nb_tickets_internet',\n",
+ " 'nb_campaigns',\n",
+ " 'nb_...\n",
+ " 1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
+ " 2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
+ " 4.000000e+00, 8.000000e+00, 1.600000e+01, 3.200000e+01,\n",
+ " 6.400000e+01]),\n",
+ " 'LogisticRegression_cv__class_weight': ['balanced',\n",
+ " {0.0: 0.5837086520288036,\n",
+ " 1.0: 3.486549107420539}],\n",
+ " 'LogisticRegression_cv__penalty': ['l1', 'l2']},\n",
+ " scoring=make_scorer(recall_score, response_method='predict')) In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. GridSearchCV?Documentation for GridSearchCV iFitted GridSearchCV(cv=3, error_score='raise',\n",
+ " estimator=Pipeline(steps=[('preprocessor',\n",
+ " ColumnTransformer(transformers=[('num',\n",
+ " Pipeline(steps=[('scaler',\n",
+ " StandardScaler())]),\n",
+ " ['nb_tickets',\n",
+ " 'nb_purchases',\n",
+ " 'total_amount',\n",
+ " 'nb_suppliers',\n",
+ " 'vente_internet_max',\n",
+ " 'purchase_date_min',\n",
+ " 'purchase_date_max',\n",
+ " 'time_between_purchase',\n",
+ " 'nb_tickets_internet',\n",
+ " 'nb_campaigns',\n",
+ " 'nb_...\n",
+ " 1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
+ " 2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
+ " 4.000000e+00, 8.000000e+00, 1.600000e+01, 3.200000e+01,\n",
+ " 6.400000e+01]),\n",
+ " 'LogisticRegression_cv__class_weight': ['balanced',\n",
+ " {0.0: 0.5837086520288036,\n",
+ " 1.0: 3.486549107420539}],\n",
+ " 'LogisticRegression_cv__penalty': ['l1', 'l2']},\n",
+ " scoring=make_scorer(recall_score, response_method='predict')) estimator: Pipeline Pipeline(steps=[('preprocessor',\n",
+ " ColumnTransformer(transformers=[('num',\n",
+ " Pipeline(steps=[('scaler',\n",
+ " StandardScaler())]),\n",
+ " ['nb_tickets', 'nb_purchases',\n",
+ " 'total_amount',\n",
+ " 'nb_suppliers',\n",
+ " 'vente_internet_max',\n",
+ " 'purchase_date_min',\n",
+ " 'purchase_date_max',\n",
+ " 'time_between_purchase',\n",
+ " 'nb_tickets_internet',\n",
+ " 'nb_campaigns',\n",
+ " 'nb_campaigns_opened']),\n",
+ " ('cat',\n",
+ " Pipeline(steps=[('onehot',\n",
+ " OneHotEncoder(handle_unknown='ignore',\n",
+ " sparse_output=False))]),\n",
+ " ['opt_in', 'gender_male',\n",
+ " 'gender_female'])])),\n",
+ " ('LogisticRegression_cv',\n",
+ " LogisticRegression(max_iter=5000, solver='saga'))]) preprocessor: ColumnTransformer?Documentation for preprocessor: ColumnTransformer ColumnTransformer(transformers=[('num',\n",
+ " Pipeline(steps=[('scaler', StandardScaler())]),\n",
+ " ['nb_tickets', 'nb_purchases', 'total_amount',\n",
+ " 'nb_suppliers', 'vente_internet_max',\n",
+ " 'purchase_date_min', 'purchase_date_max',\n",
+ " 'time_between_purchase',\n",
+ " 'nb_tickets_internet', 'nb_campaigns',\n",
+ " 'nb_campaigns_opened']),\n",
+ " ('cat',\n",
+ " Pipeline(steps=[('onehot',\n",
+ " OneHotEncoder(handle_unknown='ignore',\n",
+ " sparse_output=False))]),\n",
+ " ['opt_in', 'gender_male', 'gender_female'])]) num ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet', 'nb_campaigns', 'nb_campaigns_opened'] "
+ ],
+ "text/plain": [
+ "GridSearchCV(cv=3, error_score='raise',\n",
+ " estimator=Pipeline(steps=[('preprocessor',\n",
+ " ColumnTransformer(transformers=[('num',\n",
+ " Pipeline(steps=[('scaler',\n",
+ " StandardScaler())]),\n",
+ " ['nb_tickets',\n",
+ " 'nb_purchases',\n",
+ " 'total_amount',\n",
+ " 'nb_suppliers',\n",
+ " 'vente_internet_max',\n",
+ " 'purchase_date_min',\n",
+ " 'purchase_date_max',\n",
+ " 'time_between_purchase',\n",
+ " 'nb_tickets_internet',\n",
+ " 'nb_campaigns',\n",
+ " 'nb_...\n",
+ " 1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
+ " 2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
+ " 4.000000e+00, 8.000000e+00, 1.600000e+01, 3.200000e+01,\n",
+ " 6.400000e+01]),\n",
+ " 'LogisticRegression_cv__class_weight': ['balanced',\n",
+ " {0.0: 0.5837086520288036,\n",
+ " 1.0: 3.486549107420539}],\n",
+ " 'LogisticRegression_cv__penalty': ['l1', 'l2']},\n",
+ " scoring=make_scorer(recall_score, response_method='predict'))"
+ ]
+ },
+ "execution_count": 80,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "logit_cv = load_model(\"sport\", \"LogisticRegression_cv\")\n",
+ "logit_cv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "id": "6f3e584d-c70d-4b45-b947-4414ff416e17",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "GridSearchCV(cv=3, error_score='raise',\n",
+ " estimator=Pipeline(steps=[('preprocessor',\n",
+ " ColumnTransformer(transformers=[('num',\n",
+ " Pipeline(steps=[('scaler',\n",
+ " StandardScaler())]),\n",
+ " ['nb_tickets',\n",
+ " 'nb_purchases',\n",
+ " 'total_amount',\n",
+ " 'nb_suppliers',\n",
+ " 'vente_internet_max',\n",
+ " 'purchase_date_min',\n",
+ " 'purchase_date_max',\n",
+ " 'time_between_purchase',\n",
+ " 'nb_tickets_internet',\n",
+ " 'nb_campaigns',\n",
+ " 'nb_...\n",
+ " 1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
+ " 2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
+ " 4.000000e+00, 8.000000e+00, 1.600000e+01, 3.200000e+01,\n",
+ " 6.400000e+01]),\n",
+ " 'LogisticRegression_cv__class_weight': ['balanced',\n",
+ " {0.0: 0.5837086520288036,\n",
+ " 1.0: 3.486549107420539}],\n",
+ " 'LogisticRegression_cv__penalty': ['l1', 'l2']},\n",
+ " scoring=make_scorer(recall_score, response_method='predict')) In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. GridSearchCV?Documentation for GridSearchCV iFitted GridSearchCV(cv=3, error_score='raise',\n",
+ " estimator=Pipeline(steps=[('preprocessor',\n",
+ " ColumnTransformer(transformers=[('num',\n",
+ " Pipeline(steps=[('scaler',\n",
+ " StandardScaler())]),\n",
+ " ['nb_tickets',\n",
+ " 'nb_purchases',\n",
+ " 'total_amount',\n",
+ " 'nb_suppliers',\n",
+ " 'vente_internet_max',\n",
+ " 'purchase_date_min',\n",
+ " 'purchase_date_max',\n",
+ " 'time_between_purchase',\n",
+ " 'nb_tickets_internet',\n",
+ " 'nb_campaigns',\n",
+ " 'nb_...\n",
+ " 1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
+ " 2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
+ " 4.000000e+00, 8.000000e+00, 1.600000e+01, 3.200000e+01,\n",
+ " 6.400000e+01]),\n",
+ " 'LogisticRegression_cv__class_weight': ['balanced',\n",
+ " {0.0: 0.5837086520288036,\n",
+ " 1.0: 3.486549107420539}],\n",
+ " 'LogisticRegression_cv__penalty': ['l1', 'l2']},\n",
+ " scoring=make_scorer(recall_score, response_method='predict')) estimator: Pipeline Pipeline(steps=[('preprocessor',\n",
+ " ColumnTransformer(transformers=[('num',\n",
+ " Pipeline(steps=[('scaler',\n",
+ " StandardScaler())]),\n",
+ " ['nb_tickets', 'nb_purchases',\n",
+ " 'total_amount',\n",
+ " 'nb_suppliers',\n",
+ " 'vente_internet_max',\n",
+ " 'purchase_date_min',\n",
+ " 'purchase_date_max',\n",
+ " 'time_between_purchase',\n",
+ " 'nb_tickets_internet',\n",
+ " 'nb_campaigns',\n",
+ " 'nb_campaigns_opened']),\n",
+ " ('cat',\n",
+ " Pipeline(steps=[('onehot',\n",
+ " OneHotEncoder(handle_unknown='ignore',\n",
+ " sparse_output=False))]),\n",
+ " ['opt_in', 'gender_male',\n",
+ " 'gender_female'])])),\n",
+ " ('LogisticRegression_cv',\n",
+ " LogisticRegression(max_iter=5000, solver='saga'))]) preprocessor: ColumnTransformer?Documentation for preprocessor: ColumnTransformer ColumnTransformer(transformers=[('num',\n",
+ " Pipeline(steps=[('scaler', StandardScaler())]),\n",
+ " ['nb_tickets', 'nb_purchases', 'total_amount',\n",
+ " 'nb_suppliers', 'vente_internet_max',\n",
+ " 'purchase_date_min', 'purchase_date_max',\n",
+ " 'time_between_purchase',\n",
+ " 'nb_tickets_internet', 'nb_campaigns',\n",
+ " 'nb_campaigns_opened']),\n",
+ " ('cat',\n",
+ " Pipeline(steps=[('onehot',\n",
+ " OneHotEncoder(handle_unknown='ignore',\n",
+ " sparse_output=False))]),\n",
+ " ['opt_in', 'gender_male', 'gender_female'])]) num ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet', 'nb_campaigns', 'nb_campaigns_opened'] "
+ ],
+ "text/plain": [
+ "GridSearchCV(cv=3, error_score='raise',\n",
+ " estimator=Pipeline(steps=[('preprocessor',\n",
+ " ColumnTransformer(transformers=[('num',\n",
+ " Pipeline(steps=[('scaler',\n",
+ " StandardScaler())]),\n",
+ " ['nb_tickets',\n",
+ " 'nb_purchases',\n",
+ " 'total_amount',\n",
+ " 'nb_suppliers',\n",
+ " 'vente_internet_max',\n",
+ " 'purchase_date_min',\n",
+ " 'purchase_date_max',\n",
+ " 'time_between_purchase',\n",
+ " 'nb_tickets_internet',\n",
+ " 'nb_campaigns',\n",
+ " 'nb_...\n",
+ " 1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
+ " 2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
+ " 4.000000e+00, 8.000000e+00, 1.600000e+01, 3.200000e+01,\n",
+ " 6.400000e+01]),\n",
+ " 'LogisticRegression_cv__class_weight': ['balanced',\n",
+ " {0.0: 0.5837086520288036,\n",
+ " 1.0: 3.486549107420539}],\n",
+ " 'LogisticRegression_cv__penalty': ['l1', 'l2']},\n",
+ " scoring=make_scorer(recall_score, response_method='predict'))"
+ ]
+ },
+ "execution_count": 81,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "logit_cv"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "006819e7-e9c5-48d9-85ee-aa43d5e4c9c2",
+ "metadata": {},
+ "source": [
+ "## Quartile clustering"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 88,
+ "id": "018d8ff4-3436-4eec-8507-d1a265cbabf1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "y_pred = logit_cv.predict(X_test)\n",
+ "y_pred_prob = logit_cv.predict_proba(X_test)[:, 1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 90,
+ "id": "846f53b9-73c2-4a8b-9d9e-f11bf59ce9ba",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_620/375041546.py:3: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " X_test_segment[\"has_purchased\"] = y_test\n",
+ "/tmp/ipykernel_620/375041546.py:4: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " X_test_segment[\"has_purchased_estim\"] = y_pred\n",
+ "/tmp/ipykernel_620/375041546.py:5: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " X_test_segment[\"score\"] = y_pred_prob\n",
+ "/tmp/ipykernel_620/375041546.py:6: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " X_test_segment[\"quartile\"] = np.where(X_test['score']<0.25, '1',\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " nb_tickets \n",
+ " nb_purchases \n",
+ " total_amount \n",
+ " nb_suppliers \n",
+ " vente_internet_max \n",
+ " purchase_date_min \n",
+ " purchase_date_max \n",
+ " time_between_purchase \n",
+ " nb_tickets_internet \n",
+ " fidelity \n",
+ " ... \n",
+ " opt_in \n",
+ " gender_female \n",
+ " gender_male \n",
+ " gender_other \n",
+ " nb_campaigns \n",
+ " nb_campaigns_opened \n",
+ " has_purchased \n",
+ " has_purchased_estim \n",
+ " score \n",
+ " quartile \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 4.0 \n",
+ " 1.0 \n",
+ " 100.0 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 5.177187 \n",
+ " 5.177187 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " ... \n",
+ " False \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.657671 \n",
+ " 3 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 55.0 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 426.265613 \n",
+ " 426.265613 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 2 \n",
+ " ... \n",
+ " True \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 0.266538 \n",
+ " 2 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 17.0 \n",
+ " 1.0 \n",
+ " 80.0 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 436.033437 \n",
+ " 436.033437 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 2 \n",
+ " ... \n",
+ " True \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.214668 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 4.0 \n",
+ " 1.0 \n",
+ " 120.0 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 5.196412 \n",
+ " 5.196412 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " ... \n",
+ " False \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.657770 \n",
+ " 3 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 34.0 \n",
+ " 2.0 \n",
+ " 416.0 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 478.693148 \n",
+ " 115.631470 \n",
+ " 363.061678 \n",
+ " 0.0 \n",
+ " 4 \n",
+ " ... \n",
+ " False \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 0.894173 \n",
+ " 4 \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 2.0 \n",
+ " 1.0 \n",
+ " 60.0 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 5.140069 \n",
+ " 5.140069 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " ... \n",
+ " False \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.717482 \n",
+ " 3 \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 5.0 \n",
+ " 1.0 \n",
+ " 61.0 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 105.053773 \n",
+ " 105.053773 \n",
+ " 0.000000 \n",
+ " 5.0 \n",
+ " 1 \n",
+ " ... \n",
+ " False \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.541855 \n",
+ " 3 \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 4.0 \n",
+ " 1.0 \n",
+ " 80.0 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 63.206030 \n",
+ " 63.206030 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " ... \n",
+ " True \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.461164 \n",
+ " 2 \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 10.0 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 44.698090 \n",
+ " 44.698090 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " ... \n",
+ " True \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.310828 \n",
+ " 2 \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " 3.0 \n",
+ " 3.0 \n",
+ " 165.0 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 266.012106 \n",
+ " 258.012106 \n",
+ " 8.000000 \n",
+ " 3.0 \n",
+ " 2 \n",
+ " ... \n",
+ " False \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.452877 \n",
+ " 2 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
10 rows × 21 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " nb_tickets nb_purchases total_amount nb_suppliers vente_internet_max \\\n",
+ "0 4.0 1.0 100.0 1.0 0.0 \n",
+ "1 1.0 1.0 55.0 1.0 0.0 \n",
+ "2 17.0 1.0 80.0 1.0 0.0 \n",
+ "3 4.0 1.0 120.0 1.0 0.0 \n",
+ "4 34.0 2.0 416.0 1.0 0.0 \n",
+ "5 2.0 1.0 60.0 1.0 0.0 \n",
+ "6 5.0 1.0 61.0 1.0 1.0 \n",
+ "7 4.0 1.0 80.0 1.0 0.0 \n",
+ "8 1.0 1.0 10.0 1.0 0.0 \n",
+ "9 3.0 3.0 165.0 1.0 1.0 \n",
+ "\n",
+ " purchase_date_min purchase_date_max time_between_purchase \\\n",
+ "0 5.177187 5.177187 0.000000 \n",
+ "1 426.265613 426.265613 0.000000 \n",
+ "2 436.033437 436.033437 0.000000 \n",
+ "3 5.196412 5.196412 0.000000 \n",
+ "4 478.693148 115.631470 363.061678 \n",
+ "5 5.140069 5.140069 0.000000 \n",
+ "6 105.053773 105.053773 0.000000 \n",
+ "7 63.206030 63.206030 0.000000 \n",
+ "8 44.698090 44.698090 0.000000 \n",
+ "9 266.012106 258.012106 8.000000 \n",
+ "\n",
+ " nb_tickets_internet fidelity ... opt_in gender_female gender_male \\\n",
+ "0 0.0 1 ... False 1 0 \n",
+ "1 0.0 2 ... True 0 1 \n",
+ "2 0.0 2 ... True 1 0 \n",
+ "3 0.0 1 ... False 1 0 \n",
+ "4 0.0 4 ... False 1 0 \n",
+ "5 0.0 1 ... False 0 1 \n",
+ "6 5.0 1 ... False 0 0 \n",
+ "7 0.0 1 ... True 0 1 \n",
+ "8 0.0 1 ... True 0 0 \n",
+ "9 3.0 2 ... False 0 0 \n",
+ "\n",
+ " gender_other nb_campaigns nb_campaigns_opened has_purchased \\\n",
+ "0 0 0.0 0.0 0.0 \n",
+ "1 0 0.0 0.0 1.0 \n",
+ "2 0 0.0 0.0 0.0 \n",
+ "3 0 0.0 0.0 0.0 \n",
+ "4 0 0.0 0.0 1.0 \n",
+ "5 0 0.0 0.0 0.0 \n",
+ "6 1 0.0 0.0 0.0 \n",
+ "7 0 0.0 0.0 0.0 \n",
+ "8 1 0.0 0.0 0.0 \n",
+ "9 1 0.0 0.0 0.0 \n",
+ "\n",
+ " has_purchased_estim score quartile \n",
+ "0 1.0 0.657671 3 \n",
+ "1 0.0 0.266538 2 \n",
+ "2 0.0 0.214668 1 \n",
+ "3 1.0 0.657770 3 \n",
+ "4 1.0 0.894173 4 \n",
+ "5 1.0 0.717482 3 \n",
+ "6 1.0 0.541855 3 \n",
+ "7 0.0 0.461164 2 \n",
+ "8 0.0 0.310828 2 \n",
+ "9 0.0 0.452877 2 \n",
+ "\n",
+ "[10 rows x 21 columns]"
+ ]
+ },
+ "execution_count": 90,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_test_segment = X_test\n",
+ "\n",
+ "X_test_segment[\"has_purchased\"] = y_test\n",
+ "X_test_segment[\"has_purchased_estim\"] = y_pred\n",
+ "X_test_segment[\"score\"] = y_pred_prob\n",
+ "X_test_segment[\"quartile\"] = np.where(X_test['score']<0.25, '1',\n",
+ " np.where(X_test['score']<0.5, '2',\n",
+ " np.where(X_test['score']<0.75, '3', '4')))\n",
+ "X_test_segment.head(10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0916f099-3faa-4c47-9b60-d1ee797b3c9d",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ad16b8ab-7e01-404b-971e-866e9b9d5aa4",
+ "metadata": {},
+ "source": [
+ "## definition of functions to compute the bias of scores and adjust it \n",
+ "\n",
+ "Le biais est calculé de la façon suivante. \n",
+ "En notant $\\hat{p(x_i)}$ le score calculé (estimé par la modélisation) et $p(x_i)$ le vrai score (sans biais), et $\\beta$ le logarithme du biais, on a : \\\n",
+ "$\\ln{\\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}}} = \\beta + \\ln{\\frac{p(x_i)}{1-p(x_i)}}$ \\\n",
+ "$ \\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}} = \\exp(\\beta) . \\frac{p(x_i)}{1-p(x_i)} $ , soit : \\\n",
+ "$p(x_i) = {\\frac{\\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}}}{B+\\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}}}}$ \\\n",
+ "Ce qu'on appelle biais et qu'on estime dans le code par la suite est : $B=\\exp(\\beta) $. Les probabilités ne sont donc pas biaisées si $B=1$. Il y a surestimation si $B>1$. \n",
+ "\n",
+ "On cherche le B qui permette d'ajuster les probabilités de telle sorte que la somme des scores soit égale à la somme des y_has_purchased. Cela revient à résoudre : \n",
+ "\n",
+ "\\begin{equation}\n",
+ "\\sum_{i}{\\frac{\\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}}}{B+\\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}}}} = \\sum_{i}{Y_i}\n",
+ "\\end{equation}\n",
+ "\n",
+ "C'est ce que fait la fonction find_bias. \n",
+ "\n",
+ "Note sur les notations : \\\n",
+ "$\\hat{p(x_i)}$ correspond à ce qu'on appelle le score et $p(x_i)$ à ce qu'on appellera le score adjusted"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 91,
+ "id": "f0379536-a6c5-4b16-bde5-d0319ec1b140",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# compute adjusted score from odd ratios (cf formula above)\n",
+ "def adjusted_score(odd_ratio, bias) :\n",
+ " adjusted_score = odd_ratio/(bias+odd_ratio)\n",
+ " return adjusted_score"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 92,
+ "id": "32a0dfd0-f49d-4785-a56f-706d381bfe41",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# when the score is 1 we cannot compute the odd ratio, so we adjust scores equal to 1\n",
+ "# we set the second best score instead\n",
+ "\n",
+ "def adjust_score_1(score) :\n",
+ " second_best_score = np.array([element for element in score if element !=1]).max()\n",
+ " new_score = np.array([element if element!=1 else second_best_score for element in score]) \n",
+ " return new_score"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 93,
+ "id": "2dff1def-02df-413e-afce-b4aeaf7752b6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def odd_ratio(score) :\n",
+ " return score / (1 - score)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 94,
+ "id": "683d71fc-7442-4028-869c-49c57592d6e9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# definition of a function that automatically detects the bias\n",
+ "\n",
+ "def find_bias(odd_ratios, y_objective, initial_guess=6) :\n",
+ " \"\"\"\n",
+ " results = minimize(lambda bias : (sum([adjusted_score(element, bias) for element in list(odd_ratios)]) - y_objective)**2 ,\n",
+ " initial_guess , method = \"BFGS\")\n",
+ "\n",
+ " estimated_bias = results.x[0]\n",
+ " \"\"\"\n",
+ "\n",
+ " # faster method\n",
+ " bias_estimated = fsolve(lambda bias : sum([adjusted_score(element, bias) for element in list(odd_ratios)]) - y_objective, x0=6)\n",
+ " \n",
+ " return bias_estimated[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 98,
+ "id": "781b0d40-c954-4c54-830a-e709c8667328",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "6.172331113516847"
+ ]
+ },
+ "execution_count": 98,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# computation with the function defined\n",
+ "\n",
+ "bias_test_set = find_bias(odd_ratios = odd_ratio(adjust_score_1(X_test_segment[\"score\"])), \n",
+ " y_objective = y_test[\"y_has_purchased\"].sum(),\n",
+ " initial_guess=6)\n",
+ "bias_test_set"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 102,
+ "id": "248cb862-418e-4767-9933-70c4885ecf40",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "6.070461139075353"
+ ]
+ },
+ "execution_count": 102,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# comparison with bias of the train set\n",
+ "X_train_score = logit_cv.predict_proba(X_train)[:, 1]\n",
+ "\n",
+ "bias_train_set = find_bias(odd_ratios = odd_ratio(adjust_score_1(X_train_score)), \n",
+ " y_objective = y_train[\"y_has_purchased\"].sum(),\n",
+ " initial_guess=6)\n",
+ "bias_train_set"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 103,
+ "id": "fff6cbe6-7bb3-4732-9b81-b9ac5383bbcf",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "betâ test - betâ train = 0.016642008368292337\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"betâ test - betâ train = \",np.log(bias_test_set/bias_train_set))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 116,
+ "id": "f506870d-4a8a-4b2c-8f0b-e0789080b20c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "mean absolute erreur 0.001409799678121875\n"
+ ]
+ }
+ ],
+ "source": [
+ "# impact of considering a bias computed on train set instead of test set - totally neglectable\n",
+ "\n",
+ "score_adjusted_test = adjusted_score(odd_ratio(adjust_score_1(X_test_segment[\"score\"])), bias = bias_test_set)\n",
+ "score_adjusted_train = adjusted_score(odd_ratio(adjust_score_1(X_test_segment[\"score\"])), bias = bias_train_set)\n",
+ "\n",
+ "print(\"mean absolute erreur\",abs(score_adjusted_test-score_adjusted_train).mean())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 117,
+ "id": "8213d0e4-063b-49fa-90b7-677fc34f4c01",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_620/1825363704.py:7: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " X_test_segment[\"score_adjusted\"] = score_adjusted_train\n"
+ ]
+ }
+ ],
+ "source": [
+ "# adjust scores accordingly \n",
+ "\n",
+ "# X_test_segment[\"score_adjusted\"] = adjusted_score(odd_ratio(adjust_score_1(X_test_segment[\"score\"])), bias = bias_test_set)\n",
+ "\n",
+ "# actually, we are not supposed to have X_test, so the biais is estimated on X_train\n",
+ "# X_test_segment[\"score_adjusted\"] = adjusted_score(odd_ratio(adjust_score_1(X_test_segment[\"score\"])), bias = bias_train_set)\n",
+ "X_test_segment[\"score_adjusted\"] = score_adjusted_train"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 118,
+ "id": "834d3723-2e72-4c65-9c62-e2d595c69461",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "MSE for score : 0.15494387585189107\n",
+ "MSE for ajusted score : 0.08851697393139933\n",
+ "sum of y_has_purchased : 13690.0\n",
+ "sum of adjusted scores : 13825.476109871417\n"
+ ]
+ }
+ ],
+ "source": [
+ "# check \n",
+ "\n",
+ "MSE_score = ((X_test_segment[\"score\"]-X_test_segment[\"has_purchased\"])**2).mean()\n",
+ "MSE_ajusted_score = ((X_test_segment[\"score_adjusted\"]-X_test_segment[\"has_purchased\"])**2).mean()\n",
+ "print(f\"MSE for score : {MSE_score}\")\n",
+ "print(f\"MSE for ajusted score : {MSE_ajusted_score}\")\n",
+ "\n",
+ "print(\"sum of y_has_purchased :\",y_test[\"y_has_purchased\"].sum())\n",
+ "print(\"sum of adjusted scores :\", X_test_segment[\"score_adjusted\"].sum())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 130,
+ "id": "ed27a165-68d2-44f8-8cec-b12dad2cca5d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "29169.0"
+ ]
+ },
+ "execution_count": 130,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_test_segment[\"has_purchased_estim\"].sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "761146b7-3d0d-44b1-8b91-87e6d54f1626",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 119,
+ "id": "9f30a4dd-a9d8-405a-a7d5-5324ae88cf70",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "MAE for score : 0.32116357895490416\n",
+ "MAE for adjusted score : 0.17359227315595824\n"
+ ]
+ }
+ ],
+ "source": [
+ "# mean absolute error - divided by 2 with out method\n",
+ "\n",
+ "MAE_score = abs(X_test_segment[\"score\"]-X_test_segment[\"has_purchased\"]).mean()\n",
+ "MAE_ajusted_score = abs(X_test_segment[\"score_adjusted\"]-X_test_segment[\"has_purchased\"]).mean()\n",
+ "print(f\"MAE for score : {MAE_score}\")\n",
+ "print(f\"MAE for adjusted score : {MAE_ajusted_score}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 208,
+ "id": "6f9396db-e213-408c-a596-eaeec3bc79f3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# visualization\n",
+ "\n",
+ "# histogramme des probas et des probas ajustées\n",
+ "\n",
+ "def plot_comp_scores(df, score, score_adjusted) :\n",
+ "\n",
+ " plt.figure()\n",
+ " plt.hist(df[score], label = \"score\", alpha=0.6)\n",
+ " plt.hist(df[score_adjusted], label=\"adjusted score\", alpha=0.6)\n",
+ " plt.legend()\n",
+ " plt.xlabel(\"probability of a future purchase\")\n",
+ " plt.ylabel(\"count\")\n",
+ " plt.title(\"Comparison between score and adjusted score\")\n",
+ " plt.show()\n",
+ "\n",
+ "plot_comp_scores(X_test_segment, score = \"score\", score_adjusted = \"score_adjusted\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e6fae260-fab8-4f51-90dc-9b6d7314c77b",
+ "metadata": {},
+ "source": [
+ "## Compute number of tickets and CA by segment with the recalibrated score"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 121,
+ "id": "c618cebc-c295-47f7-bd76-b7e18778a17c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " nb_tickets \n",
+ " nb_purchases \n",
+ " total_amount \n",
+ " nb_suppliers \n",
+ " vente_internet_max \n",
+ " purchase_date_min \n",
+ " purchase_date_max \n",
+ " time_between_purchase \n",
+ " nb_tickets_internet \n",
+ " fidelity \n",
+ " ... \n",
+ " gender_female \n",
+ " gender_male \n",
+ " gender_other \n",
+ " nb_campaigns \n",
+ " nb_campaigns_opened \n",
+ " has_purchased \n",
+ " has_purchased_estim \n",
+ " score \n",
+ " quartile \n",
+ " score_adjusted \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 4.0 \n",
+ " 1.0 \n",
+ " 100.0 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 5.177187 \n",
+ " 5.177187 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " ... \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.657671 \n",
+ " 3 \n",
+ " 0.240397 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 55.0 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 426.265613 \n",
+ " 426.265613 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 2 \n",
+ " ... \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 0.266538 \n",
+ " 2 \n",
+ " 0.056482 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 17.0 \n",
+ " 1.0 \n",
+ " 80.0 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 436.033437 \n",
+ " 436.033437 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 2 \n",
+ " ... \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.214668 \n",
+ " 1 \n",
+ " 0.043089 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 4.0 \n",
+ " 1.0 \n",
+ " 120.0 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 5.196412 \n",
+ " 5.196412 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " ... \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.657770 \n",
+ " 3 \n",
+ " 0.240478 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 34.0 \n",
+ " 2.0 \n",
+ " 416.0 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 478.693148 \n",
+ " 115.631470 \n",
+ " 363.061678 \n",
+ " 0.0 \n",
+ " 4 \n",
+ " ... \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 0.894173 \n",
+ " 4 \n",
+ " 0.581920 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
5 rows × 22 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " nb_tickets nb_purchases total_amount nb_suppliers vente_internet_max \\\n",
+ "0 4.0 1.0 100.0 1.0 0.0 \n",
+ "1 1.0 1.0 55.0 1.0 0.0 \n",
+ "2 17.0 1.0 80.0 1.0 0.0 \n",
+ "3 4.0 1.0 120.0 1.0 0.0 \n",
+ "4 34.0 2.0 416.0 1.0 0.0 \n",
+ "\n",
+ " purchase_date_min purchase_date_max time_between_purchase \\\n",
+ "0 5.177187 5.177187 0.000000 \n",
+ "1 426.265613 426.265613 0.000000 \n",
+ "2 436.033437 436.033437 0.000000 \n",
+ "3 5.196412 5.196412 0.000000 \n",
+ "4 478.693148 115.631470 363.061678 \n",
+ "\n",
+ " nb_tickets_internet fidelity ... gender_female gender_male \\\n",
+ "0 0.0 1 ... 1 0 \n",
+ "1 0.0 2 ... 0 1 \n",
+ "2 0.0 2 ... 1 0 \n",
+ "3 0.0 1 ... 1 0 \n",
+ "4 0.0 4 ... 1 0 \n",
+ "\n",
+ " gender_other nb_campaigns nb_campaigns_opened has_purchased \\\n",
+ "0 0 0.0 0.0 0.0 \n",
+ "1 0 0.0 0.0 1.0 \n",
+ "2 0 0.0 0.0 0.0 \n",
+ "3 0 0.0 0.0 0.0 \n",
+ "4 0 0.0 0.0 1.0 \n",
+ "\n",
+ " has_purchased_estim score quartile score_adjusted \n",
+ "0 1.0 0.657671 3 0.240397 \n",
+ "1 0.0 0.266538 2 0.056482 \n",
+ "2 0.0 0.214668 1 0.043089 \n",
+ "3 1.0 0.657770 3 0.240478 \n",
+ "4 1.0 0.894173 4 0.581920 \n",
+ "\n",
+ "[5 rows x 22 columns]"
+ ]
+ },
+ "execution_count": 121,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_test_segment.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 156,
+ "id": "29633dd2-8b4b-48dc-be02-52f4015e686d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " score \n",
+ " score_adjusted \n",
+ " has_purchased \n",
+ " \n",
+ " \n",
+ " quartile \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 0.132457 \n",
+ " 0.025105 \n",
+ " 0.015691 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 0.338914 \n",
+ " 0.079990 \n",
+ " 0.098486 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 0.630647 \n",
+ " 0.225757 \n",
+ " 0.214729 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 0.905216 \n",
+ " 0.661997 \n",
+ " 0.650133 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " score score_adjusted has_purchased\n",
+ "quartile \n",
+ "1 0.132457 0.025105 0.015691\n",
+ "2 0.338914 0.079990 0.098486\n",
+ "3 0.630647 0.225757 0.214729\n",
+ "4 0.905216 0.661997 0.650133"
+ ]
+ },
+ "execution_count": 156,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_test_segment.groupby(\"quartile\")[[\"score\",\"score_adjusted\", \"has_purchased\"]].mean()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9c64085e-51f2-4bad-8a37-274905bbed2e",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e29be2a8-ef9f-4004-ae67-cab66eea0013",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_test_segment[\"nb_tickets_projected\"] = X_test_segment[\"nb_tickets\"] / 1.5\n",
+ "X_test_segment[\"total_amount_projected\"] = X_test_segment[\"total_amount\"] / 1.5\n",
+ "\n",
+ "X_test_segment[\"nb_tickets_expected\"] = X_test_segment[\"score_adjusted\"] * X_test_segment[\"nb_tickets_projected\"]\n",
+ "X_test_segment[\"total_amount_expected\"] = X_test_segment[\"score_adjusted\"] * X_test_segment[\"total_amount_projected\"]\n",
+ "\n",
+ "X_test_segment"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 123,
+ "id": "a974589f-7952-4db2-bebf-7b69c6b09372",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def project_tickets_CA (df, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) :\n",
+ " \n",
+ " duration_ratio = duration_ref/duration_projection\n",
+ "\n",
+ " df_output = df\n",
+ "\n",
+ " df_output[\"nb_tickets_projected\"] = df_output[nb_tickets] / duration_ratio\n",
+ " df_output[\"total_amount_projected\"] = df_output[total_amount] / duration_ratio\n",
+ " \n",
+ " df_output[\"nb_tickets_expected\"] = df_output[score_adjusted] * df_output[\"nb_tickets_projected\"]\n",
+ " df_output[\"total_amount_expected\"] = df_output[score_adjusted] * df_output[\"total_amount_projected\"]\n",
+ "\n",
+ " return df_output\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 124,
+ "id": "1e000901-717d-4851-9db2-df90998d35ed",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " nb_tickets \n",
+ " nb_purchases \n",
+ " total_amount \n",
+ " nb_suppliers \n",
+ " vente_internet_max \n",
+ " purchase_date_min \n",
+ " purchase_date_max \n",
+ " time_between_purchase \n",
+ " nb_tickets_internet \n",
+ " fidelity \n",
+ " ... \n",
+ " gender_female \n",
+ " gender_male \n",
+ " gender_other \n",
+ " nb_campaigns \n",
+ " nb_campaigns_opened \n",
+ " has_purchased \n",
+ " has_purchased_estim \n",
+ " score \n",
+ " quartile \n",
+ " score_adjusted \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 4.0 \n",
+ " 1.0 \n",
+ " 100.00 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 5.177187 \n",
+ " 5.177187 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " ... \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.657671 \n",
+ " 3 \n",
+ " 0.240397 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 55.00 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 426.265613 \n",
+ " 426.265613 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 2 \n",
+ " ... \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 0.266538 \n",
+ " 2 \n",
+ " 0.056482 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 17.0 \n",
+ " 1.0 \n",
+ " 80.00 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 436.033437 \n",
+ " 436.033437 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 2 \n",
+ " ... \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.214668 \n",
+ " 1 \n",
+ " 0.043089 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 4.0 \n",
+ " 1.0 \n",
+ " 120.00 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 5.196412 \n",
+ " 5.196412 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " ... \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.657770 \n",
+ " 3 \n",
+ " 0.240478 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 34.0 \n",
+ " 2.0 \n",
+ " 416.00 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 478.693148 \n",
+ " 115.631470 \n",
+ " 363.061678 \n",
+ " 0.0 \n",
+ " 4 \n",
+ " ... \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 0.894173 \n",
+ " 4 \n",
+ " 0.581920 \n",
+ " \n",
+ " \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " \n",
+ " \n",
+ " 96091 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 67.31 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 278.442257 \n",
+ " 278.442257 \n",
+ " 0.000000 \n",
+ " 1.0 \n",
+ " 2 \n",
+ " ... \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 15.0 \n",
+ " 5.0 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 0.623551 \n",
+ " 3 \n",
+ " 0.214369 \n",
+ " \n",
+ " \n",
+ " 96092 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 61.41 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 189.207373 \n",
+ " 189.207373 \n",
+ " 0.000000 \n",
+ " 1.0 \n",
+ " 1 \n",
+ " ... \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 12.0 \n",
+ " 9.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.682521 \n",
+ " 3 \n",
+ " 0.261526 \n",
+ " \n",
+ " \n",
+ " 96093 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.00 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 550.000000 \n",
+ " 550.000000 \n",
+ " -1.000000 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " ... \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 29.0 \n",
+ " 3.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.117192 \n",
+ " 1 \n",
+ " 0.021400 \n",
+ " \n",
+ " \n",
+ " 96094 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 79.43 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 279.312905 \n",
+ " 279.312905 \n",
+ " 0.000000 \n",
+ " 1.0 \n",
+ " 1 \n",
+ " ... \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 20.0 \n",
+ " 4.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.625185 \n",
+ " 3 \n",
+ " 0.215545 \n",
+ " \n",
+ " \n",
+ " 96095 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.00 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 550.000000 \n",
+ " 550.000000 \n",
+ " -1.000000 \n",
+ " 0.0 \n",
+ " 2 \n",
+ " ... \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 31.0 \n",
+ " 4.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.319585 \n",
+ " 2 \n",
+ " 0.071817 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
96096 rows × 22 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " nb_tickets nb_purchases total_amount nb_suppliers \\\n",
+ "0 4.0 1.0 100.00 1.0 \n",
+ "1 1.0 1.0 55.00 1.0 \n",
+ "2 17.0 1.0 80.00 1.0 \n",
+ "3 4.0 1.0 120.00 1.0 \n",
+ "4 34.0 2.0 416.00 1.0 \n",
+ "... ... ... ... ... \n",
+ "96091 1.0 1.0 67.31 1.0 \n",
+ "96092 1.0 1.0 61.41 1.0 \n",
+ "96093 0.0 0.0 0.00 0.0 \n",
+ "96094 1.0 1.0 79.43 1.0 \n",
+ "96095 0.0 0.0 0.00 0.0 \n",
+ "\n",
+ " vente_internet_max purchase_date_min purchase_date_max \\\n",
+ "0 0.0 5.177187 5.177187 \n",
+ "1 0.0 426.265613 426.265613 \n",
+ "2 0.0 436.033437 436.033437 \n",
+ "3 0.0 5.196412 5.196412 \n",
+ "4 0.0 478.693148 115.631470 \n",
+ "... ... ... ... \n",
+ "96091 1.0 278.442257 278.442257 \n",
+ "96092 1.0 189.207373 189.207373 \n",
+ "96093 0.0 550.000000 550.000000 \n",
+ "96094 1.0 279.312905 279.312905 \n",
+ "96095 0.0 550.000000 550.000000 \n",
+ "\n",
+ " time_between_purchase nb_tickets_internet fidelity ... \\\n",
+ "0 0.000000 0.0 1 ... \n",
+ "1 0.000000 0.0 2 ... \n",
+ "2 0.000000 0.0 2 ... \n",
+ "3 0.000000 0.0 1 ... \n",
+ "4 363.061678 0.0 4 ... \n",
+ "... ... ... ... ... \n",
+ "96091 0.000000 1.0 2 ... \n",
+ "96092 0.000000 1.0 1 ... \n",
+ "96093 -1.000000 0.0 1 ... \n",
+ "96094 0.000000 1.0 1 ... \n",
+ "96095 -1.000000 0.0 2 ... \n",
+ "\n",
+ " gender_female gender_male gender_other nb_campaigns \\\n",
+ "0 1 0 0 0.0 \n",
+ "1 0 1 0 0.0 \n",
+ "2 1 0 0 0.0 \n",
+ "3 1 0 0 0.0 \n",
+ "4 1 0 0 0.0 \n",
+ "... ... ... ... ... \n",
+ "96091 0 1 0 15.0 \n",
+ "96092 0 1 0 12.0 \n",
+ "96093 1 0 0 29.0 \n",
+ "96094 0 1 0 20.0 \n",
+ "96095 0 1 0 31.0 \n",
+ "\n",
+ " nb_campaigns_opened has_purchased has_purchased_estim score \\\n",
+ "0 0.0 0.0 1.0 0.657671 \n",
+ "1 0.0 1.0 0.0 0.266538 \n",
+ "2 0.0 0.0 0.0 0.214668 \n",
+ "3 0.0 0.0 1.0 0.657770 \n",
+ "4 0.0 1.0 1.0 0.894173 \n",
+ "... ... ... ... ... \n",
+ "96091 5.0 1.0 1.0 0.623551 \n",
+ "96092 9.0 0.0 1.0 0.682521 \n",
+ "96093 3.0 0.0 0.0 0.117192 \n",
+ "96094 4.0 0.0 1.0 0.625185 \n",
+ "96095 4.0 0.0 0.0 0.319585 \n",
+ "\n",
+ " quartile score_adjusted \n",
+ "0 3 0.240397 \n",
+ "1 2 0.056482 \n",
+ "2 1 0.043089 \n",
+ "3 3 0.240478 \n",
+ "4 4 0.581920 \n",
+ "... ... ... \n",
+ "96091 3 0.214369 \n",
+ "96092 3 0.261526 \n",
+ "96093 1 0.021400 \n",
+ "96094 3 0.215545 \n",
+ "96095 2 0.071817 \n",
+ "\n",
+ "[96096 rows x 22 columns]"
+ ]
+ },
+ "execution_count": 124,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_test_segment"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 127,
+ "id": "dd8a52e1-d06e-4790-8687-8e58e3e6b84e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_620/3599949626.py:7: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " df_output[\"nb_tickets_projected\"] = df_output[nb_tickets] / duration_ratio\n",
+ "/tmp/ipykernel_620/3599949626.py:8: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " df_output[\"total_amount_projected\"] = df_output[total_amount] / duration_ratio\n",
+ "/tmp/ipykernel_620/3599949626.py:10: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " df_output[\"nb_tickets_expected\"] = df_output[score_adjusted] * df_output[\"nb_tickets_projected\"]\n",
+ "/tmp/ipykernel_620/3599949626.py:11: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " df_output[\"total_amount_expected\"] = df_output[score_adjusted] * df_output[\"total_amount_projected\"]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " nb_tickets \n",
+ " nb_purchases \n",
+ " total_amount \n",
+ " nb_suppliers \n",
+ " vente_internet_max \n",
+ " purchase_date_min \n",
+ " purchase_date_max \n",
+ " time_between_purchase \n",
+ " nb_tickets_internet \n",
+ " fidelity \n",
+ " ... \n",
+ " nb_campaigns_opened \n",
+ " has_purchased \n",
+ " has_purchased_estim \n",
+ " score \n",
+ " quartile \n",
+ " score_adjusted \n",
+ " nb_tickets_projected \n",
+ " total_amount_projected \n",
+ " nb_tickets_expected \n",
+ " total_amount_expected \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 4.0 \n",
+ " 1.0 \n",
+ " 100.00 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 5.177187 \n",
+ " 5.177187 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " ... \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.657671 \n",
+ " 3 \n",
+ " 0.240397 \n",
+ " 2.666667 \n",
+ " 66.666667 \n",
+ " 0.641059 \n",
+ " 16.026472 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 55.00 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 426.265613 \n",
+ " 426.265613 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 2 \n",
+ " ... \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 0.266538 \n",
+ " 2 \n",
+ " 0.056482 \n",
+ " 0.666667 \n",
+ " 36.666667 \n",
+ " 0.037655 \n",
+ " 2.071006 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 17.0 \n",
+ " 1.0 \n",
+ " 80.00 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 436.033437 \n",
+ " 436.033437 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 2 \n",
+ " ... \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.214668 \n",
+ " 1 \n",
+ " 0.043089 \n",
+ " 11.333333 \n",
+ " 53.333333 \n",
+ " 0.488340 \n",
+ " 2.298068 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 4.0 \n",
+ " 1.0 \n",
+ " 120.00 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 5.196412 \n",
+ " 5.196412 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " ... \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.657770 \n",
+ " 3 \n",
+ " 0.240478 \n",
+ " 2.666667 \n",
+ " 80.000000 \n",
+ " 0.641273 \n",
+ " 19.238202 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 34.0 \n",
+ " 2.0 \n",
+ " 416.00 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 478.693148 \n",
+ " 115.631470 \n",
+ " 363.061678 \n",
+ " 0.0 \n",
+ " 4 \n",
+ " ... \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 0.894173 \n",
+ " 4 \n",
+ " 0.581920 \n",
+ " 22.666667 \n",
+ " 277.333333 \n",
+ " 13.190183 \n",
+ " 161.385771 \n",
+ " \n",
+ " \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " \n",
+ " \n",
+ " 96091 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 67.31 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 278.442257 \n",
+ " 278.442257 \n",
+ " 0.000000 \n",
+ " 1.0 \n",
+ " 2 \n",
+ " ... \n",
+ " 5.0 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 0.623551 \n",
+ " 3 \n",
+ " 0.214369 \n",
+ " 0.666667 \n",
+ " 44.873333 \n",
+ " 0.142913 \n",
+ " 9.619467 \n",
+ " \n",
+ " \n",
+ " 96092 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 61.41 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 189.207373 \n",
+ " 189.207373 \n",
+ " 0.000000 \n",
+ " 1.0 \n",
+ " 1 \n",
+ " ... \n",
+ " 9.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.682521 \n",
+ " 3 \n",
+ " 0.261526 \n",
+ " 0.666667 \n",
+ " 40.940000 \n",
+ " 0.174351 \n",
+ " 10.706885 \n",
+ " \n",
+ " \n",
+ " 96093 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.00 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 550.000000 \n",
+ " 550.000000 \n",
+ " -1.000000 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " ... \n",
+ " 3.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.117192 \n",
+ " 1 \n",
+ " 0.021400 \n",
+ " 0.000000 \n",
+ " 0.000000 \n",
+ " 0.000000 \n",
+ " 0.000000 \n",
+ " \n",
+ " \n",
+ " 96094 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 79.43 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 279.312905 \n",
+ " 279.312905 \n",
+ " 0.000000 \n",
+ " 1.0 \n",
+ " 1 \n",
+ " ... \n",
+ " 4.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.625185 \n",
+ " 3 \n",
+ " 0.215545 \n",
+ " 0.666667 \n",
+ " 52.953333 \n",
+ " 0.143697 \n",
+ " 11.413840 \n",
+ " \n",
+ " \n",
+ " 96095 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.00 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 550.000000 \n",
+ " 550.000000 \n",
+ " -1.000000 \n",
+ " 0.0 \n",
+ " 2 \n",
+ " ... \n",
+ " 4.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.319585 \n",
+ " 2 \n",
+ " 0.071817 \n",
+ " 0.000000 \n",
+ " 0.000000 \n",
+ " 0.000000 \n",
+ " 0.000000 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
96096 rows × 26 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " nb_tickets nb_purchases total_amount nb_suppliers \\\n",
+ "0 4.0 1.0 100.00 1.0 \n",
+ "1 1.0 1.0 55.00 1.0 \n",
+ "2 17.0 1.0 80.00 1.0 \n",
+ "3 4.0 1.0 120.00 1.0 \n",
+ "4 34.0 2.0 416.00 1.0 \n",
+ "... ... ... ... ... \n",
+ "96091 1.0 1.0 67.31 1.0 \n",
+ "96092 1.0 1.0 61.41 1.0 \n",
+ "96093 0.0 0.0 0.00 0.0 \n",
+ "96094 1.0 1.0 79.43 1.0 \n",
+ "96095 0.0 0.0 0.00 0.0 \n",
+ "\n",
+ " vente_internet_max purchase_date_min purchase_date_max \\\n",
+ "0 0.0 5.177187 5.177187 \n",
+ "1 0.0 426.265613 426.265613 \n",
+ "2 0.0 436.033437 436.033437 \n",
+ "3 0.0 5.196412 5.196412 \n",
+ "4 0.0 478.693148 115.631470 \n",
+ "... ... ... ... \n",
+ "96091 1.0 278.442257 278.442257 \n",
+ "96092 1.0 189.207373 189.207373 \n",
+ "96093 0.0 550.000000 550.000000 \n",
+ "96094 1.0 279.312905 279.312905 \n",
+ "96095 0.0 550.000000 550.000000 \n",
+ "\n",
+ " time_between_purchase nb_tickets_internet fidelity ... \\\n",
+ "0 0.000000 0.0 1 ... \n",
+ "1 0.000000 0.0 2 ... \n",
+ "2 0.000000 0.0 2 ... \n",
+ "3 0.000000 0.0 1 ... \n",
+ "4 363.061678 0.0 4 ... \n",
+ "... ... ... ... ... \n",
+ "96091 0.000000 1.0 2 ... \n",
+ "96092 0.000000 1.0 1 ... \n",
+ "96093 -1.000000 0.0 1 ... \n",
+ "96094 0.000000 1.0 1 ... \n",
+ "96095 -1.000000 0.0 2 ... \n",
+ "\n",
+ " nb_campaigns_opened has_purchased has_purchased_estim score \\\n",
+ "0 0.0 0.0 1.0 0.657671 \n",
+ "1 0.0 1.0 0.0 0.266538 \n",
+ "2 0.0 0.0 0.0 0.214668 \n",
+ "3 0.0 0.0 1.0 0.657770 \n",
+ "4 0.0 1.0 1.0 0.894173 \n",
+ "... ... ... ... ... \n",
+ "96091 5.0 1.0 1.0 0.623551 \n",
+ "96092 9.0 0.0 1.0 0.682521 \n",
+ "96093 3.0 0.0 0.0 0.117192 \n",
+ "96094 4.0 0.0 1.0 0.625185 \n",
+ "96095 4.0 0.0 0.0 0.319585 \n",
+ "\n",
+ " quartile score_adjusted nb_tickets_projected total_amount_projected \\\n",
+ "0 3 0.240397 2.666667 66.666667 \n",
+ "1 2 0.056482 0.666667 36.666667 \n",
+ "2 1 0.043089 11.333333 53.333333 \n",
+ "3 3 0.240478 2.666667 80.000000 \n",
+ "4 4 0.581920 22.666667 277.333333 \n",
+ "... ... ... ... ... \n",
+ "96091 3 0.214369 0.666667 44.873333 \n",
+ "96092 3 0.261526 0.666667 40.940000 \n",
+ "96093 1 0.021400 0.000000 0.000000 \n",
+ "96094 3 0.215545 0.666667 52.953333 \n",
+ "96095 2 0.071817 0.000000 0.000000 \n",
+ "\n",
+ " nb_tickets_expected total_amount_expected \n",
+ "0 0.641059 16.026472 \n",
+ "1 0.037655 2.071006 \n",
+ "2 0.488340 2.298068 \n",
+ "3 0.641273 19.238202 \n",
+ "4 13.190183 161.385771 \n",
+ "... ... ... \n",
+ "96091 0.142913 9.619467 \n",
+ "96092 0.174351 10.706885 \n",
+ "96093 0.000000 0.000000 \n",
+ "96094 0.143697 11.413840 \n",
+ "96095 0.000000 0.000000 \n",
+ "\n",
+ "[96096 rows x 26 columns]"
+ ]
+ },
+ "execution_count": 127,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_test_segment = project_tickets_CA (X_test_segment, \"nb_tickets\", \"total_amount\", \"score_adjusted\", duration_ref=1.5, duration_projection=1)\n",
+ "X_test_segment"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 135,
+ "id": "5bf8def7-d6f3-4b5b-a656-d61f6dca9536",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " nb_tickets \n",
+ " nb_purchases \n",
+ " total_amount \n",
+ " nb_suppliers \n",
+ " vente_internet_max \n",
+ " purchase_date_min \n",
+ " purchase_date_max \n",
+ " time_between_purchase \n",
+ " nb_tickets_internet \n",
+ " fidelity \n",
+ " ... \n",
+ " nb_campaigns_opened \n",
+ " has_purchased \n",
+ " has_purchased_estim \n",
+ " score \n",
+ " quartile \n",
+ " score_adjusted \n",
+ " nb_tickets_projected \n",
+ " total_amount_projected \n",
+ " nb_tickets_expected \n",
+ " total_amount_expected \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 4.0 \n",
+ " 1.0 \n",
+ " 100.00 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 5.177187 \n",
+ " 5.177187 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " ... \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.657671 \n",
+ " 3 \n",
+ " 0.240397 \n",
+ " 2.666667 \n",
+ " 66.666667 \n",
+ " 0.641059 \n",
+ " 16.026472 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 55.00 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 426.265613 \n",
+ " 426.265613 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 2 \n",
+ " ... \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 0.266538 \n",
+ " 2 \n",
+ " 0.056482 \n",
+ " 0.666667 \n",
+ " 36.666667 \n",
+ " 0.037655 \n",
+ " 2.071006 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 17.0 \n",
+ " 1.0 \n",
+ " 80.00 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 436.033437 \n",
+ " 436.033437 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 2 \n",
+ " ... \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.214668 \n",
+ " 1 \n",
+ " 0.043089 \n",
+ " 11.333333 \n",
+ " 53.333333 \n",
+ " 0.488340 \n",
+ " 2.298068 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 4.0 \n",
+ " 1.0 \n",
+ " 120.00 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 5.196412 \n",
+ " 5.196412 \n",
+ " 0.000000 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " ... \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.657770 \n",
+ " 3 \n",
+ " 0.240478 \n",
+ " 2.666667 \n",
+ " 80.000000 \n",
+ " 0.641273 \n",
+ " 19.238202 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 34.0 \n",
+ " 2.0 \n",
+ " 416.00 \n",
+ " 1.0 \n",
+ " 0.0 \n",
+ " 478.693148 \n",
+ " 115.631470 \n",
+ " 363.061678 \n",
+ " 0.0 \n",
+ " 4 \n",
+ " ... \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 0.894173 \n",
+ " 4 \n",
+ " 0.581920 \n",
+ " 22.666667 \n",
+ " 277.333333 \n",
+ " 13.190183 \n",
+ " 161.385771 \n",
+ " \n",
+ " \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " \n",
+ " \n",
+ " 96091 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 67.31 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 278.442257 \n",
+ " 278.442257 \n",
+ " 0.000000 \n",
+ " 1.0 \n",
+ " 2 \n",
+ " ... \n",
+ " 5.0 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 0.623551 \n",
+ " 3 \n",
+ " 0.214369 \n",
+ " 0.666667 \n",
+ " 44.873333 \n",
+ " 0.142913 \n",
+ " 9.619467 \n",
+ " \n",
+ " \n",
+ " 96092 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 61.41 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 189.207373 \n",
+ " 189.207373 \n",
+ " 0.000000 \n",
+ " 1.0 \n",
+ " 1 \n",
+ " ... \n",
+ " 9.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.682521 \n",
+ " 3 \n",
+ " 0.261526 \n",
+ " 0.666667 \n",
+ " 40.940000 \n",
+ " 0.174351 \n",
+ " 10.706885 \n",
+ " \n",
+ " \n",
+ " 96093 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.00 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 550.000000 \n",
+ " 550.000000 \n",
+ " -1.000000 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " ... \n",
+ " 3.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.117192 \n",
+ " 1 \n",
+ " 0.021400 \n",
+ " 0.000000 \n",
+ " 0.000000 \n",
+ " 0.000000 \n",
+ " 0.000000 \n",
+ " \n",
+ " \n",
+ " 96094 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 79.43 \n",
+ " 1.0 \n",
+ " 1.0 \n",
+ " 279.312905 \n",
+ " 279.312905 \n",
+ " 0.000000 \n",
+ " 1.0 \n",
+ " 1 \n",
+ " ... \n",
+ " 4.0 \n",
+ " 0.0 \n",
+ " 1.0 \n",
+ " 0.625185 \n",
+ " 3 \n",
+ " 0.215545 \n",
+ " 0.666667 \n",
+ " 52.953333 \n",
+ " 0.143697 \n",
+ " 11.413840 \n",
+ " \n",
+ " \n",
+ " 96095 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.00 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 550.000000 \n",
+ " 550.000000 \n",
+ " -1.000000 \n",
+ " 0.0 \n",
+ " 2 \n",
+ " ... \n",
+ " 4.0 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0.319585 \n",
+ " 2 \n",
+ " 0.071817 \n",
+ " 0.000000 \n",
+ " 0.000000 \n",
+ " 0.000000 \n",
+ " 0.000000 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
96096 rows × 26 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " nb_tickets nb_purchases total_amount nb_suppliers \\\n",
+ "0 4.0 1.0 100.00 1.0 \n",
+ "1 1.0 1.0 55.00 1.0 \n",
+ "2 17.0 1.0 80.00 1.0 \n",
+ "3 4.0 1.0 120.00 1.0 \n",
+ "4 34.0 2.0 416.00 1.0 \n",
+ "... ... ... ... ... \n",
+ "96091 1.0 1.0 67.31 1.0 \n",
+ "96092 1.0 1.0 61.41 1.0 \n",
+ "96093 0.0 0.0 0.00 0.0 \n",
+ "96094 1.0 1.0 79.43 1.0 \n",
+ "96095 0.0 0.0 0.00 0.0 \n",
+ "\n",
+ " vente_internet_max purchase_date_min purchase_date_max \\\n",
+ "0 0.0 5.177187 5.177187 \n",
+ "1 0.0 426.265613 426.265613 \n",
+ "2 0.0 436.033437 436.033437 \n",
+ "3 0.0 5.196412 5.196412 \n",
+ "4 0.0 478.693148 115.631470 \n",
+ "... ... ... ... \n",
+ "96091 1.0 278.442257 278.442257 \n",
+ "96092 1.0 189.207373 189.207373 \n",
+ "96093 0.0 550.000000 550.000000 \n",
+ "96094 1.0 279.312905 279.312905 \n",
+ "96095 0.0 550.000000 550.000000 \n",
+ "\n",
+ " time_between_purchase nb_tickets_internet fidelity ... \\\n",
+ "0 0.000000 0.0 1 ... \n",
+ "1 0.000000 0.0 2 ... \n",
+ "2 0.000000 0.0 2 ... \n",
+ "3 0.000000 0.0 1 ... \n",
+ "4 363.061678 0.0 4 ... \n",
+ "... ... ... ... ... \n",
+ "96091 0.000000 1.0 2 ... \n",
+ "96092 0.000000 1.0 1 ... \n",
+ "96093 -1.000000 0.0 1 ... \n",
+ "96094 0.000000 1.0 1 ... \n",
+ "96095 -1.000000 0.0 2 ... \n",
+ "\n",
+ " nb_campaigns_opened has_purchased has_purchased_estim score \\\n",
+ "0 0.0 0.0 1.0 0.657671 \n",
+ "1 0.0 1.0 0.0 0.266538 \n",
+ "2 0.0 0.0 0.0 0.214668 \n",
+ "3 0.0 0.0 1.0 0.657770 \n",
+ "4 0.0 1.0 1.0 0.894173 \n",
+ "... ... ... ... ... \n",
+ "96091 5.0 1.0 1.0 0.623551 \n",
+ "96092 9.0 0.0 1.0 0.682521 \n",
+ "96093 3.0 0.0 0.0 0.117192 \n",
+ "96094 4.0 0.0 1.0 0.625185 \n",
+ "96095 4.0 0.0 0.0 0.319585 \n",
+ "\n",
+ " quartile score_adjusted nb_tickets_projected total_amount_projected \\\n",
+ "0 3 0.240397 2.666667 66.666667 \n",
+ "1 2 0.056482 0.666667 36.666667 \n",
+ "2 1 0.043089 11.333333 53.333333 \n",
+ "3 3 0.240478 2.666667 80.000000 \n",
+ "4 4 0.581920 22.666667 277.333333 \n",
+ "... ... ... ... ... \n",
+ "96091 3 0.214369 0.666667 44.873333 \n",
+ "96092 3 0.261526 0.666667 40.940000 \n",
+ "96093 1 0.021400 0.000000 0.000000 \n",
+ "96094 3 0.215545 0.666667 52.953333 \n",
+ "96095 2 0.071817 0.000000 0.000000 \n",
+ "\n",
+ " nb_tickets_expected total_amount_expected \n",
+ "0 0.641059 16.026472 \n",
+ "1 0.037655 2.071006 \n",
+ "2 0.488340 2.298068 \n",
+ "3 0.641273 19.238202 \n",
+ "4 13.190183 161.385771 \n",
+ "... ... ... \n",
+ "96091 0.142913 9.619467 \n",
+ "96092 0.174351 10.706885 \n",
+ "96093 0.000000 0.000000 \n",
+ "96094 0.143697 11.413840 \n",
+ "96095 0.000000 0.000000 \n",
+ "\n",
+ "[96096 rows x 26 columns]"
+ ]
+ },
+ "execution_count": 135,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_test_segment"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 169,
+ "id": "78d12889-b310-4eca-8a2a-8f2535c7b2e5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " quartile \n",
+ " size \n",
+ " size_perct \n",
+ " nb_tickets_expected \n",
+ " total_amount_expected \n",
+ " perct_revenue_recovered \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 1 \n",
+ " 37410 \n",
+ " 38.929820 \n",
+ " 84.764915 \n",
+ " 1.867190e+03 \n",
+ " 4.384354 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2 \n",
+ " 29517 \n",
+ " 30.716159 \n",
+ " 2899.288091 \n",
+ " 7.446102e+04 \n",
+ " 9.854069 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 3 \n",
+ " 20137 \n",
+ " 20.955087 \n",
+ " 10876.786661 \n",
+ " 3.442867e+05 \n",
+ " 22.842135 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 4 \n",
+ " 9032 \n",
+ " 9.398934 \n",
+ " 215194.829104 \n",
+ " 9.899418e+06 \n",
+ " 90.107285 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " quartile size size_perct nb_tickets_expected total_amount_expected \\\n",
+ "0 1 37410 38.929820 84.764915 1.867190e+03 \n",
+ "1 2 29517 30.716159 2899.288091 7.446102e+04 \n",
+ "2 3 20137 20.955087 10876.786661 3.442867e+05 \n",
+ "3 4 9032 9.398934 215194.829104 9.899418e+06 \n",
+ "\n",
+ " perct_revenue_recovered \n",
+ "0 4.384354 \n",
+ "1 9.854069 \n",
+ "2 22.842135 \n",
+ "3 90.107285 "
+ ]
+ },
+ "execution_count": 169,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# compute nb tickets estimated and total amount expected\n",
+ "X_test_expected_CA = X_test_segment.groupby(\"quartile\")[[\"nb_tickets_expected\", \"total_amount_expected\"]].sum().reset_index()\n",
+ "\n",
+ "# number of customers by segment\n",
+ "X_test_expected_CA.insert(1, \"size\", X_test_segment.groupby(\"quartile\").size().values)\n",
+ "\n",
+ "# size in percent of all customers\n",
+ "X_test_expected_CA.insert(2, \"size_perct\", 100 * X_test_expected_CA[\"size\"]/X_test_expected_CA[\"size\"].sum())\n",
+ "\n",
+ "# compute share of CA recovered\n",
+ "duration_ref=1.5\n",
+ "duration_projection=1\n",
+ "duration_ratio=duration_ref/duration_projection\n",
+ "\n",
+ "X_test_expected_CA[\"perct_revenue_recovered\"] = 100 * duration_ratio * X_test_expected_CA[\"total_amount_expected\"] / \\\n",
+ "X_test_segment.groupby(\"quartile\")[\"total_amount\"].sum().values\n",
+ "\n",
+ "X_test_expected_CA"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9c471bdd-25c2-420a-a8a1-3add9f003cbc",
+ "metadata": {},
+ "source": [
+ "## Just to try, same computation with score instead of score adjusted\n",
+ "\n",
+ "seems overestimated : if only 14% of customers come back, how can we recover 22% of the revenue from the segment that is least likely to buy ?? ..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 201,
+ "id": "53684a24-1809-465f-8e21-b9295e34582a",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_620/3599949626.py:7: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " df_output[\"nb_tickets_projected\"] = df_output[nb_tickets] / duration_ratio\n",
+ "/tmp/ipykernel_620/3599949626.py:8: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " df_output[\"total_amount_projected\"] = df_output[total_amount] / duration_ratio\n",
+ "/tmp/ipykernel_620/3599949626.py:10: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " df_output[\"nb_tickets_expected\"] = df_output[score_adjusted] * df_output[\"nb_tickets_projected\"]\n",
+ "/tmp/ipykernel_620/3599949626.py:11: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " df_output[\"total_amount_expected\"] = df_output[score_adjusted] * df_output[\"total_amount_projected\"]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " quartile \n",
+ " size \n",
+ " size_perct \n",
+ " nb_tickets_expected \n",
+ " total_amount_expected \n",
+ " perct_revenue_recovered \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 1 \n",
+ " 37410 \n",
+ " 38.929820 \n",
+ " 419.757918 \n",
+ " 9.245081e+03 \n",
+ " 21.708404 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2 \n",
+ " 29517 \n",
+ " 30.716159 \n",
+ " 11549.060736 \n",
+ " 2.965220e+05 \n",
+ " 39.241320 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 3 \n",
+ " 20137 \n",
+ " 20.955087 \n",
+ " 29997.854731 \n",
+ " 9.547519e+05 \n",
+ " 63.344224 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 4 \n",
+ " 9032 \n",
+ " 9.398934 \n",
+ " 244655.821195 \n",
+ " 1.073601e+07 \n",
+ " 97.722201 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " quartile size size_perct nb_tickets_expected total_amount_expected \\\n",
+ "0 1 37410 38.929820 419.757918 9.245081e+03 \n",
+ "1 2 29517 30.716159 11549.060736 2.965220e+05 \n",
+ "2 3 20137 20.955087 29997.854731 9.547519e+05 \n",
+ "3 4 9032 9.398934 244655.821195 1.073601e+07 \n",
+ "\n",
+ " perct_revenue_recovered \n",
+ "0 21.708404 \n",
+ "1 39.241320 \n",
+ "2 63.344224 \n",
+ "3 97.722201 "
+ ]
+ },
+ "execution_count": 201,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_test_segment_bis = project_tickets_CA (X_test_segment, \"nb_tickets\", \"total_amount\", \"score\", duration_ref=1.5, duration_projection=1)\n",
+ "\n",
+ "# compute nb tickets estimated and total amount expected\n",
+ "X_test_expected_CA_bis = X_test_segment_bis.groupby(\"quartile\")[[\"nb_tickets_expected\", \"total_amount_expected\"]].sum().reset_index()\n",
+ "\n",
+ "# number of customers by segment\n",
+ "X_test_expected_CA_bis.insert(1, \"size\", X_test_segment_bis.groupby(\"quartile\").size().values)\n",
+ "\n",
+ "# size in percent of all customers\n",
+ "X_test_expected_CA_bis.insert(2, \"size_perct\", 100 * X_test_expected_CA_bis[\"size\"]/X_test_expected_CA_bis[\"size\"].sum())\n",
+ "\n",
+ "# compute share of CA recovered\n",
+ "duration_ref=1.5\n",
+ "duration_projection=1\n",
+ "duration_ratio=duration_ref/duration_projection\n",
+ "\n",
+ "X_test_expected_CA_bis[\"perct_revenue_recovered\"] = 100 * duration_ratio * X_test_expected_CA_bis[\"total_amount_expected\"] / \\\n",
+ "X_test_segment_bis.groupby(\"quartile\")[\"total_amount\"].sum().values\n",
+ "\n",
+ "X_test_expected_CA_bis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 203,
+ "id": "7dc66d1e-da03-4513-96e4-d9a43ac0a2c8",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "overall share of revenue recovered : 90.26 %\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"overall share of revenue recovered : \", round(100 * duration_ratio * X_test_expected_CA_bis[\"total_amount_expected\"].sum() / \\\n",
+ "X_test_segment_bis[\"total_amount\"].sum(),2), \"%\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "67cc9c5c-fff2-4d3c-8bfc-b59e06fa6e3a",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aab045f6-81a1-4c02-9724-eec32b30a355",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "673f2969-7b9a-44c1-abf5-5679fca877ce",
+ "metadata": {},
+ "source": [
+ "## Last pieces of analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 161,
+ "id": "2365bb13-0f3f-49d5-bf91-52c92abebcee",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "overall share of revenue recovered : 77.64%\n"
+ ]
+ }
+ ],
+ "source": [
+ "# global revenue recovered\n",
+ "global_revenue_recovered = round(100 * duration_ratio * X_test_expected_CA[\"total_amount_expected\"].sum() / \\\n",
+ "X_test_segment[\"total_amount\"].sum(),2)\n",
+ "print(f\"overall share of revenue recovered : {global_revenue_recovered}%\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 163,
+ "id": "16b17f35-57dd-459a-8989-129143dc0952",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 0.018093\n",
+ "1 0.721519\n",
+ "2 3.336101\n",
+ "3 95.924287\n",
+ "Name: total_amount_expected, dtype: float64"
+ ]
+ },
+ "execution_count": 163,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "100 * X_test_expected_CA[\"total_amount_expected\"]/X_test_expected_CA[\"total_amount_expected\"].sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 166,
+ "id": "dee4a200-eefe-4377-8e80-59ad33edd3c0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "quartile\n",
+ "1 0.320407\n",
+ "2 5.685020\n",
+ "3 11.339715\n",
+ "4 82.654858\n",
+ "Name: total_amount, dtype: float64"
+ ]
+ },
+ "execution_count": 166,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# le segment 4 représente 83% du CA actuel et 96% du CA lié aux anciens clients pour l'année prochaine\n",
+ "100 * X_test_segment.groupby(\"quartile\")[\"total_amount\"].sum()/X_test_segment[\"total_amount\"].sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6a30506c-2175-4efd-b3cb-349ad3aaa3e3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# graphique - loi de Pareto sur le CA généré\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 177,
+ "id": "c1e6f020-ef18-40b4-bfc1-19f98cb2796e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "count 96096.000000\n",
+ "mean 207.475735\n",
+ "std 4720.046248\n",
+ "min -48831.800000\n",
+ "25% 0.000000\n",
+ "50% 0.000000\n",
+ "75% 60.000000\n",
+ "max 624890.000000\n",
+ "Name: total_amount, dtype: float64"
+ ]
+ },
+ "execution_count": 177,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_test_segment[\"total_amount\"].describe() # total amount négatif ???\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 184,
+ "id": "d301a50e-7c68-40f0-9245-a4eea64c387b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 -4.883180e+04\n",
+ "1 -6.483180e+04\n",
+ "2 -7.683860e+04\n",
+ "3 -8.683860e+04\n",
+ "4 -9.683860e+04\n",
+ " ... \n",
+ "96091 1.802247e+07\n",
+ "96092 1.839238e+07\n",
+ "96093 1.877219e+07\n",
+ "96094 1.931270e+07\n",
+ "96095 1.993759e+07\n",
+ "Name: total_amount, Length: 96096, dtype: float64"
+ ]
+ },
+ "execution_count": 184,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "np.cumsum(X_test_segment[\"total_amount\"].sort_values()).reset_index()[\"total_amount\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 200,
+ "id": "864d0206-7f5e-4d33-8f4b-fe685c3bd916",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGwCAYAAABVdURTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABHRElEQVR4nO3de1xUdf4/8NdwmeE+yh0EQRQMNDVhvWBmWmBabuq2umveLxvdvJBarLtZVl9LV8Xylq2XdTPTTFvrRyZp3lBLELQUFQVBZJBA5SJym/n8/iDGRkBmcIbDDK/n4zGPmjPn8p7j1Hn5+XzO58iEEAJEREREFsJK6gKIiIiIjInhhoiIiCwKww0RERFZFIYbIiIisigMN0RERGRRGG6IiIjIojDcEBERkUWxkbqAlqbRaJCXlwdnZ2fIZDKpyyEiIiI9CCFQWloKX19fWFndv22mzYWbvLw8+Pv7S10GERERNcPVq1fh5+d333XaXLhxdnYGUHtyXFxcJK6GiIiI9FFSUgJ/f3/tdfx+2ly4qeuKcnFxYbghIiIyM/oMKeGAYiIiIrIoDDdERERkURhuiIiIyKIw3BAREZFFYbghIiIii8JwQ0RERBaF4YaIiIgsCsMNERERWRSGGyIiIrIoDDdERERkURhuiIiIyKIw3BAREZFFaXMPziQi/QghUFmjQWW1BhU1alSrNVKX1KimHqTX1GP29HgOH2RN7KWpfTR5iCa3b7rIB63B1OdRn+/Q5HmQ+DvqV8OD/VZMfXz99tHU9o2vIQNgZfWAX/IBSRpuDh8+jKVLlyIlJQUqlQq7d+/GyJEj77vNoUOHEBsbi7Nnz8LX1xfz589HTExMyxRMZCaq1RrcuF2FW+XVKL5zz6u8qt6yssoaVFRrUFGtRmXN3X8SERlKJgNmDgnGnKgQyWqQNNzcvn0bPXv2xJQpU/CnP/2pyfWzsrIwfPhwzJgxA59++imSkpLw0ksvwcPDQ6/ticxdjVqDottVuF5SgYKSSlwvrcD1kkoUlFTULiutxPWSShTdroQQxjuuTAbYWlvp9bfalqbX12xiJaHHXpo6n/rUIZrYiX770GMlIgkJAfwv7VrbDTfDhg3DsGHD9F5/3bp16NixI+Lj4wEAoaGhSE5Oxr/+9a9Gw01lZSUqKyu170tKSh6oZiJTEkLg17JKXL1Rjqs37iDnRjmu3ihHzo1y5N68A1XxHWj0vLhZyQClve3dl4P8t3+30V1ubwtnO1vY2VpDYWOl8087WysobKxhay1rsjmfzEeTIUuP31hTqzR1DP32oU8dD/5dmjxGWwrHD3jOf7lWgvEbfkRFtbQtv2Y15ub48eOIjo7WWTZ06FBs2LAB1dXVsLW1rbfN4sWL8fbbb7dUiUR6qahW40rRbVwuuI3MX8tw+dcyZBbeRuavt1FWWXPfba1kgIezAl4udvB0toOniwJeznbwcqldVveZq6Mc1hL3e1Pr1OS4E6P8bPjba4tcHeUAALXETYxmFW7y8/Ph5eWls8zLyws1NTUoLCyEj49PvW3i4uIQGxurfV9SUgJ/f3+T10oE1IaYi9dLcV5VivP5pb+FmDLk3rzT6N+grGSAj9Ie/q728G/vgI6uDujo5gC/9g7wb28PNycFQwsRtUr+rvb4ZGIE7GylvRnbrMINUP9vHHXNdI39TUShUEChUJi8LmrbhBBQFVfgfH4J0lWlSFeVIF1VgqzC2412I7nY2aCzpxOC3J0Q5OGIzh5O6OzhiI5uDlDYWLfsFyAiMgJnO1tEhXk1vaKJmVW48fb2Rn5+vs6ygoIC2NjYwM3NTaKqqK0RQuDarTs4k1uM01dv4UxuMc6pSlB8p7rB9V0d5Qj1cUZXLxcEezkhyN0RnT2d4OYo5zgWIiITMKtw079/f3z99dc6y/bt24eIiIgGx9sQGcON21U4nXtLG2ROX72FottV9daztpKhs4cjQn1c8JC3C0J9nBHm4wIPZwVDDBFRC5I03JSVleHSpUva91lZWUhLS4Orqys6duyIuLg4XLt2DVu2bAEAxMTEYNWqVYiNjcWMGTNw/PhxbNiwAdu2bZPqK5CFqVFrcD6/FKdybiIl+yZO5dzE1Rt36q1nYyVDV29n9PRvh55+SnTzVaKLpxPsbNmdREQkNUnDTXJyMgYPHqx9Xzfwd9KkSdi8eTNUKhVycnK0n3fq1AkJCQmYM2cOVq9eDV9fX3z44Yec44aa7VZ5FVJzbiEluzbMnM69hfIqdb31gtwd0dO/HXr4KdHTvx3CfFwYZIiIWimZ0GcyAgtSUlICpVKJ4uJiuLi4SF0OtbAbt6twIrMIxy8X4URmETIKyuqt46ywwSMB7RHesT16B7RDD792UNqz25OISEqGXL/NaswNkaFulVfhROYNnMisDTPn80vrrdPJ3RG9O7ZHeEDtK9jTSfLnohARUfMx3JBFKa+qwfHLRTh2ubZ1Jj2/pN58Ml29nNG/sxv6BbniD4GucHPiVAFERJaE4YbMmhAClwrKcOjirzh44Vf8lHUDVfc8vbqLpxP6B7mhX5Ab+ga5wp1hhojIojHckNkpq6xB0qVCHLr4Kw5d+BXXbunezeTX3h4Dgz20rTOeznYSVUpERFJguKFWTwiBjIIyHDhfgEMXfkVy9g1Uq+/2NcltrNAvyA2DQjzweFcPBLk7cl4ZIqI2jOGGWqUatQbJ2Tfx/bnrSEy/juyicp3PA90c8HhXTwwK8UC/IDfYy3lbNhER1WK4oVajolqNIxmF+PYXFX44X4Cb5XcfZyC3sUJkZzcM/i3QBLo7SlgpERG1Zgw3JKnKGjWOZhTi/51RIfHcdZRW1mg/a+dgiyEPeSI6zAsDgz3gqODPlYiImsarBbW4arUGRzMK8c0ZFfady0dpxd1A4+1ih2EPe2NoN29EBLSHjbWVhJUSEZE5YrihFnM2rxhfplzDntPXUFh298GTXi4KDH/YB8/08MEj/u05gR4RET0QhhsyqYLSCvwvNQ9fnsrVmR3Y3UmOpx/2wTM9fRHekYGGiIiMh+GGjK5GrcH+8wX4/KccHM4ohFpTe9u23NoKUWFeGN27Ax4L8YAtu5yIiMgEGG7IaPJu3cHnJ69i+8kcXC+p1C7v3bEdRvf2w4gevlA68AGURERkWgw39EDUGoHDF3/F1h+zceB8AX5rpIGboxx/jvDHmAg/BHk4SVskERG1KQw31CylFdXYkZyLzceycPXG3ccf9A9yw7i+HTG0mzfkNux2IiKilsdwQwa5eqMcm49dwfaTV1H225w0SntbPBfuh7/26YgunmylISIiaTHckF5Ssm/ik8OZ2HcuX9v11NnDEVMf7YTRj/jx8QdERNRqMNxQo4QQOH65CB8duITjmUXa5QOD3THt0U54LNiDt3ATEVGrw3BD9Qgh8MOFAqw6cAmncm4BAGytZRj9iB+mDeyEEC9naQskIiK6D4Yb0hJC4ODFX7F830X8fK0YAKCwscJf+3TE3x4Lgm87e4krJCIiahrDDQEATmQW4V/fXUBy9k0AgIPcGhP6BWDawE7wdLaTuDoiIiL9Mdy0cT/nFmPJd+dxJKMQQG1LzcT+AYgZ1BluTgqJqyMiIjIcw00bdb2kAku/u4AvT+VCCMDGSoa/9PHHK4OD4a1kSw0REZkvhps2pqJajQ1Hs7D6h0sor1IDAJ7t5YvXorqio5uDxNURERE9OIabNkIIgb2/5OPd/5eOa7dqZxTu5d8Ob44IQ++O7SWujoiIyHgYbtqAvFt38M+vfsH+8wUAAG8XO7wx7CH8sacv56khIiKLw3BjwdQagU9PZGPJ3vO4XaWGrbUMLw7qjJjHO8NBzj96IiKyTLzCWaiL10vxxpdntJPwhQe0x/ujH0YwJ+AjIiILx3BjYTQagY1JWViy9wKq1Bo4KWzw+lNd8XzfAHZBERFRm8BwY0Gul1Rg7hentXPWDHnIE++N6g4fJWcWJiKitoPhxkLsO5uP1788g5vl1bCztcI/nwnDuD4dIZOxtYaIiNoWhhszV6PWYOl3F/Dx4UwAQDdfF6z8yyPo4ukkcWVERETSYLgxY4VllXj1s1QczywCAMwY2Anzhj4EuY2VxJURERFJh+HGTP1yrRgztiRDVVwBR7k1lv65J4Y/7CN1WURERJJjuDFDB85fxyufpaK8So0gD0esnxCOLp68xZuIiAhguDE7/z2RjYX/+wUaAQwMdsfq53vDxc5W6rKIiIhaDYYbMyGEQPz3GVi5PwMA8OdwP/zf6Idha83xNURERL/HcGMGhBBY8t0FrD14GQAw58kQzHyiC2/zJiIiagDDTSsnhMB7/y8d/z6aBQD45zNhmPZoJ4mrIiIiar0Yblq5Jd9d0Aabd57thgn9A6UtiIiIqJVjuGnFNhzN0nZFvTeqO57vGyBxRURERK0fR6O2Uv9Lu4Z3vjkHAJg3tCuDDRERkZ4Yblqh1JybmPfFGQDAlAGBeOnxzhJXREREZD4YblqZX0sr8eKnp1Cl1mBoNy/88+kw3hVFRERkAIabVqRarcHLW08hv6QCXTydsGxML1hZMdgQEREZguGmFVmeeBE/XbkBZ4UNPp4QDicFx3sTEREZiuGmlfgxswjrDtXeGbX0zz3Q2cNJ4oqIiIjME8NNK1BSUY3YHachBDA2wh9PdefTvYmIiJqL4aYVWLL3PK7duoMANwe8OSJM6nKIiIjMGsONxE5fvYWtP+YAAN4f3QOOHGdDRET0QBhuJKTWCPzjq18gBDD6kQ7o39lN6pKIiIjMHsONhL5KvYafrxXD2c4GccNDpS6HiIjIIjDcSKSqRoMV318EALz0eBd4OCskroiIiMgyMNxIZPvJHOTevAMPZwUmRfK5UURERMbCcCOBarUGa3572vcrg7vAQc5BxERERMbCcCOBhJ9VUBVXwN1JjrF/8Je6HCIiIovCcNPChBDYcDQLADChXyDsbK0lroiIiMiyMNy0sNSrt3AmtxhyGyuM79dR6nKIiIgsjuThZs2aNejUqRPs7OwQHh6OI0eO3Hf9rVu3omfPnnBwcICPjw+mTJmCoqKiFqr2wX2RnAsAeOZhH7g58Q4pIiIiY5M03Gzfvh2zZ8/GggULkJqaioEDB2LYsGHIyclpcP2jR49i4sSJmDZtGs6ePYsvvvgCJ0+exPTp01u48uapqFbjmzN5AIDnIvwkroaIiMgySRpuli9fjmnTpmH69OkIDQ1FfHw8/P39sXbt2gbXP3HiBAIDAzFz5kx06tQJjz76KF544QUkJyc3eozKykqUlJTovKSy79x1lFbUoEM7e/TrxNmIiYiITEGycFNVVYWUlBRER0frLI+OjsaxY8ca3CYyMhK5ublISEiAEALXr1/Hzp078fTTTzd6nMWLF0OpVGpf/v7S3Z30v9RrAIDRvTvAykomWR1ERESWTLJwU1hYCLVaDS8vL53lXl5eyM/Pb3CbyMhIbN26FWPHjoVcLoe3tzfatWuHjz76qNHjxMXFobi4WPu6evWqUb+HvsqranDkUiEA4JkevpLUQERE1BZIPqBYJtNtwRBC1FtW59y5c5g5cybefPNNpKSkYO/evcjKykJMTEyj+1coFHBxcdF5SeHwxUJU1Wjg72qPEC8nSWogIiJqCySbGtfd3R3W1tb1WmkKCgrqtebUWbx4MQYMGIB58+YBAHr06AFHR0cMHDgQ7777Lnx8fExed3N9n34dABAV6t1oeCMiIqIHJ1nLjVwuR3h4OBITE3WWJyYmIjIyssFtysvLYWWlW7K1de0keEII0xRqBBqNwIHzBQCAJ8M8Ja6GiIjIsknaLRUbG4t///vf2LhxI9LT0zFnzhzk5ORou5ni4uIwceJE7fojRozArl27sHbtWmRmZiIpKQkzZ85Enz594OvbesexnFOV4MbtKjgpbPCHQFepyyEiIrJokj6xcezYsSgqKsKiRYugUqnQvXt3JCQkICCg9inZKpVKZ86byZMno7S0FKtWrcJrr72Gdu3aYciQIfjggw+k+gp6SfptIHHfTq6wtZZ8mBMREZFFk4nW3J9jAiUlJVAqlSguLm6xwcWTNv6EQxd/xT+fCcO0Rzu1yDGJiIgsiSHXbzYjmFhVjQY/Zd0AAER25sR9REREpsZwY2I/XyvGnWo1XB3l6OrlLHU5REREFo/hxsTSrt4CAPTu2J6zEhMREbUAhhsTO/1buOnlr5S2ECIiojaC4cbE6lpuevq3k7QOIiKitoLhxoRu3K5Czo1yAEAPv3bSFkNERNRGMNyY0OncWwCAIA9HKO1tpS2GiIiojWC4MaGz14oBAD06cLwNERFRS2G4MaEL18sAAA/5SPMkciIioraI4caELuaXAgDntyEiImpBDDcmUlWjweVfa1tuQrwZboiIiFoKw42JXCm6jRqNgJPCBr5KO6nLISIiajMYbkwk89fbAIDOHo6QyTgzMRERUUthuDGR3Ju189v4uzpIXAkREVHbwnBjIrk37wAA/Noz3BAREbUkhhsTqWu58WtvL3ElREREbQvDjYncbblhuCEiImpJDDcmUhduOOaGiIioZTHcmEB5VQ3KKmsAAF4uvA2ciIioJTHcmEBRWRUAQG5jBUe5tcTVEBERtS0MNyZw43ZtuHFzlHOOGyIiohbGcGMC2nDjJJe4EiIioraH4cYEin4LN66OCokrISIianuaFW5qamrw/fff4+OPP0Zpae2Tr/Py8lBWVmbU4sxVUVklgNpuKSIiImpZNoZukJ2djaeeego5OTmorKxEVFQUnJ2dsWTJElRUVGDdunWmqNOs3NC23DDcEBERtTSDW25mzZqFiIgI3Lx5E/b2dyeoGzVqFPbv32/U4sxVEcMNERGRZAxuuTl69CiSkpIgl+teuAMCAnDt2jWjFWbObpUz3BAREUnF4JYbjUYDtVpdb3lubi6cnZ2NUpS5K6moncDP2c7g7EhEREQPyOBwExUVhfj4eO17mUyGsrIyLFy4EMOHDzdmbWarVBtubCWuhIiIqO0xuGlhxYoVGDx4MMLCwlBRUYFx48YhIyMD7u7u2LZtmylqNDulFdUA2HJDREQkBYOvvr6+vkhLS8O2bdtw6tQpaDQaTJs2Dc8//7zOAOO2rK7lxoXhhoiIqMU16+prb2+PqVOnYurUqcaux+wJIbQPzWS3FBERUcszONxs2bLlvp9PnDix2cVYgvIqNdQaAYDdUkRERFIw+Oo7a9YsnffV1dUoLy+HXC6Hg4NDmw83dV1S1lYy2NvyieBEREQtzeC7pW7evKnzKisrw4ULF/Doo49yQDGAssq7g4n5RHAiIqKWZ5QHZwYHB+P999+v16rTFnGOGyIiImkZ7ang1tbWyMvLM9buzJZ2jhsFBxMTERFJweDmhT179ui8F0JApVJh1apVGDBggNEKM1ec44aIiEhaBl+BR44cqfNeJpPBw8MDQ4YMwbJly4xVl9kqr6x9NIWjguGGiIhICgZfgTUajSnqsBjlVbXdUvZy3ilFREQkBaONuaFad6prw58DbwMnIiKShMEtN2q1Gps3b8b+/ftRUFBQryXnwIEDRivOHN1hyw0REZGkmjWJ3+bNm/H000+je/funMvlHneqa8fcMNwQERFJw+Bw8/nnn2PHjh0YPny4Keoxe+VVv4UbdksRERFJwuAxN3K5HF26dDFFLRahruXGgS03REREkjA43Lz22mtYuXIlhBCmqMfs3WHLDRERkaQM7pY6evQofvjhB3z77bfo1q0bbG11Z+LdtWuX0YozR3fH3HCeGyIiIikYfAVu164dRo0aZYpaLALH3BAREUnL4HCzadMmU9RhMSq0LTecQoiIiEgKzboC19TU4Pvvv8fHH3+M0tJSAEBeXh7KysqMWpw5uttyw24pIiIiKRh8Bc7OzsZTTz2FnJwcVFZWIioqCs7OzliyZAkqKiqwbt06U9RpNrQDinm3FBERkSQMbrmZNWsWIiIicPPmTdjb22uXjxo1Cvv37zdqceZI2y3FMTdERESSaNbdUklJSZDL5TrLAwICcO3aNaMVZq6qamofRyG34ZgbIiIiKRh8BdZoNFCr1fWW5+bmwtnZ2ShFmbMqdW24sbXmYymIiIikYHC4iYqKQnx8vPa9TCZDWVkZFi5c2OYfySCE0IYbttwQERFJw+BuqRUrVmDw4MEICwtDRUUFxo0bh4yMDLi7u2Pbtm2mqNFs1GgE6iZullsz3BAREUnB4HDj6+uLtLQ0bNu2DadOnYJGo8G0adPw/PPP6wwwbouqf2u1AdhyQ0REJBWDr8Dl5eWwt7fH1KlTsWrVKqxZswbTp09vdrBZs2YNOnXqBDs7O4SHh+PIkSP3Xb+yshILFixAQEAAFAoFOnfujI0bNzbr2MZWN5gYAGzZckNERCQJg6/Anp6eGD9+PL777jtoNJqmN7iP7du3Y/bs2ViwYAFSU1MxcOBADBs2DDk5OY1uM2bMGOzfvx8bNmzAhQsXsG3bNjz00EMPVIex1I23kckAGysOKCYiIpKCweFmy5YtqKysxKhRo+Dr64tZs2bh5MmTzTr48uXLMW3aNEyfPh2hoaGIj4+Hv78/1q5d2+D6e/fuxaFDh5CQkIAnn3wSgYGB6NOnDyIjI5t1fGOra7mxtbaCTMZwQ0REJAWDw83o0aPxxRdf4Pr161i8eDHS09MRGRmJkJAQLFq0SO/9VFVVISUlBdHR0TrLo6OjcezYsQa32bNnDyIiIrBkyRJ06NABISEhmDt3Lu7cudPocSorK1FSUqLzMpVqde1oYgW7pIiIiCTT7Kuws7MzpkyZgn379uH06dNwdHTE22+/rff2hYWFUKvV8PLy0lnu5eWF/Pz8BrfJzMzE0aNH8csvv2D37t2Ij4/Hzp078fLLLzd6nMWLF0OpVGpf/v7+etdoKG3LDQcTExERSabZV+GKigrs2LEDI0eORO/evVFUVIS5c+cavJ97u2+EEI126Wg0GshkMmzduhV9+vTB8OHDsXz5cmzevLnR1pu4uDgUFxdrX1evXjW4Rn1pZydmyw0REZFkDL4VfN++fdi6dSu++uorWFtb47nnnsN3332HQYMGGbQfd3d3WFtb12ulKSgoqNeaU8fHxwcdOnSAUqnULgsNDYUQArm5uQgODq63jUKhgEKhMKi25uIEfkRERNIz+Co8cuRIlJeX4z//+Q+uX7+O9evXGxxsAEAulyM8PByJiYk6yxMTExsdIDxgwADk5eWhrKxMu+zixYuwsrKCn5+fwTUY290BxRxMTEREJBWDW27y8/Ph4uJilIPHxsZiwoQJiIiIQP/+/bF+/Xrk5OQgJiYGQG2X0rVr17BlyxYAwLhx4/DOO+9gypQpePvtt1FYWIh58+Zh6tSprWICwWptyw2fCE5ERCQVg8ONi4sLLl++jE2bNuHy5ctYuXIlPD09sXfvXvj7+6Nbt25672vs2LEoKirCokWLoFKp0L17dyQkJCAgIAAAoFKpdOa8cXJyQmJiIl599VVERETAzc0NY8aMwbvvvmvo1zCJu2Nu2HJDREQkFZkQdU9D0s+hQ4cwbNgwDBgwAIcPH0Z6ejqCgoKwZMkS/PTTT9i5c6epajWKkpISKJVKFBcXG60Fqs63P6vw4tZTiAhoj50vto65d4iIiCyBIddvg8fcvPHGG3j33XeRmJgIuVyuXT548GAcP37c8GotSI2mNidac3ZiIiIiyRgcbn7++WeMGjWq3nIPDw8UFRUZpShzpREMN0RERFIzONy0a9cOKpWq3vLU1FR06NDBKEWZKzVbboiIiCRncLgZN24cXn/9deTn50Mmk0Gj0SApKQlz587FxIkTTVGj2WC3FBERkfQMDjfvvfceOnbsiA4dOqCsrAxhYWF47LHHEBkZiX/84x+mqNFsaOrCDR+aSUREJBmDbgUXQiAvLw+ffPIJ3nnnHZw6dQoajQaPPPJIg7MDtzVqjrkhIiKSnMHhJjg4GGfPnkVwcDCCgoJMVZdZ4pgbIiIi6RnULWVlZYXg4OA2f1dUY+rCjRXDDRERkWQMHnOzZMkSzJs3D7/88osp6jFrdeHGhuGGiIhIMgY/fmH8+PEoLy9Hz549IZfL6z3T6caNG0YrztyoOaCYiIhIcgaHm/j4eBOUYRk4oJiIiEh6BoebSZMmmaIOi6BWM9wQERFJzeAxN9S4upYbDigmIiKSDsONEWk4oJiIiEhyDDdGVPf4BSsOKCYiIpIMw40RcUAxERGR9BhujKhuQDG7pYiIiKSj191So0eP1nuHu3btanYx5o4DiomIiKSnV8uNUqnUvlxcXLB//34kJydrP09JScH+/fuhVCpNVqg54IBiIiIi6enVcrNp0ybtv7/++usYM2YM1q1bB2trawCAWq3GSy+9BBcXF9NUaSY4oJiIiEh6Bo+52bhxI+bOnasNNgBgbW2N2NhYbNy40ajFmRsNBxQTERFJzuBwU1NTg/T09HrL09PTodFojFKUudI+W4rhhoiISDIGP35hypQpmDp1Ki5duoR+/foBAE6cOIH3338fU6ZMMXqB5qSG4YaIiEhyBoebf/3rX/D29saKFSugUqkAAD4+Ppg/fz5ee+01oxdoTjR8KjgREZHkDA43VlZWmD9/PubPn4+SkhIAaPMDiev8Ns0NbwUnIiKSULMm8aupqcH333+Pbdu2QfZbK0VeXh7KysqMWpy5Eb8NKGa0ISIiko7BLTfZ2dl46qmnkJOTg8rKSkRFRcHZ2RlLlixBRUUF1q1bZ4o6zcJvDTdgrxQREZF0DG65mTVrFiIiInDz5k3Y29trl48aNQr79+83anFmp65biumGiIhIMga33Bw9ehRJSUmQy+U6ywMCAnDt2jWjFWaO6ua5YbYhIiKSjsEtNxqNBmq1ut7y3NxcODs7G6Uoc/VbtuGYGyIiIgkZHG6ioqIQHx+vfS+TyVBWVoaFCxdi+PDhxqzN7Ii6fik23RAREUnG4G6pFStWYPDgwQgLC0NFRQXGjRuHjIwMuLu7Y9u2baao0WwI7ZgbaesgIiJqywwON76+vkhLS8Pnn3+OlJQUaDQaTJs2Dc8//7zOAOO2SKPtlmK6ISIikorB4ebw4cOIjIzElClTdB63UFNTg8OHD+Oxxx4zaoHmhQOKiYiIpGbwmJvBgwfjxo0b9ZYXFxdj8ODBRinKXLFbioiISHoGhxshhHZW4t8rKiqCo6OjUYoyV9pJ/NgtRUREJBm9u6VGjx4NoPbuqMmTJ0OhUGg/U6vVOHPmDCIjI41foRnR8F5wIiIiyekdbpRKJYDalhtnZ2edwcNyuRz9+vXDjBkzjF+hGWG2ISIikp7e4WbTpk0AgMDAQMydO7fNd0E1pK5bio9fICIiko7Bd0stXLjQFHVYBMHHLxAREUnO4HADADt37sSOHTuQk5ODqqoqnc9OnTpllMLMkeAExURERJIz+G6pDz/8EFOmTIGnpydSU1PRp08fuLm5ITMzE8OGDTNFjWaj7vELvFuKiIhIOgaHmzVr1mD9+vVYtWoV5HI55s+fj8TERMycORPFxcWmqNFssOWGiIhIegaHm5ycHO0t3/b29igtLQUATJgwgc+W0oYbphsiIiKpGBxuvL29UVRUBAAICAjAiRMnAABZWVnaAbVtVd08N4w2RERE0jE43AwZMgRff/01AGDatGmYM2cOoqKiMHbsWIwaNcroBZoT3gpOREQkPYPvllq/fj00Gg0AICYmBq6urjh69ChGjBiBmJgYoxdoVjjmhoiISHIGhxsrKytYWd1t8BkzZgzGjBlj1KLMFbuliIiIpKdXuDlz5ozeO+zRo0ezizF32gdnMt0QERFJRq9w06tXL8hksiYHDMtkMqjVaqMUZo7uzlDMdENERCQVvcJNVlaWqeuwCNqWG0mrICIiatv0CjcBAQGmrsMiaDjPDRERkeQMHlC8ZcuW+34+ceLEZhdj9n7rlrJitiEiIpKMweFm1qxZOu+rq6tRXl4OuVwOBweHNh1uOKCYiIhIegZP4nfz5k2dV1lZGS5cuIBHH32Uj1+o65biqBsiIiLJGBxuGhIcHIz333+/XqtOW6O5m26IiIhIIkYJNwBgbW2NvLw8Y+3OLNVlGz5+gYiISDoGj7nZs2ePznshBFQqFVatWoUBAwYYrTBzxFvBiYiIpGdwuBk5cqTOe5lMBg8PDwwZMgTLli0zuIA1a9Zg6dKlUKlU6NatG+Lj4zFw4MAmt0tKSsKgQYPQvXt3pKWlGXxcU7g7iZ/EhRAREbVhBoebuodmGsP27dsxe/ZsrFmzBgMGDMDHH3+MYcOG4dy5c+jYsWOj2xUXF2PixIl44okncP36daPV86A4oJiIiEh6Rhtz0xzLly/HtGnTMH36dISGhiI+Ph7+/v5Yu3btfbd74YUXMG7cOPTv37+FKtWPAOe5ISIikprBLTdCCOzcuRM//PADCgoK6rXk7Nq1S6/9VFVVISUlBW+88YbO8ujoaBw7dqzR7TZt2oTLly/j008/xbvvvtvkcSorK1FZWal9X1JSold9zSE46IaIiEhyBrfczJo1CxMmTEBWVhacnJygVCp1XvoqLCyEWq2Gl5eXznIvLy/k5+c3uE1GRgbeeOMNbN26FTY2+uWyxYsX69Tn7++vd42GupttmG6IiIikYnDLzaeffopdu3Zh+PDhRing3ucwCSEafDaTWq3GuHHj8PbbbyMkJETv/cfFxSE2Nlb7vqSkxGQBR8PHLxAREUnO4HCjVCoRFBT0wAd2d3eHtbV1vVaagoKCeq05AFBaWork5GSkpqbilVdeAVA7uFkIARsbG+zbtw9Dhgypt51CoYBCoXjgevXCB2cSERFJzuBuqbfeegtvv/027ty580AHlsvlCA8PR2Jios7yxMREREZG1lvfxcUFP//8M9LS0rSvmJgYdO3aFWlpaejbt+8D1WMMfLYUERGR9Axuufnzn/+Mbdu2wdPTE4GBgbC1tdX5/NSpU3rvKzY2FhMmTEBERAT69++P9evXIycnBzExMQBqu5SuXbuGLVu2wMrKCt27d9fZ3tPTE3Z2dvWWS0U7z43EdRAREbVlBoebyZMnIyUlBePHj4eXl9cDdcGMHTsWRUVFWLRoEVQqFbp3746EhAQEBAQAAFQqFXJycpq9/5amYbcUERGR5GRCaG9g1oujoyO+++47PProo6aqyaRKSkqgVCpRXFwMFxcXo+574JIDuHrjDna9FIneHdsbdd9ERERtmSHXb4PH3Pj7+xs9FFgKPhSciIhIegaHm2XLlmH+/Pm4cuWKCcoxb3wqOBERkfQMHnMzfvx4lJeXo3PnznBwcKg3oPjGjRtGK87c8MGZRERE0jM43MTHx5ugDMvAGYqJiIikZ3C4mTRpkinqsAjaMTfMNkRERJIxONw0dWt2x44dm12MudOwW4qIiEhyBoebwMDA+87jolarH6ggc8ZuKSIiIukZHG5SU1N13ldXVyM1NRXLly/He++9Z7TCzBG7pYiIiKRncLjp2bNnvWURERHw9fXF0qVLMXr0aKMUZp7YLUVERCQ1g+e5aUxISAhOnjxprN2ZJQ3nuSEiIpKcwS03JSUlOu+FEFCpVHjrrbcQHBxstMLMER+cSUREJD2Dw027du3qDSgWQsDf3x+ff/650QozR9oBxUw3REREkjE43Bw4cEAn3FhZWcHDwwNdunSBjY3Bu7Modx9BynRDREQkFYPTyOOPP26CMixDXbeUFbMNERGRZAweULx48WJs3Lix3vKNGzfigw8+MEpRRERERM1lcLj5+OOP8dBDD9Vb3q1bN6xbt84oRZkr0fQqREREZGIGh5v8/Hz4+PjUW+7h4QGVSmWUoszd/WZwJiIiItMyONz4+/sjKSmp3vKkpCT4+voapSgiIiKi5jJ4QPH06dMxe/ZsVFdXY8iQIQCA/fv3Y/78+XjttdeMXqBZYb8UERGR5AwON/Pnz8eNGzfw0ksvoaqqCgBgZ2eH119/HXFxcUYv0ByxU4qIiEg6BocbmUyGDz74AP/85z+Rnp4Oe3t7BAcHQ6FQmKI+s8KGGyIiIuk1e9Y9Jycn/OEPfzBmLRaD44mJiIikY7QHZxIRERG1Bgw3RiQEO6aIiIikxnBjAjIOKSYiIpIMw40Rsd2GiIhIegw3JsABxURERNJhuCEiIiKLwnBjRBxPTEREJD2GGyIiIrIoDDdGJDikmIiISHIMNybAAcVERETSYbghIiIii8JwY0QcUExERCQ9hhsTkLFfioiISDIMN0RERGRRGG6MiL1SRERE0mO4MQF2ShEREUmH4caY2HRDREQkOYYbE+B4YiIiIukw3BAREZFFYbgxIj5+gYiISHoMNyYg45BiIiIiyTDcGBFnKCYiIpIew40JcEAxERGRdBhuiIiIyKIw3BgRe6WIiIikx3BjAuyVIiIikg7DjREJjigmIiKSHMONKbDphoiISDIMN0RERGRRGG6MiJ1SRERE0mO4MQHOUExERCQdhhsiIiKyKAw3RsSbpYiIiKTHcGMCfPwCERGRdBhuiIiIyKIw3JgAG26IiIikI3m4WbNmDTp16gQ7OzuEh4fjyJEjja67a9cuREVFwcPDAy4uLujfvz++++67FqyWiIiIWjtJw8327dsxe/ZsLFiwAKmpqRg4cCCGDRuGnJycBtc/fPgwoqKikJCQgJSUFAwePBgjRoxAampqC1deHx+9QERE1DrIhIRX5b59+6J3795Yu3atdlloaChGjhyJxYsX67WPbt26YezYsXjzzTf1Wr+kpARKpRLFxcVwcXFpVt0NEUKgU1wCAODUP6Pg6ig32r6JiIjaOkOu35K13FRVVSElJQXR0dE6y6Ojo3Hs2DG99qHRaFBaWgpXV9dG16msrERJSYnOyxTYcENERNQ6SBZuCgsLoVar4eXlpbPcy8sL+fn5eu1j2bJluH37NsaMGdPoOosXL4ZSqdS+/P39H6hufXBAMRERkXQkH1Asu2dSGCFEvWUN2bZtG9566y1s374dnp6eja4XFxeH4uJi7evq1asPXDMRERG1XjZSHdjd3R3W1tb1WmkKCgrqtebca/v27Zg2bRq++OILPPnkk/ddV6FQQKFQPHC9TWGvFBERUesgWcuNXC5HeHg4EhMTdZYnJiYiMjKy0e22bduGyZMn47PPPsPTTz9t6jKbhTMUExERSUeylhsAiI2NxYQJExAREYH+/ftj/fr1yMnJQUxMDIDaLqVr165hy5YtAGqDzcSJE7Fy5Ur069dP2+pjb28PpVIp2fcAeCs4ERFRayFpuBk7diyKioqwaNEiqFQqdO/eHQkJCQgICAAAqFQqnTlvPv74Y9TU1ODll1/Gyy+/rF0+adIkbN68uaXLb5SMQ4qJiIgkI+k8N1Iw1Tw3NWoNuiz4FgBw+s1oKB1sjbZvIiKits4s5rmxNG0qIRIREbViDDemwF4pIiIiyTDcEBERkUVhuDGStjVyiYiIqPViuDEBznNDREQkHYYbIxEcUkxERNQqMNyYABtuiIiIpMNwQ0RERBaF4cZIOKCYiIiodWC4MQEZRxQTERFJhuGGiIiILArDjQmw3YaIiEg6DDdERERkURhujIQDiomIiFoHhhsT4HhiIiIi6TDcGAlnKCYiImodGG5MQMYhxURERJJhuCEiIiKLwnBjJBxQTERE1Dow3JgABxQTERFJh+GGiIiILArDjZGwV4qIiKh1YLghIiIii8JwYySCI4qJiIhaBYYbE+CAYiIiIukw3BAREZFFYbgxEnZKERERtQ4MNybAxy8QERFJh+HGSDiemIiIqHVguDEBDigmIiKSDsMNERERWRSGG2NhtxQREVGrwHBjAuyVIiIikg7DjZEINt0QERG1Cgw3JiDjiGIiIiLJMNwQERGRRWG4MRLOc0NERNQ6MNyYADuliIiIpMNwQ0RERBaF4cZI2CtFRETUOjDcmABvliIiIpIOw42RCI4oJiIiahUYbkyA89wQERFJh+GGiIiILArDjZGwU4qIiKh1YLghIiIii8JwYyQcT0xERNQ6MNwYGccSExERSYvhhoiIiCwKw42RCA4pJiIiahUYboyMvVJERETSspG6AEtiZ2sFKw66ISIikhTDjZF4Otvh/DvDpC6DiIiozWO3FBEREVkUhhsiIiKyKAw3REREZFEYboiIiMiiMNwQERGRRZE83KxZswadOnWCnZ0dwsPDceTIkfuuf+jQIYSHh8POzg5BQUFYt25dC1VKRERE5kDScLN9+3bMnj0bCxYsQGpqKgYOHIhhw4YhJyenwfWzsrIwfPhwDBw4EKmpqfj73/+OmTNn4ssvv2zhyomIiKi1kgkh3fOs+/bti969e2Pt2rXaZaGhoRg5ciQWL15cb/3XX38de/bsQXp6unZZTEwMTp8+jePHj+t1zJKSEiiVShQXF8PFxeXBvwQRERGZnCHXb8labqqqqpCSkoLo6Gid5dHR0Th27FiD2xw/frze+kOHDkVycjKqq6sb3KayshIlJSU6LyIiIrJckoWbwsJCqNVqeHl56Sz38vJCfn5+g9vk5+c3uH5NTQ0KCwsb3Gbx4sVQKpXal7+/v3G+ABEREbVKkg8olt3zLCYhRL1lTa3f0PI6cXFxKC4u1r6uXr36gBUTERFRaybZs6Xc3d1hbW1dr5WmoKCgXutMHW9v7wbXt7GxgZubW4PbKBQKKBQK4xRNRERErZ5kLTdyuRzh4eFITEzUWZ6YmIjIyMgGt+nfv3+99fft24eIiAjY2tqarFYiIiIyH5J2S8XGxuLf//43Nm7ciPT0dMyZMwc5OTmIiYkBUNulNHHiRO36MTExyM7ORmxsLNLT07Fx40Zs2LABc+fOleorEBERUSsjWbcUAIwdOxZFRUVYtGgRVCoVunfvjoSEBAQEBAAAVCqVzpw3nTp1QkJCAubMmYPVq1fD19cXH374If70pz/pfcy6MTq8a4qIiMh81F239ZnBRtJ5bqSQm5vLO6aIiIjM1NWrV+Hn53ffddpcuNFoNMjLy4Ozs/N978pqjpKSEvj7++Pq1aucINCEeJ5bBs9zy+B5bjk81y3DVOdZCIHS0lL4+vrCyur+o2ok7ZaSgpWVVZOJ70G5uLjwP5wWwPPcMnieWwbPc8vhuW4ZpjjPSqVSr/Ukn+eGiIiIyJgYboiIiMiiMNwYkUKhwMKFCzlpoInxPLcMnueWwfPccniuW0ZrOM9tbkAxERERWTa23BAREZFFYbghIiIii8JwQ0RERBaF4YaIiIgsCsONgdasWYNOnTrBzs4O4eHhOHLkyH3XP3ToEMLDw2FnZ4egoCCsW7euhSo1b4ac5127diEqKgoeHh5wcXFB//798d1337VgtebL0N9znaSkJNjY2KBXr16mLdBCGHqeKysrsWDBAgQEBEChUKBz587YuHFjC1Vrvgw9z1u3bkXPnj3h4OAAHx8fTJkyBUVFRS1UrXk6fPgwRowYAV9fX8hkMnz11VdNbiPJdVCQ3j7//HNha2srPvnkE3Hu3Dkxa9Ys4ejoKLKzsxtcPzMzUzg4OIhZs2aJc+fOiU8++UTY2tqKnTt3tnDl5sXQ8zxr1izxwQcfiJ9++klcvHhRxMXFCVtbW3Hq1KkWrty8GHqe69y6dUsEBQWJ6Oho0bNnz5Yp1ow15zz/8Y9/FH379hWJiYkiKytL/PjjjyIpKakFqzY/hp7nI0eOCCsrK7Fy5UqRmZkpjhw5Irp16yZGjhzZwpWbl4SEBLFgwQLx5ZdfCgBi9+7d911fqusgw40B+vTpI2JiYnSWPfTQQ+KNN95ocP358+eLhx56SGfZCy+8IPr162eyGi2Boee5IWFhYeLtt982dmkWpbnneezYseIf//iHWLhwIcONHgw9z99++61QKpWiqKioJcqzGIae56VLl4qgoCCdZR9++KHw8/MzWY2WRp9wI9V1kN1SeqqqqkJKSgqio6N1lkdHR+PYsWMNbnP8+PF66w8dOhTJycmorq42Wa3mrDnn+V4ajQalpaVwdXU1RYkWobnnedOmTbh8+TIWLlxo6hItQnPO8549exAREYElS5agQ4cOCAkJwdy5c3Hnzp2WKNksNec8R0ZGIjc3FwkJCRBC4Pr169i5cyeefvrplii5zZDqOtjmHpzZXIWFhVCr1fDy8tJZ7uXlhfz8/Aa3yc/Pb3D9mpoaFBYWwsfHx2T1mqvmnOd7LVu2DLdv38aYMWNMUaJFaM55zsjIwBtvvIEjR47Axob/69BHc85zZmYmjh49Cjs7O+zevRuFhYV46aWXcOPGDY67aURzznNkZCS2bt2KsWPHoqKiAjU1NfjjH/+Ijz76qCVKbjOkug6y5cZAMplM570Qot6yptZvaDnpMvQ819m2bRveeustbN++HZ6enqYqz2Loe57VajXGjRuHt99+GyEhIS1VnsUw5Pes0Wggk8mwdetW9OnTB8OHD8fy5cuxefNmtt40wZDzfO7cOcycORNvvvkmUlJSsHfvXmRlZSEmJqYlSm1TpLgO8q9fenJ3d4e1tXW9vwUUFBTUS6V1vL29G1zfxsYGbm5uJqvVnDXnPNfZvn07pk2bhi+++AJPPvmkKcs0e4ae59LSUiQnJyM1NRWvvPIKgNqLsBACNjY22LdvH4YMGdIitZuT5vyefXx80KFDByiVSu2y0NBQCCGQm5uL4OBgk9ZsjppznhcvXowBAwZg3rx5AIAePXrA0dERAwcOxLvvvsuWdSOR6jrIlhs9yeVyhIeHIzExUWd5YmIiIiMjG9ymf//+9dbft28fIiIiYGtra7JazVlzzjNQ22IzefJkfPbZZ+wz14Oh59nFxQU///wz0tLStK+YmBh07doVaWlp6Nu3b0uVblaa83seMGAA8vLyUFZWpl128eJFWFlZwc/Pz6T1mqvmnOfy8nJYWeleAq2trQHcbVmgByfZddCkw5UtTN2thhs2bBDnzp0Ts2fPFo6OjuLKlStCCCHeeOMNMWHCBO36dbfAzZkzR5w7d05s2LCBt4LrwdDz/NlnnwkbGxuxevVqoVKptK9bt25J9RXMgqHn+V68W0o/hp7n0tJS4efnJ5577jlx9uxZcejQIREcHCymT58u1VcwC4ae502bNgkbGxuxZs0acfnyZXH06FEREREh+vTpI9VXMAulpaUiNTVVpKamCgBi+fLlIjU1VXvLfWu5DjLcGGj16tUiICBAyOVy0bt3b3Ho0CHtZ5MmTRKDBg3SWf/gwYPikUceEXK5XAQGBoq1a9e2cMXmyZDzPGjQIAGg3mvSpEktX7iZMfT3/HsMN/oz9Dynp6eLJ598Utjb2ws/Pz8RGxsrysvLW7hq82Poef7www9FWFiYsLe3Fz4+PuL5558Xubm5LVy1efnhhx/u+//b1nIdlAnB9jciIiKyHBxzQ0RERBaF4YaIiIgsCsMNERERWRSGGyIiIrIoDDdERERkURhuiIiIyKIw3BAREZFFYbghIiIii8JwQ/Q7Qgj87W9/g6urK2QyGdLS0lr0+AcPHoRMJsOtW7da9LgNSUpKwsMPPwxbW1uMHDlS6nLICK5cudLk71rq32BgYCDi4+MlOTZZDoYbot/Zu3cvNm/ejG+++QYqlQrdu3c32bEef/xxzJ49W2dZZGQkVCqVzhOhpRIbG4tevXohKysLmzdvbvHjb968Ge3atWvx41oyf39/k/+uiVoDhhui37l8+TJ8fHwQGRkJb29v2NjY1FunqqrKZMeXy+Xw9vaGTCYz2TH0dfnyZQwZMgR+fn4MGQ1Qq9XQaDRSl2EQa2vrRn/XxmTK/0aI9MFwQ/SbyZMn49VXX0VOTg5kMhkCAwMB1LawvPLKK4iNjYW7uzuioqIAAMuXL8fDDz8MR0dH+Pv746WXXkJZWZnOPpOSkjBo0CA4ODigffv2GDp0KG7evInJkyfj0KFDWLlyJWQyGWQyGa5cudJgl8CXX36Jbt26QaFQIDAwEMuWLdM5RmBgIP7v//4PU6dOhbOzMzp27Ij169ff97tWVlZi5syZ8PT0hJ2dHR599FGcPHkSwN2ui6KiIkydOhUymazRlpvKykrMnz8f/v7+UCgUCA4OxoYNGwA03PLy1Vdf6QS306dPY/DgwXB2doaLiwvCw8ORnJyMgwcPYsqUKSguLtaen7feegsAcPPmTUycOBHt27eHg4MDhg0bhoyMDO0+6477zTffoGvXrnBwcMBzzz2H27dv4z//+Q8CAwPRvn17vPrqq1Cr1drtqqqqMH/+fHTo0AGOjo7o27cvDh482OB+w8LCoFAokJ2djYMHD6JPnz5wdHREu3btMGDAAGRnZzd67nNzc/GXv/wFrq6ucHR0REREBH788Uft52vXrkXnzp0hl8vRtWtX/Pe//9XZXiaT4d///jdGjRoFBwcHBAcHY8+ePdrPb968ieeffx4eHh6wt7dHcHAwNm3apPNn+/tuqYSEBISEhMDe3h6DBw/GlStX6tV87NgxPPbYY7C3t4e/vz9mzpyJ27dvaz8PDAzEu+++i8mTJ0OpVGLGjBl6bVdQUIARI0bA3t4enTp1wtatWxs9b0QGMfmjOYnMxK1bt8SiRYuEn5+fUKlUoqCgQAhR+9RxJycnMW/ePHH+/HmRnp4uhBBixYoV4sCBAyIzM1Ps379fdO3aVbz44ova/aWmpgqFQiFefPFFkZaWJn755Rfx0UcfiV9//VXcunVL9O/fX8yYMUOoVCqhUqlETU2N9om7N2/eFEIIkZycLKysrMSiRYvEhQsXxKZNm4S9vb3YtGmT9jgBAQHC1dVVrF69WmRkZIjFixcLKysrbZ0NmTlzpvD19RUJCQni7NmzYtKkSaJ9+/aiqKhI1NTUCJVKJVxcXER8fLxQqVSNPpF6zJgxwt/fX+zatUtcvnxZfP/99+Lzzz8XQgixadMmoVQqddbfvXu3+P3/drp16ybGjx8v0tPTxcWLF8WOHTtEWlqaqKysFPHx8cLFxUV7fkpLS4UQQvzxj38UoaGh4vDhwyItLU0MHTpUdOnSRVRVVWmPa2trK6KiosSpU6fEoUOHhJubm4iOjhZjxowRZ8+eFV9//bWQy+XaWoUQYty4cSIyMlIcPnxYXLp0SSxdulQoFApx8eJFnf1GRkaKpKQkcf78eXHr1i2hVCrF3LlzxaVLl8S5c+fE5s2bRXZ2doPnq7S0VAQFBYmBAweKI0eOiIyMDLF9+3Zx7NgxIYQQu3btEra2tmL16tXiwoULYtmyZcLa2locOHBAuw8Aws/PT3z22WciIyNDzJw5Uzg5OYmioiIhhBAvv/yy6NWrlzh58qTIysoSiYmJYs+ePUIIIbKysgQAkZqaKoQQIicnRygUCjFr1ixx/vx58emnnwovLy+d3+CZM2eEk5OTWLFihbh48aJISkoSjzzyiJg8ebK2poCAAOHi4iKWLl0qMjIyREZGhl7bDRs2THTv3l0cO3ZMJCcni8jISGFvby9WrFjR4Pkj0hfDDdHvrFixQgQEBOgsGzRokOjVq1eT2+7YsUO4ublp3//1r38VAwYMaHT9QYMGiVmzZuksuzfcjBs3TkRFRemsM2/ePBEWFqZ9HxAQIMaPH699r9FohKenp1i7dm2Dxy0rKxO2trZi69at2mVVVVXC19dXLFmyRLtMqVTqhKh7XbhwQQAQiYmJDX6uT7hxdnYWmzdv1nv7ixcvCgAiKSlJu6ywsFDY29uLHTt2aLcDIC5duqRd54UXXhAODg7agCSEEEOHDhUvvPCCEEKIS5cuCZlMJq5du6ZzvCeeeELExcXp7DctLU37eVFRkQAgDh482OB3uNfHH38snJ2dtUHkXpGRkWLGjBk6y/785z+L4cOHa98DEP/4xz+078vKyoRMJhPffvutEEKIESNGiClTpjS4/3vDTVxcnAgNDRUajUa7zuuvv67zG5wwYYL429/+prOfI0eOCCsrK3Hnzh0hRO1vcOTIkTrrNLVd3e/nxIkT2s/T09MFAIYbemDsliLSQ0RERL1lP/zwA6KiotChQwc4Oztj4sSJKCoq0ja7p6Wl4Yknnnig46anp2PAgAE6ywYMGICMjAydLpUePXpo/10mk8Hb2xsFBQUN7vPy5cuorq7W2a+trS369OmD9PR0vWtLS0uDtbU1Bg0apPc294qNjcX06dPx5JNP4v3338fly5fvu356ejpsbGzQt29f7TI3Nzd07dpVp3YHBwd07txZ+97LywuBgYFwcnLSWVZ3jk6dOgUhBEJCQuDk5KR9HTp0SKcmuVyuc65dXV0xefJkDB06FCNGjMDKlSuhUqkarT8tLQ2PPPIIXF1dG/1+Df153/vn8vsaHB0d4ezsrP0uL774Ij7//HP06tUL8+fPx7FjxxqtJz09Hf369dPpKuzfv7/OOikpKdi8ebPOeRk6dCg0Gg2ysrK0693730hT29X9Wf5+u4ceeojju8goGG6I9ODo6KjzPjs7G8OHD0f37t3x5ZdfIiUlBatXrwYAVFdXAwDs7e0f+LhCiHqDi4UQ9daztbXVeS+TyRod7Fq3fUP7NWQgc1Pfz8rKql6tdeemzltvvYWzZ8/i6aefxoEDBxAWFobdu3c3us+Gvnvd8t/X3tD5uN850mg0sLa2RkpKCtLS0rSv9PR0rFy5UruNvb19vXO0adMmHD9+HJGRkdi+fTtCQkJw4sSJBuvU5zehz5/L/b7LsGHDkJ2djdmzZyMvLw9PPPEE5s6d2+CxGjufv6fRaPDCCy/onJfTp08jIyNDJ0De+99IU9s19jskMgaGG6JmSE5ORk1NDZYtW4Z+/fohJCQEeXl5Ouv06NED+/fvb3Qfcrlcp/WlIWFhYTh69KjOsmPHjiEkJATW1tbNqr1Lly6Qy+U6+62urkZycjJCQ0P13s/DDz8MjUaDQ4cONfi5h4cHSktLdQaQNjS/SkhICObMmYN9+/Zh9OjR2sGvDZ2fsLAw1NTU6AzALSoqwsWLFw2q/V6PPPII1Go1CgoK0KVLF52Xt7e3XtvHxcXh2LFj6N69Oz777LMG1+vRowfS0tJw48aNBj8PDQ1t8M/b0O/m4eGByZMn49NPP0V8fHyjA8zDwsLqBbF73/fu3Rtnz56td17qfkeNaWq70NBQ1NTUIDk5WbvNhQsXWsUcT2T+GG6ImqFz586oqanBRx99hMzMTPz3v//FunXrdNaJi4vDyZMn8dJLL+HMmTM4f/481q5di8LCQgC1d5j8+OOPuHLlCgoLCxtsaXnttdewf/9+vPPOO7h48SL+85//YNWqVY3+TVwfjo6OePHFFzFv3jzs3bsX586dw4wZM1BeXo5p06bpvZ/AwEBMmjQJU6dOxVdffYWsrCwcPHgQO3bsAAD07dsXDg4O+Pvf/45Lly7hs88+07nr6s6dO3jllVdw8OBBZGdnIykpCSdPntReyAMDA1FWVob9+/ejsLAQ5eXlCA4OxrPPPosZM2bg6NGjOH36NMaPH48OHTrg2WefbfY5CQkJwfPPP4+JEydi165dyMrKwsmTJ/HBBx8gISGh0e2ysrIQFxeH48ePIzs7G/v27btv0PrrX/8Kb29vjBw5EklJScjMzMSXX36J48ePAwDmzZuHzZs3Y926dcjIyMDy5cuxa9cug/6833zzTfzvf//DpUuXcPbsWXzzzTeN1hMTE4PLly8jNjYWFy5cqPdnBACvv/46jh8/jpdffhlpaWnIyMjAnj178Oqrr963jqa269q1K5566inMmDEDP/74I1JSUjB9+nSjtHgScUAx0e80NqD43oG/QgixfPly4ePjI+zt7cXQoUPFli1bdAZiCiHEwYMHRWRkpFAoFKJdu3Zi6NCh2s8vXLgg+vXrJ+zt7QUAkZWVVW9AsRBC7Ny5U4SFhQlbW1vRsWNHsXTpUp06AgIC6g3A7Nmzp1i4cGGj3/POnTvi1VdfFe7u7kKhUIgBAwaIn376SWedpgYU1+1nzpw5wsfHR8jlctGlSxexceNG7ee7d+8WXbp0EXZ2duKZZ54R69ev1w4orqysFH/5y1+Ev7+/kMvlwtfXV7zyyivaQapCCBETEyPc3NwEAO33uXHjhpgwYYJQKpXac193R5MQDQ9EXrhwoejZs6fOskmTJolnn31W+76qqkq8+eabIjAwUNja2gpvb28xatQocebMmUb3m5+fL0aOHKn9/gEBAeLNN98UarW60XN25coV8ac//Um4uLgIBwcHERERIX788Uft52vWrBFBQUHC1tZWhISEiC1btuhsD0Ds3r1bZ9nv/6zeeecdERoaKuzt7YWrq6t49tlnRWZmphCi/oBiIYT4+uuvRZcuXYRCoRADBw4UGzdurPcb/Omnn0RUVJRwcnISjo6OokePHuK9997Tft7Qb1Cf7VQqlXj66aeFQqEQHTt2FFu2bGl0X0SGkAmhR6crERERkZlgtxQRERFZFIYbIiIisigMN0RERGRRGG6IiIjIojDcEBERkUVhuCEiIiKLwnBDREREFoXhhoiIiCwKww0RERFZFIYbIiIisigMN0RERGRR/j+ILo9RuTAcYgAAAABJRU5ErkJggg==",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# graphic for cumulated revenue\n",
+ "\n",
+ "plt.figure()\n",
+ "plt.plot(X_test_segment.index/X_test_segment.index.max(), \n",
+ " np.cumsum(X_test_segment[\"total_amount\"].sort_values(ascending=False)).values/ \\\n",
+ " np.sum(X_test_segment[\"total_amount\"]))\n",
+ "plt.xlabel(\"fraction of customers considered\")\n",
+ "plt.ylabel(\"cumulated revenue\")\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 198,
+ "id": "67981e78-d7a5-432e-b93b-9d0d189f4e5d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "96095"
+ ]
+ },
+ "execution_count": 198,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_test_segment.index.max()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}