BDC-team-1/Sport/Modelization/CA_segment_sport.ipynb

2999 lines
154 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"id": "84b6e27e-4bda-4d38-8689-ec7fc0da1848",
"metadata": {},
"source": [
"# Define segment and predict sales associated"
]
},
{
"cell_type": "markdown",
"id": "ec059482-45d3-4ae6-99bc-9b4ced115db3",
"metadata": {},
"source": [
"## Importations of packages "
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "9771bf29-d08e-4674-8c23-9a2672fbef8f",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from pandas import DataFrame\n",
"import numpy as np\n",
"import os\n",
"import s3fs\n",
"import re\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, recall_score\n",
"from sklearn.utils import class_weight\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.model_selection import GridSearchCV\n",
"from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler\n",
"from sklearn.metrics import make_scorer, f1_score, balanced_accuracy_score\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score\n",
"from sklearn.exceptions import ConvergenceWarning, DataConversionWarning\n",
"from sklearn.naive_bayes import GaussianNB\n",
"from scipy.optimize import fsolve\n",
"import io\n",
"\n",
"import pickle\n",
"import warnings"
]
},
{
"cell_type": "markdown",
"id": "048fcd7c-800a-4a6b-b725-faf8410f924a",
"metadata": {},
"source": [
"## load databases"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "539ccbdf-f29f-4f04-99c1-8c88d0efe514",
"metadata": {},
"outputs": [],
"source": [
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "0c3a6ddc-9345-4a42-b6bf-a20a95de3028",
"metadata": {},
"outputs": [],
"source": [
"def load_train_test():\n",
" BUCKET = \"projet-bdc2324-team1/Generalization/sport\"\n",
" File_path_train = BUCKET + \"/Train_set.csv\"\n",
" File_path_test = BUCKET + \"/Test_set.csv\"\n",
" \n",
" with fs.open( File_path_train, mode=\"rb\") as file_in:\n",
" dataset_train = pd.read_csv(file_in, sep=\",\")\n",
" # dataset_train['y_has_purchased'] = dataset_train['y_has_purchased'].fillna(0)\n",
"\n",
" with fs.open(File_path_test, mode=\"rb\") as file_in:\n",
" dataset_test = pd.read_csv(file_in, sep=\",\")\n",
" # dataset_test['y_has_purchased'] = dataset_test['y_has_purchased'].fillna(0)\n",
" \n",
" return dataset_train, dataset_test"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "2831d546-b365-498b-8248-c618bd9c3057",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_521/2459610029.py:7: DtypeWarning: Columns (38) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" dataset_train = pd.read_csv(file_in, sep=\",\")\n"
]
},
{
"data": {
"text/plain": [
"customer_id 0\n",
"nb_tickets 0\n",
"nb_purchases 0\n",
"total_amount 0\n",
"nb_suppliers 0\n",
"vente_internet_max 0\n",
"purchase_date_min 0\n",
"purchase_date_max 0\n",
"time_between_purchase 0\n",
"nb_tickets_internet 0\n",
"street_id 0\n",
"structure_id 222825\n",
"mcp_contact_id 70874\n",
"fidelity 0\n",
"tenant_id 0\n",
"is_partner 0\n",
"deleted_at 224213\n",
"gender 0\n",
"is_email_true 0\n",
"opt_in 0\n",
"last_buying_date 66139\n",
"max_price 66139\n",
"ticket_sum 0\n",
"average_price 66023\n",
"average_purchase_delay 66139\n",
"average_price_basket 66139\n",
"average_ticket_basket 66139\n",
"total_price 116\n",
"purchase_count 0\n",
"first_buying_date 66139\n",
"country 23159\n",
"gender_label 0\n",
"gender_female 0\n",
"gender_male 0\n",
"gender_other 0\n",
"country_fr 23159\n",
"nb_campaigns 0\n",
"nb_campaigns_opened 0\n",
"time_to_open 123159\n",
"y_has_purchased 0\n",
"dtype: int64"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset_train, dataset_test = load_train_test()\n",
"dataset_train.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "b8827f7b-b304-4f51-9814-c7a98ed88cf0",
"metadata": {},
"outputs": [],
"source": [
"def features_target_split(dataset_train, dataset_test):\n",
" \n",
" features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n",
" 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n",
" 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n",
"\n",
" # we suppress fidelity, time between purchase, and gender other (colinearity issue)\n",
" \"\"\"\n",
" features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', \n",
" 'purchase_date_min', 'purchase_date_max', 'nb_tickets_internet', 'is_email_true', \n",
" 'opt_in', 'gender_female', 'gender_male', 'nb_campaigns', 'nb_campaigns_opened']\n",
" \"\"\"\n",
" \n",
" X_train = dataset_train[features_l]\n",
" y_train = dataset_train[['y_has_purchased']]\n",
"\n",
" X_test = dataset_test[features_l]\n",
" y_test = dataset_test[['y_has_purchased']]\n",
" return X_train, X_test, y_train, y_test"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "c18195fc-ed40-4e39-a59e-c9ecc5a8e6c3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Shape train : (224213, 17)\n",
"Shape test : (96096, 17)\n"
]
}
],
"source": [
"X_train, X_test, y_train, y_test = features_target_split(dataset_train, dataset_test)\n",
"print(\"Shape train : \", X_train.shape)\n",
"print(\"Shape test : \", X_test.shape)"
]
},
{
"cell_type": "markdown",
"id": "74eda066-5e01-43aa-b0cf-cc6d9bbf770e",
"metadata": {},
"source": [
"## get results from the logit cross validated model"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "7c81390e-598c-4f02-bd56-dd03b00dcb33",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>fidelity</th>\n",
" <th>is_email_true</th>\n",
" <th>opt_in</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>100.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>5.177187</td>\n",
" <td>5.177187</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>55.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>426.265613</td>\n",
" <td>426.265613</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>17.0</td>\n",
" <td>1.0</td>\n",
" <td>80.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>436.033437</td>\n",
" <td>436.033437</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>120.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>5.196412</td>\n",
" <td>5.196412</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>34.0</td>\n",
" <td>2.0</td>\n",
" <td>416.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>478.693148</td>\n",
" <td>115.631470</td>\n",
" <td>363.061678</td>\n",
" <td>0.0</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96091</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>67.31</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>278.442257</td>\n",
" <td>278.442257</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>15.0</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96092</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>61.41</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>189.207373</td>\n",
" <td>189.207373</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>12.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96093</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>29.0</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96094</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>79.43</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>279.312905</td>\n",
" <td>279.312905</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>20.0</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96095</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>31.0</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>96096 rows × 17 columns</p>\n",
"</div>"
],
"text/plain": [
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 4.0 1.0 100.00 1.0 \n",
"1 1.0 1.0 55.00 1.0 \n",
"2 17.0 1.0 80.00 1.0 \n",
"3 4.0 1.0 120.00 1.0 \n",
"4 34.0 2.0 416.00 1.0 \n",
"... ... ... ... ... \n",
"96091 1.0 1.0 67.31 1.0 \n",
"96092 1.0 1.0 61.41 1.0 \n",
"96093 0.0 0.0 0.00 0.0 \n",
"96094 1.0 1.0 79.43 1.0 \n",
"96095 0.0 0.0 0.00 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0.0 5.177187 5.177187 \n",
"1 0.0 426.265613 426.265613 \n",
"2 0.0 436.033437 436.033437 \n",
"3 0.0 5.196412 5.196412 \n",
"4 0.0 478.693148 115.631470 \n",
"... ... ... ... \n",
"96091 1.0 278.442257 278.442257 \n",
"96092 1.0 189.207373 189.207373 \n",
"96093 0.0 550.000000 550.000000 \n",
"96094 1.0 279.312905 279.312905 \n",
"96095 0.0 550.000000 550.000000 \n",
"\n",
" time_between_purchase nb_tickets_internet fidelity is_email_true \\\n",
"0 0.000000 0.0 1 True \n",
"1 0.000000 0.0 2 True \n",
"2 0.000000 0.0 2 True \n",
"3 0.000000 0.0 1 True \n",
"4 363.061678 0.0 4 True \n",
"... ... ... ... ... \n",
"96091 0.000000 1.0 2 True \n",
"96092 0.000000 1.0 1 True \n",
"96093 -1.000000 0.0 1 True \n",
"96094 0.000000 1.0 1 True \n",
"96095 -1.000000 0.0 2 True \n",
"\n",
" opt_in gender_female gender_male gender_other nb_campaigns \\\n",
"0 False 1 0 0 0.0 \n",
"1 True 0 1 0 0.0 \n",
"2 True 1 0 0 0.0 \n",
"3 False 1 0 0 0.0 \n",
"4 False 1 0 0 0.0 \n",
"... ... ... ... ... ... \n",
"96091 False 0 1 0 15.0 \n",
"96092 False 0 1 0 12.0 \n",
"96093 True 1 0 0 29.0 \n",
"96094 False 0 1 0 20.0 \n",
"96095 False 0 1 0 31.0 \n",
"\n",
" nb_campaigns_opened \n",
"0 0.0 \n",
"1 0.0 \n",
"2 0.0 \n",
"3 0.0 \n",
"4 0.0 \n",
"... ... \n",
"96091 5.0 \n",
"96092 9.0 \n",
"96093 3.0 \n",
"96094 4.0 \n",
"96095 4.0 \n",
"\n",
"[96096 rows x 17 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "c708f439-bb75-4688-bf4f-4c04e13deaae",
"metadata": {},
"outputs": [],
"source": [
"def load_model(type_of_activity, model):\n",
" BUCKET = f\"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}/\"\n",
" filename = model + '.pkl'\n",
" file_path = BUCKET + filename\n",
" with fs.open(file_path, mode=\"rb\") as f:\n",
" model_bytes = f.read()\n",
"\n",
" model = pickle.loads(model_bytes)\n",
" return model"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "5261a803-05b8-41a0-968c-dc7bde48ddd3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-container-id-1 {\n",
" /* Definition of color scheme common for light and dark mode */\n",
" --sklearn-color-text: black;\n",
" --sklearn-color-line: gray;\n",
" /* Definition of color scheme for unfitted estimators */\n",
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
" --sklearn-color-unfitted-level-3: chocolate;\n",
" /* Definition of color scheme for fitted estimators */\n",
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
" --sklearn-color-fitted-level-1: #d4ebff;\n",
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
"\n",
" /* Specific color for light theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-icon: #696969;\n",
"\n",
" @media (prefers-color-scheme: dark) {\n",
" /* Redefinition of color scheme for dark theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-icon: #878787;\n",
" }\n",
"}\n",
"\n",
"#sk-container-id-1 {\n",
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"#sk-container-id-1 pre {\n",
" padding: 0;\n",
"}\n",
"\n",
"#sk-container-id-1 input.sk-hidden--visually {\n",
" border: 0;\n",
" clip: rect(1px 1px 1px 1px);\n",
" clip: rect(1px, 1px, 1px, 1px);\n",
" height: 1px;\n",
" margin: -1px;\n",
" overflow: hidden;\n",
" padding: 0;\n",
" position: absolute;\n",
" width: 1px;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-dashed-wrapped {\n",
" border: 1px dashed var(--sklearn-color-line);\n",
" margin: 0 0.4em 0.5em 0.4em;\n",
" box-sizing: border-box;\n",
" padding-bottom: 0.4em;\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-container {\n",
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
" so we also need the `!important` here to be able to override the\n",
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
" display: inline-block !important;\n",
" position: relative;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-text-repr-fallback {\n",
" display: none;\n",
"}\n",
"\n",
"div.sk-parallel-item,\n",
"div.sk-serial,\n",
"div.sk-item {\n",
" /* draw centered vertical line to link estimators */\n",
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
" background-size: 2px 100%;\n",
" background-repeat: no-repeat;\n",
" background-position: center center;\n",
"}\n",
"\n",
"/* Parallel-specific style estimator block */\n",
"\n",
"#sk-container-id-1 div.sk-parallel-item::after {\n",
" content: \"\";\n",
" width: 100%;\n",
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
" flex-grow: 1;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-parallel {\n",
" display: flex;\n",
" align-items: stretch;\n",
" justify-content: center;\n",
" background-color: var(--sklearn-color-background);\n",
" position: relative;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-parallel-item {\n",
" display: flex;\n",
" flex-direction: column;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
" align-self: flex-end;\n",
" width: 50%;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
" align-self: flex-start;\n",
" width: 50%;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
" width: 0;\n",
"}\n",
"\n",
"/* Serial-specific style estimator block */\n",
"\n",
"#sk-container-id-1 div.sk-serial {\n",
" display: flex;\n",
" flex-direction: column;\n",
" align-items: center;\n",
" background-color: var(--sklearn-color-background);\n",
" padding-right: 1em;\n",
" padding-left: 1em;\n",
"}\n",
"\n",
"\n",
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
"clickable and can be expanded/collapsed.\n",
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
"*/\n",
"\n",
"/* Pipeline and ColumnTransformer style (default) */\n",
"\n",
"#sk-container-id-1 div.sk-toggleable {\n",
" /* Default theme specific background. It is overwritten whether we have a\n",
" specific estimator or a Pipeline/ColumnTransformer */\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"/* Toggleable label */\n",
"#sk-container-id-1 label.sk-toggleable__label {\n",
" cursor: pointer;\n",
" display: block;\n",
" width: 100%;\n",
" margin-bottom: 0;\n",
" padding: 0.5em;\n",
" box-sizing: border-box;\n",
" text-align: center;\n",
"}\n",
"\n",
"#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
" /* Arrow on the left of the label */\n",
" content: \"▸\";\n",
" float: left;\n",
" margin-right: 0.25em;\n",
" color: var(--sklearn-color-icon);\n",
"}\n",
"\n",
"#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"/* Toggleable content - dropdown */\n",
"\n",
"#sk-container-id-1 div.sk-toggleable__content {\n",
" max-height: 0;\n",
" max-width: 0;\n",
" overflow: hidden;\n",
" text-align: left;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-toggleable__content pre {\n",
" margin: 0.2em;\n",
" border-radius: 0.25em;\n",
" color: var(--sklearn-color-text);\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
" /* Expand drop-down */\n",
" max-height: 200px;\n",
" max-width: 100%;\n",
" overflow: auto;\n",
"}\n",
"\n",
"#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
" content: \"▾\";\n",
"}\n",
"\n",
"/* Pipeline/ColumnTransformer-specific style */\n",
"\n",
"#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator-specific style */\n",
"\n",
"/* Colorize estimator box */\n",
"#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
"#sk-container-id-1 div.sk-label label {\n",
" /* The background is the default theme color */\n",
" color: var(--sklearn-color-text-on-default-background);\n",
"}\n",
"\n",
"/* On hover, darken the color of the background */\n",
"#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"/* Label box, darken color on hover, fitted */\n",
"#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator label */\n",
"\n",
"#sk-container-id-1 div.sk-label label {\n",
" font-family: monospace;\n",
" font-weight: bold;\n",
" display: inline-block;\n",
" line-height: 1.2em;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-label-container {\n",
" text-align: center;\n",
"}\n",
"\n",
"/* Estimator-specific */\n",
"#sk-container-id-1 div.sk-estimator {\n",
" font-family: monospace;\n",
" border: 1px dotted var(--sklearn-color-border-box);\n",
" border-radius: 0.25em;\n",
" box-sizing: border-box;\n",
" margin-bottom: 0.5em;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-estimator.fitted {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"/* on hover */\n",
"#sk-container-id-1 div.sk-estimator:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
"\n",
"/* Common style for \"i\" and \"?\" */\n",
"\n",
".sk-estimator-doc-link,\n",
"a:link.sk-estimator-doc-link,\n",
"a:visited.sk-estimator-doc-link {\n",
" float: right;\n",
" font-size: smaller;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1em;\n",
" height: 1em;\n",
" width: 1em;\n",
" text-decoration: none !important;\n",
" margin-left: 1ex;\n",
" /* unfitted */\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted,\n",
"a:link.sk-estimator-doc-link.fitted,\n",
"a:visited.sk-estimator-doc-link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"/* Span, style for the box shown on hovering the info icon */\n",
".sk-estimator-doc-link span {\n",
" display: none;\n",
" z-index: 9999;\n",
" position: relative;\n",
" font-weight: normal;\n",
" right: .2ex;\n",
" padding: .5ex;\n",
" margin: .5ex;\n",
" width: min-content;\n",
" min-width: 20ex;\n",
" max-width: 50ex;\n",
" color: var(--sklearn-color-text);\n",
" box-shadow: 2pt 2pt 4pt #999;\n",
" /* unfitted */\n",
" background: var(--sklearn-color-unfitted-level-0);\n",
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted span {\n",
" /* fitted */\n",
" background: var(--sklearn-color-fitted-level-0);\n",
" border: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link:hover span {\n",
" display: block;\n",
"}\n",
"\n",
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
"\n",
"#sk-container-id-1 a.estimator_doc_link {\n",
" float: right;\n",
" font-size: 1rem;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1rem;\n",
" height: 1rem;\n",
" width: 1rem;\n",
" text-decoration: none;\n",
" /* unfitted */\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
"}\n",
"\n",
"#sk-container-id-1 a.estimator_doc_link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"#sk-container-id-1 a.estimator_doc_link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>GridSearchCV(cv=3, error_score=&#x27;raise&#x27;,\n",
" estimator=Pipeline(steps=[(&#x27;preprocessor&#x27;,\n",
" ColumnTransformer(transformers=[(&#x27;num&#x27;,\n",
" Pipeline(steps=[(&#x27;scaler&#x27;,\n",
" StandardScaler())]),\n",
" [&#x27;nb_tickets&#x27;,\n",
" &#x27;nb_purchases&#x27;,\n",
" &#x27;total_amount&#x27;,\n",
" &#x27;nb_suppliers&#x27;,\n",
" &#x27;vente_internet_max&#x27;,\n",
" &#x27;purchase_date_min&#x27;,\n",
" &#x27;purchase_date_max&#x27;,\n",
" &#x27;time_between_purchase&#x27;,\n",
" &#x27;nb_tickets_internet&#x27;,\n",
" &#x27;nb_campaigns&#x27;,\n",
" &#x27;nb_...\n",
" 1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
" 2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
" 4.000000e+00, 8.000000e+00, 1.600000e+01, 3.200000e+01,\n",
" 6.400000e+01]),\n",
" &#x27;LogisticRegression_cv__class_weight&#x27;: [&#x27;balanced&#x27;,\n",
" {0.0: 0.5837086520288036,\n",
" 1.0: 3.486549107420539}],\n",
" &#x27;LogisticRegression_cv__penalty&#x27;: [&#x27;l1&#x27;, &#x27;l2&#x27;]},\n",
" scoring=make_scorer(recall_score, response_method=&#x27;predict&#x27;))</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;GridSearchCV<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.model_selection.GridSearchCV.html\">?<span>Documentation for GridSearchCV</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>GridSearchCV(cv=3, error_score=&#x27;raise&#x27;,\n",
" estimator=Pipeline(steps=[(&#x27;preprocessor&#x27;,\n",
" ColumnTransformer(transformers=[(&#x27;num&#x27;,\n",
" Pipeline(steps=[(&#x27;scaler&#x27;,\n",
" StandardScaler())]),\n",
" [&#x27;nb_tickets&#x27;,\n",
" &#x27;nb_purchases&#x27;,\n",
" &#x27;total_amount&#x27;,\n",
" &#x27;nb_suppliers&#x27;,\n",
" &#x27;vente_internet_max&#x27;,\n",
" &#x27;purchase_date_min&#x27;,\n",
" &#x27;purchase_date_max&#x27;,\n",
" &#x27;time_between_purchase&#x27;,\n",
" &#x27;nb_tickets_internet&#x27;,\n",
" &#x27;nb_campaigns&#x27;,\n",
" &#x27;nb_...\n",
" 1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
" 2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
" 4.000000e+00, 8.000000e+00, 1.600000e+01, 3.200000e+01,\n",
" 6.400000e+01]),\n",
" &#x27;LogisticRegression_cv__class_weight&#x27;: [&#x27;balanced&#x27;,\n",
" {0.0: 0.5837086520288036,\n",
" 1.0: 3.486549107420539}],\n",
" &#x27;LogisticRegression_cv__penalty&#x27;: [&#x27;l1&#x27;, &#x27;l2&#x27;]},\n",
" scoring=make_scorer(recall_score, response_method=&#x27;predict&#x27;))</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">estimator: Pipeline</label><div class=\"sk-toggleable__content fitted\"><pre>Pipeline(steps=[(&#x27;preprocessor&#x27;,\n",
" ColumnTransformer(transformers=[(&#x27;num&#x27;,\n",
" Pipeline(steps=[(&#x27;scaler&#x27;,\n",
" StandardScaler())]),\n",
" [&#x27;nb_tickets&#x27;, &#x27;nb_purchases&#x27;,\n",
" &#x27;total_amount&#x27;,\n",
" &#x27;nb_suppliers&#x27;,\n",
" &#x27;vente_internet_max&#x27;,\n",
" &#x27;purchase_date_min&#x27;,\n",
" &#x27;purchase_date_max&#x27;,\n",
" &#x27;time_between_purchase&#x27;,\n",
" &#x27;nb_tickets_internet&#x27;,\n",
" &#x27;nb_campaigns&#x27;,\n",
" &#x27;nb_campaigns_opened&#x27;]),\n",
" (&#x27;cat&#x27;,\n",
" Pipeline(steps=[(&#x27;onehot&#x27;,\n",
" OneHotEncoder(handle_unknown=&#x27;ignore&#x27;,\n",
" sparse_output=False))]),\n",
" [&#x27;opt_in&#x27;, &#x27;gender_male&#x27;,\n",
" &#x27;gender_female&#x27;])])),\n",
" (&#x27;LogisticRegression_cv&#x27;,\n",
" LogisticRegression(max_iter=5000, solver=&#x27;saga&#x27;))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;preprocessor: ColumnTransformer<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>ColumnTransformer(transformers=[(&#x27;num&#x27;,\n",
" Pipeline(steps=[(&#x27;scaler&#x27;, StandardScaler())]),\n",
" [&#x27;nb_tickets&#x27;, &#x27;nb_purchases&#x27;, &#x27;total_amount&#x27;,\n",
" &#x27;nb_suppliers&#x27;, &#x27;vente_internet_max&#x27;,\n",
" &#x27;purchase_date_min&#x27;, &#x27;purchase_date_max&#x27;,\n",
" &#x27;time_between_purchase&#x27;,\n",
" &#x27;nb_tickets_internet&#x27;, &#x27;nb_campaigns&#x27;,\n",
" &#x27;nb_campaigns_opened&#x27;]),\n",
" (&#x27;cat&#x27;,\n",
" Pipeline(steps=[(&#x27;onehot&#x27;,\n",
" OneHotEncoder(handle_unknown=&#x27;ignore&#x27;,\n",
" sparse_output=False))]),\n",
" [&#x27;opt_in&#x27;, &#x27;gender_male&#x27;, &#x27;gender_female&#x27;])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">num</label><div class=\"sk-toggleable__content fitted\"><pre>[&#x27;nb_tickets&#x27;, &#x27;nb_purchases&#x27;, &#x27;total_amount&#x27;, &#x27;nb_suppliers&#x27;, &#x27;vente_internet_max&#x27;, &#x27;purchase_date_min&#x27;, &#x27;purchase_date_max&#x27;, &#x27;time_between_purchase&#x27;, &#x27;nb_tickets_internet&#x27;, &#x27;nb_campaigns&#x27;, &#x27;nb_campaigns_opened&#x27;]</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;StandardScaler<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.StandardScaler.html\">?<span>Documentation for StandardScaler</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>StandardScaler()</pre></div> </div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">cat</label><div class=\"sk-toggleable__content fitted\"><pre>[&#x27;opt_in&#x27;, &#x27;gender_male&#x27;, &#x27;gender_female&#x27;]</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;OneHotEncoder<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>OneHotEncoder(handle_unknown=&#x27;ignore&#x27;, sparse_output=False)</pre></div> </div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;LogisticRegression<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>LogisticRegression(max_iter=5000, solver=&#x27;saga&#x27;)</pre></div> </div></div></div></div></div></div></div></div></div></div></div>"
],
"text/plain": [
"GridSearchCV(cv=3, error_score='raise',\n",
" estimator=Pipeline(steps=[('preprocessor',\n",
" ColumnTransformer(transformers=[('num',\n",
" Pipeline(steps=[('scaler',\n",
" StandardScaler())]),\n",
" ['nb_tickets',\n",
" 'nb_purchases',\n",
" 'total_amount',\n",
" 'nb_suppliers',\n",
" 'vente_internet_max',\n",
" 'purchase_date_min',\n",
" 'purchase_date_max',\n",
" 'time_between_purchase',\n",
" 'nb_tickets_internet',\n",
" 'nb_campaigns',\n",
" 'nb_...\n",
" 1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
" 2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
" 4.000000e+00, 8.000000e+00, 1.600000e+01, 3.200000e+01,\n",
" 6.400000e+01]),\n",
" 'LogisticRegression_cv__class_weight': ['balanced',\n",
" {0.0: 0.5837086520288036,\n",
" 1.0: 3.486549107420539}],\n",
" 'LogisticRegression_cv__penalty': ['l1', 'l2']},\n",
" scoring=make_scorer(recall_score, response_method='predict'))"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"logit_cv = load_model(\"sport\", \"LogisticRegression_cv\")\n",
"logit_cv"
]
},
{
"cell_type": "markdown",
"id": "006819e7-e9c5-48d9-85ee-aa43d5e4c9c2",
"metadata": {},
"source": [
"## Quartile clustering"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "018d8ff4-3436-4eec-8507-d1a265cbabf1",
"metadata": {},
"outputs": [],
"source": [
"y_pred = logit_cv.predict(X_test)\n",
"y_pred_prob = logit_cv.predict_proba(X_test)[:, 1]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "846f53b9-73c2-4a8b-9d9e-f11bf59ce9ba",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_521/375041546.py:3: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" X_test_segment[\"has_purchased\"] = y_test\n",
"/tmp/ipykernel_521/375041546.py:4: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" X_test_segment[\"has_purchased_estim\"] = y_pred\n",
"/tmp/ipykernel_521/375041546.py:5: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" X_test_segment[\"score\"] = y_pred_prob\n",
"/tmp/ipykernel_521/375041546.py:6: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" X_test_segment[\"quartile\"] = np.where(X_test['score']<0.25, '1',\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>fidelity</th>\n",
" <th>...</th>\n",
" <th>opt_in</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>has_purchased</th>\n",
" <th>has_purchased_estim</th>\n",
" <th>score</th>\n",
" <th>quartile</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>100.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>5.177187</td>\n",
" <td>5.177187</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.657671</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>55.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>426.265613</td>\n",
" <td>426.265613</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.266538</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>17.0</td>\n",
" <td>1.0</td>\n",
" <td>80.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>436.033437</td>\n",
" <td>436.033437</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.214668</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>120.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>5.196412</td>\n",
" <td>5.196412</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.657770</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>34.0</td>\n",
" <td>2.0</td>\n",
" <td>416.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>478.693148</td>\n",
" <td>115.631470</td>\n",
" <td>363.061678</td>\n",
" <td>0.0</td>\n",
" <td>4</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.894173</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>60.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>5.140069</td>\n",
" <td>5.140069</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.717482</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>61.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>105.053773</td>\n",
" <td>105.053773</td>\n",
" <td>0.000000</td>\n",
" <td>5.0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.541855</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>80.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>63.206030</td>\n",
" <td>63.206030</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.461164</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>10.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>44.698090</td>\n",
" <td>44.698090</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.310828</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>165.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>266.012106</td>\n",
" <td>258.012106</td>\n",
" <td>8.000000</td>\n",
" <td>3.0</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.452877</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" nb_tickets nb_purchases total_amount nb_suppliers vente_internet_max \\\n",
"0 4.0 1.0 100.0 1.0 0.0 \n",
"1 1.0 1.0 55.0 1.0 0.0 \n",
"2 17.0 1.0 80.0 1.0 0.0 \n",
"3 4.0 1.0 120.0 1.0 0.0 \n",
"4 34.0 2.0 416.0 1.0 0.0 \n",
"5 2.0 1.0 60.0 1.0 0.0 \n",
"6 5.0 1.0 61.0 1.0 1.0 \n",
"7 4.0 1.0 80.0 1.0 0.0 \n",
"8 1.0 1.0 10.0 1.0 0.0 \n",
"9 3.0 3.0 165.0 1.0 1.0 \n",
"\n",
" purchase_date_min purchase_date_max time_between_purchase \\\n",
"0 5.177187 5.177187 0.000000 \n",
"1 426.265613 426.265613 0.000000 \n",
"2 436.033437 436.033437 0.000000 \n",
"3 5.196412 5.196412 0.000000 \n",
"4 478.693148 115.631470 363.061678 \n",
"5 5.140069 5.140069 0.000000 \n",
"6 105.053773 105.053773 0.000000 \n",
"7 63.206030 63.206030 0.000000 \n",
"8 44.698090 44.698090 0.000000 \n",
"9 266.012106 258.012106 8.000000 \n",
"\n",
" nb_tickets_internet fidelity ... opt_in gender_female gender_male \\\n",
"0 0.0 1 ... False 1 0 \n",
"1 0.0 2 ... True 0 1 \n",
"2 0.0 2 ... True 1 0 \n",
"3 0.0 1 ... False 1 0 \n",
"4 0.0 4 ... False 1 0 \n",
"5 0.0 1 ... False 0 1 \n",
"6 5.0 1 ... False 0 0 \n",
"7 0.0 1 ... True 0 1 \n",
"8 0.0 1 ... True 0 0 \n",
"9 3.0 2 ... False 0 0 \n",
"\n",
" gender_other nb_campaigns nb_campaigns_opened has_purchased \\\n",
"0 0 0.0 0.0 0.0 \n",
"1 0 0.0 0.0 1.0 \n",
"2 0 0.0 0.0 0.0 \n",
"3 0 0.0 0.0 0.0 \n",
"4 0 0.0 0.0 1.0 \n",
"5 0 0.0 0.0 0.0 \n",
"6 1 0.0 0.0 0.0 \n",
"7 0 0.0 0.0 0.0 \n",
"8 1 0.0 0.0 0.0 \n",
"9 1 0.0 0.0 0.0 \n",
"\n",
" has_purchased_estim score quartile \n",
"0 1.0 0.657671 3 \n",
"1 0.0 0.266538 2 \n",
"2 0.0 0.214668 1 \n",
"3 1.0 0.657770 3 \n",
"4 1.0 0.894173 4 \n",
"5 1.0 0.717482 3 \n",
"6 1.0 0.541855 3 \n",
"7 0.0 0.461164 2 \n",
"8 0.0 0.310828 2 \n",
"9 0.0 0.452877 2 \n",
"\n",
"[10 rows x 21 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment = X_test\n",
"\n",
"X_test_segment[\"has_purchased\"] = y_test\n",
"X_test_segment[\"has_purchased_estim\"] = y_pred\n",
"X_test_segment[\"score\"] = y_pred_prob\n",
"X_test_segment[\"quartile\"] = np.where(X_test['score']<0.25, '1',\n",
" np.where(X_test['score']<0.5, '2',\n",
" np.where(X_test['score']<0.75, '3', '4')))\n",
"X_test_segment.head(10)"
]
},
{
"cell_type": "markdown",
"id": "ad16b8ab-7e01-404b-971e-866e9b9d5aa4",
"metadata": {},
"source": [
"## definition of functions to compute the bias of scores and adjust it \n",
"\n",
"Le biais est calculé de la façon suivante. \n",
"En notant $\\hat{p(x_i)}$ le score calculé (estimé par la modélisation) et $p(x_i)$ le vrai score (sans biais), et $\\beta$ le logarithme du biais, on a : \\\n",
"$\\ln{\\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}}} = \\beta + \\ln{\\frac{p(x_i)}{1-p(x_i)}}$ \\\n",
"$ \\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}} = \\exp(\\beta) . \\frac{p(x_i)}{1-p(x_i)} $ , soit : \\\n",
"$p(x_i) = {\\frac{\\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}}}{B+\\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}}}}$ \\\n",
"Ce qu'on appelle biais et qu'on estime dans le code par la suite est : $B=\\exp(\\beta) $. Les probabilités ne sont donc pas biaisées si $B=1$. Il y a surestimation si $B>1$. \n",
"\n",
"On cherche le B qui permette d'ajuster les probabilités de telle sorte que la somme des scores soit égale à la somme des y_has_purchased. Cela revient à résoudre : \n",
"\n",
"\\begin{equation}\n",
"\\sum_{i}{\\frac{\\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}}}{B+\\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}}}} = \\sum_{i}{Y_i}\n",
"\\end{equation}\n",
"\n",
"C'est ce que fait la fonction find_bias. \n",
"\n",
"Note sur les notations : \\\n",
"$\\hat{p(x_i)}$ correspond à ce qu'on appelle le score et $p(x_i)$ à ce qu'on appellera le score adjusted"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "f0379536-a6c5-4b16-bde5-d0319ec1b140",
"metadata": {},
"outputs": [],
"source": [
"# compute adjusted score from odd ratios (cf formula above)\n",
"def adjusted_score(odd_ratio, bias) :\n",
" adjusted_score = odd_ratio/(bias+odd_ratio)\n",
" return adjusted_score"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "32a0dfd0-f49d-4785-a56f-706d381bfe41",
"metadata": {},
"outputs": [],
"source": [
"# when the score is 1 we cannot compute the odd ratio, so we adjust scores equal to 1\n",
"# we set the second best score instead\n",
"\n",
"def adjust_score_1(score) :\n",
" second_best_score = np.array([element for element in score if element !=1]).max()\n",
" new_score = np.array([element if element!=1 else second_best_score for element in score]) \n",
" return new_score"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "2dff1def-02df-413e-afce-b4aeaf7752b6",
"metadata": {},
"outputs": [],
"source": [
"def odd_ratio(score) :\n",
" return score / (1 - score)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "683d71fc-7442-4028-869c-49c57592d6e9",
"metadata": {},
"outputs": [],
"source": [
"# definition of a function that automatically detects the bias\n",
"\n",
"def find_bias(odd_ratios, y_objective, initial_guess=6) :\n",
" \"\"\"\n",
" results = minimize(lambda bias : (sum([adjusted_score(element, bias) for element in list(odd_ratios)]) - y_objective)**2 ,\n",
" initial_guess , method = \"BFGS\")\n",
"\n",
" estimated_bias = results.x[0]\n",
" \"\"\"\n",
"\n",
" # faster method\n",
" bias_estimated = fsolve(lambda bias : sum([adjusted_score(element, bias) for element in list(odd_ratios)]) - y_objective, x0=6)\n",
" \n",
" return bias_estimated[0]"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "781b0d40-c954-4c54-830a-e709c8667328",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"6.172331113516847"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# computation with the function defined\n",
"\n",
"bias_test_set = find_bias(odd_ratios = odd_ratio(adjust_score_1(X_test_segment[\"score\"])), \n",
" y_objective = y_test[\"y_has_purchased\"].sum(),\n",
" initial_guess=6)\n",
"bias_test_set"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "248cb862-418e-4767-9933-70c4885ecf40",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"6.070461139075353"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# comparison with bias of the train set\n",
"X_train_score = logit_cv.predict_proba(X_train)[:, 1]\n",
"\n",
"bias_train_set = find_bias(odd_ratios = odd_ratio(adjust_score_1(X_train_score)), \n",
" y_objective = y_train[\"y_has_purchased\"].sum(),\n",
" initial_guess=6)\n",
"bias_train_set"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "fff6cbe6-7bb3-4732-9b81-b9ac5383bbcf",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"betâ test - betâ train = 0.016642008368292337\n"
]
}
],
"source": [
"print(\"betâ test - betâ train = \",np.log(bias_test_set/bias_train_set))"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "f506870d-4a8a-4b2c-8f0b-e0789080b20c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"mean absolute erreur 0.001409799678121875\n"
]
}
],
"source": [
"# impact of considering a bias computed on train set instead of test set - totally neglectable\n",
"\n",
"score_adjusted_test = adjusted_score(odd_ratio(adjust_score_1(X_test_segment[\"score\"])), bias = bias_test_set)\n",
"score_adjusted_train = adjusted_score(odd_ratio(adjust_score_1(X_test_segment[\"score\"])), bias = bias_train_set)\n",
"\n",
"print(\"mean absolute erreur\",abs(score_adjusted_test-score_adjusted_train).mean())"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "8213d0e4-063b-49fa-90b7-677fc34f4c01",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_521/1825363704.py:7: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" X_test_segment[\"score_adjusted\"] = score_adjusted_train\n"
]
}
],
"source": [
"# adjust scores accordingly \n",
"\n",
"# X_test_segment[\"score_adjusted\"] = adjusted_score(odd_ratio(adjust_score_1(X_test_segment[\"score\"])), bias = bias_test_set)\n",
"\n",
"# actually, we are not supposed to have X_test, so the biais is estimated on X_train\n",
"# X_test_segment[\"score_adjusted\"] = adjusted_score(odd_ratio(adjust_score_1(X_test_segment[\"score\"])), bias = bias_train_set)\n",
"X_test_segment[\"score_adjusted\"] = score_adjusted_train"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "834d3723-2e72-4c65-9c62-e2d595c69461",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"MSE for score : 0.15494387585189107\n",
"MSE for ajusted score : 0.08851697393139933\n",
"sum of y_has_purchased : 13690.0\n",
"sum of adjusted scores : 13825.476109871417\n"
]
}
],
"source": [
"# check \n",
"\n",
"MSE_score = ((X_test_segment[\"score\"]-X_test_segment[\"has_purchased\"])**2).mean()\n",
"MSE_ajusted_score = ((X_test_segment[\"score_adjusted\"]-X_test_segment[\"has_purchased\"])**2).mean()\n",
"print(f\"MSE for score : {MSE_score}\")\n",
"print(f\"MSE for ajusted score : {MSE_ajusted_score}\")\n",
"\n",
"print(\"sum of y_has_purchased :\",y_test[\"y_has_purchased\"].sum())\n",
"print(\"sum of adjusted scores :\", X_test_segment[\"score_adjusted\"].sum())"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "9f30a4dd-a9d8-405a-a7d5-5324ae88cf70",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"MAE for score : 0.32116357895490416\n",
"MAE for adjusted score : 0.17359227315595824\n"
]
}
],
"source": [
"# mean absolute error - divided by 2 with out method\n",
"\n",
"MAE_score = abs(X_test_segment[\"score\"]-X_test_segment[\"has_purchased\"]).mean()\n",
"MAE_ajusted_score = abs(X_test_segment[\"score_adjusted\"]-X_test_segment[\"has_purchased\"]).mean()\n",
"print(f\"MAE for score : {MAE_score}\")\n",
"print(f\"MAE for adjusted score : {MAE_ajusted_score}\")"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "6f9396db-e213-408c-a596-eaeec3bc79f3",
"metadata": {},
"outputs": [],
"source": [
"# visualization\n",
"\n",
"# histogramme des probas et des probas ajustées\n",
"\n",
"def plot_hist_scores(df, score, score_adjusted, type_of_activity) :\n",
"\n",
" plt.figure()\n",
" plt.hist(df[score], label = \"score\", alpha=0.6)\n",
" plt.hist(df[score_adjusted], label=\"adjusted score\", alpha=0.6)\n",
" plt.legend()\n",
" plt.xlabel(\"probability of a future purchase\")\n",
" plt.ylabel(\"count\")\n",
" plt.title(f\"Comparison between score and adjusted score for {type_of_activity} companies\")\n",
" # plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "def64c16-f4dd-493c-909c-d886d7f53947",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'projet-bdc2324-team1/Output_expected_CA/sport/hist_score_adjustedsport.png'"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"PATH + file_name + type_of_activity + \".png\""
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "b478d40d-9677-4204-87bd-16fb0bc1fe9a",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAm8AAAHFCAYAAACkWR6dAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABpG0lEQVR4nO3dd1gUV/s38O/S+0pHFBUbgmDDiEgUK1iwxBiTkGDHgooolvj4GEt8NHaNJmpMLLEEk1iiUQnYiIoFUazEFlSMIEYpigoI5/3Dl/k5LCIgxTHfz3VxJXvmnpl7zs7u3J4pqxJCCBARERGRImhVdgJEREREVHws3oiIiIgUhMUbERERkYKweCMiIiJSEBZvRERERArC4o2IiIhIQVi8ERERESkIizciIiIiBWHxRkRERKQgb2Xxdu7cOQwcOBCOjo4wMDCAiYkJmjVrhnnz5uHBgweVnV65GzBgAGrVqlXZaRRq3bp1UKlUOHXqVJktMzo6GtOnT0daWlqZLZOU7dChQ1CpVDh06FCFr6e8P3+bN2/GkiVLymXZtWrVwoABA8pl2W+iGzduoFu3brCwsIBKpUJISEhlp1Sh9uzZg+nTp1d2GorUtm1btG3bttLW/9YVb6tXr4a7uztiYmIwYcIEhIeHY/v27fjggw+wcuVKDB48uLJTLHdTp07F9u3bKzuNChMdHY0ZM2aweKM3Qnl//sqzePu3GTt2LE6cOIE1a9bg2LFjGDt2bGWnVKH27NmDGTNmVHYaivTNN9/gm2++qbT161TamsvBsWPHMGLECHTq1Ak7duyAvr6+NK1Tp04IDQ1FeHh4JWZYvh4/fgwjIyPUqVOnslOht4wQAk+fPoWhoWFlp/LG4+ev/OXm5uLZs2ey7/jSuHDhAlq0aIFevXq9UXmVt/xjBZWei4tLpa7/rRp5mz17NlQqFb799ttCPzx6enro0aOH9DovLw/z5s1DgwYNoK+vDxsbG/Tr1w+3b9+Wzde2bVu4urri2LFjaNWqFQwNDVGrVi2sXbsWALB79240a9YMRkZGcHNz0ygQp0+fDpVKhTNnzqB3794wMzODWq3Gp59+inv37slit2zZAh8fH1StWhWGhoZwdnbGZ599hszMTFncgAEDYGJigvPnz8PHxwempqbo0KGDNK3gaZuff/4ZHh4eUKvVMDIyQu3atTFo0CBZzK1bt/Dpp5/CxsYG+vr6cHZ2xsKFC5GXlyfF3LhxAyqVCgsWLMCiRYvg6OgIExMTeHp64vjx40W9PTKpqakYOHAgLCwsYGxsjO7du+Ovv/7SiNu3bx86dOgAMzMzGBkZwcvLC/v375f17YQJEwAAjo6OUKlU0mmsCRMmQK1WIzc3V4ofPXo0VCoV5s+fL7Xdv38fWlpaWLZsmdSWkZGB8ePHw9HREXp6eqhWrRpCQkI03gchBL755hs0adIEhoaGMDc3R58+fTS2JX8fiomJQevWraX34Msvv5T178sU5/1LS0tDaGgoateuLe3PXbt2xZ9//inFPHjwAEFBQahWrRr09PRQu3ZtTJkyBVlZWbJlqVQqjBo1CitXroSzszP09fWxfv16AMDVq1fh7+8v20++/vrrV24DAHz99ddo06YNbGxsYGxsDDc3N8ybNw85OTml7q8///wTnTt3hpGREaysrDB8+HA8fPiwWPlcu3YNAwcORL169WBkZIRq1aqhe/fuOH/+vEZscddT8POX/5lZt26dRqxKpZKdtrp37x6GDh0KBwcH6Ovrw9raGl5eXti3b5/UL7t378bNmzelfV2lUknzZ2dnY9asWdJ3mrW1NQYOHKjxPZOTk4OJEyfCzs4ORkZGePfdd3Hy5Mli9RkArFixAo0bN4aJiQlMTU3RoEED/Oc//5HF/P3339K26Onpwd7eHn369MHdu3elmJJ858ybNw+zZs2Co6Mj9PX1cfDgQQDAqVOn0KNHD1hYWMDAwABNmzbFTz/9VGT++ae7r127hr1790r9eOPGjTLLqzCv+hzn57Vx40aMGzcOdnZ2MDQ0hLe3N86cOaOxvJ07d8LT0xNGRkYwNTVFp06dcOzYMVlM/vHn9OnT6NOnD8zNzVGnTh0MGDBA+ty+uC/l98HLhIeHo0OHDtI2ODs7Y86cOaXO69y5c/jggw+gVqthYWGBcePG4dmzZ7h8+TI6d+4MU1NT1KpVC/PmzZPNX5K+OnXqFD766CPUqlVLOn5//PHHuHnzpiwu/7KegwcPYsSIEbCysoKlpSV69+6NO3fuyGILO21a3M/fgQMH0LZtW1haWsLQ0BA1atTA+++/j8ePHxfZ9zLiLfHs2TNhZGQkPDw8ij3P0KFDBQAxatQoER4eLlauXCmsra2Fg4ODuHfvnhTn7e0tLC0thZOTk/j+++/F77//Lvz8/AQAMWPGDOHm5iZ+/PFHsWfPHtGyZUuhr68v/v77b2n+adOmCQCiZs2aYsKECeL3338XixYtEsbGxqJp06YiOztbiv3iiy/E4sWLxe7du8WhQ4fEypUrhaOjo2jXrp0s9/79+wtdXV1Rq1YtMWfOHLF//37x+++/S9Nq1qwpxUZHRwuVSiU++ugjsWfPHnHgwAGxdu1aERAQIMWkpKSIatWqCWtra7Fy5UoRHh4uRo0aJQCIESNGSHEJCQkCgKhVq5bo3Lmz2LFjh9ixY4dwc3MT5ubmIi0trcg+X7t2rQAgHBwcxKBBg8TevXvFt99+K2xsbISDg4NITU2VYjds2CBUKpXo1auX2LZtm9i1a5fw8/MT2traYt++fUIIIRITE8Xo0aMFALFt2zZx7NgxcezYMZGeni7Cw8MFABEdHS0ts0GDBsLQ0FB06tRJatuyZYsAIC5duiSEECIzM1M0adJEWFlZiUWLFol9+/aJpUuXCrVaLdq3by/y8vKkeQMDA4Wurq4IDQ0V4eHhYvPmzaJBgwbC1tZWJCcna+xD9erVEytXrhSRkZEiKChIABDr168vss+K8/5lZGSIhg0bCmNjYzFz5kzx+++/i61bt4oxY8aIAwcOCCGEePLkiWjUqJEwNjYWCxYsEBEREWLq1KlCR0dHdO3aVbZOAKJatWqiUaNGYvPmzeLAgQPiwoUL4uLFi0KtVgs3Nzfxww8/iIiICBEaGiq0tLTE9OnTi9wOIYQYO3asWLFihQgPDxcHDhwQixcvFlZWVmLgwIGyuOL2V3JysrCxsRHVqlUTa9euFXv27BGffPKJqFGjhgAgDh48WGQ+UVFRIjQ0VPzyyy8iKipKbN++XfTq1UsYGhqKP//8s1TrKfj5y//MrF27VmP9AMS0adOk176+vsLa2lp8++234tChQ2LHjh3i888/F2FhYUIIIS5evCi8vLyEnZ2dtK8fO3ZMCCFEbm6u6Ny5szA2NhYzZswQkZGR4rvvvhPVqlUTLi4u4vHjx7IcVSqVmDBhgoiIiBCLFi0S1apVE2ZmZqJ///5F9tmPP/4oAIjRo0eLiIgIsW/fPrFy5UoRHBwsxdy+fVtUrVpV9hnasmWLGDRokIiPjxdClPw7p1q1aqJdu3bil19+ERERESIhIUEcOHBA6OnpidatW4stW7aI8PBwMWDAgJf2d7709HRx7NgxYWdnJ7y8vKR+fPr0aZnkVZjifI4PHjwofT/27NlT7Nq1S2zcuFHUrVtXmJmZievXr0uxmzZtEgCEj4+P2LFjh9iyZYtwd3cXenp64vDhw1Lci8efSZMmicjISLFjxw5x7do10adPHwFAti89ffr0pf323XffCZVKJdq2bSs2b94s9u3bJ7755hsRFBRU6rycnJzEF198ISIjI8XEiROlY3KDBg3EV199JSIjI8XAgQMFALF169ZS9dXPP/8sPv/8c7F9+3YRFRUlwsLChLe3t7C2tpYd6/OPT7Vr1xajR48Wv//+u/juu++Eubm5xjHY29tbeHt7S6+L+/lLSEgQBgYGolOnTmLHjh3i0KFDYtOmTSIgIEB2/HuVt6Z4S05OFgDERx99VKz4+Ph4AUC20wkhxIkTJwQA8Z///Edq8/b2FgDEqVOnpLb79+8LbW1tYWhoKCvU4uLiBADx1VdfSW35O+nYsWNl68rfyTdu3Fhojnl5eSInJ0dERUUJAOLs2bPStP79+wsAYs2aNRrzFTx4LFiwQAAosrD67LPPBABx4sQJWfuIESOESqUSly9fFkL83xeWm5ubePbsmRR38uRJAUD8+OOPL12HEP/34Xjvvfdk7UePHhUAxKxZs4QQzwsoCwsL0b17d1lcbm6uaNy4sWjRooXUNn/+fAFA40szMzNT6OnpiZkzZwohnh9QAIhJkyYJQ0ND6UsqMDBQ2NvbS/PNmTNHaGlpiZiYGNnyfvnlFwFA7NmzRwghxLFjxwQAsXDhQllcYmKiMDQ0FBMnTpTa8vehgv3r4uIifH19i+yz4rx/M2fOFABEZGTkS2NWrlwpAIiffvpJ1j537lwBQEREREhtAIRarRYPHjyQxfr6+orq1auL9PR0WfuoUaOEgYGBRnxRcnNzRU5Ojvjhhx+Etra2bN7i9tekSZOESqUScXFxsrhOnToVq3gr6NmzZyI7O1vUq1dP9nktyXpep3gzMTERISEhRebYrVs32fLz5RdVLx7ghBAiJiZGABDffPONEOL/vvte9n30quJt1KhRokqVKkXGDBo0SOjq6kr/ICpMSb9z6tSpI/uHrhDP/zHWtGlTkZOTI2v38/MTVatWFbm5uUXmWbNmTdGtW7cyz6swxfkc5xckzZo1k/0j8caNG0JXV1cMGTJECPH8s2Nvby/c3Nxk2/jw4UNhY2MjWrVqJbXlH38+//xzjfWNHDlSFHcM5+HDh8LMzEy8++67stxeVJq8Cn5/NmnSRPrHeL6cnBxhbW0tevfuLbUVt68K8+zZM/Ho0SNhbGwsli5dKrXnH58K1gXz5s0TAERSUpLUVrB4K+7nL/84UvC7pKTeqtOmJZE/tF3wzqoWLVrA2dlZdmoOAKpWrQp3d3fptYWFBWxsbNCkSRPY29tL7c7OzgCgMRwLAJ988onsdd++faGjoyMbZv/rr7/g7+8POzs7aGtrQ1dXF97e3gCA+Ph4jWW+//77r9zWd955R1rfTz/9hL///lsj5sCBA3BxcUGLFi1k7QMGDIAQAgcOHJC1d+vWDdra2tLrRo0aASh8uwtTsC9atWqFmjVrSn0RHR2NBw8eoH///nj27Jn0l5eXh86dOyMmJkbjFGZBRkZG8PT0lE45RUZGokqVKpgwYQKys7Nx5MgRAM9PzXbs2FGa77fffoOrqyuaNGkiW7evr6/szsLffvsNKpUKn376qSzOzs4OjRs31rjT0c7OTqN/GzVq9Mo+K877t3fvXtSvX1+2HQUdOHAAxsbG6NOnj6w9/zNQcJ9v3749zM3NpddPnz7F/v378d5778HIyEi2zV27dsXTp09feer8zJkz6NGjBywtLaX9u1+/fsjNzcWVK1dkscXpr4MHD6Jhw4Zo3LixLM7f37/IPPI9e/YMs2fPhouLC/T09KCjowM9PT1cvXpV9nl73fUUV4sWLbBu3TrMmjULx48f1zidXJTffvsNVapUQffu3WXvTZMmTWBnZyftj/mfsZd9HxUnx7S0NHz88cf49ddf8c8//2jE7N27F+3atZO+DwtT0u+cHj16QFdXV3p97do1/Pnnn9J2FNwfk5KScPny5VduT1nn9TLF+Rzn8/f3l50Or1mzJlq1aiW9d5cvX8adO3cQEBAALa3/O4ybmJjg/fffx/HjxzVOwRXnWFGU6OhoZGRkICgoSJbbi0qTl5+fn+y1s7MzVCoVunTpIrXp6Oigbt26hX5XvqqvAODRo0eYNGkS6tatCx0dHejo6MDExASZmZmFHldfvLwKKN7xrbifvyZNmkBPTw9Dhw7F+vXrC71cqDjemuLNysoKRkZGSEhIKFb8/fv3ATwvygqyt7eXpuezsLDQiNPT09No19PTA/D8QFeQnZ2d7LWOjg4sLS2ldT169AitW7fGiRMnMGvWLBw6dAgxMTHYtm0bAODJkyey+Y2MjGBmZlbkdgJAmzZtsGPHDjx79gz9+vVD9erV4erqih9//FGKuX///kv7In/6iywtLWWv868xLJjjyxTsi/y2/PXkXxfTp08f6Orqyv7mzp0LIUSxHvvSsWNHHD9+HJmZmdi3bx/at28PS0tLuLu7Y9++fUhISEBCQoKs6Ll79y7OnTunsV5TU1MIIaSD1d27dyGEgK2trUbs8ePHNQ5qBfssv99e1WfFef/u3buH6tWrF7mc+/fvw87OTuOL18bGBjo6OhrvccH94f79+3j27BmWLVumsb1du3YFgEIP5Plu3bqF1q1b4++//8bSpUtx+PBhxMTESNfdFOyH4vRX/jYVVFhbYcaNG4epU6eiV69e2LVrF06cOIGYmBg0bty4TNdTXFu2bEH//v3x3XffwdPTExYWFujXrx+Sk5NfOe/du3eRlpYGPT09jfcnOTlZem/y3+eXfR+9SkBAANasWYObN2/i/fffh42NDTw8PBAZGSnFFHd/LMl3TsHY/O+I8ePHa2xvUFAQgKL3x/LK62WK8znO96rvx1cdv/Ly8pCamlqqPF8m/7qtot7X0uRV2DHUyMgIBgYGGu3FOa7mt734Pvn7+2P58uUYMmQIfv/9d5w8eRIxMTGwtrYu9Pu3NMe34n7+6tSpg3379sHGxgYjR45EnTp1UKdOHSxduvSlyy7MW3O3qba2Njp06IC9e/fi9u3br/ziyH9zkpKSNGLv3LkDKyurMs8xOTkZ1apVk14/e/YM9+/fl3I5cOAA7ty5g0OHDkmjbQBe+giMl/3rpzA9e/ZEz549kZWVhePHj2POnDnw9/dHrVq14OnpCUtLSyQlJWnMl3+RZln3R2EHo+TkZNStW1e2vmXLlqFly5aFLsPW1vaV6+nQoQOmTp2KP/74A/v378e0adOk9oiICDg6Okqv81lZWcHQ0BBr1qwpdJn5uVlZWUGlUuHw4cOF3iBTlnecver9s7a21rjRpiBLS0ucOHECQgjZvpOSkoJnz55pvMcF9y9zc3Noa2sjICAAI0eOLHQd+f1ZmB07diAzMxPbtm1DzZo1pfa4uLgi8y6KpaXlS/el4ti4cSP69euH2bNny9r/+ecfVKlSpUzWk38QKnhTSMEiAHi+Ty1ZsgRLlizBrVu3sHPnTnz22WdISUl55Z3y+RdXvyzO1NRU2pb83Av7PiqOgQMHYuDAgcjMzMQff/yBadOmwc/PD1euXEHNmjWLvT+W5Dun4P6YP33y5Mno3bt3oetwcnIq1vaUZV5FedXnON/L9rX89+7F41dheWppaclGzUuaZ2Gsra0BoMj3tTR5va5X9VV6ejp+++03TJs2DZ999pkUk5WVVabPfS3u5w8AWrdujdatWyM3NxenTp3CsmXLEBISAltbW3z00UfFWt9bM/IGPP8QCyEQGBiI7Oxsjek5OTnYtWsXgOenhIDnX94viomJQXx8vOxgXlY2bdoke/3TTz/h2bNn0h0r+R+uggf9VatWlVkO+vr68Pb2xty5cwFAuiunQ4cOuHTpEk6fPi2L/+GHH6BSqdCuXbsyywHQ7Ivo6GjcvHlT6gsvLy9UqVIFly5dQvPmzQv9yx/lLOpfRS1atICZmRmWLFmC5ORkdOrUCcDzEbkzZ87gp59+gouLi+zUt5+fH65fvw5LS8tC15t/J6Gfnx+EEPj7778LjXNzcyvTPsvf1sLevy5duuDKlSsap3Re1KFDBzx69Ag7duyQtf/www/S9KIYGRmhXbt2OHPmDBo1alToNhc1clPY/i2EwOrVq4tcb1HatWuHixcv4uzZs7L2zZs3F2t+lUql8XnbvXu3ximt11mPra0tDAwMcO7cOVn7r7/+WuR8NWrUwKhRo9CpUyfZ5/Jlo7V+fn64f/8+cnNzC31v8guZ/M/Yy76PSsLY2BhdunTBlClTkJ2djYsXLwJ4vj8ePHiwyNOWr/ud4+TkhHr16uHs2bMv/Y548YBZXBXxXfiyz3G+H3/8EUII6fXNmzcRHR0tvXdOTk6oVq0aNm/eLIvLzMzE1q1bpTs9i5MHULwzJq1atYJarcbKlStl63xRWeVVEq/qK5VKBSGExuf8u+++kz2J4HUV9/P3Im1tbXh4eEhnHwruc0V5a0beAMDT0xMrVqxAUFAQ3N3dMWLECDRs2BA5OTk4c+YMvv32W7i6uqJ79+5wcnLC0KFDsWzZMmhpaaFLly64ceMGpk6dCgcHh3J5WOO2bdugo6ODTp064eLFi5g6dSoaN26Mvn37Anj+4TA3N8fw4cMxbdo06OrqYtOmTRoHjJL6/PPPcfv2bXTo0AHVq1dHWloali5dKruebuzYsfjhhx/QrVs3zJw5EzVr1sTu3bvxzTffYMSIEahfv/5rb/+LTp06hSFDhuCDDz5AYmIipkyZgmrVqkmnO0xMTLBs2TL0798fDx48QJ8+fWBjY4N79+7h7NmzuHfvHlasWAEAUpG0dOlS9O/fH7q6unBycoKpqSm0tbXh7e2NXbt2wdHRUXoGl5eXF/T19bF//34EBwfLcgsJCcHWrVvRpk0bjB07Fo0aNUJeXh5u3bqFiIgIhIaGwsPDA15eXhg6dCgGDhyIU6dOoU2bNjA2NkZSUhKOHDkCNzc3jBgx4rX7qjjvX0hICLZs2YKePXvis88+Q4sWLfDkyRNERUXBz88P7dq1Q79+/fD111+jf//+uHHjBtzc3HDkyBHMnj0bXbt2LfJ6uXxLly7Fu+++i9atW2PEiBGoVasWHj58iGvXrmHXrl1FFo+dOnWCnp4ePv74Y0ycOBFPnz7FihUrNE6jlERISAjWrFmDbt26YdasWbC1tcWmTZtkj0cpip+fH9atW4cGDRqgUaNGiI2Nxfz58zVG419nPfnXRa5ZswZ16tRB48aNcfLkSY3CLz09He3atYO/vz8aNGgAU1NTxMTEIDw8XDay5Obmhm3btmHFihVwd3eHlpYWmjdvjo8++gibNm1C165dMWbMGLRo0QK6urq4ffs2Dh48iJ49e+K9996Ds7MzPv30UyxZsgS6urro2LEjLly4gAULFhTrMozAwEAYGhrCy8sLVatWRXJyMubMmQO1Wi1d1zVz5kzs3bsXbdq0wX/+8x+4ubkhLS0N4eHhGDduHBo0aFAm3zmrVq1Cly5d4OvriwEDBqBatWp48OAB4uPjcfr0afz888+vXEZB5fVdWJzPcb6UlBS89957CAwMRHp6OqZNmwYDAwNMnjwZAKClpYV58+bhk08+gZ+fH4YNG4asrCzMnz8faWlp+PLLL4uVU/5359y5c9GlSxdoa2ujUaNG0j+MX2RiYoKFCxdiyJAh6NixIwIDA2Fra4tr167h7NmzWL58eZnlVRKv6iszMzO0adMG8+fPh5WVFWrVqoWoqCh8//33stH111Xcz9/KlStx4MABdOvWDTVq1MDTp0+lszzF+Q6WvNbtDm+ouLg40b9/f1GjRg2hp6cnPZLj888/FykpKVJcbm6umDt3rqhfv77Q1dUVVlZW4tNPPxWJiYmy5Xl7e4uGDRtqrKewO5WEeH4H2ciRI6XX+XfVxMbGiu7duwsTExNhamoqPv74Y3H37l3ZvNHR0cLT01MYGRkJa2trMWTIEHH69GmNu9X69+8vjI2NC93+gne7/fbbb6JLly6iWrVqQk9PT9jY2IiuXbvKbtsWQoibN28Kf39/YWlpKXR1dYWTk5OYP3++7K6h/Dus5s+fX+h2v3jnXGHy7+aJiIgQAQEBokqVKsLQ0FB07dpVXL16VSM+KipKdOvWTVhYWAhdXV1RrVo10a1bN/Hzzz/L4iZPnizs7e2FlpaWxt1/S5cuFQBEYGCgbJ78OwV37typsd5Hjx6J//73v8LJyUno6elJj8cYO3as7BEgQgixZs0a4eHhIYyNjYWhoaGoU6eO6Nevn+zu5JftQwXfq8IU9/1LTU0VY8aMETVq1BC6urrCxsZGdOvWTfbIi/v374vhw4eLqlWrCh0dHVGzZk0xefJkjccDFNyHX5SQkCAGDRokqlWrJnR1dYW1tbVo1aqVdKdwUXbt2iUaN24sDAwMRLVq1cSECRPE3r17Nd6zkvTXpUuXRKdOnYSBgYGwsLAQgwcPFr/++mux7jZNTU0VgwcPFjY2NsLIyEi8++674vDhwxp3kpVkPf379xe1atWSzZueni6GDBkibG1thbGxsejevbu4ceOG7DPz9OlTMXz4cNGoUSNhZmYmDA0NhZOTk5g2bZrIzMyUlvXgwQPRp08fUaVKFaFSqWR3C+bk5IgFCxZIfWxiYiIaNGgghg0bJvt8ZWVlidDQUGFjYyMMDAxEy5YtxbFjx0TNmjVfebfp+vXrRbt27YStra3Q09MT9vb2om/fvuLcuXOyuMTERDFo0CBhZ2cndHV1pbgXv/Ne9ztHCCHOnj0r+vbtK2xsbISurq6ws7MT7du3FytXrixyO4R4+Xd4WeRVUHE+x/l3UG7YsEEEBwcLa2troa+vL1q3bi37Psm3Y8cO4eHhIQwMDISxsbHo0KGDOHr0qCwm//jz4iMx8mVlZYkhQ4YIa2traV962aNO8u3Zs0d4e3sLY2NjYWRkJFxcXMTcuXPLLK+XHdsKfieUpK9u374t3n//fWFubi5MTU1F586dxYULFzT29/zjU8EnDeSvq+B3VMHviOJ8/o4dOybee+89UbNmTaGvry8sLS2Ft7d3ocehoqiEeMn4J5WZ6dOnY8aMGbh37165XEtHRG+O9957D4mJiWX6+73073Do0CG0a9cOP//8s8Zd4ST3b++rt+qaNyKiynLr1i2EhYXh4MGDsovPiYjKGos3IqIysGbNGgwfPhzt27eX7momIioPPG1KREREpCAceSMiIiJSEBZvRERERArC4o2IiIhIQd6qh/RWtry8PNy5cwempqav/VMkREREVDGEEHj48CHs7e2hpfXmj2uxeCtDd+7cgYODQ2WnQURERKWQmJj4yt9GfxOweCtD+b+jl5iYWKyfmSEiIqLKl5GRAQcHh1L9Hm5lYPFWhvJPlZqZmbF4IyIiUhilXPL05p/YJSIiIiIJizciIiIiBWHxRkRERKQgvOaNiIjeOLm5ucjJyansNOgtoaurC21t7cpOo8yweCMiojeGEALJyclIS0ur7FToLVOlShXY2dkp5qaEorB4IyKiN0Z+4WZjYwMjI6O34kBLlUsIgcePHyMlJQUAULVq1UrO6PWxeCMiojdCbm6uVLhZWlpWdjr0FjE0NAQApKSkwMbGRvGnUHnDAhERvRHyr3EzMjKq5EzobZS/X70N11KyeCMiojcKT5VSeXib9isWb0REREQKwuKNiIiISEF4wwIREb3RJm87X6Hrm9PbrULXR1RSHHkjIiJSiLfhYnt6fSzeiIiIXtMvv/wCNzc3GBoawtLSEh07dkRmZiYAYM2aNWjYsCH09fVRtWpVjBo1Sprv1q1b6NmzJ0xMTGBmZoa+ffvi7t270vTp06ejSZMmWLNmDWrXrg19fX0IIZCeno6hQ4fCxsYGZmZmaN++Pc6ePVvh202Vg8UbERHRa0hKSsLHH3+MQYMGIT4+HocOHULv3r0hhMCKFSswcuRIDB06FOfPn8fOnTtRt25dAM8fHturVy88ePAAUVFRiIyMxPXr1/Hhhx/Kln/t2jX89NNP2Lp1K+Li4gAA3bp1Q3JyMvbs2YPY2Fg0a9YMHTp0wIMHDyp686kS8Jo3pdg1prIzKLnuSys7AyKicpeUlIRnz56hd+/eqFmzJgDAze35dXOzZs1CaGgoxoz5v+/wd955BwCwb98+nDt3DgkJCXBwcAAAbNiwAQ0bNkRMTIwUl52djQ0bNsDa2hoAcODAAZw/fx4pKSnQ19cHACxYsAA7duzAL7/8gqFDh1bMhlOlYfFGRET0Gho3bowOHTrAzc0Nvr6+8PHxQZ8+fZCTk4M7d+6gQ4cOhc4XHx8PBwcHqXADABcXF1SpUgXx8fFS8VazZk2pcAOA2NhYPHr0SONXKJ48eYLr16+XwxbSm4bFGxER0WvQ1tZGZGQkoqOjERERgWXLlmHKlCnYv39/kfMJIQp9cGzBdmNjY9n0vLw8VK1aFYcOHdKYt0qVKqXaBlIWFm9ERESvSaVSwcvLC15eXvj8889Rs2ZNREZGolatWti/fz/atWunMY+Liwtu3bqFxMREafTt0qVLSE9Ph7Oz80vX1axZMyQnJ0NHRwe1atUqr02iNxiLNyIiotdw4sQJ7N+/Hz4+PrCxscGJEydw7949ODs7Y/r06Rg+fDhsbGzQpUsXPHz4EEePHsXo0aPRsWNHNGrUCJ988gmWLFmCZ8+eISgoCN7e3mjevPlL19exY0d4enqiV69emDt3LpycnHDnzh3s2bMHvXr1KnJeejuweCMiInoNZmZm+OOPP7BkyRJkZGSgZs2aWLhwIbp06QIAePr0KRYvXozx48fDysoKffr0AfB8tG7Hjh0YPXo02rRpAy0tLXTu3BnLli0rcn0qlQp79uzBlClTMGjQINy7dw92dnZo06YNbG1ty317qfKphBCiMhP4+++/MWnSJOzduxdPnjxB/fr18f3338Pd3R3A83P/M2bMwLfffovU1FR4eHjg66+/RsOGDaVlZGVlYfz48fjxxx/x5MkTdOjQAd988w2qV68uxaSmpiI4OBg7d+4EAPTo0QPLli2TXR9w69YtjBw5EgcOHIChoSH8/f2xYMEC6OnpFWtbMjIyoFarkZ6eDjMzszLonRfwblMiess9ffoUCQkJcHR0hIGBQWWnQ2+Zovavcj1+l4NKfc5bamoqvLy8oKuri7179+LSpUtYuHChrKCaN28eFi1ahOXLlyMmJgZ2dnbo1KkTHj58KMWEhIRg+/btCAsLw5EjR/Do0SP4+fkhNzdXivH390dcXBzCw8MRHh6OuLg4BAQESNNzc3PRrVs3ZGZm4siRIwgLC8PWrVsRGhpaIX1BREREVByVOvL22Wef4ejRozh8+HCh04UQsLe3R0hICCZNmgTg+Sibra0t5s6di2HDhiE9PR3W1tbYsGGD9GDDO3fuwMHBAXv27IGvry/i4+Ph4uKC48ePw8PDAwBw/PhxeHp64s8//4STkxP27t0LPz8/JCYmwt7eHgAQFhaGAQMGICUlpViVOEfeCuDIGxGVAEfeqDxx5K2M7Ny5E82bN8cHH3wAGxsbNG3aFKtXr5amJyQkIDk5GT4+PlKbvr4+vL29ER0dDeD5825ycnJkMfb29nB1dZVijh07BrVaLRVuANCyZUuo1WpZjKurq1S4AYCvry+ysrIQGxtbaP5ZWVnIyMiQ/RERERGVp0ot3v766y+sWLEC9erVw++//47hw4cjODgYP/zwAwAgOTkZADQuwLS1tZWmJScnQ09PD+bm5kXG2NjYaKzfxsZGFlNwPebm5tDT05NiCpozZw7UarX09+KDFomIiIjKQ6UWb3l5eWjWrBlmz56Npk2bYtiwYQgMDMSKFStkcQUfYviyBxsWFVOcByEWJ+ZFkydPRnp6uvSXmJhYZE5EREREr6tSi7eqVavCxcVF1ubs7Ixbt24BAOzs7ABAY+QrJSVFGiWzs7NDdnY2UlNTi4y5e/euxvrv3bsniym4ntTUVOTk5Lz01mt9fX2YmZnJ/oiIiIjKU6UWb15eXrh8+bKs7cqVK9IP+zo6OsLOzg6RkZHS9OzsbERFRaFVq1YAAHd3d+jq6spikpKScOHCBSnG09MT6enpOHnypBRz4sQJpKeny2IuXLiApKQkKSYiIgL6+vrSY0uIiIiIKlulPqR37NixaNWqFWbPno2+ffvi5MmT+Pbbb/Htt98CeH4aMyQkBLNnz0a9evVQr149zJ49G0ZGRvD39wcAqNVqDB48GKGhobC0tISFhQXGjx8PNzc3dOzYEcDz0bzOnTsjMDAQq1atAgAMHToUfn5+cHJyAgD4+PjAxcUFAQEBmD9/Ph48eIDx48cjMDCQI2pERET0xqjU4u2dd97B9u3bMXnyZMycOROOjo5YsmQJPvnkEylm4sSJePLkCYKCgqSH9EZERMDU1FSKWbx4MXR0dNC3b1/pIb3r1q2Dtra2FLNp0yYEBwdLd6X26NEDy5cvl6Zra2tj9+7dCAoKgpeXl+whvURERERvikr/hYW3CZ/zVgCf80ZEJfBves7bjRs34OjoiDNnzqBJkyYar98k69atQ0hICNLS0io7ldfyNj3njb9tSkREb7aK/sdrJfzD08HBAUlJSbCysiqT5b0tBRcVrlJvWCAiIqLnl+7Y2dlBR4djKq+SnZ1d2SlUOhZvREREryE8PBzvvvsuqlSpAktLS/j5+eH69euymJMnT6Jp06YwMDBA8+bNcebMGdn0GzduQKVSIS4uDsDzkbMXf+cbAHbs2CF77ujZs2fRrl07mJqawszMDO7u7jh16hQOHTqEgQMHIj09HSqVCiqVCtOnTwfwvPCZOHEiqlWrBmNjY3h4eODQoUOy9axbtw41atSAkZER3nvvPdy/f7/I7c/OzsaoUaNQtWpVGBgYoFatWpgzZ440PS0tDUOHDoWtrS0MDAzg6uqK3377TZq+detWNGzYEPr6+qhVqxYWLlwoW36tWrUwa9YsDBgwAGq1GoGBgQCA6OhotGnTBoaGhnBwcEBwcDAyMzOLzPVtweKNiIjoNWRmZmLcuHGIiYnB/v37oaWlhffeew95eXnS9PynG8TGxmL69OkYP378a6/3k08+QfXq1RETE4PY2Fh89tln0NXVRatWrbBkyRKYmZkhKSkJSUlJ0voGDhyIo0ePIiwsDOfOncMHH3yAzp074+rVqwCeP0Zr0KBBCAoKQlxcHNq1a4dZs2YVmcdXX32FnTt34qeffsLly5exceNG1KpVC8Dzh/F36dIF0dHR2LhxIy5duoQvv/xSuqEwNjYWffv2xUcffYTz589j+vTpmDp1KtatWydbx/z58+Hq6orY2FhMnToV58+fh6+vL3r37o1z585hy5YtOHLkCEaNGvXa/aoEHJ8lIiJ6De+//77s9ffffw8bGxtcunQJrq6u2LRpE3Jzc7FmzRoYGRmhYcOGuH37NkaMGPFa67116xYmTJiABg0aAADq1asnTVOr1VCpVNLD7gHg+vXr+PHHH3H79m3pd7zHjx+P8PBwrF27FrNnz8bSpUvh6+uLzz77DABQv359REdHIzw8vMg86tWrh3fffRcqlUp6VisA7Nu3DydPnkR8fDzq168PAKhdu7Y0fdGiRejQoQOmTp0qre/SpUuYP38+BgwYIMW1b99eVvD269cP/v7+CAkJkbb9q6++gre3N1asWPHW3/DCkTciIqLXcP36dfj7+6N27dowMzODo6MjAEi/FhQfH4/GjRvDyMhImsfT0/O11ztu3DgMGTIEHTt2xJdffqlxqrag06dPQwiB+vXrw8TERPqLioqS5o2Pj9fI7VW5DhgwAHFxcXByckJwcDAiIiKkaXFxcahevbpUuBUUHx8PLy8vWZuXlxeuXr2K3Nxcqa158+aymNjYWKxbt062Hb6+vsjLy0NCQkKR+b4NOPJGRET0Grp37w4HBwesXr0a9vb2yMvLg6urq3RhfWmeyKWlpaUxX05Ojuz19OnT4e/vj927d2Pv3r2YNm0awsLC8N577xW6zLy8PGhrayM2Nlb2HFQAMDExKXWuzZo1Q0JCAvbu3Yt9+/ahb9++6NixI3755RcYGhoWOW9hvx9eWA7GxsYa2zJs2DAEBwdrxNaoUaPE26A0LN6IiIhK6f79+4iPj8eqVavQunVrAMCRI0dkMS4uLtiwYQOePHkiFTPHjx8vcrnW1tZ4+PAhMjMzpcIl/2aGF9WvXx/169fH2LFj8fHHH2Pt2rV47733oKenJxu5AoCmTZsiNzcXKSkpUq4Fubi4aOT2qlwBwMzMDB9++CE+/PBD9OnTB507d8aDBw/QqFEj3L59G1euXCl09M3FxUWjv6Kjo1G/fn2NAvNFzZo1w8WLF1G3bt1X5vY24mlTIiKiUjI3N4elpSW+/fZbXLt2DQcOHMC4ceNkMf7+/tDS0sLgwYNx6dIl7Nmz55W/3uPh4QEjIyP85z//wbVr17B582bZRfxPnjzBqFGjcOjQIdy8eRNHjx5FTEwMnJ2dATy/Q/PRo0fYv38//vnnHzx+/Bj169fHJ598gn79+mHbtm1ISEhATEwM5s6diz179gAAgoODER4ejnnz5uHKlStYvnx5kde7Ac9/5SgsLAx//vknrly5gp9//hl2dnaoUqUKvL290aZNG7z//vuIjIyURujylxkaGor9+/fjiy++wJUrV7B+/XosX778lTd0TJo0CceOHcPIkSMRFxeHq1evYufOnRg9enSR870tWLwRERGVkpaWFsLCwhAbGwtXV1eMHTsW8+fPl8WYmJhg165duHTpEpo2bYopU6Zg7ty5RS7XwsICGzduxJ49e+Dm5oYff/xRetwH8Py5cPfv30e/fv1Qv3599O3bF126dMGMGTMAAK1atcLw4cPx4YcfwtraGvPmzQMArF27Fv369UNoaCicnJzQo0cPnDhxAg4ODgCAli1b4rvvvsOyZcvQpEkTRERE4L///W+RuZqYmGDu3Llo3rw53nnnHdy4cQN79uyBltbzEmPr1q1455138PHHH8PFxQUTJ06URgWbNWuGn376CWFhYXB1dcXnn3+OmTNnym5WKEyjRo0QFRWFq1evonXr1mjatCmmTp2KqlWrFjnf24I/j1WG+PNYBfDnsYioBP5NP49V0OXLl9GgQQNcvXr1X3sqsLy9TT+PxZE3IiKiSvTgwQP88ssvMDMzk0bAiIrCGxaIiIgq0eDBgxEbG4sVK1ZAX1+/stMhBWDxRkREVIm2b99e2SmQwvC0KREREZGCsHgjIqI3Cu+jo/LwNu1XLN6IiOiNoKurCwB4/PhxJWdCb6P8/Sp/P1MyXvNGRERvBG1tbVSpUgUpKSkAACMjI42fTiIqKSEEHj9+jJSUFFSpUqXIX25QChZvRET0xrCzswMAqYAjKitVqlSR9i+lY/FGRERvDJVKhapVq8LGxkbjh9iJSktXV/etGHHLx+KNiIjeONra2m/VwZaoLPGGBSIiIiIFYfFGREREpCAs3oiIiIgUhMUbERERkYKweCMiIiJSEBZvRERERArC4o2IiIhIQVi8ERERESkIizciIiIiBWHxRkRERKQgLN6IiIiIFITFGxEREZGCsHgjIiIiUhAWb0REREQKwuKNiIiISEFYvBEREREpCIs3IiIiIgVh8UZERESkICzeiIiIiBSExRsRERGRgrB4IyIiIlIQFm9ERERECsLijYiIiEhBWLwRERERKQiLNyIiIiIFqdTibfr06VCpVLI/Ozs7aboQAtOnT4e9vT0MDQ3Rtm1bXLx4UbaMrKwsjB49GlZWVjA2NkaPHj1w+/ZtWUxqaioCAgKgVquhVqsREBCAtLQ0WcytW7fQvXt3GBsbw8rKCsHBwcjOzi63bSciIiIqjUofeWvYsCGSkpKkv/Pnz0vT5s2bh0WLFmH58uWIiYmBnZ0dOnXqhIcPH0oxISEh2L59O8LCwnDkyBE8evQIfn5+yM3NlWL8/f0RFxeH8PBwhIeHIy4uDgEBAdL03NxcdOvWDZmZmThy5AjCwsKwdetWhIaGVkwnEBERERWTTqUnoKMjG23LJ4TAkiVLMGXKFPTu3RsAsH79etja2mLz5s0YNmwY0tPT8f3332PDhg3o2LEjAGDjxo1wcHDAvn374Ovri/j4eISHh+P48ePw8PAAAKxevRqenp64fPkynJycEBERgUuXLiExMRH29vYAgIULF2LAgAH43//+BzMzswrqDSIiIqKiVfrI29WrV2Fvbw9HR0d89NFH+OuvvwAACQkJSE5Oho+PjxSrr68Pb29vREdHAwBiY2ORk5Mji7G3t4erq6sUc+zYMajVaqlwA4CWLVtCrVbLYlxdXaXCDQB8fX2RlZWF2NjYl+aelZWFjIwM2R8RERFRearU4s3DwwM//PADfv/9d6xevRrJyclo1aoV7t+/j+TkZACAra2tbB5bW1tpWnJyMvT09GBubl5kjI2Njca6bWxsZDEF12Nubg49PT0ppjBz5syRrqNTq9VwcHAoYQ8QERERlUylFm9dunTB+++/Dzc3N3Ts2BG7d+8G8Pz0aD6VSiWbRwih0VZQwZjC4ksTU9DkyZORnp4u/SUmJhaZFxEREdHrqvTTpi8yNjaGm5sbrl69Kl0HV3DkKyUlRRols7OzQ3Z2NlJTU4uMuXv3rsa67t27J4spuJ7U1FTk5ORojMi9SF9fH2ZmZrI/IiIiovL0RhVvWVlZiI+PR9WqVeHo6Ag7OztERkZK07OzsxEVFYVWrVoBANzd3aGrqyuLSUpKwoULF6QYT09PpKen4+TJk1LMiRMnkJ6eLou5cOECkpKSpJiIiAjo6+vD3d29XLeZiIiIqCQq9W7T8ePHo3v37qhRowZSUlIwa9YsZGRkoH///lCpVAgJCcHs2bNRr1491KtXD7Nnz4aRkRH8/f0BAGq1GoMHD0ZoaCgsLS1hYWGB8ePHS6dhAcDZ2RmdO3dGYGAgVq1aBQAYOnQo/Pz84OTkBADw8fGBi4sLAgICMH/+fDx48ADjx49HYGAgR9OIiIjojVKpxdvt27fx8ccf459//oG1tTVatmyJ48ePo2bNmgCAiRMn4smTJwgKCkJqaio8PDwQEREBU1NTaRmLFy+Gjo4O+vbtiydPnqBDhw5Yt24dtLW1pZhNmzYhODhYuiu1R48eWL58uTRdW1sbu3fvRlBQELy8vGBoaAh/f38sWLCggnqCiIiIqHhUQghR2Um8LTIyMqBWq5Genl72I3a7xpTt8ipC96WVnQEREdErlevxuxy8Ude8EREREVHRWLwRERERKQiLNyIiIiIFYfFGREREpCAs3oiIiIgUhMUbERERkYKweCMiIiJSEBZvRERERArC4o2IiIhIQVi8ERERESkIizciIiIiBWHxRkRERKQgLN6IiIiIFITFGxEREZGCsHgjIiIiUhAWb0REREQKwuKNiIiISEFYvBEREREpCIs3IiIiIgVh8UZERESkICzeiIiIiBSExRsRERGRgrB4IyIiIlIQFm9ERERECsLijYiIiEhBWLwRERERKQiLNyIiIiIFYfFGREREpCAs3oiIiIgUhMUbERERkYKweCMiIiJSEBZvRERERArC4o2IiIhIQVi8ERERESkIizciIiIiBWHxRkRERKQgLN6IiIiIFITFGxEREZGCsHgjIiIiUhAWb0REREQKwuKNiIiISEFYvBEREREpCIs3IiIiIgVh8UZERESkICzeiIiIiBSExRsRERGRgrwxxducOXOgUqkQEhIitQkhMH36dNjb28PQ0BBt27bFxYsXZfNlZWVh9OjRsLKygrGxMXr06IHbt2/LYlJTUxEQEAC1Wg21Wo2AgACkpaXJYm7duoXu3bvD2NgYVlZWCA4ORnZ2dnltLhEREVGpvBHFW0xMDL799ls0atRI1j5v3jwsWrQIy5cvR0xMDOzs7NCpUyc8fPhQigkJCcH27dsRFhaGI0eO4NGjR/Dz80Nubq4U4+/vj7i4OISHhyM8PBxxcXEICAiQpufm5qJbt27IzMzEkSNHEBYWhq1btyI0NLT8N56IiIioBCq9eHv06BE++eQTrF69Gubm5lK7EAJLlizBlClT0Lt3b7i6umL9+vV4/PgxNm/eDABIT0/H999/j4ULF6Jjx45o2rQpNm7ciPPnz2Pfvn0AgPj4eISHh+O7776Dp6cnPD09sXr1avz222+4fPkyACAiIgKXLl3Cxo0b0bRpU3Ts2BELFy7E6tWrkZGRUfGdQkRERPQSlV68jRw5Et26dUPHjh1l7QkJCUhOToaPj4/Upq+vD29vb0RHRwMAYmNjkZOTI4uxt7eHq6urFHPs2DGo1Wp4eHhIMS1btoRarZbFuLq6wt7eXorx9fVFVlYWYmNjX5p7VlYWMjIyZH9ERERE5UmnMlceFhaG06dPIyYmRmNacnIyAMDW1lbWbmtri5s3b0oxenp6shG7/Jj8+ZOTk2FjY6OxfBsbG1lMwfWYm5tDT09PiinMnDlzMGPGjFdtJhEREVGZqbSRt8TERIwZMwYbN26EgYHBS+NUKpXstRBCo62ggjGFxZcmpqDJkycjPT1d+ktMTCwyLyIiIqLXVWnFW2xsLFJSUuDu7g4dHR3o6OggKioKX331FXR0dKSRsIIjXykpKdI0Ozs7ZGdnIzU1tciYu3fvaqz/3r17spiC60lNTUVOTo7GiNyL9PX1YWZmJvsjIiIiKk+VVrx16NAB58+fR1xcnPTXvHlzfPLJJ4iLi0Pt2rVhZ2eHyMhIaZ7s7GxERUWhVatWAAB3d3fo6urKYpKSknDhwgUpxtPTE+np6Th58qQUc+LECaSnp8tiLly4gKSkJCkmIiIC+vr6cHd3L9d+ICIiIiqJSrvmzdTUFK6urrI2Y2NjWFpaSu0hISGYPXs26tWrh3r16mH27NkwMjKCv78/AECtVmPw4MEIDQ2FpaUlLCwsMH78eLi5uUk3QDg7O6Nz584IDAzEqlWrAABDhw6Fn58fnJycAAA+Pj5wcXFBQEAA5s+fjwcPHmD8+PEIDAzkaBoRERG9USr1hoVXmThxIp48eYKgoCCkpqbCw8MDERERMDU1lWIWL14MHR0d9O3bF0+ePEGHDh2wbt06aGtrSzGbNm1CcHCwdFdqjx49sHz5cmm6trY2du/ejaCgIHh5ecHQ0BD+/v5YsGBBxW0sERERUTGohBCispN4W2RkZECtViM9Pb3sR+x2jSnb5VWE7ksrOwMiIqJXKtfjdzmo9Oe8EREREVHxsXgjIiIiUhAWb0REREQKwuKNiIiISEFYvBEREREpCIs3IiIiIgVh8UZERESkICzeiIiIiBSExRsRERGRgrB4IyIiIlIQFm9ERERECsLijYiIiEhBWLwRERERKQiLNyIiIiIFYfFGREREpCAs3oiIiIgUhMUbERERkYKweCMiIiJSEBZvRERERApSquKtffv2SEtL02jPyMhA+/btXzcnIiIiInqJUhVvhw4dQnZ2tkb706dPcfjw4ddOioiIiIgKp1OS4HPnzkn/f+nSJSQnJ0uvc3NzER4ejmrVqpVddkREREQkU6LirUmTJlCpVFCpVIWeHjU0NMSyZcvKLDkiIiIikitR8ZaQkAAhBGrXro2TJ0/C2tpamqanpwcbGxtoa2uXeZJERERE9FyJireaNWsCAPLy8solGSIiIiIqWomKtxdduXIFhw4dQkpKikYx9/nnn792YkRERESkqVTF2+rVqzFixAhYWVnBzs4OKpVKmqZSqVi8EREREZWTUhVvs2bNwv/+9z9MmjSprPMhIiIioiKU6jlvqamp+OCDD8o6FyIiIiJ6hVIVbx988AEiIiLKOhciIiIieoVSnTatW7cupk6diuPHj8PNzQ26urqy6cHBwWWSHBERERHJqYQQoqQzOTo6vnyBKhX++uuv10pKqTIyMqBWq5Geng4zM7OyXfiuMWW7vIrQfWllZ0BERPRK5Xr8LgelGnlLSEgo6zyIiIiIqBhKdc0bEREREVWOUo28DRo0qMjpa9asKVUyRERERFS0UhVvqampstc5OTm4cOEC0tLSCv3BeiIiIiIqG6Uq3rZv367RlpeXh6CgINSuXfu1kyIiIiKiwpXZNW9aWloYO3YsFi9eXFaLJCIiIqICyvSGhevXr+PZs2dluUgiIiIiekGpTpuOGzdO9loIgaSkJOzevRv9+/cvk8SIiIiISFOpirczZ87IXmtpacHa2hoLFy585Z2oRERERFR6pSreDh48WNZ5EBEREVExlKp4y3fv3j1cvnwZKpUK9evXh7W1dVnlRURERESFKNUNC5mZmRg0aBCqVq2KNm3aoHXr1rC3t8fgwYPx+PHjss6RiIiIiP6/UhVv48aNQ1RUFHbt2oW0tDSkpaXh119/RVRUFEJDQ8s6RyIiIiL6/0p12nTr1q345Zdf0LZtW6mta9euMDQ0RN++fbFixYqyyo+IiIiIXlCqkbfHjx/D1tZWo93GxqZEp01XrFiBRo0awczMDGZmZvD09MTevXul6UIITJ8+Hfb29jA0NETbtm1x8eJF2TKysrIwevRoWFlZwdjYGD169MDt27dlMampqQgICIBarYZarUZAQADS0tJkMbdu3UL37t1hbGwMKysrBAcHIzs7u9jbQkRERFQRSlW8eXp6Ytq0aXj69KnU9uTJE8yYMQOenp7FXk716tXx5Zdf4tSpUzh16hTat2+Pnj17SgXavHnzsGjRIixfvhwxMTGws7NDp06d8PDhQ2kZISEh2L59O8LCwnDkyBE8evQIfn5+yM3NlWL8/f0RFxeH8PBwhIeHIy4uDgEBAdL03NxcdOvWDZmZmThy5AjCwsKwdetWngImIiKiN45KCCFKOtP58+fRpUsXPH36FI0bN4ZKpUJcXBz09fURERGBhg0bljohCwsLzJ8/H4MGDYK9vT1CQkIwadIkAM9H2WxtbTF37lwMGzYM6enpsLa2xoYNG/Dhhx8CAO7cuQMHBwfs2bMHvr6+iI+Ph4uLC44fPw4PDw8AwPHjx+Hp6Yk///wTTk5O2Lt3L/z8/JCYmAh7e3sAQFhYGAYMGICUlBSYmZkVK/eMjAyo1Wqkp6cXe55i2zWmbJdXEbovrewMiIiIXqlcj9/loFQjb25ubrh69SrmzJmDJk2aoFGjRvjyyy9x7dq1Uhduubm5CAsLQ2ZmJjw9PZGQkIDk5GT4+PhIMfr6+vD29kZ0dDQAIDY2Fjk5ObIYe3t7uLq6SjHHjh2DWq2WCjcAaNmyJdRqtSzG1dVVKtwAwNfXF1lZWYiNjX1pzllZWcjIyJD9EREREZWnUt2wMGfOHNja2iIwMFDWvmbNGty7d08aKSuO8+fPw9PTE0+fPoWJiQm2b98OFxcXqbAqeG2dra0tbt68CQBITk6Gnp4ezM3NNWKSk5OlGBsbG4312tjYyGIKrsfc3Bx6enpSTGHmzJmDGTNmFHtbiYiIiF5XqUbeVq1ahQYNGmi0N2zYECtXrizRspycnBAXF4fjx49jxIgR6N+/Py5duiRNV6lUsnghhEZbQQVjCosvTUxBkydPRnp6uvSXmJhYZF5EREREr6tUxVtycjKqVq2q0W5tbY2kpKQSLUtPTw9169ZF8+bNMWfOHDRu3BhLly6FnZ2dtK4XpaSkSKNkdnZ2yM7ORmpqapExd+/e1VjvvXv3ZDEF15OamoqcnJxC76rNp6+vL90pm/9HREREVJ5KVbw5ODjg6NGjGu1Hjx6VXTdWGkIIZGVlwdHREXZ2doiMjJSmZWdnIyoqCq1atQIAuLu7Q1dXVxaTlJSECxcuSDGenp5IT0/HyZMnpZgTJ04gPT1dFnPhwgVZ4RkREQF9fX24u7u/1vYQERERlaVSXfM2ZMgQhISEICcnB+3btwcA7N+/HxMnTizR4zX+85//oEuXLnBwcMDDhw8RFhaGQ4cOITw8HCqVCiEhIZg9ezbq1auHevXqYfbs2TAyMoK/vz8AQK1WY/DgwQgNDYWlpSUsLCwwfvx4uLm5oWPHjgAAZ2dndO7cGYGBgVi1ahUAYOjQofDz84OTkxMAwMfHBy4uLggICMD8+fPx4MEDjB8/HoGBgRxNIyIiojdKqYq3iRMn4sGDBwgKCpIeZGtgYIBJkyZh8uTJxV7O3bt3ERAQgKSkJKjVajRq1Ajh4eHo1KmTtJ4nT54gKCgIqamp8PDwQEREBExNTaVlLF68GDo6Oujbty+ePHmCDh06YN26ddDW1pZiNm3ahODgYOmu1B49emD58uXSdG1tbezevRtBQUHw8vKCoaEh/P39sWDBgtJ0DxEREVG5KdVz3vI9evQI8fHxMDQ0RL169aCvr1+WuSkOn/NWAJ/zRkRECqC057yVauQtn4mJCd55552yyoWIiIiIXqFUNywQERERUeVg8UZERESkICzeiIiIiBSExRsRERGRgrB4IyIiIlIQFm9ERERECvJajwqht8+JhAdltqwd286X2bIKM6e3W7kun4iI6E3EkTciIiIiBWHxRkRERKQgLN6IiIiIFITFGxEREZGCsHgjIiIiUhAWb0REREQKwuKNiIiISEFYvBEREREpCIs3IiIiIgVh8UZERESkICzeiIiIiBSExRsRERGRgrB4IyIiIlIQFm9ERERECsLijYiIiEhBWLwRERERKQiLNyIiIiIFYfFGREREpCAs3oiIiIgUhMUbERERkYKweCMiIiJSEBZvRERERArC4o2IiIhIQVi8ERERESkIizciIiIiBWHxRkRERKQgLN6IiIiIFITFGxEREZGCsHgjIiIiUhAWb0REREQKwuKNiIiISEFYvBEREREpCIs3IiIiIgVh8UZERESkICzeiIiIiBSExRsRERGRgrB4IyIiIlIQFm9EREREClKpxducOXPwzjvvwNTUFDY2NujVqxcuX74sixFCYPr06bC3t4ehoSHatm2LixcvymKysrIwevRoWFlZwdjYGD169MDt27dlMampqQgICIBarYZarUZAQADS0tJkMbdu3UL37t1hbGwMKysrBAcHIzs7u1y2nYiIiKg0KrV4i4qKwsiRI3H8+HFERkbi2bNn8PHxQWZmphQzb948LFq0CMuXL0dMTAzs7OzQqVMnPHz4UIoJCQnB9u3bERYWhiNHjuDRo0fw8/NDbm6uFOPv74+4uDiEh4cjPDwccXFxCAgIkKbn5uaiW7duyMzMxJEjRxAWFoatW7ciNDS0YjqDiIiIqBhUQghR2Unku3fvHmxsbBAVFYU2bdpACAF7e3uEhIRg0qRJAJ6Pstna2mLu3LkYNmwY0tPTYW1tjQ0bNuDDDz8EANy5cwcODg7Ys2cPfH19ER8fDxcXFxw/fhweHh4AgOPHj8PT0xN//vknnJycsHfvXvj5+SExMRH29vYAgLCwMAwYMAApKSkwMzN7Zf4ZGRlQq9VIT08vVnyJ7BpTtst7iRMJD8psWTuqTyyzZRVmTm+3cl0+ERH9O5Tr8bscvFHXvKWnpwMALCwsAAAJCQlITk6Gj4+PFKOvrw9vb29ER0cDAGJjY5GTkyOLsbe3h6urqxRz7NgxqNVqqXADgJYtW0KtVstiXF1dpcINAHx9fZGVlYXY2NhC883KykJGRobsj4iIiKg8vTHFmxAC48aNw7vvvgtXV1cAQHJyMgDA1tZWFmtraytNS05Ohp6eHszNzYuMsbGx0VinjY2NLKbgeszNzaGnpyfFFDRnzhzpGjq1Wg0HB4eSbjYRERFRibwxxduoUaNw7tw5/PjjjxrTVCqV7LUQQqOtoIIxhcWXJuZFkydPRnp6uvSXmJhYZE5EREREr+uNKN5Gjx6NnTt34uDBg6hevbrUbmdnBwAaI18pKSnSKJmdnR2ys7ORmppaZMzdu3c11nvv3j1ZTMH1pKamIicnR2NELp++vj7MzMxkf0RERETlqVKLNyEERo0ahW3btuHAgQNwdHSUTXd0dISdnR0iIyOltuzsbERFRaFVq1YAAHd3d+jq6spikpKScOHCBSnG09MT6enpOHnypBRz4sQJpKeny2IuXLiApKQkKSYiIgL6+vpwd3cv+40nIiIiKgWdylz5yJEjsXnzZvz6668wNTWVRr7UajUMDQ2hUqkQEhKC2bNno169eqhXrx5mz54NIyMj+Pv7S7GDBw9GaGgoLC0tYWFhgfHjx8PNzQ0dO3YEADg7O6Nz584IDAzEqlWrAABDhw6Fn58fnJycAAA+Pj5wcXFBQEAA5s+fjwcPHmD8+PEIDAzkiBoRERG9MSq1eFuxYgUAoG3btrL2tWvXYsCAAQCAiRMn4smTJwgKCkJqaio8PDwQEREBU1NTKX7x4sXQ0dFB37598eTJE3To0AHr1q2Dtra2FLNp0yYEBwdLd6X26NEDy5cvl6Zra2tj9+7dCAoKgpeXFwwNDeHv748FCxaU09YTERERldwb9Zw3peNz3uT4nDciIlICPueNiIiIiMoNizciIiIiBWHxRkRERKQgLN6IiIiIFKRS7zYl+jeYvO18ZadQbLwJhIjozceRNyIiIiIFYfFGREREpCAs3oiIiIgUhMUbERERkYKweCMiIiJSEBZvRERERArC4o2IiIhIQVi8ERERESkIizciIiIiBWHxRkRERKQgLN6IiIiIFITFGxEREZGCsHgjIiIiUhAWb0REREQKwuKNiIiISEFYvBEREREpCIs3IiIiIgVh8UZERESkICzeiIiIiBSExRsRERGRgrB4IyIiIlIQFm9ERERECsLijYiIiEhBdCo7AXp79bo9r3xXsMui7JfZfWnZL5OIiKgMceSNiIiISEFYvBEREREpCIs3IiIiIgVh8UZERESkICzeiIiIiBSExRsRERGRgrB4IyIiIlIQFm9ERERECsLijYiIiEhBWLwRERERKQiLNyIiIiIFYfFGREREpCAs3oiIiIgUhMUbERERkYKweCMiIiJSEBZvRERERApSqcXbH3/8ge7du8Pe3h4qlQo7duyQTRdCYPr06bC3t4ehoSHatm2LixcvymKysrIwevRoWFlZwdjYGD169MDt27dlMampqQgICIBarYZarUZAQADS0tJkMbdu3UL37t1hbGwMKysrBAcHIzs7uzw2m4iIiKjUKrV4y8zMROPGjbF8+fJCp8+bNw+LFi3C8uXLERMTAzs7O3Tq1AkPHz6UYkJCQrB9+3aEhYXhyJEjePToEfz8/JCbmyvF+Pv7Iy4uDuHh4QgPD0dcXBwCAgKk6bm5uejWrRsyMzNx5MgRhIWFYevWrQgNDS2/jSciIiIqBZ3KXHmXLl3QpUuXQqcJIbBkyRJMmTIFvXv3BgCsX78etra22Lx5M4YNG4b09HR8//332LBhAzp27AgA2LhxIxwcHLBv3z74+voiPj4e4eHhOH78ODw8PAAAq1evhqenJy5fvgwnJydERETg0qVLSExMhL29PQBg4cKFGDBgAP73v//BzMysAnqDiIiI6NUqtXgrSkJCApKTk+Hj4yO16evrw9vbG9HR0Rg2bBhiY2ORk5Mji7G3t4erqyuio6Ph6+uLY8eOQa1WS4UbALRs2RJqtRrR0dFwcnLCsWPH4OrqKhVuAODr64usrCzExsaiXbt2FbPRVCInEh6U+TJ3bDtf5sskIiIqS29s8ZacnAwAsLW1lbXb2tri5s2bUoyenh7Mzc01YvLnT05Oho2NjcbybWxsZDEF12Nubg49PT0ppjBZWVnIysqSXmdkZBR38+gN1ev2vMpOocR2VJ9Y2SkQEVEFeuPvNlWpVLLXQgiNtoIKxhQWX5qYgubMmSPdBKFWq+Hg4FBkXkRERESv640t3uzs7ABAY+QrJSVFGiWzs7NDdnY2UlNTi4y5e/euxvLv3bsniym4ntTUVOTk5GiMyL1o8uTJSE9Pl/4SExNLuJVEREREJfPGFm+Ojo6ws7NDZGSk1JadnY2oqCi0atUKAODu7g5dXV1ZTFJSEi5cuCDFeHp6Ij09HSdPnpRiTpw4gfT0dFnMhQsXkJSUJMVERERAX18f7u7uL81RX18fZmZmsj8iIiKi8lSp17w9evQI165dk14nJCQgLi4OFhYWqFGjBkJCQjB79mzUq1cP9erVw+zZs2FkZAR/f38AgFqtxuDBgxEaGgpLS0tYWFhg/PjxcHNzk+4+dXZ2RufOnREYGIhVq1YBAIYOHQo/Pz84OTkBAHx8fODi4oKAgADMnz8fDx48wPjx4xEYGMiCjIiIiN4olVq8nTp1SnYn57hx4wAA/fv3x7p16zBx4kQ8efIEQUFBSE1NhYeHByIiImBqairNs3jxYujo6KBv37548uQJOnTogHXr1kFbW1uK2bRpE4KDg6W7Unv06CF7tpy2tjZ2796NoKAgeHl5wdDQEP7+/liwYEF5dwERERFRiaiEEKKyk3hbZGRkQK1WIz09vexH7HaNKdvlvUR5PH6DyldZ3m06p7dbmS2LiEgpyvX4XQ7e2GveiIiIiEjTG/ucNyIiqjiTFfSAao4Q078dR96IiIiIFITFGxEREZGCsHgjIiIiUhAWb0REREQKwuKNiIiISEFYvBEREREpCB8VQkRUjpT0CA4iUgaOvBEREREpCIs3IiIiIgVh8UZERESkICzeiIiIiBSENywQkeLwJgAi+jdj8UZEEhZFRERvPp42JSIiIlIQFm9ERERECsLijYiIiEhBWLwRERERKQiLNyIiIiIFYfFGREREpCAs3oiIiIgUhM95IyIi+pdTyjMe5/R2q+wU3ggceSMiIiJSEBZvRERERArC4o2IiIhIQVi8ERERESkIizciIiIiBWHxRkRERKQgLN6IiIiIFITFGxEREZGC8CG9RESkKHygLP3bceSNiIiISEFYvBEREREpCIs3IiIiIgVh8UZERESkILxhgUjhet2eV9kplMqO6hMrOwUiIkVi8UZERFQOlHJXLCkPT5sSERERKQiLNyIiIiIFYfFGREREpCAs3oiIiIgUhDcsEBER/Qsp8071DZWdwBuBI29ERERECsKRNyKqFMr8Vz8RUeXjyBsRERGRgrB4K+Cbb76Bo6MjDAwM4O7ujsOHD1d2SkREREQSFm8v2LJlC0JCQjBlyhScOXMGrVu3RpcuXXDr1q3KTo2IiIgIAIs3mUWLFmHw4MEYMmQInJ2dsWTJEjg4OGDFihWVnRoRERERABZvkuzsbMTGxsLHx0fW7uPjg+jo6ErKioiIiEiOd5v+f//88w9yc3Nha2sra7e1tUVycnKh82RlZSErK0t6nZ6eDgDIyMgo+wQfZ706pgxkPs2ukPUQERGVVLkcX19YrhCiXJZf1li8FaBSqWSvhRAabfnmzJmDGTNmaLQ7ODiUS25ERET/apN+KtfFP3z4EGq1ulzXURZYvP1/VlZW0NbW1hhlS0lJ0RiNyzd58mSMGzdOep2Xl4cHDx7A0tLypQXfq2RkZMDBwQGJiYkwMzMr1TKoeNjXFYd9XbHY3xWHfV1xyrOvhRB4+PAh7O3ty3S55YXF2/+np6cHd3d3REZG4r333pPaIyMj0bNnz0Ln0dfXh76+vqytSpUqZZKPmZkZvwgqCPu64rCvKxb7u+KwrytOefW1Ekbc8rF4e8G4ceMQEBCA5s2bw9PTE99++y1u3bqF4cOHV3ZqRERERABYvMl8+OGHuH//PmbOnImkpCS4urpiz549qFmzZmWnRkRERASAxZuGoKAgBAUFVdr69fX1MW3aNI3TsVT22NcVh31dsdjfFYd9XXHY1/9HJZRyXywRERER8SG9RERERErC4o2IiIhIQVi8ERERESkIizciIiIiBWHxVgm++eYbODo6wsDAAO7u7jh8+HCR8VFRUXB3d4eBgQFq166NlStXVlCmyleSvt62bRs6deoEa2trmJmZwdPTE7///nsFZqtsJd2v8x09ehQ6Ojpo0qRJ+Sb4FilpX2dlZWHKlCmoWbMm9PX1UadOHaxZs6aCslW+kvb3pk2b0LhxYxgZGaFq1aoYOHAg7t+/X0HZKtcff/yB7t27w97eHiqVCjt27HjlPP/a46OgChUWFiZ0dXXF6tWrxaVLl8SYMWOEsbGxuHnzZqHxf/31lzAyMhJjxowRly5dEqtXrxa6urril19+qeDMlaekfT1mzBgxd+5ccfLkSXHlyhUxefJkoaurK06fPl3BmStPSfs6X1pamqhdu7bw8fERjRs3rphkFa40fd2jRw/h4eEhIiMjRUJCgjhx4oQ4evRoBWatXCXt78OHDwstLS2xdOlS8ddff4nDhw+Lhg0bil69elVw5sqzZ88eMWXKFLF161YBQGzfvr3I+H/z8ZHFWwVr0aKFGD58uKytQYMG4rPPPis0fuLEiaJBgwaytmHDhomWLVuWW45vi5L2dWFcXFzEjBkzyjq1t05p+/rDDz8U//3vf8W0adNYvBVTSft67969Qq1Wi/v371dEem+dkvb3/PnzRe3atWVtX331lahevXq55fg2Kk7x9m8+PvK0aQXKzs5GbGwsfHx8ZO0+Pj6Ijo4udJ5jx45pxPv6+uLUqVPIyckpt1yVrjR9XVBeXh4ePnwICwuL8kjxrVHavl67di2uX7+OadOmlXeKb43S9PXOnTvRvHlzzJs3D9WqVUP9+vUxfvx4PHnypCJSVrTS9HerVq1w+/Zt7NmzB0II3L17F7/88gu6detWESn/q/ybj4/8hYUK9M8//yA3Nxe2traydltbWyQnJxc6T3JycqHxz549wz///IOqVauWW75KVpq+LmjhwoXIzMxE3759yyPFt0Zp+vrq1av47LPPcPjwYejo8GuouErT13/99ReOHDkCAwMDbN++Hf/88w+CgoLw4MEDXvf2CqXp71atWmHTpk348MMP8fTpUzx79gw9evTAsmXLKiLlf5V/8/GRI2+VQKVSyV4LITTaXhVfWDtpKmlf5/vxxx8xffp0bNmyBTY2NuWV3luluH2dm5sLf39/zJgxA/Xr16+o9N4qJdmv8/LyoFKpsGnTJrRo0QJdu3bFokWLsG7dOo6+FVNJ+vvSpUsIDg7G559/jtjYWISHhyMhIQHDhw+viFT/df6tx0f+k7cCWVlZQVtbW+NfbCkpKRr/eshnZ2dXaLyOjg4sLS3LLVelK01f59uyZQsGDx6Mn3/+GR07dizPNN8KJe3rhw8f4tSpUzhz5gxGjRoF4HmBIYSAjo4OIiIi0L59+wrJXWlKs19XrVoV1apVg1qtltqcnZ0hhMDt27dRr169cs1ZyUrT33PmzIGXlxcmTJgAAGjUqBGMjY3RunVrzJo1660eDapo/+bjI0feKpCenh7c3d0RGRkpa4+MjESrVq0KncfT01MjPiIiAs2bN4eurm655ap0pelr4PmI24ABA7B582Zeo1JMJe1rMzMznD9/HnFxcdLf8OHD4eTkhLi4OHh4eFRU6opTmv3ay8sLd+7cwaNHj6S2K1euQEtLC9WrVy/XfJWuNP39+PFjaGnJD63a2toA/m9UiMrGv/r4WEk3Svxr5d92/v3334tLly6JkJAQYWxsLG7cuCGEEOKzzz4TAQEBUnz+rdBjx44Vly5dEt9///2/5lbo11XSvt68ebPQ0dERX3/9tUhKSpL+0tLSKmsTFKOkfV0Q7zYtvpL29cOHD0X16tVFnz59xMWLF0VUVJSoV6+eGDJkSGVtgqKUtL/Xrl0rdHR0xDfffCOuX78ujhw5Ipo3by5atGhRWZugGA8fPhRnzpwRZ86cEQDEokWLxJkzZ6THsvD4+H9YvFWCr7/+WtSsWVPo6emJZs2aiaioKGla//79hbe3tyz+0KFDomnTpkJPT0/UqlVLrFixooIzVq6S9LW3t7cAoPHXv3//ik9cgUq6X7+IxVvJlLSv4+PjRceOHYWhoaGoXr26GDdunHj8+HEFZ61cJe3vr776Sri4uAhDQ0NRtWpV8cknn4jbt29XcNbKc/DgwSK/g3l8/D8qITiOS0RERKQUvOaNiIiISEFYvBEREREpCIs3IiIiIgVh8UZERESkICzeiIiIiBSExRsRERGRgrB4IyIiIlIQFm9Eb6FatWphyZIlr7WMdevWoUqVKkXGTJ8+HU2aNJFeDxgwAL169ZJet23bFiEhIa+VR2kdPXoUbm5u0NXVleX0uh4/foz3338fZmZmUKlUSEtLK7Nlv60qcz8gehuxeCOiUhs/fjz279//0unbtm3DF198Ib0ui6KyuMaNG4cmTZogISEB69atK7Plrl+/HocPH0Z0dDSSkpJkP/j+Mjdu3IBKpUJcXFyZ5UFE/146lZ0AERVfdnY29PT0KjsNiYmJCUxMTF463cLCogKzkbt+/TqGDx9e5j++fv36dTg7O8PV1bVMl1tcb9o+8KblQ/RvwJE3okrStm1bjBo1CqNGjUKVKlVgaWmJ//73v3jxF+tq1aqFWbNmYcCAAVCr1QgMDAQAbN26FQ0bNoS+vj5q1aqFhQsXaiz/4cOH8Pf3h4mJCezt7bFs2TLZ9EWLFsHNzQ3GxsZwcHBAUFAQHj16pLGcHTt2oH79+jAwMECnTp2QmJgoTSt42rSwbcw/Xda2bVvcvHkTY8eOhUqlgkqlQmZmJszMzPDLL7/I5tu1axeMjY3x8OHDQpeblZWF4OBg2NjYwMDAAO+++y5iYmIA/N8o1/379zFo0CCoVKqXjrxt3LgRzZs3h6mpKezs7ODv74+UlJQit2fhwoX4448/oFKp0LZtWwCASqXCjh07ZLFVqlSR1uvo6AgAaNq0qWy+wk4n9urVCwMGDJBev2wfiI6ORps2bWBoaAgHBwcEBwcjMzPzpbnnv1erVq2Cg4MDjIyM8MEHH8hO+75OPkePHoW3tzeMjIxgbm4OX19fpKamSvPl5eVh4sSJsLCwgJ2dHaZPny5bz6v2x5s3b6J79+4wNzeHsbExGjZsiD179kjTL126hK5du8LExAS2trYICAjAP//889L+IFIyFm9ElWj9+vXQ0dHBiRMn8NVXX2Hx4sX47rvvZDHz58+Hq6srYmNjMXXqVMTGxqJv37746KOPcP78eUyfPh1Tp07VKFDmz5+PRo0a4fTp05g8eTLGjh2LyMhIabqWlha++uorXLhwAevXr8eBAwcwceJE2TIeP36M//3vf1i/fj2OHj2KjIwMfPTRR6Xa1m3btqF69eqYOXMmkpKSkJSUBGNjY3z00UdYu3atLHbt2rXo06cPTE1NC13WxIkTsXXrVqxfvx6nT59G3bp14evriwcPHsDBwQFJSUkwMzPDkiVLkJSUhA8//LDQ5WRnZ+OLL77A2bNnsWPHDiQkJMgKlcK2ITAwEJ6enkhKSsK2bduKte0nT54EAOzbt69E8+UruA+cP38evr6+6N27N86dO4ctW7bgyJEjGDVqVJHLuXbtGn766Sfs2rUL4eHhiIuLw8iRI0uUS2H5xMXFoUOHDmjYsCGOHTuGI0eOoHv37sjNzZXmWb9+PYyNjXHixAnMmzcPM2fOLNH+OHLkSGRlZeGPP/7A+fPnMXfuXGnUNykpCd7e3mjSpAlOnTqF8PBw3L17F3379i3xthEpQjn/8D0RvYS3t7dwdnYWeXl5UtukSZOEs7Oz9LpmzZqiV69esvn8/f1Fp06dZG0TJkwQLi4usvk6d+4si/nwww9Fly5dXprPTz/9JCwtLaXXa9euFQDE8ePHpbb4+HgBQJw4cUIIIcS0adNE48aNpen9+/cXPXv2lG3jmDFjZHktXrxYtt4TJ04IbW1t8ffffwshhLh3757Q1dUVhw4dKjTPR48eCV1dXbFp0yapLTs7W9jb24t58+ZJbWq1Wqxdu/al21uYkydPCgDi4cOHL40ZM2aM8Pb2lrUBENu3b5e1vbj+hIQEAUCcOXNGFlOwf4QQomfPnqJ///7S68L2gYCAADF06FBZ2+HDh4WWlpZ48uRJoXlPmzZNaGtri8TERKlt7969QktLSyQlJb1WPh9//LHw8vIqdL35y3333Xdlbe+8846YNGnSS+cpuD+6ubmJ6dOnFxo7depU4ePjI2tLTEwUAMTly5dfug4ipeLIG1ElatmyJVQqlfTa09MTV69elY1YNG/eXDZPfHw8vLy8ZG1eXl4a83l6espiPD09ER8fL70+ePAgOnXqhGrVqsHU1BT9+vXD/fv3ZafedHR0ZOtv0KABqlSpIlvO62rRogUaNmyIH374AQCwYcMG1KhRA23atCk0/vr168jJyZH1ga6uLlq0aFHivM6cOYOePXuiZs2aMDU1lU5n3rp1q3QbU04K7gOxsbFYt26ddM2hiYkJfH19kZeXh4SEhJcup0aNGrJrAD09PZGXl4fLly+/Vj75I29FadSokex11apVZaeoX7U/BgcHY9asWfDy8sK0adNw7tw5ad7Y2FgcPHhQ1h8NGjQA8Hx/IXrbsHgjesMZGxvLXgshZAVffltx5M938+ZNdO3aFa6urti6dStiY2Px9ddfAwBycnIKnedVba9jyJAh0qnTtWvXYuDAgS9dR/62FtYHJckrMzMTPj4+MDExwcaNGxETE4Pt27cDeH46tSRUKpXGe1CwHwujpaVVrPkK7gN5eXkYNmwY4uLipL+zZ8/i6tWrqFOnTonyfvG/pc3H0NDwlevS1dXVWHdeXh6A4u2PQ4YMwV9//YWAgACcP38ezZs3l67jzMvLQ/fu3WX9ERcXh6tXr770HwFESsbijagSHT9+XON1vXr1oK2t/dJ5XFxccOTIEVlbdHQ06tevL5uvsGXnj0acOnUKz549w8KFC9GyZUvUr18fd+7c0VjXs2fPcOrUKen15cuXkZaWJi2npPT09GSjg/k+/fRT3Lp1C1999RUuXryI/v37v3QZdevWhZ6enqwPcnJycOrUKTg7Oxc7lz///BP//PMPvvzyS7Ru3RoNGjQo8maFolhbWyMpKUl6ffXqVTx+/Fh6nX83ZsFtLzhfbm4uLly48Mr1NWvWDBcvXkTdunU1/oq68/PWrVuy9/nYsWPQ0tJC/fr1XyufRo0aFfnImFcp7v7o4OCA4cOHY9u2bQgNDcXq1asB/F9/1KpVS6M/ChaaRG8DFm9ElSgxMRHjxo3D5cuX8eOPP2LZsmUYM2ZMkfOEhoZi//79+OKLL3DlyhWsX78ey5cvx/jx42VxR48exbx583DlyhV8/fXX+Pnnn6Vl16lTB8+ePcOyZcvw119/YcOGDVi5cqXGunR1dTF69GicOHECp0+fxsCBA9GyZUu0aNGiVNtbq1Yt/PHHH/j7779ldwKam5ujd+/emDBhAnx8fIp8vIexsTFGjBiBCRMmIDw8HJcuXUJgYCAeP36MwYMHFzuXGjVqQE9PT+qDnTt3yp5JVxLt27fH8uXLcfr0aZw6dQrDhw+XjTTZ2NjA0NBQupA+PT1dmm/37t3YvXs3/vzzTwQFBRXrob+TJk3CsWPHMHLkSGmEaefOnRg9enSR8xkYGKB///44e/YsDh8+jODgYPTt2xd2dnavlc/kyZMRExODoKAgnDt3Dn/++SdWrFhR7Ls9i7M/hoSE4Pfff0dCQgJOnz6NAwcOSMX6yJEj8eDBA3z88cc4efIk/vrrL0RERGDQoEGF/mOBSOlYvBFVon79+uHJkydo0aIFRo4cidGjR2Po0KFFztOsWTP89NNPCAsLg6urKz7//HPMnDlT4y7J0NBQxMbGomnTpvjiiy+wcOFC+Pr6AgCaNGmCRYsWYe7cuXB1dcWmTZswZ84cjXUZGRlh0qRJ8Pf3h6enJwwNDREWFlbq7Z05cyZu3LiBOnXqwNraWjZt8ODByM7OxqBBg165nC+//BLvv/8+AgIC0KxZM1y7dg2///47zM3Ni52LtbU11q1bh59//hkuLi748ssvsWDBghJvEwAsXLgQDg4OaNOmDfz9/TF+/HgYGRlJ03V0dPDVV19h1apVsLe3R8+ePQEAgwYNQv/+/dGvXz94e3vD0dER7dq1e+X6GjVqhKioKFy9ehWtW7dG06ZNMXXqVFStWrXI+erWrYvevXuja9eu8PHxgaurK7755htpemnzqV+/PiIiInD27Fm0aNECnp6e+PXXX6GjU7xHiRZnf8zNzcXIkSPh7OyMzp07w8nJScrd3t4eR48eRW5uLnx9feHq6ooxY8ZArVZDS4uHOXr7qERxL5YhojLVtm1bNGnSpMJ+ceBNt2nTJowZMwZ37tzhQ1/LwfTp07Fjxw7+ygPRW4C/sEBElerx48dISEjAnDlzMGzYMBZuRESvwPFkIqpU8+bNQ5MmTWBra4vJkydXdjpERG88njYlIiIiUhCOvBEREREpCIs3IiIiIgVh8UZERESkICzeiIiIiBSExRsRERGRgrB4IyIiIlIQFm9ERERECsLijYiIiEhBWLwRERERKcj/AyRD7hy3DLvhAAAAAElFTkSuQmCC",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plot_hist_scores(X_test_segment, score = \"score\", score_adjusted = \"score_adjusted\", type_of_activity = type_of_activity)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "add631d7-0757-45a5-bb5b-f7f4b4baa961",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"projet-bdc2324-team1/Output_expected_CA/sport/\n"
]
}
],
"source": [
"# define path so save graphics\n",
"\n",
"# define type of activity \n",
"type_of_activity = \"sport\"\n",
"PATH = f\"projet-bdc2324-team1/Output_expected_CA/{type_of_activity}/\"\n",
"print(PATH)"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "3a5b5bd9-e033-4436-8c56-bf5fb61df87f",
"metadata": {},
"outputs": [],
"source": [
"# export png \n",
"\n",
"# plot adjusted scores and save (to be tested)\n",
"plot_hist_scores(X_test_segment, score = \"score\", score_adjusted = \"score_adjusted\", type_of_activity = type_of_activity)\n",
"\n",
"image_buffer = io.BytesIO()\n",
"plt.savefig(image_buffer, format='png')\n",
"image_buffer.seek(0)\n",
"file_name = \"hist_score_adjusted_\"\n",
"FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + \".png\"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:\n",
" s3_file.write(image_buffer.read())\n",
"plt.close()"
]
},
{
"cell_type": "markdown",
"id": "e6fae260-fab8-4f51-90dc-9b6d7314c77b",
"metadata": {},
"source": [
"## Compute number of tickets and CA by segment with the recalibrated score"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "90c4c2b5-0ede-4001-889f-749cfbd9df04",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>quartile</th>\n",
" <th>score (%)</th>\n",
" <th>score adjusted (%)</th>\n",
" <th>has purchased (%)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>13.25</td>\n",
" <td>2.51</td>\n",
" <td>1.57</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>33.89</td>\n",
" <td>8.00</td>\n",
" <td>9.85</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>63.06</td>\n",
" <td>22.58</td>\n",
" <td>21.47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>90.52</td>\n",
" <td>66.20</td>\n",
" <td>65.01</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" quartile score (%) score adjusted (%) has purchased (%)\n",
"0 1 13.25 2.51 1.57\n",
"1 2 33.89 8.00 9.85\n",
"2 3 63.06 22.58 21.47\n",
"3 4 90.52 66.20 65.01"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_table_adjusted_scores = (100 * X_test_segment.groupby(\"quartile\")[[\"score\",\"score_adjusted\", \"has_purchased\"]].mean()).round(2).reset_index()\n",
"X_test_table_adjusted_scores = X_test_table_adjusted_scores.rename(columns = {col : f\"{col.replace('_', ' ')} (%)\" for col in X_test_table_adjusted_scores.columns if col in [\"score\",\"score_adjusted\", \"has_purchased\"]})\n",
"X_test_table_adjusted_scores"
]
},
{
"cell_type": "code",
"execution_count": 162,
"id": "d0b8740c-cf48-4a3e-83cb-23d95059f62f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'\\\\begin{tabular}{lrrr}\\n\\\\toprule\\nquartile & score (%) & score adjusted (%) & has purchased (%) \\\\\\\\\\n\\\\midrule\\n1 & 13.250000 & 2.510000 & 1.570000 \\\\\\\\\\n2 & 33.890000 & 8.000000 & 9.850000 \\\\\\\\\\n3 & 63.060000 & 22.580000 & 21.470000 \\\\\\\\\\n4 & 90.520000 & 66.200000 & 65.010000 \\\\\\\\\\n\\\\bottomrule\\n\\\\end{tabular}\\n'"
]
},
"execution_count": 162,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_table_adjusted_scores.to_latex(index=False)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "d6a04d3e-c454-43e4-ae4c-0746e928575b",
"metadata": {},
"outputs": [],
"source": [
"# comparison between score and adjusted score - export csv associated\n",
"\n",
"file_name = \"table_adjusted_score_\"\n",
"FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + \".csv\"\n",
"with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
" X_test_table_adjusted_scores.to_csv(file_out, index = False)"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "a974589f-7952-4db2-bebf-7b69c6b09372",
"metadata": {},
"outputs": [],
"source": [
"def project_tickets_CA (df, nb_purchases, nb_tickets, total_amount, score_adjusted, duration_ref, duration_projection) :\n",
" \n",
" duration_ratio = duration_ref/duration_projection\n",
"\n",
" df_output = df\n",
"\n",
" df_output.loc[:,\"nb_tickets_projected\"] = df_output.loc[:,nb_tickets] / duration_ratio\n",
" df_output.loc[:,\"total_amount_projected\"] = df_output.loc[:,total_amount] / duration_ratio\n",
" \n",
" df_output.loc[:,\"nb_tickets_expected\"] = df_output.loc[:,score_adjusted] * df_output.loc[:,\"nb_tickets_projected\"]\n",
" df_output.loc[:,\"total_amount_expected\"] = df_output.loc[:,score_adjusted] * df_output.loc[:,\"total_amount_projected\"]\n",
"\n",
" df_output.loc[:,\"pace_purchase\"] = (duration_ref/df_output.loc[:,nb_purchases]).apply(lambda x : np.nan if x==np.inf else x)\n",
" \n",
" return df_output\n"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "dd8a52e1-d06e-4790-8687-8e58e3e6b84e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>fidelity</th>\n",
" <th>...</th>\n",
" <th>has_purchased</th>\n",
" <th>has_purchased_estim</th>\n",
" <th>score</th>\n",
" <th>quartile</th>\n",
" <th>score_adjusted</th>\n",
" <th>nb_tickets_projected</th>\n",
" <th>total_amount_projected</th>\n",
" <th>nb_tickets_expected</th>\n",
" <th>total_amount_expected</th>\n",
" <th>pace_purchase</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>100.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>5.177187</td>\n",
" <td>5.177187</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.657671</td>\n",
" <td>3</td>\n",
" <td>0.240397</td>\n",
" <td>2.823529</td>\n",
" <td>70.588235</td>\n",
" <td>0.678768</td>\n",
" <td>16.969205</td>\n",
" <td>17.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>55.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>426.265613</td>\n",
" <td>426.265613</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.266538</td>\n",
" <td>2</td>\n",
" <td>0.056482</td>\n",
" <td>0.705882</td>\n",
" <td>38.823529</td>\n",
" <td>0.039870</td>\n",
" <td>2.192830</td>\n",
" <td>17.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>17.0</td>\n",
" <td>1.0</td>\n",
" <td>80.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>436.033437</td>\n",
" <td>436.033437</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.214668</td>\n",
" <td>1</td>\n",
" <td>0.043089</td>\n",
" <td>12.000000</td>\n",
" <td>56.470588</td>\n",
" <td>0.517065</td>\n",
" <td>2.433249</td>\n",
" <td>17.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>120.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>5.196412</td>\n",
" <td>5.196412</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.657770</td>\n",
" <td>3</td>\n",
" <td>0.240478</td>\n",
" <td>2.823529</td>\n",
" <td>84.705882</td>\n",
" <td>0.678995</td>\n",
" <td>20.369861</td>\n",
" <td>17.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>34.0</td>\n",
" <td>2.0</td>\n",
" <td>416.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>478.693148</td>\n",
" <td>115.631470</td>\n",
" <td>363.061678</td>\n",
" <td>0.0</td>\n",
" <td>4</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.894173</td>\n",
" <td>4</td>\n",
" <td>0.581920</td>\n",
" <td>24.000000</td>\n",
" <td>293.647059</td>\n",
" <td>13.966076</td>\n",
" <td>170.879052</td>\n",
" <td>8.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96091</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>67.31</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>278.442257</td>\n",
" <td>278.442257</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.623551</td>\n",
" <td>3</td>\n",
" <td>0.214369</td>\n",
" <td>0.705882</td>\n",
" <td>47.512941</td>\n",
" <td>0.151320</td>\n",
" <td>10.185318</td>\n",
" <td>17.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96092</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>61.41</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>189.207373</td>\n",
" <td>189.207373</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.682521</td>\n",
" <td>3</td>\n",
" <td>0.261526</td>\n",
" <td>0.705882</td>\n",
" <td>43.348235</td>\n",
" <td>0.184607</td>\n",
" <td>11.336701</td>\n",
" <td>17.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96093</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.117192</td>\n",
" <td>1</td>\n",
" <td>0.021400</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96094</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>79.43</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>279.312905</td>\n",
" <td>279.312905</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.625185</td>\n",
" <td>3</td>\n",
" <td>0.215545</td>\n",
" <td>0.705882</td>\n",
" <td>56.068235</td>\n",
" <td>0.152150</td>\n",
" <td>12.085242</td>\n",
" <td>17.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96095</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.319585</td>\n",
" <td>2</td>\n",
" <td>0.071817</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>96096 rows × 27 columns</p>\n",
"</div>"
],
"text/plain": [
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 4.0 1.0 100.00 1.0 \n",
"1 1.0 1.0 55.00 1.0 \n",
"2 17.0 1.0 80.00 1.0 \n",
"3 4.0 1.0 120.00 1.0 \n",
"4 34.0 2.0 416.00 1.0 \n",
"... ... ... ... ... \n",
"96091 1.0 1.0 67.31 1.0 \n",
"96092 1.0 1.0 61.41 1.0 \n",
"96093 0.0 0.0 0.00 0.0 \n",
"96094 1.0 1.0 79.43 1.0 \n",
"96095 0.0 0.0 0.00 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0.0 5.177187 5.177187 \n",
"1 0.0 426.265613 426.265613 \n",
"2 0.0 436.033437 436.033437 \n",
"3 0.0 5.196412 5.196412 \n",
"4 0.0 478.693148 115.631470 \n",
"... ... ... ... \n",
"96091 1.0 278.442257 278.442257 \n",
"96092 1.0 189.207373 189.207373 \n",
"96093 0.0 550.000000 550.000000 \n",
"96094 1.0 279.312905 279.312905 \n",
"96095 0.0 550.000000 550.000000 \n",
"\n",
" time_between_purchase nb_tickets_internet fidelity ... \\\n",
"0 0.000000 0.0 1 ... \n",
"1 0.000000 0.0 2 ... \n",
"2 0.000000 0.0 2 ... \n",
"3 0.000000 0.0 1 ... \n",
"4 363.061678 0.0 4 ... \n",
"... ... ... ... ... \n",
"96091 0.000000 1.0 2 ... \n",
"96092 0.000000 1.0 1 ... \n",
"96093 -1.000000 0.0 1 ... \n",
"96094 0.000000 1.0 1 ... \n",
"96095 -1.000000 0.0 2 ... \n",
"\n",
" has_purchased has_purchased_estim score quartile score_adjusted \\\n",
"0 0.0 1.0 0.657671 3 0.240397 \n",
"1 1.0 0.0 0.266538 2 0.056482 \n",
"2 0.0 0.0 0.214668 1 0.043089 \n",
"3 0.0 1.0 0.657770 3 0.240478 \n",
"4 1.0 1.0 0.894173 4 0.581920 \n",
"... ... ... ... ... ... \n",
"96091 1.0 1.0 0.623551 3 0.214369 \n",
"96092 0.0 1.0 0.682521 3 0.261526 \n",
"96093 0.0 0.0 0.117192 1 0.021400 \n",
"96094 0.0 1.0 0.625185 3 0.215545 \n",
"96095 0.0 0.0 0.319585 2 0.071817 \n",
"\n",
" nb_tickets_projected total_amount_projected nb_tickets_expected \\\n",
"0 2.823529 70.588235 0.678768 \n",
"1 0.705882 38.823529 0.039870 \n",
"2 12.000000 56.470588 0.517065 \n",
"3 2.823529 84.705882 0.678995 \n",
"4 24.000000 293.647059 13.966076 \n",
"... ... ... ... \n",
"96091 0.705882 47.512941 0.151320 \n",
"96092 0.705882 43.348235 0.184607 \n",
"96093 0.000000 0.000000 0.000000 \n",
"96094 0.705882 56.068235 0.152150 \n",
"96095 0.000000 0.000000 0.000000 \n",
"\n",
" total_amount_expected pace_purchase \n",
"0 16.969205 17.0 \n",
"1 2.192830 17.0 \n",
"2 2.433249 17.0 \n",
"3 20.369861 17.0 \n",
"4 170.879052 8.5 \n",
"... ... ... \n",
"96091 10.185318 17.0 \n",
"96092 11.336701 17.0 \n",
"96093 0.000000 NaN \n",
"96094 12.085242 17.0 \n",
"96095 0.000000 NaN \n",
"\n",
"[96096 rows x 27 columns]"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment = project_tickets_CA (X_test_segment, \"nb_purchases\", \"nb_tickets\", \"total_amount\", \"score_adjusted\", \n",
" duration_ref=17, duration_projection=12)\n",
"X_test_segment"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "cb66a8ea-65f7-460f-b3fc-ba76a3b91faa",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"quartile\n",
"1 16.581057\n",
"2 15.840818\n",
"3 14.888091\n",
"4 4.830480\n",
"Name: pace_purchase, dtype: float64"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment.groupby(\"quartile\")[\"pace_purchase\"].mean()"
]
},
{
"cell_type": "code",
"execution_count": 118,
"id": "f58f9151-2f91-45df-abb7-1ddcf0652adc",
"metadata": {},
"outputs": [],
"source": [
"# generalization with a function\n",
"\n",
"def summary_expected_CA(df, segment, nb_tickets_expected, total_amount_expected, total_amount, pace_purchase,\n",
" duration_ref=1.5, duration_projection=1) :\n",
" \n",
" # compute nb tickets estimated and total amount expected\n",
" df_expected_CA = df.groupby(segment)[[nb_tickets_expected, total_amount_expected]].sum().reset_index()\n",
" \n",
" # number of customers by segment\n",
" df_expected_CA.insert(1, \"size\", df.groupby(segment).size().values)\n",
" \n",
" # size in percent of all customers\n",
" df_expected_CA.insert(2, \"size_perct\", 100 * df_expected_CA[\"size\"]/df_expected_CA[\"size\"].sum())\n",
" \n",
" # compute share of CA recovered\n",
" duration_ratio=duration_ref/duration_projection\n",
" \n",
" df_expected_CA[\"revenue_recovered_perct\"] = 100 * duration_ratio * df_expected_CA[total_amount_expected] / \\\n",
" df.groupby(segment)[total_amount].sum().values\n",
"\n",
" df_drop_null_pace = df.dropna(subset=[pace_purchase])\n",
" df_expected_CA[\"pace_purchase\"] = df_drop_null_pace.groupby(segment)[pace_purchase].mean().values\n",
" \n",
" return df_expected_CA"
]
},
{
"cell_type": "code",
"execution_count": 119,
"id": "c8df6c80-43e8-4f00-9cd3-eb9022744313",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>quartile</th>\n",
" <th>size</th>\n",
" <th>size_perct</th>\n",
" <th>nb_tickets_expected</th>\n",
" <th>total_amount_expected</th>\n",
" <th>revenue_recovered_perct</th>\n",
" <th>pace_purchase</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>37410</td>\n",
" <td>38.93</td>\n",
" <td>89.75</td>\n",
" <td>1977.02</td>\n",
" <td>4.64</td>\n",
" <td>16.58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>29517</td>\n",
" <td>30.72</td>\n",
" <td>3069.83</td>\n",
" <td>78841.08</td>\n",
" <td>10.43</td>\n",
" <td>15.84</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>20137</td>\n",
" <td>20.96</td>\n",
" <td>11516.60</td>\n",
" <td>364538.82</td>\n",
" <td>24.19</td>\n",
" <td>14.89</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>9032</td>\n",
" <td>9.40</td>\n",
" <td>227853.35</td>\n",
" <td>10481736.51</td>\n",
" <td>95.41</td>\n",
" <td>4.83</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" quartile size size_perct nb_tickets_expected total_amount_expected \\\n",
"0 1 37410 38.93 89.75 1977.02 \n",
"1 2 29517 30.72 3069.83 78841.08 \n",
"2 3 20137 20.96 11516.60 364538.82 \n",
"3 4 9032 9.40 227853.35 10481736.51 \n",
"\n",
" revenue_recovered_perct pace_purchase \n",
"0 4.64 16.58 \n",
"1 10.43 15.84 \n",
"2 24.19 14.89 \n",
"3 95.41 4.83 "
]
},
"execution_count": 119,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_expected_CA = round(summary_expected_CA(df=X_test_segment, segment=\"quartile\", \n",
" nb_tickets_expected=\"nb_tickets_expected\", total_amount_expected=\"total_amount_expected\", \n",
" total_amount=\"total_amount\", pace_purchase=\"pace_purchase\"),2)\n",
"\n",
"X_test_expected_CA"
]
},
{
"cell_type": "code",
"execution_count": 120,
"id": "ac706ed7-defa-4df1-82e1-06f12fc1b6ad",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'\\\\begin{tabular}{lrrrrrr}\\n\\\\toprule\\nquartile & size & size (%) & nb tickets expected & total amount expected & revenue recovered (%) & pace purchase \\\\\\\\\\n\\\\midrule\\n1 & 37410 & 38.930000 & 89.750000 & 1977.020000 & 4.640000 & 16.580000 \\\\\\\\\\n2 & 29517 & 30.720000 & 3069.830000 & 78841.080000 & 10.430000 & 15.840000 \\\\\\\\\\n3 & 20137 & 20.960000 & 11516.600000 & 364538.820000 & 24.190000 & 14.890000 \\\\\\\\\\n4 & 9032 & 9.400000 & 227853.350000 & 10481736.510000 & 95.410000 & 4.830000 \\\\\\\\\\n\\\\bottomrule\\n\\\\end{tabular}\\n'"
]
},
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Création du dictionnaire de mapping pour les noms de colonnes\n",
"mapping_dict = {col: col.replace(\"perct\", \"(%)\").replace(\"_\", \" \") for col in X_test_expected_CA.columns}\n",
"\n",
"X_test_expected_CA.rename(columns=mapping_dict).to_latex(index=False)"
]
},
{
"cell_type": "code",
"execution_count": 122,
"id": "771da0cf-c49f-4e7e-b52f-ebcfb0fb2df3",
"metadata": {},
"outputs": [],
"source": [
"# export summary table to the MinIO storage\n",
"\n",
"file_name = \"table_expected_CA_\"\n",
"FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + \".csv\"\n",
"with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
" X_test_expected_CA.to_csv(file_out, index = False)"
]
},
{
"cell_type": "markdown",
"id": "9c471bdd-25c2-420a-a8a1-3add9f003cbc",
"metadata": {},
"source": [
"## Just to try, same computation with score instead of score adjusted\n",
"\n",
"seems overestimated : if only 14% of customers come back, how can we recover 22% of the revenue from the segment that is least likely to buy ?? ..."
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "53684a24-1809-465f-8e21-b9295e34582a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>quartile</th>\n",
" <th>size</th>\n",
" <th>size_perct</th>\n",
" <th>nb_tickets_expected</th>\n",
" <th>total_amount_expected</th>\n",
" <th>perct_revenue_recovered</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>37410</td>\n",
" <td>38.93</td>\n",
" <td>419.76</td>\n",
" <td>9245.08</td>\n",
" <td>21.71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>29517</td>\n",
" <td>30.72</td>\n",
" <td>11549.06</td>\n",
" <td>296522.02</td>\n",
" <td>39.24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>20137</td>\n",
" <td>20.96</td>\n",
" <td>29997.85</td>\n",
" <td>954751.91</td>\n",
" <td>63.34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>9032</td>\n",
" <td>9.40</td>\n",
" <td>244655.82</td>\n",
" <td>10736011.95</td>\n",
" <td>97.72</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" quartile size size_perct nb_tickets_expected total_amount_expected \\\n",
"0 1 37410 38.93 419.76 9245.08 \n",
"1 2 29517 30.72 11549.06 296522.02 \n",
"2 3 20137 20.96 29997.85 954751.91 \n",
"3 4 9032 9.40 244655.82 10736011.95 \n",
"\n",
" perct_revenue_recovered \n",
"0 21.71 \n",
"1 39.24 \n",
"2 63.34 \n",
"3 97.72 "
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment_bis = project_tickets_CA (X_test_segment, \"nb_tickets\", \"total_amount\", \"score\", duration_ref=1.5, duration_projection=1)\n",
"\n",
"X_test_expected_CA_bis = round(summary_expected_CA(df=X_test_segment_bis, segment=\"quartile\", nb_tickets_expected=\"nb_tickets_expected\", \n",
" total_amount_expected=\"total_amount_expected\", total_amount=\"total_amount\"),2)\n",
"\n",
"X_test_expected_CA_bis"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "7dc66d1e-da03-4513-96e4-d9a43ac0a2c8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"overall share of revenue recovered : 90.26 %\n"
]
}
],
"source": [
"print(\"overall share of revenue recovered : \", round(100 * duration_ratio * X_test_expected_CA_bis[\"total_amount_expected\"].sum() / \\\n",
"X_test_segment_bis[\"total_amount\"].sum(),2), \"%\")"
]
},
{
"cell_type": "markdown",
"id": "673f2969-7b9a-44c1-abf5-5679fca877ce",
"metadata": {},
"source": [
"## Last pieces of analysis"
]
},
{
"cell_type": "code",
"execution_count": 161,
"id": "2365bb13-0f3f-49d5-bf91-52c92abebcee",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"overall share of revenue recovered : 77.64%\n"
]
}
],
"source": [
"# global revenue recovered\n",
"global_revenue_recovered = round(100 * duration_ratio * X_test_expected_CA[\"total_amount_expected\"].sum() / \\\n",
"X_test_segment[\"total_amount\"].sum(),2)\n",
"print(f\"overall share of revenue recovered : {global_revenue_recovered}%\")"
]
},
{
"cell_type": "code",
"execution_count": 163,
"id": "16b17f35-57dd-459a-8989-129143dc0952",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 0.018093\n",
"1 0.721519\n",
"2 3.336101\n",
"3 95.924287\n",
"Name: total_amount_expected, dtype: float64"
]
},
"execution_count": 163,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"100 * X_test_expected_CA[\"total_amount_expected\"]/X_test_expected_CA[\"total_amount_expected\"].sum()"
]
},
{
"cell_type": "code",
"execution_count": 166,
"id": "dee4a200-eefe-4377-8e80-59ad33edd3c0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"quartile\n",
"1 0.320407\n",
"2 5.685020\n",
"3 11.339715\n",
"4 82.654858\n",
"Name: total_amount, dtype: float64"
]
},
"execution_count": 166,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# le segment 4 représente 83% du CA actuel et 96% du CA lié aux anciens clients pour l'année prochaine\n",
"100 * X_test_segment.groupby(\"quartile\")[\"total_amount\"].sum()/X_test_segment[\"total_amount\"].sum()"
]
},
{
"cell_type": "code",
"execution_count": 177,
"id": "c1e6f020-ef18-40b4-bfc1-19f98cb2796e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 96096.000000\n",
"mean 207.475735\n",
"std 4720.046248\n",
"min -48831.800000\n",
"25% 0.000000\n",
"50% 0.000000\n",
"75% 60.000000\n",
"max 624890.000000\n",
"Name: total_amount, dtype: float64"
]
},
"execution_count": 177,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment[\"total_amount\"].describe() # total amount négatif ???\n"
]
},
{
"cell_type": "code",
"execution_count": 184,
"id": "d301a50e-7c68-40f0-9245-a4eea64c387b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 -4.883180e+04\n",
"1 -6.483180e+04\n",
"2 -7.683860e+04\n",
"3 -8.683860e+04\n",
"4 -9.683860e+04\n",
" ... \n",
"96091 1.802247e+07\n",
"96092 1.839238e+07\n",
"96093 1.877219e+07\n",
"96094 1.931270e+07\n",
"96095 1.993759e+07\n",
"Name: total_amount, Length: 96096, dtype: float64"
]
},
"execution_count": 184,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.cumsum(X_test_segment[\"total_amount\"].sort_values()).reset_index()[\"total_amount\"]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}