18752 lines
1.3 MiB
18752 lines
1.3 MiB
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "ff8cc602-e733-4a31-bf46-a31087511fe0",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Predict sales - sports companies"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "415e466a-1a71-4150-bff7-2f8904766df4",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Importations"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "b5aaf421-850a-4a86-8e99-2c1f0723bd6c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"import os\n",
|
||
"import s3fs\n",
|
||
"import re\n",
|
||
"from sklearn.linear_model import LogisticRegression\n",
|
||
"from sklearn.ensemble import RandomForestClassifier\n",
|
||
"from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, recall_score\n",
|
||
"from sklearn.utils import class_weight\n",
|
||
"from sklearn.neighbors import KNeighborsClassifier\n",
|
||
"from sklearn.pipeline import Pipeline\n",
|
||
"from sklearn.compose import ColumnTransformer\n",
|
||
"from sklearn.preprocessing import OneHotEncoder\n",
|
||
"from sklearn.impute import SimpleImputer\n",
|
||
"from sklearn.model_selection import GridSearchCV\n",
|
||
"from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler\n",
|
||
"from sklearn.metrics import make_scorer, f1_score, balanced_accuracy_score\n",
|
||
"import seaborn as sns\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score\n",
|
||
"from sklearn.exceptions import ConvergenceWarning, DataConversionWarning\n",
|
||
"from sklearn.naive_bayes import GaussianNB\n",
|
||
"\n",
|
||
"import pickle\n",
|
||
"import warnings"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "c2f44070-451e-4109-9a08-3b80011d610f",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Load data "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "b5f8135f-b6e7-4d6d-b8e1-da185b944aff",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Create filesystem object\n",
|
||
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "2668a243-4ff8-40c6-9de2-5c9c07bcf714",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def load_train_test():\n",
|
||
" BUCKET = \"projet-bdc2324-team1/Generalization/sport\"\n",
|
||
" File_path_train = BUCKET + \"/Train_set.csv\"\n",
|
||
" File_path_test = BUCKET + \"/Test_set.csv\"\n",
|
||
" \n",
|
||
" with fs.open( File_path_train, mode=\"rb\") as file_in:\n",
|
||
" dataset_train = pd.read_csv(file_in, sep=\",\")\n",
|
||
" # dataset_train['y_has_purchased'] = dataset_train['y_has_purchased'].fillna(0)\n",
|
||
"\n",
|
||
" with fs.open(File_path_test, mode=\"rb\") as file_in:\n",
|
||
" dataset_test = pd.read_csv(file_in, sep=\",\")\n",
|
||
" # dataset_test['y_has_purchased'] = dataset_test['y_has_purchased'].fillna(0)\n",
|
||
" \n",
|
||
" return dataset_train, dataset_test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "13eba3e1-3ea5-435b-8b05-6d7d5744cbe2",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_1481/2459610029.py:7: DtypeWarning: Columns (38) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||
" dataset_train = pd.read_csv(file_in, sep=\",\")\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"customer_id 0\n",
|
||
"nb_tickets 0\n",
|
||
"nb_purchases 0\n",
|
||
"total_amount 0\n",
|
||
"nb_suppliers 0\n",
|
||
"vente_internet_max 0\n",
|
||
"purchase_date_min 0\n",
|
||
"purchase_date_max 0\n",
|
||
"time_between_purchase 0\n",
|
||
"nb_tickets_internet 0\n",
|
||
"street_id 0\n",
|
||
"structure_id 222825\n",
|
||
"mcp_contact_id 70874\n",
|
||
"fidelity 0\n",
|
||
"tenant_id 0\n",
|
||
"is_partner 0\n",
|
||
"deleted_at 224213\n",
|
||
"gender 0\n",
|
||
"is_email_true 0\n",
|
||
"opt_in 0\n",
|
||
"last_buying_date 66139\n",
|
||
"max_price 66139\n",
|
||
"ticket_sum 0\n",
|
||
"average_price 66023\n",
|
||
"average_purchase_delay 66139\n",
|
||
"average_price_basket 66139\n",
|
||
"average_ticket_basket 66139\n",
|
||
"total_price 116\n",
|
||
"purchase_count 0\n",
|
||
"first_buying_date 66139\n",
|
||
"country 23159\n",
|
||
"gender_label 0\n",
|
||
"gender_female 0\n",
|
||
"gender_male 0\n",
|
||
"gender_other 0\n",
|
||
"country_fr 23159\n",
|
||
"nb_campaigns 0\n",
|
||
"nb_campaigns_opened 0\n",
|
||
"time_to_open 123159\n",
|
||
"y_has_purchased 0\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"dataset_train, dataset_test = load_train_test()\n",
|
||
"dataset_train.isna().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "e46622e7-0fc1-43f8-a7e7-34a5e90068b2",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def features_target_split(dataset_train, dataset_test):\n",
|
||
" \"\"\"\n",
|
||
" features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n",
|
||
" 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n",
|
||
" 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n",
|
||
" \"\"\"\n",
|
||
"\n",
|
||
" # we suppress fidelity, time between purchase, and gender other (colinearity issue)\n",
|
||
" features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', \n",
|
||
" 'purchase_date_min', 'purchase_date_max', 'nb_tickets_internet', 'is_email_true', \n",
|
||
" 'opt_in', 'gender_female', 'gender_male', 'nb_campaigns', 'nb_campaigns_opened']\n",
|
||
" \n",
|
||
" X_train = dataset_train[features_l]\n",
|
||
" y_train = dataset_train[['y_has_purchased']]\n",
|
||
"\n",
|
||
" X_test = dataset_test[features_l]\n",
|
||
" y_test = dataset_test[['y_has_purchased']]\n",
|
||
" return X_train, X_test, y_train, y_test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "cec4f386-e643-4bd8-b8cd-8917d2c1b3d0",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Shape train : (224213, 14)\n",
|
||
"Shape test : (96096, 14)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"X_train, X_test, y_train, y_test = features_target_split(dataset_train, dataset_test)\n",
|
||
"print(\"Shape train : \", X_train.shape)\n",
|
||
"print(\"Shape test : \", X_test.shape)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "c9e8edbd-7ff6-42f9-a8eb-10d27ca19c8a",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Logistic"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 96,
|
||
"id": "639b432a-c39c-4bf8-8ee2-e136d156e0dd",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{0.0: 0.5837086520288036, 1.0: 3.486549107420539}"
|
||
]
|
||
},
|
||
"execution_count": 96,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Compute Weights\n",
|
||
"weights = class_weight.compute_class_weight(class_weight = 'balanced', classes = np.unique(y_train['y_has_purchased']),\n",
|
||
" y = y_train['y_has_purchased'])\n",
|
||
"\n",
|
||
"weight_dict = {np.unique(y_train['y_has_purchased'])[i]: weights[i] for i in range(len(np.unique(y_train['y_has_purchased'])))}\n",
|
||
"weight_dict"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 97,
|
||
"id": "34644a00-85a5-41c9-98df-41178cb3ac69",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>60.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>355.268981</td>\n",
|
||
" <td>355.268981</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>140.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>373.540289</td>\n",
|
||
" <td>219.262269</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>50.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.202442</td>\n",
|
||
" <td>5.202442</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>90.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.178958</td>\n",
|
||
" <td>5.178958</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>78.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.174039</td>\n",
|
||
" <td>5.174039</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224208</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224209</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>20.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>392.501030</td>\n",
|
||
" <td>392.501030</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>23.0</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224210</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224211</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>97.11</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>172.334074</td>\n",
|
||
" <td>172.334074</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>13.0</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224212</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>224213 rows × 14 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 2.0 1.0 60.00 1.0 \n",
|
||
"1 8.0 3.0 140.00 1.0 \n",
|
||
"2 2.0 1.0 50.00 1.0 \n",
|
||
"3 3.0 1.0 90.00 1.0 \n",
|
||
"4 2.0 1.0 78.00 1.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"224208 0.0 0.0 0.00 0.0 \n",
|
||
"224209 1.0 1.0 20.00 1.0 \n",
|
||
"224210 0.0 0.0 0.00 0.0 \n",
|
||
"224211 1.0 1.0 97.11 1.0 \n",
|
||
"224212 0.0 0.0 0.00 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 355.268981 355.268981 \n",
|
||
"1 0.0 373.540289 219.262269 \n",
|
||
"2 0.0 5.202442 5.202442 \n",
|
||
"3 0.0 5.178958 5.178958 \n",
|
||
"4 0.0 5.174039 5.174039 \n",
|
||
"... ... ... ... \n",
|
||
"224208 0.0 550.000000 550.000000 \n",
|
||
"224209 1.0 392.501030 392.501030 \n",
|
||
"224210 0.0 550.000000 550.000000 \n",
|
||
"224211 1.0 172.334074 172.334074 \n",
|
||
"224212 0.0 550.000000 550.000000 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in gender_female \\\n",
|
||
"0 0.0 True False 0 \n",
|
||
"1 0.0 True False 0 \n",
|
||
"2 0.0 True False 0 \n",
|
||
"3 0.0 True False 0 \n",
|
||
"4 0.0 True False 1 \n",
|
||
"... ... ... ... ... \n",
|
||
"224208 0.0 True False 0 \n",
|
||
"224209 1.0 True False 0 \n",
|
||
"224210 0.0 True True 0 \n",
|
||
"224211 1.0 True False 0 \n",
|
||
"224212 0.0 True False 0 \n",
|
||
"\n",
|
||
" gender_male nb_campaigns nb_campaigns_opened \n",
|
||
"0 1 0.0 0.0 \n",
|
||
"1 1 0.0 0.0 \n",
|
||
"2 1 0.0 0.0 \n",
|
||
"3 1 0.0 0.0 \n",
|
||
"4 0 0.0 0.0 \n",
|
||
"... ... ... ... \n",
|
||
"224208 1 34.0 3.0 \n",
|
||
"224209 1 23.0 6.0 \n",
|
||
"224210 1 8.0 4.0 \n",
|
||
"224211 1 13.0 5.0 \n",
|
||
"224212 1 4.0 4.0 \n",
|
||
"\n",
|
||
"[224213 rows x 14 columns]"
|
||
]
|
||
},
|
||
"execution_count": 97,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_train"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 98,
|
||
"id": "295676df-36ac-43d8-8b31-49ff08efd6e7",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# preprocess data \n",
|
||
"# numeric features - standardize\n",
|
||
"# categorical features - encode\n",
|
||
"# encoded features - do nothing\n",
|
||
"\n",
|
||
"numeric_features = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', \n",
|
||
" 'purchase_date_min', 'purchase_date_max', 'nb_tickets_internet', 'nb_campaigns', \n",
|
||
" 'nb_campaigns_opened' # , 'gender_male', 'gender_female'\n",
|
||
" ]\n",
|
||
"\n",
|
||
"numeric_transformer = Pipeline(steps=[\n",
|
||
" #(\"imputer\", SimpleImputer(strategy=\"mean\")), \n",
|
||
" (\"scaler\", StandardScaler()) \n",
|
||
"])\n",
|
||
"\n",
|
||
"categorical_features = ['opt_in', 'is_email_true'] \n",
|
||
"\n",
|
||
"# Transformer for the categorical features\n",
|
||
"categorical_transformer = Pipeline(steps=[\n",
|
||
" #(\"imputer\", SimpleImputer(strategy=\"most_frequent\")), # Impute missing values with the most frequent\n",
|
||
" (\"onehot\", OneHotEncoder(handle_unknown='ignore', sparse_output=False))\n",
|
||
"])\n",
|
||
"\n",
|
||
"preproc = ColumnTransformer(\n",
|
||
" transformers=[\n",
|
||
" (\"num\", numeric_transformer, numeric_features),\n",
|
||
" (\"cat\", categorical_transformer, categorical_features)\n",
|
||
" ]\n",
|
||
")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 99,
|
||
"id": "f46fb56e-c908-40b4-868f-9684d1ae01c2",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"nb_tickets 0\n",
|
||
"nb_purchases 0\n",
|
||
"total_amount 0\n",
|
||
"nb_suppliers 0\n",
|
||
"vente_internet_max 0\n",
|
||
"purchase_date_min 0\n",
|
||
"purchase_date_max 0\n",
|
||
"nb_tickets_internet 0\n",
|
||
"nb_campaigns 0\n",
|
||
"nb_campaigns_opened 0\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 99,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_train[numeric_features].isna().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 100,
|
||
"id": "e729781b-4d65-42c5-bdc5-82b4d653aaf0",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Set loss\n",
|
||
"balanced_scorer = make_scorer(balanced_accuracy_score)\n",
|
||
"recall_scorer = make_scorer(recall_score)\n",
|
||
"f1_scorer = make_scorer(f1_score)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 101,
|
||
"id": "a7ebbe6f-70ba-4276-be18-f10e7bfd7423",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def draw_confusion_matrix(y_test, y_pred):\n",
|
||
" conf_matrix = confusion_matrix(y_test, y_pred)\n",
|
||
" sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Class 0', 'Class 1'], yticklabels=['Class 0', 'Class 1'])\n",
|
||
" plt.xlabel('Predicted')\n",
|
||
" plt.ylabel('Actual')\n",
|
||
" plt.title('Confusion Matrix')\n",
|
||
" plt.show()\n",
|
||
"\n",
|
||
"\n",
|
||
"def draw_roc_curve(X_test, y_test):\n",
|
||
" y_pred_prob = pipeline.predict_proba(X_test)[:, 1]\n",
|
||
"\n",
|
||
" # Calcul des taux de faux positifs (FPR) et de vrais positifs (TPR)\n",
|
||
" fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob, pos_label=1)\n",
|
||
" \n",
|
||
" # Calcul de l'aire sous la courbe ROC (AUC)\n",
|
||
" roc_auc = auc(fpr, tpr)\n",
|
||
" \n",
|
||
" plt.figure(figsize = (14, 8))\n",
|
||
" plt.plot(fpr, tpr, label=\"ROC curve(area = %0.3f)\" % roc_auc)\n",
|
||
" plt.plot([0, 1], [0, 1], color=\"red\",label=\"Random Baseline\", linestyle=\"--\")\n",
|
||
" plt.grid(color='gray', linestyle='--', linewidth=0.5)\n",
|
||
" plt.xlabel('Taux de faux positifs (FPR)')\n",
|
||
" plt.ylabel('Taux de vrais positifs (TPR)')\n",
|
||
" plt.title('Courbe ROC : modèle logistique')\n",
|
||
" plt.legend(loc=\"lower right\")\n",
|
||
" plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 102,
|
||
"id": "2334eb51-e6ea-4fd0-89ce-f54cd474d332",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def draw_features_importance(pipeline, model):\n",
|
||
" coefficients = pipeline.named_steps['logreg'].coef_[0]\n",
|
||
" feature_names = pipeline.named_steps['logreg'].feature_names_in_\n",
|
||
" \n",
|
||
" # Tracer l'importance des caractéristiques\n",
|
||
" plt.figure(figsize=(10, 6))\n",
|
||
" plt.barh(feature_names, coefficients, color='skyblue')\n",
|
||
" plt.xlabel('Importance des caractéristiques')\n",
|
||
" plt.ylabel('Caractéristiques')\n",
|
||
" plt.title('Importance des caractéristiques dans le modèle de régression logistique')\n",
|
||
" plt.grid(True)\n",
|
||
" plt.show()\n",
|
||
"\n",
|
||
"def draw_prob_distribution(X_test):\n",
|
||
" y_pred_prob = pipeline.predict_proba(X_test)[:, 1]\n",
|
||
" plt.figure(figsize=(8, 6))\n",
|
||
" plt.hist(y_pred_prob, bins=10, range=(0, 1), color='blue', alpha=0.7)\n",
|
||
" \n",
|
||
" plt.xlim(0, 1)\n",
|
||
" plt.ylim(0, None)\n",
|
||
" \n",
|
||
" plt.title('Histogramme des probabilités pour la classe 1')\n",
|
||
" plt.xlabel('Probabilité')\n",
|
||
" plt.ylabel('Fréquence')\n",
|
||
" plt.grid(True)\n",
|
||
" plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 103,
|
||
"id": "83917b97-4d9b-4e3c-ba27-1e546ce885d3",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Hyperparameter\n",
|
||
"\n",
|
||
"param_c = np.logspace(-10, 4, 15, base=2)\n",
|
||
"# param_penalty_type = ['l1', 'l2', 'elasticnet']\n",
|
||
"param_penalty_type = ['l1']\n",
|
||
"param_grid = {'logreg__C': param_c,\n",
|
||
" 'logreg__penalty': param_penalty_type} "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"id": "3ae25049-920c-4a6d-a59d-c26e3b45dec6",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"1024"
|
||
]
|
||
},
|
||
"execution_count": 26,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"2 ** 10"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 104,
|
||
"id": "ba4cde9f-a614-4a43-81b9-e16e78aa6c4c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<style>#sk-container-id-5 {\n",
|
||
" /* Definition of color scheme common for light and dark mode */\n",
|
||
" --sklearn-color-text: black;\n",
|
||
" --sklearn-color-line: gray;\n",
|
||
" /* Definition of color scheme for unfitted estimators */\n",
|
||
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
|
||
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
|
||
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
|
||
" --sklearn-color-unfitted-level-3: chocolate;\n",
|
||
" /* Definition of color scheme for fitted estimators */\n",
|
||
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
|
||
" --sklearn-color-fitted-level-1: #d4ebff;\n",
|
||
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
|
||
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
|
||
"\n",
|
||
" /* Specific color for light theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-icon: #696969;\n",
|
||
"\n",
|
||
" @media (prefers-color-scheme: dark) {\n",
|
||
" /* Redefinition of color scheme for dark theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-icon: #878787;\n",
|
||
" }\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 pre {\n",
|
||
" padding: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 input.sk-hidden--visually {\n",
|
||
" border: 0;\n",
|
||
" clip: rect(1px 1px 1px 1px);\n",
|
||
" clip: rect(1px, 1px, 1px, 1px);\n",
|
||
" height: 1px;\n",
|
||
" margin: -1px;\n",
|
||
" overflow: hidden;\n",
|
||
" padding: 0;\n",
|
||
" position: absolute;\n",
|
||
" width: 1px;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-dashed-wrapped {\n",
|
||
" border: 1px dashed var(--sklearn-color-line);\n",
|
||
" margin: 0 0.4em 0.5em 0.4em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" padding-bottom: 0.4em;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-container {\n",
|
||
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
|
||
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
|
||
" so we also need the `!important` here to be able to override the\n",
|
||
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
|
||
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
|
||
" display: inline-block !important;\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-text-repr-fallback {\n",
|
||
" display: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-parallel-item,\n",
|
||
"div.sk-serial,\n",
|
||
"div.sk-item {\n",
|
||
" /* draw centered vertical line to link estimators */\n",
|
||
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
|
||
" background-size: 2px 100%;\n",
|
||
" background-repeat: no-repeat;\n",
|
||
" background-position: center center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Parallel-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-parallel-item::after {\n",
|
||
" content: \"\";\n",
|
||
" width: 100%;\n",
|
||
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
|
||
" flex-grow: 1;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-parallel {\n",
|
||
" display: flex;\n",
|
||
" align-items: stretch;\n",
|
||
" justify-content: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-parallel-item {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-parallel-item:first-child::after {\n",
|
||
" align-self: flex-end;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-parallel-item:last-child::after {\n",
|
||
" align-self: flex-start;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-parallel-item:only-child::after {\n",
|
||
" width: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Serial-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-serial {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
" align-items: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" padding-right: 1em;\n",
|
||
" padding-left: 1em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"\n",
|
||
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
|
||
"clickable and can be expanded/collapsed.\n",
|
||
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
|
||
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
|
||
"*/\n",
|
||
"\n",
|
||
"/* Pipeline and ColumnTransformer style (default) */\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-toggleable {\n",
|
||
" /* Default theme specific background. It is overwritten whether we have a\n",
|
||
" specific estimator or a Pipeline/ColumnTransformer */\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable label */\n",
|
||
"#sk-container-id-5 label.sk-toggleable__label {\n",
|
||
" cursor: pointer;\n",
|
||
" display: block;\n",
|
||
" width: 100%;\n",
|
||
" margin-bottom: 0;\n",
|
||
" padding: 0.5em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 label.sk-toggleable__label-arrow:before {\n",
|
||
" /* Arrow on the left of the label */\n",
|
||
" content: \"▸\";\n",
|
||
" float: left;\n",
|
||
" margin-right: 0.25em;\n",
|
||
" color: var(--sklearn-color-icon);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 label.sk-toggleable__label-arrow:hover:before {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable content - dropdown */\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-toggleable__content {\n",
|
||
" max-height: 0;\n",
|
||
" max-width: 0;\n",
|
||
" overflow: hidden;\n",
|
||
" text-align: left;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-toggleable__content.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-toggleable__content pre {\n",
|
||
" margin: 0.2em;\n",
|
||
" border-radius: 0.25em;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-toggleable__content.fitted pre {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
||
" /* Expand drop-down */\n",
|
||
" max-height: 200px;\n",
|
||
" max-width: 100%;\n",
|
||
" overflow: auto;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
||
" content: \"▾\";\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Pipeline/ColumnTransformer-specific style */\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific style */\n",
|
||
"\n",
|
||
"/* Colorize estimator box */\n",
|
||
"#sk-container-id-5 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-label label.sk-toggleable__label,\n",
|
||
"#sk-container-id-5 div.sk-label label {\n",
|
||
" /* The background is the default theme color */\n",
|
||
" color: var(--sklearn-color-text-on-default-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover, darken the color of the background */\n",
|
||
"#sk-container-id-5 div.sk-label:hover label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Label box, darken color on hover, fitted */\n",
|
||
"#sk-container-id-5 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator label */\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-label label {\n",
|
||
" font-family: monospace;\n",
|
||
" font-weight: bold;\n",
|
||
" display: inline-block;\n",
|
||
" line-height: 1.2em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-label-container {\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific */\n",
|
||
"#sk-container-id-5 div.sk-estimator {\n",
|
||
" font-family: monospace;\n",
|
||
" border: 1px dotted var(--sklearn-color-border-box);\n",
|
||
" border-radius: 0.25em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" margin-bottom: 0.5em;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-estimator.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* on hover */\n",
|
||
"#sk-container-id-5 div.sk-estimator:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-estimator.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
|
||
"\n",
|
||
"/* Common style for \"i\" and \"?\" */\n",
|
||
"\n",
|
||
".sk-estimator-doc-link,\n",
|
||
"a:link.sk-estimator-doc-link,\n",
|
||
"a:visited.sk-estimator-doc-link {\n",
|
||
" float: right;\n",
|
||
" font-size: smaller;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1em;\n",
|
||
" height: 1em;\n",
|
||
" width: 1em;\n",
|
||
" text-decoration: none !important;\n",
|
||
" margin-left: 1ex;\n",
|
||
" /* unfitted */\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted,\n",
|
||
"a:link.sk-estimator-doc-link.fitted,\n",
|
||
"a:visited.sk-estimator-doc-link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Span, style for the box shown on hovering the info icon */\n",
|
||
".sk-estimator-doc-link span {\n",
|
||
" display: none;\n",
|
||
" z-index: 9999;\n",
|
||
" position: relative;\n",
|
||
" font-weight: normal;\n",
|
||
" right: .2ex;\n",
|
||
" padding: .5ex;\n",
|
||
" margin: .5ex;\n",
|
||
" width: min-content;\n",
|
||
" min-width: 20ex;\n",
|
||
" max-width: 50ex;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" box-shadow: 2pt 2pt 4pt #999;\n",
|
||
" /* unfitted */\n",
|
||
" background: var(--sklearn-color-unfitted-level-0);\n",
|
||
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted span {\n",
|
||
" /* fitted */\n",
|
||
" background: var(--sklearn-color-fitted-level-0);\n",
|
||
" border: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link:hover span {\n",
|
||
" display: block;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
|
||
"\n",
|
||
"#sk-container-id-5 a.estimator_doc_link {\n",
|
||
" float: right;\n",
|
||
" font-size: 1rem;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1rem;\n",
|
||
" height: 1rem;\n",
|
||
" width: 1rem;\n",
|
||
" text-decoration: none;\n",
|
||
" /* unfitted */\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 a.estimator_doc_link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"#sk-container-id-5 a.estimator_doc_link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 a.estimator_doc_link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"</style><div id=\"sk-container-id-5\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler',\n",
|
||
" StandardScaler())]),\n",
|
||
" ['nb_tickets', 'nb_purchases',\n",
|
||
" 'total_amount',\n",
|
||
" 'nb_suppliers',\n",
|
||
" 'vente_internet_max',\n",
|
||
" 'purchase_date_min',\n",
|
||
" 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet',\n",
|
||
" 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in',\n",
|
||
" 'is_email_true'])])),\n",
|
||
" ('logreg',\n",
|
||
" LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000, solver='saga'))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> Pipeline<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.pipeline.Pipeline.html\">?<span>Documentation for Pipeline</span></a><span class=\"sk-estimator-doc-link \">i<span>Not fitted</span></span></label><div class=\"sk-toggleable__content \"><pre>Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler',\n",
|
||
" StandardScaler())]),\n",
|
||
" ['nb_tickets', 'nb_purchases',\n",
|
||
" 'total_amount',\n",
|
||
" 'nb_suppliers',\n",
|
||
" 'vente_internet_max',\n",
|
||
" 'purchase_date_min',\n",
|
||
" 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet',\n",
|
||
" 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in',\n",
|
||
" 'is_email_true'])])),\n",
|
||
" ('logreg',\n",
|
||
" LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000, solver='saga'))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> preprocessor: ColumnTransformer<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></label><div class=\"sk-toggleable__content \"><pre>ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler', StandardScaler())]),\n",
|
||
" ['nb_tickets', 'nb_purchases', 'total_amount',\n",
|
||
" 'nb_suppliers', 'vente_internet_max',\n",
|
||
" 'purchase_date_min', 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet', 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in', 'is_email_true'])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">num</label><div class=\"sk-toggleable__content \"><pre>['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', 'nb_tickets_internet', 'nb_campaigns', 'nb_campaigns_opened']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> StandardScaler<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.StandardScaler.html\">?<span>Documentation for StandardScaler</span></a></label><div class=\"sk-toggleable__content \"><pre>StandardScaler()</pre></div> </div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">cat</label><div class=\"sk-toggleable__content \"><pre>['opt_in', 'is_email_true']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-10\" type=\"checkbox\" ><label for=\"sk-estimator-id-10\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> OneHotEncoder<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></label><div class=\"sk-toggleable__content \"><pre>OneHotEncoder(handle_unknown='ignore', sparse_output=False)</pre></div> </div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-11\" type=\"checkbox\" ><label for=\"sk-estimator-id-11\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> LogisticRegression<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a></label><div class=\"sk-toggleable__content \"><pre>LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000, solver='saga')</pre></div> </div></div></div></div></div></div>"
|
||
],
|
||
"text/plain": [
|
||
"Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler',\n",
|
||
" StandardScaler())]),\n",
|
||
" ['nb_tickets', 'nb_purchases',\n",
|
||
" 'total_amount',\n",
|
||
" 'nb_suppliers',\n",
|
||
" 'vente_internet_max',\n",
|
||
" 'purchase_date_min',\n",
|
||
" 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet',\n",
|
||
" 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in',\n",
|
||
" 'is_email_true'])])),\n",
|
||
" ('logreg',\n",
|
||
" LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000, solver='saga'))])"
|
||
]
|
||
},
|
||
"execution_count": 104,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Pipeline\n",
|
||
"pipeline = Pipeline(steps=[\n",
|
||
" ('preprocessor', preproc),\n",
|
||
" ('logreg', LogisticRegression(solver='saga', class_weight = weight_dict,\n",
|
||
" max_iter=5000)) \n",
|
||
"])\n",
|
||
"\n",
|
||
"pipeline.set_output(transform=\"pandas\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 50,
|
||
"id": "1e4c1be5-176d-4222-9b3c-fe27225afe36",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>39626</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>158560</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>20.0</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>170411</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>62.11</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>350.010093</td>\n",
|
||
" <td>350.010093</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>40.0</td>\n",
|
||
" <td>23.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>220692</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>84.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.158787</td>\n",
|
||
" <td>5.158787</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>182741</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>19.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>194275</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>38.0</td>\n",
|
||
" <td>19.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>142915</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>26.0</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>95021</th>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>250.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>382.280455</td>\n",
|
||
" <td>382.279877</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>197603</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>21.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>88679</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>10000 rows × 14 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"39626 0.0 0.0 0.00 0.0 \n",
|
||
"158560 0.0 0.0 0.00 0.0 \n",
|
||
"170411 1.0 1.0 62.11 1.0 \n",
|
||
"220692 1.0 1.0 84.00 1.0 \n",
|
||
"182741 0.0 0.0 0.00 0.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"194275 0.0 0.0 0.00 0.0 \n",
|
||
"142915 0.0 0.0 0.00 0.0 \n",
|
||
"95021 7.0 2.0 250.00 1.0 \n",
|
||
"197603 0.0 0.0 0.00 0.0 \n",
|
||
"88679 0.0 0.0 0.00 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"39626 0.0 550.000000 550.000000 \n",
|
||
"158560 0.0 550.000000 550.000000 \n",
|
||
"170411 1.0 350.010093 350.010093 \n",
|
||
"220692 0.0 5.158787 5.158787 \n",
|
||
"182741 0.0 550.000000 550.000000 \n",
|
||
"... ... ... ... \n",
|
||
"194275 0.0 550.000000 550.000000 \n",
|
||
"142915 0.0 550.000000 550.000000 \n",
|
||
"95021 0.0 382.280455 382.279877 \n",
|
||
"197603 0.0 550.000000 550.000000 \n",
|
||
"88679 0.0 550.000000 550.000000 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in gender_female \\\n",
|
||
"39626 0.0 True True 0 \n",
|
||
"158560 0.0 True True 0 \n",
|
||
"170411 1.0 True False 0 \n",
|
||
"220692 0.0 True False 0 \n",
|
||
"182741 0.0 True True 0 \n",
|
||
"... ... ... ... ... \n",
|
||
"194275 0.0 True False 1 \n",
|
||
"142915 0.0 True True 0 \n",
|
||
"95021 0.0 True True 0 \n",
|
||
"197603 0.0 True True 0 \n",
|
||
"88679 0.0 True False 0 \n",
|
||
"\n",
|
||
" gender_male nb_campaigns nb_campaigns_opened \n",
|
||
"39626 0 9.0 0.0 \n",
|
||
"158560 0 20.0 5.0 \n",
|
||
"170411 1 40.0 23.0 \n",
|
||
"220692 1 0.0 0.0 \n",
|
||
"182741 1 19.0 1.0 \n",
|
||
"... ... ... ... \n",
|
||
"194275 0 38.0 19.0 \n",
|
||
"142915 1 26.0 8.0 \n",
|
||
"95021 0 0.0 0.0 \n",
|
||
"197603 1 21.0 0.0 \n",
|
||
"88679 1 5.0 0.0 \n",
|
||
"\n",
|
||
"[10000 rows x 14 columns]"
|
||
]
|
||
},
|
||
"execution_count": 50,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# reduce X_train to reduce the training time\n",
|
||
"\n",
|
||
"X_train_subsample = X_train.sample(n=10000, random_state=43)\n",
|
||
"y_train_subsample = y_train.loc[X_train_subsample.index]\n",
|
||
"X_train_subsample"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 108,
|
||
"id": "2b09c2cd-fd5c-49b3-be66-cec6c5ec1351",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>y_has_purchased</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>43000</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>183923</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>97373</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>66956</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>116487</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>140473</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>153768</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>110886</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>115390</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>24919</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>1000 rows × 1 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" y_has_purchased\n",
|
||
"43000 0.0\n",
|
||
"183923 0.0\n",
|
||
"97373 0.0\n",
|
||
"66956 1.0\n",
|
||
"116487 0.0\n",
|
||
"... ...\n",
|
||
"140473 0.0\n",
|
||
"153768 0.0\n",
|
||
"110886 1.0\n",
|
||
"115390 0.0\n",
|
||
"24919 0.0\n",
|
||
"\n",
|
||
"[1000 rows x 1 columns]"
|
||
]
|
||
},
|
||
"execution_count": 108,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"y_train_subsample"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 109,
|
||
"id": "6c33fcd8-17d8-4390-b836-faec9ada9acd",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<style>#sk-container-id-6 {\n",
|
||
" /* Definition of color scheme common for light and dark mode */\n",
|
||
" --sklearn-color-text: black;\n",
|
||
" --sklearn-color-line: gray;\n",
|
||
" /* Definition of color scheme for unfitted estimators */\n",
|
||
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
|
||
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
|
||
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
|
||
" --sklearn-color-unfitted-level-3: chocolate;\n",
|
||
" /* Definition of color scheme for fitted estimators */\n",
|
||
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
|
||
" --sklearn-color-fitted-level-1: #d4ebff;\n",
|
||
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
|
||
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
|
||
"\n",
|
||
" /* Specific color for light theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-icon: #696969;\n",
|
||
"\n",
|
||
" @media (prefers-color-scheme: dark) {\n",
|
||
" /* Redefinition of color scheme for dark theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-icon: #878787;\n",
|
||
" }\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 pre {\n",
|
||
" padding: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 input.sk-hidden--visually {\n",
|
||
" border: 0;\n",
|
||
" clip: rect(1px 1px 1px 1px);\n",
|
||
" clip: rect(1px, 1px, 1px, 1px);\n",
|
||
" height: 1px;\n",
|
||
" margin: -1px;\n",
|
||
" overflow: hidden;\n",
|
||
" padding: 0;\n",
|
||
" position: absolute;\n",
|
||
" width: 1px;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-dashed-wrapped {\n",
|
||
" border: 1px dashed var(--sklearn-color-line);\n",
|
||
" margin: 0 0.4em 0.5em 0.4em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" padding-bottom: 0.4em;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-container {\n",
|
||
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
|
||
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
|
||
" so we also need the `!important` here to be able to override the\n",
|
||
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
|
||
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
|
||
" display: inline-block !important;\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-text-repr-fallback {\n",
|
||
" display: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-parallel-item,\n",
|
||
"div.sk-serial,\n",
|
||
"div.sk-item {\n",
|
||
" /* draw centered vertical line to link estimators */\n",
|
||
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
|
||
" background-size: 2px 100%;\n",
|
||
" background-repeat: no-repeat;\n",
|
||
" background-position: center center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Parallel-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-parallel-item::after {\n",
|
||
" content: \"\";\n",
|
||
" width: 100%;\n",
|
||
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
|
||
" flex-grow: 1;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-parallel {\n",
|
||
" display: flex;\n",
|
||
" align-items: stretch;\n",
|
||
" justify-content: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-parallel-item {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-parallel-item:first-child::after {\n",
|
||
" align-self: flex-end;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-parallel-item:last-child::after {\n",
|
||
" align-self: flex-start;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-parallel-item:only-child::after {\n",
|
||
" width: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Serial-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-serial {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
" align-items: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" padding-right: 1em;\n",
|
||
" padding-left: 1em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"\n",
|
||
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
|
||
"clickable and can be expanded/collapsed.\n",
|
||
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
|
||
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
|
||
"*/\n",
|
||
"\n",
|
||
"/* Pipeline and ColumnTransformer style (default) */\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-toggleable {\n",
|
||
" /* Default theme specific background. It is overwritten whether we have a\n",
|
||
" specific estimator or a Pipeline/ColumnTransformer */\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable label */\n",
|
||
"#sk-container-id-6 label.sk-toggleable__label {\n",
|
||
" cursor: pointer;\n",
|
||
" display: block;\n",
|
||
" width: 100%;\n",
|
||
" margin-bottom: 0;\n",
|
||
" padding: 0.5em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 label.sk-toggleable__label-arrow:before {\n",
|
||
" /* Arrow on the left of the label */\n",
|
||
" content: \"▸\";\n",
|
||
" float: left;\n",
|
||
" margin-right: 0.25em;\n",
|
||
" color: var(--sklearn-color-icon);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 label.sk-toggleable__label-arrow:hover:before {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable content - dropdown */\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-toggleable__content {\n",
|
||
" max-height: 0;\n",
|
||
" max-width: 0;\n",
|
||
" overflow: hidden;\n",
|
||
" text-align: left;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-toggleable__content.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-toggleable__content pre {\n",
|
||
" margin: 0.2em;\n",
|
||
" border-radius: 0.25em;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-toggleable__content.fitted pre {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
||
" /* Expand drop-down */\n",
|
||
" max-height: 200px;\n",
|
||
" max-width: 100%;\n",
|
||
" overflow: auto;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
||
" content: \"▾\";\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Pipeline/ColumnTransformer-specific style */\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific style */\n",
|
||
"\n",
|
||
"/* Colorize estimator box */\n",
|
||
"#sk-container-id-6 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-label label.sk-toggleable__label,\n",
|
||
"#sk-container-id-6 div.sk-label label {\n",
|
||
" /* The background is the default theme color */\n",
|
||
" color: var(--sklearn-color-text-on-default-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover, darken the color of the background */\n",
|
||
"#sk-container-id-6 div.sk-label:hover label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Label box, darken color on hover, fitted */\n",
|
||
"#sk-container-id-6 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator label */\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-label label {\n",
|
||
" font-family: monospace;\n",
|
||
" font-weight: bold;\n",
|
||
" display: inline-block;\n",
|
||
" line-height: 1.2em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-label-container {\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific */\n",
|
||
"#sk-container-id-6 div.sk-estimator {\n",
|
||
" font-family: monospace;\n",
|
||
" border: 1px dotted var(--sklearn-color-border-box);\n",
|
||
" border-radius: 0.25em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" margin-bottom: 0.5em;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-estimator.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* on hover */\n",
|
||
"#sk-container-id-6 div.sk-estimator:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 div.sk-estimator.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
|
||
"\n",
|
||
"/* Common style for \"i\" and \"?\" */\n",
|
||
"\n",
|
||
".sk-estimator-doc-link,\n",
|
||
"a:link.sk-estimator-doc-link,\n",
|
||
"a:visited.sk-estimator-doc-link {\n",
|
||
" float: right;\n",
|
||
" font-size: smaller;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1em;\n",
|
||
" height: 1em;\n",
|
||
" width: 1em;\n",
|
||
" text-decoration: none !important;\n",
|
||
" margin-left: 1ex;\n",
|
||
" /* unfitted */\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted,\n",
|
||
"a:link.sk-estimator-doc-link.fitted,\n",
|
||
"a:visited.sk-estimator-doc-link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Span, style for the box shown on hovering the info icon */\n",
|
||
".sk-estimator-doc-link span {\n",
|
||
" display: none;\n",
|
||
" z-index: 9999;\n",
|
||
" position: relative;\n",
|
||
" font-weight: normal;\n",
|
||
" right: .2ex;\n",
|
||
" padding: .5ex;\n",
|
||
" margin: .5ex;\n",
|
||
" width: min-content;\n",
|
||
" min-width: 20ex;\n",
|
||
" max-width: 50ex;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" box-shadow: 2pt 2pt 4pt #999;\n",
|
||
" /* unfitted */\n",
|
||
" background: var(--sklearn-color-unfitted-level-0);\n",
|
||
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted span {\n",
|
||
" /* fitted */\n",
|
||
" background: var(--sklearn-color-fitted-level-0);\n",
|
||
" border: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link:hover span {\n",
|
||
" display: block;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
|
||
"\n",
|
||
"#sk-container-id-6 a.estimator_doc_link {\n",
|
||
" float: right;\n",
|
||
" font-size: 1rem;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1rem;\n",
|
||
" height: 1rem;\n",
|
||
" width: 1rem;\n",
|
||
" text-decoration: none;\n",
|
||
" /* unfitted */\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 a.estimator_doc_link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"#sk-container-id-6 a.estimator_doc_link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-6 a.estimator_doc_link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"</style><div id=\"sk-container-id-6\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler',\n",
|
||
" StandardScaler())]),\n",
|
||
" ['nb_tickets', 'nb_purchases',\n",
|
||
" 'total_amount',\n",
|
||
" 'nb_suppliers',\n",
|
||
" 'vente_internet_max',\n",
|
||
" 'purchase_date_min',\n",
|
||
" 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet',\n",
|
||
" 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in',\n",
|
||
" 'is_email_true'])])),\n",
|
||
" ('logreg',\n",
|
||
" LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000, solver='saga'))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-12\" type=\"checkbox\" ><label for=\"sk-estimator-id-12\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> Pipeline<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.pipeline.Pipeline.html\">?<span>Documentation for Pipeline</span></a><span class=\"sk-estimator-doc-link \">i<span>Not fitted</span></span></label><div class=\"sk-toggleable__content \"><pre>Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler',\n",
|
||
" StandardScaler())]),\n",
|
||
" ['nb_tickets', 'nb_purchases',\n",
|
||
" 'total_amount',\n",
|
||
" 'nb_suppliers',\n",
|
||
" 'vente_internet_max',\n",
|
||
" 'purchase_date_min',\n",
|
||
" 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet',\n",
|
||
" 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in',\n",
|
||
" 'is_email_true'])])),\n",
|
||
" ('logreg',\n",
|
||
" LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000, solver='saga'))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-13\" type=\"checkbox\" ><label for=\"sk-estimator-id-13\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> preprocessor: ColumnTransformer<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></label><div class=\"sk-toggleable__content \"><pre>ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler', StandardScaler())]),\n",
|
||
" ['nb_tickets', 'nb_purchases', 'total_amount',\n",
|
||
" 'nb_suppliers', 'vente_internet_max',\n",
|
||
" 'purchase_date_min', 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet', 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in', 'is_email_true'])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-14\" type=\"checkbox\" ><label for=\"sk-estimator-id-14\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">num</label><div class=\"sk-toggleable__content \"><pre>['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', 'nb_tickets_internet', 'nb_campaigns', 'nb_campaigns_opened']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-15\" type=\"checkbox\" ><label for=\"sk-estimator-id-15\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> StandardScaler<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.StandardScaler.html\">?<span>Documentation for StandardScaler</span></a></label><div class=\"sk-toggleable__content \"><pre>StandardScaler()</pre></div> </div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-16\" type=\"checkbox\" ><label for=\"sk-estimator-id-16\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">cat</label><div class=\"sk-toggleable__content \"><pre>['opt_in', 'is_email_true']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-17\" type=\"checkbox\" ><label for=\"sk-estimator-id-17\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> OneHotEncoder<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></label><div class=\"sk-toggleable__content \"><pre>OneHotEncoder(handle_unknown='ignore', sparse_output=False)</pre></div> </div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-18\" type=\"checkbox\" ><label for=\"sk-estimator-id-18\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> LogisticRegression<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a></label><div class=\"sk-toggleable__content \"><pre>LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000, solver='saga')</pre></div> </div></div></div></div></div></div>"
|
||
],
|
||
"text/plain": [
|
||
"Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler',\n",
|
||
" StandardScaler())]),\n",
|
||
" ['nb_tickets', 'nb_purchases',\n",
|
||
" 'total_amount',\n",
|
||
" 'nb_suppliers',\n",
|
||
" 'vente_internet_max',\n",
|
||
" 'purchase_date_min',\n",
|
||
" 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet',\n",
|
||
" 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in',\n",
|
||
" 'is_email_true'])])),\n",
|
||
" ('logreg',\n",
|
||
" LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000, solver='saga'))])"
|
||
]
|
||
},
|
||
"execution_count": 109,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pipeline"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 110,
|
||
"id": "710ccccc-50c9-4aba-8cf1-11483dbbdd1c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'logreg__C': array([9.765625e-04, 1.953125e-03, 3.906250e-03, 7.812500e-03,\n",
|
||
" 1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
|
||
" 2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
|
||
" 4.000000e+00, 8.000000e+00, 1.600000e+01]),\n",
|
||
" 'logreg__penalty': ['l1']}"
|
||
]
|
||
},
|
||
"execution_count": 110,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"param_grid"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 46,
|
||
"id": "ab078cf8-0d4c-4b23-9f33-2483cf605b06",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"make_scorer(f1_score, response_method='predict')"
|
||
]
|
||
},
|
||
"execution_count": 46,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"f1_scorer"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 51,
|
||
"id": "8062169e-8305-42b0-aeff-8f714117da40",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>39626</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>158560</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>20.0</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>170411</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>62.11</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>350.010093</td>\n",
|
||
" <td>350.010093</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>40.0</td>\n",
|
||
" <td>23.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>220692</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>84.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.158787</td>\n",
|
||
" <td>5.158787</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>182741</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>19.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>194275</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>38.0</td>\n",
|
||
" <td>19.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>142915</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>26.0</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>95021</th>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>250.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>382.280455</td>\n",
|
||
" <td>382.279877</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>197603</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>21.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>88679</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>10000 rows × 14 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"39626 0.0 0.0 0.00 0.0 \n",
|
||
"158560 0.0 0.0 0.00 0.0 \n",
|
||
"170411 1.0 1.0 62.11 1.0 \n",
|
||
"220692 1.0 1.0 84.00 1.0 \n",
|
||
"182741 0.0 0.0 0.00 0.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"194275 0.0 0.0 0.00 0.0 \n",
|
||
"142915 0.0 0.0 0.00 0.0 \n",
|
||
"95021 7.0 2.0 250.00 1.0 \n",
|
||
"197603 0.0 0.0 0.00 0.0 \n",
|
||
"88679 0.0 0.0 0.00 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"39626 0.0 550.000000 550.000000 \n",
|
||
"158560 0.0 550.000000 550.000000 \n",
|
||
"170411 1.0 350.010093 350.010093 \n",
|
||
"220692 0.0 5.158787 5.158787 \n",
|
||
"182741 0.0 550.000000 550.000000 \n",
|
||
"... ... ... ... \n",
|
||
"194275 0.0 550.000000 550.000000 \n",
|
||
"142915 0.0 550.000000 550.000000 \n",
|
||
"95021 0.0 382.280455 382.279877 \n",
|
||
"197603 0.0 550.000000 550.000000 \n",
|
||
"88679 0.0 550.000000 550.000000 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in gender_female \\\n",
|
||
"39626 0.0 True True 0 \n",
|
||
"158560 0.0 True True 0 \n",
|
||
"170411 1.0 True False 0 \n",
|
||
"220692 0.0 True False 0 \n",
|
||
"182741 0.0 True True 0 \n",
|
||
"... ... ... ... ... \n",
|
||
"194275 0.0 True False 1 \n",
|
||
"142915 0.0 True True 0 \n",
|
||
"95021 0.0 True True 0 \n",
|
||
"197603 0.0 True True 0 \n",
|
||
"88679 0.0 True False 0 \n",
|
||
"\n",
|
||
" gender_male nb_campaigns nb_campaigns_opened \n",
|
||
"39626 0 9.0 0.0 \n",
|
||
"158560 0 20.0 5.0 \n",
|
||
"170411 1 40.0 23.0 \n",
|
||
"220692 1 0.0 0.0 \n",
|
||
"182741 1 19.0 1.0 \n",
|
||
"... ... ... ... \n",
|
||
"194275 0 38.0 19.0 \n",
|
||
"142915 1 26.0 8.0 \n",
|
||
"95021 0 0.0 0.0 \n",
|
||
"197603 1 21.0 0.0 \n",
|
||
"88679 1 5.0 0.0 \n",
|
||
"\n",
|
||
"[10000 rows x 14 columns]"
|
||
]
|
||
},
|
||
"execution_count": 51,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_train_subsample"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 52,
|
||
"id": "0270013a-6523-4cf8-8de0-569c0d1c5db5",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"warnings.filterwarnings('ignore')\n",
|
||
"warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n",
|
||
"warnings.filterwarnings(\"ignore\", category=DataConversionWarning)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 53,
|
||
"id": "7a49d78a-5a9b-44a9-95cf-3fca1b3febfa",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Returned hyperparameter: {'logreg__C': 0.0625, 'logreg__penalty': 'l1'}\n",
|
||
"Best classification F1 score in train is: 0.462769170101807\n",
|
||
"Classification F1 score on test is: 0.46474681703251214\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# run the pipeline on the subsample\n",
|
||
"\n",
|
||
"logit_grid = GridSearchCV(pipeline, param_grid, cv=3, scoring = f1_scorer #, error_score=\"raise\"\n",
|
||
" )\n",
|
||
"logit_grid.fit(X_train_subsample, y_train_subsample)\n",
|
||
"\n",
|
||
"# print results\n",
|
||
"print('Returned hyperparameter: {}'.format(logit_grid.best_params_))\n",
|
||
"print('Best classification F1 score in train is: {}'.format(logit_grid.best_score_))\n",
|
||
"print('Classification F1 score on test is: {}'.format(logit_grid.score(X_test, y_test)))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 114,
|
||
"id": "b1d5e71d-1078-4370-86e8-52b1ae378898",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([9.765625e-04, 1.953125e-03, 3.906250e-03, 7.812500e-03,\n",
|
||
" 1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
|
||
" 2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
|
||
" 4.000000e+00, 8.000000e+00, 1.600000e+01])"
|
||
]
|
||
},
|
||
"execution_count": 114,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"param_c"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 96,
|
||
"id": "cfe04739-fe9c-4802-9d34-885a8cfce0dc",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<style>#sk-container-id-12 {\n",
|
||
" /* Definition of color scheme common for light and dark mode */\n",
|
||
" --sklearn-color-text: black;\n",
|
||
" --sklearn-color-line: gray;\n",
|
||
" /* Definition of color scheme for unfitted estimators */\n",
|
||
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
|
||
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
|
||
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
|
||
" --sklearn-color-unfitted-level-3: chocolate;\n",
|
||
" /* Definition of color scheme for fitted estimators */\n",
|
||
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
|
||
" --sklearn-color-fitted-level-1: #d4ebff;\n",
|
||
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
|
||
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
|
||
"\n",
|
||
" /* Specific color for light theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-icon: #696969;\n",
|
||
"\n",
|
||
" @media (prefers-color-scheme: dark) {\n",
|
||
" /* Redefinition of color scheme for dark theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-icon: #878787;\n",
|
||
" }\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 pre {\n",
|
||
" padding: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 input.sk-hidden--visually {\n",
|
||
" border: 0;\n",
|
||
" clip: rect(1px 1px 1px 1px);\n",
|
||
" clip: rect(1px, 1px, 1px, 1px);\n",
|
||
" height: 1px;\n",
|
||
" margin: -1px;\n",
|
||
" overflow: hidden;\n",
|
||
" padding: 0;\n",
|
||
" position: absolute;\n",
|
||
" width: 1px;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-dashed-wrapped {\n",
|
||
" border: 1px dashed var(--sklearn-color-line);\n",
|
||
" margin: 0 0.4em 0.5em 0.4em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" padding-bottom: 0.4em;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-container {\n",
|
||
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
|
||
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
|
||
" so we also need the `!important` here to be able to override the\n",
|
||
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
|
||
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
|
||
" display: inline-block !important;\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-text-repr-fallback {\n",
|
||
" display: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-parallel-item,\n",
|
||
"div.sk-serial,\n",
|
||
"div.sk-item {\n",
|
||
" /* draw centered vertical line to link estimators */\n",
|
||
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
|
||
" background-size: 2px 100%;\n",
|
||
" background-repeat: no-repeat;\n",
|
||
" background-position: center center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Parallel-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-parallel-item::after {\n",
|
||
" content: \"\";\n",
|
||
" width: 100%;\n",
|
||
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
|
||
" flex-grow: 1;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-parallel {\n",
|
||
" display: flex;\n",
|
||
" align-items: stretch;\n",
|
||
" justify-content: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-parallel-item {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-parallel-item:first-child::after {\n",
|
||
" align-self: flex-end;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-parallel-item:last-child::after {\n",
|
||
" align-self: flex-start;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-parallel-item:only-child::after {\n",
|
||
" width: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Serial-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-serial {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
" align-items: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" padding-right: 1em;\n",
|
||
" padding-left: 1em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"\n",
|
||
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
|
||
"clickable and can be expanded/collapsed.\n",
|
||
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
|
||
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
|
||
"*/\n",
|
||
"\n",
|
||
"/* Pipeline and ColumnTransformer style (default) */\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-toggleable {\n",
|
||
" /* Default theme specific background. It is overwritten whether we have a\n",
|
||
" specific estimator or a Pipeline/ColumnTransformer */\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable label */\n",
|
||
"#sk-container-id-12 label.sk-toggleable__label {\n",
|
||
" cursor: pointer;\n",
|
||
" display: block;\n",
|
||
" width: 100%;\n",
|
||
" margin-bottom: 0;\n",
|
||
" padding: 0.5em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 label.sk-toggleable__label-arrow:before {\n",
|
||
" /* Arrow on the left of the label */\n",
|
||
" content: \"▸\";\n",
|
||
" float: left;\n",
|
||
" margin-right: 0.25em;\n",
|
||
" color: var(--sklearn-color-icon);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 label.sk-toggleable__label-arrow:hover:before {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable content - dropdown */\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-toggleable__content {\n",
|
||
" max-height: 0;\n",
|
||
" max-width: 0;\n",
|
||
" overflow: hidden;\n",
|
||
" text-align: left;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-toggleable__content.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-toggleable__content pre {\n",
|
||
" margin: 0.2em;\n",
|
||
" border-radius: 0.25em;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-toggleable__content.fitted pre {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
||
" /* Expand drop-down */\n",
|
||
" max-height: 200px;\n",
|
||
" max-width: 100%;\n",
|
||
" overflow: auto;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
||
" content: \"▾\";\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Pipeline/ColumnTransformer-specific style */\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific style */\n",
|
||
"\n",
|
||
"/* Colorize estimator box */\n",
|
||
"#sk-container-id-12 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-label label.sk-toggleable__label,\n",
|
||
"#sk-container-id-12 div.sk-label label {\n",
|
||
" /* The background is the default theme color */\n",
|
||
" color: var(--sklearn-color-text-on-default-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover, darken the color of the background */\n",
|
||
"#sk-container-id-12 div.sk-label:hover label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Label box, darken color on hover, fitted */\n",
|
||
"#sk-container-id-12 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator label */\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-label label {\n",
|
||
" font-family: monospace;\n",
|
||
" font-weight: bold;\n",
|
||
" display: inline-block;\n",
|
||
" line-height: 1.2em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-label-container {\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific */\n",
|
||
"#sk-container-id-12 div.sk-estimator {\n",
|
||
" font-family: monospace;\n",
|
||
" border: 1px dotted var(--sklearn-color-border-box);\n",
|
||
" border-radius: 0.25em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" margin-bottom: 0.5em;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-estimator.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* on hover */\n",
|
||
"#sk-container-id-12 div.sk-estimator:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 div.sk-estimator.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
|
||
"\n",
|
||
"/* Common style for \"i\" and \"?\" */\n",
|
||
"\n",
|
||
".sk-estimator-doc-link,\n",
|
||
"a:link.sk-estimator-doc-link,\n",
|
||
"a:visited.sk-estimator-doc-link {\n",
|
||
" float: right;\n",
|
||
" font-size: smaller;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1em;\n",
|
||
" height: 1em;\n",
|
||
" width: 1em;\n",
|
||
" text-decoration: none !important;\n",
|
||
" margin-left: 1ex;\n",
|
||
" /* unfitted */\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted,\n",
|
||
"a:link.sk-estimator-doc-link.fitted,\n",
|
||
"a:visited.sk-estimator-doc-link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Span, style for the box shown on hovering the info icon */\n",
|
||
".sk-estimator-doc-link span {\n",
|
||
" display: none;\n",
|
||
" z-index: 9999;\n",
|
||
" position: relative;\n",
|
||
" font-weight: normal;\n",
|
||
" right: .2ex;\n",
|
||
" padding: .5ex;\n",
|
||
" margin: .5ex;\n",
|
||
" width: min-content;\n",
|
||
" min-width: 20ex;\n",
|
||
" max-width: 50ex;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" box-shadow: 2pt 2pt 4pt #999;\n",
|
||
" /* unfitted */\n",
|
||
" background: var(--sklearn-color-unfitted-level-0);\n",
|
||
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted span {\n",
|
||
" /* fitted */\n",
|
||
" background: var(--sklearn-color-fitted-level-0);\n",
|
||
" border: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link:hover span {\n",
|
||
" display: block;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
|
||
"\n",
|
||
"#sk-container-id-12 a.estimator_doc_link {\n",
|
||
" float: right;\n",
|
||
" font-size: 1rem;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1rem;\n",
|
||
" height: 1rem;\n",
|
||
" width: 1rem;\n",
|
||
" text-decoration: none;\n",
|
||
" /* unfitted */\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 a.estimator_doc_link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"#sk-container-id-12 a.estimator_doc_link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-12 a.estimator_doc_link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"</style><div id=\"sk-container-id-12\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>GridSearchCV(cv=3,\n",
|
||
" estimator=Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler',\n",
|
||
" StandardScaler())]),\n",
|
||
" ['nb_tickets',\n",
|
||
" 'nb_purchases',\n",
|
||
" 'total_amount',\n",
|
||
" 'nb_suppliers',\n",
|
||
" 'vente_internet_max',\n",
|
||
" 'purchase_date_min',\n",
|
||
" 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet',\n",
|
||
" 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[(...\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000,\n",
|
||
" solver='saga'))]),\n",
|
||
" param_grid={'logreg__C': array([9.765625e-04, 1.953125e-03, 3.906250e-03, 7.812500e-03,\n",
|
||
" 1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
|
||
" 2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
|
||
" 4.000000e+00, 8.000000e+00, 1.600000e+01]),\n",
|
||
" 'logreg__penalty': ['l1']},\n",
|
||
" scoring=make_scorer(f1_score, response_method='predict'))</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-75\" type=\"checkbox\" ><label for=\"sk-estimator-id-75\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> GridSearchCV<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.model_selection.GridSearchCV.html\">?<span>Documentation for GridSearchCV</span></a><span class=\"sk-estimator-doc-link \">i<span>Not fitted</span></span></label><div class=\"sk-toggleable__content \"><pre>GridSearchCV(cv=3,\n",
|
||
" estimator=Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler',\n",
|
||
" StandardScaler())]),\n",
|
||
" ['nb_tickets',\n",
|
||
" 'nb_purchases',\n",
|
||
" 'total_amount',\n",
|
||
" 'nb_suppliers',\n",
|
||
" 'vente_internet_max',\n",
|
||
" 'purchase_date_min',\n",
|
||
" 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet',\n",
|
||
" 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[(...\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000,\n",
|
||
" solver='saga'))]),\n",
|
||
" param_grid={'logreg__C': array([9.765625e-04, 1.953125e-03, 3.906250e-03, 7.812500e-03,\n",
|
||
" 1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
|
||
" 2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
|
||
" 4.000000e+00, 8.000000e+00, 1.600000e+01]),\n",
|
||
" 'logreg__penalty': ['l1']},\n",
|
||
" scoring=make_scorer(f1_score, response_method='predict'))</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-76\" type=\"checkbox\" ><label for=\"sk-estimator-id-76\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">estimator: Pipeline</label><div class=\"sk-toggleable__content \"><pre>Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler',\n",
|
||
" StandardScaler())]),\n",
|
||
" ['nb_tickets', 'nb_purchases',\n",
|
||
" 'total_amount',\n",
|
||
" 'nb_suppliers',\n",
|
||
" 'vente_internet_max',\n",
|
||
" 'purchase_date_min',\n",
|
||
" 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet',\n",
|
||
" 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in',\n",
|
||
" 'is_email_true'])])),\n",
|
||
" ('logreg',\n",
|
||
" LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000, solver='saga'))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-77\" type=\"checkbox\" ><label for=\"sk-estimator-id-77\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> preprocessor: ColumnTransformer<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></label><div class=\"sk-toggleable__content \"><pre>ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler', StandardScaler())]),\n",
|
||
" ['nb_tickets', 'nb_purchases', 'total_amount',\n",
|
||
" 'nb_suppliers', 'vente_internet_max',\n",
|
||
" 'purchase_date_min', 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet', 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in', 'is_email_true'])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-78\" type=\"checkbox\" ><label for=\"sk-estimator-id-78\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">num</label><div class=\"sk-toggleable__content \"><pre>['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', 'nb_tickets_internet', 'nb_campaigns', 'nb_campaigns_opened']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-79\" type=\"checkbox\" ><label for=\"sk-estimator-id-79\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> StandardScaler<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.StandardScaler.html\">?<span>Documentation for StandardScaler</span></a></label><div class=\"sk-toggleable__content \"><pre>StandardScaler()</pre></div> </div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-80\" type=\"checkbox\" ><label for=\"sk-estimator-id-80\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">cat</label><div class=\"sk-toggleable__content \"><pre>['opt_in', 'is_email_true']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-81\" type=\"checkbox\" ><label for=\"sk-estimator-id-81\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> OneHotEncoder<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></label><div class=\"sk-toggleable__content \"><pre>OneHotEncoder(handle_unknown='ignore', sparse_output=False)</pre></div> </div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-82\" type=\"checkbox\" ><label for=\"sk-estimator-id-82\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> LogisticRegression<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a></label><div class=\"sk-toggleable__content \"><pre>LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000, solver='saga')</pre></div> </div></div></div></div></div></div></div></div></div></div></div>"
|
||
],
|
||
"text/plain": [
|
||
"GridSearchCV(cv=3,\n",
|
||
" estimator=Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler',\n",
|
||
" StandardScaler())]),\n",
|
||
" ['nb_tickets',\n",
|
||
" 'nb_purchases',\n",
|
||
" 'total_amount',\n",
|
||
" 'nb_suppliers',\n",
|
||
" 'vente_internet_max',\n",
|
||
" 'purchase_date_min',\n",
|
||
" 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet',\n",
|
||
" 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[(...\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000,\n",
|
||
" solver='saga'))]),\n",
|
||
" param_grid={'logreg__C': array([9.765625e-04, 1.953125e-03, 3.906250e-03, 7.812500e-03,\n",
|
||
" 1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
|
||
" 2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
|
||
" 4.000000e+00, 8.000000e+00, 1.600000e+01]),\n",
|
||
" 'logreg__penalty': ['l1']},\n",
|
||
" scoring=make_scorer(f1_score, response_method='predict'))"
|
||
]
|
||
},
|
||
"execution_count": 96,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"logit_grid = GridSearchCV(pipeline, param_grid, cv=3, scoring = f1_scorer #, error_score=\"raise\"\n",
|
||
" )\n",
|
||
"logit_grid"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 97,
|
||
"id": "6debc66c-a56d-41fa-8ef8-ba388e0e14fe",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'logreg__C': array([9.765625e-04, 1.953125e-03, 3.906250e-03, 7.812500e-03,\n",
|
||
" 1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
|
||
" 2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
|
||
" 4.000000e+00, 8.000000e+00, 1.600000e+01]),\n",
|
||
" 'logreg__penalty': ['l1']}"
|
||
]
|
||
},
|
||
"execution_count": 97,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"param_grid"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 98,
|
||
"id": "e394cc04-5d0b-4a64-9aa0-415dc8a3cbbc",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Returned hyperparameter: {'logreg__C': 0.03125, 'logreg__penalty': 'l1'}\n",
|
||
"Best classification accuracy in train is: 0.42160313383818665\n",
|
||
"Classification accuracy on test is: 0.47078982841737305\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# run the pipeline on the full sample\n",
|
||
"\n",
|
||
"logit_grid = GridSearchCV(pipeline, param_grid, cv=3, scoring = f1_scorer #, error_score=\"raise\"\n",
|
||
" )\n",
|
||
"logit_grid.fit(X_train, y_train)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 99,
|
||
"id": "8e6cf558-a4f4-4159-9835-364ee3bb1ed2",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Returned hyperparameter: {'logreg__C': 0.03125, 'logreg__penalty': 'l1'}\n",
|
||
"Best classification F1 score in train is: 0.42160313383818665\n",
|
||
"Classification F1 score on test is: 0.47078982841737305\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# print results\n",
|
||
"print('Returned hyperparameter: {}'.format(logit_grid.best_params_))\n",
|
||
"print('Best classification F1 score in train is: {}'.format(logit_grid.best_score_))\n",
|
||
"print('Classification F1 score on test is: {}'.format(logit_grid.score(X_test, y_test)))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 100,
|
||
"id": "e2ff26cb-f137-4a23-9add-bdb61bebdf9c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<style>#sk-container-id-13 {\n",
|
||
" /* Definition of color scheme common for light and dark mode */\n",
|
||
" --sklearn-color-text: black;\n",
|
||
" --sklearn-color-line: gray;\n",
|
||
" /* Definition of color scheme for unfitted estimators */\n",
|
||
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
|
||
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
|
||
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
|
||
" --sklearn-color-unfitted-level-3: chocolate;\n",
|
||
" /* Definition of color scheme for fitted estimators */\n",
|
||
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
|
||
" --sklearn-color-fitted-level-1: #d4ebff;\n",
|
||
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
|
||
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
|
||
"\n",
|
||
" /* Specific color for light theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-icon: #696969;\n",
|
||
"\n",
|
||
" @media (prefers-color-scheme: dark) {\n",
|
||
" /* Redefinition of color scheme for dark theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-icon: #878787;\n",
|
||
" }\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 pre {\n",
|
||
" padding: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 input.sk-hidden--visually {\n",
|
||
" border: 0;\n",
|
||
" clip: rect(1px 1px 1px 1px);\n",
|
||
" clip: rect(1px, 1px, 1px, 1px);\n",
|
||
" height: 1px;\n",
|
||
" margin: -1px;\n",
|
||
" overflow: hidden;\n",
|
||
" padding: 0;\n",
|
||
" position: absolute;\n",
|
||
" width: 1px;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-dashed-wrapped {\n",
|
||
" border: 1px dashed var(--sklearn-color-line);\n",
|
||
" margin: 0 0.4em 0.5em 0.4em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" padding-bottom: 0.4em;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-container {\n",
|
||
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
|
||
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
|
||
" so we also need the `!important` here to be able to override the\n",
|
||
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
|
||
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
|
||
" display: inline-block !important;\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-text-repr-fallback {\n",
|
||
" display: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-parallel-item,\n",
|
||
"div.sk-serial,\n",
|
||
"div.sk-item {\n",
|
||
" /* draw centered vertical line to link estimators */\n",
|
||
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
|
||
" background-size: 2px 100%;\n",
|
||
" background-repeat: no-repeat;\n",
|
||
" background-position: center center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Parallel-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-parallel-item::after {\n",
|
||
" content: \"\";\n",
|
||
" width: 100%;\n",
|
||
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
|
||
" flex-grow: 1;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-parallel {\n",
|
||
" display: flex;\n",
|
||
" align-items: stretch;\n",
|
||
" justify-content: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-parallel-item {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-parallel-item:first-child::after {\n",
|
||
" align-self: flex-end;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-parallel-item:last-child::after {\n",
|
||
" align-self: flex-start;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-parallel-item:only-child::after {\n",
|
||
" width: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Serial-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-serial {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
" align-items: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" padding-right: 1em;\n",
|
||
" padding-left: 1em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"\n",
|
||
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
|
||
"clickable and can be expanded/collapsed.\n",
|
||
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
|
||
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
|
||
"*/\n",
|
||
"\n",
|
||
"/* Pipeline and ColumnTransformer style (default) */\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-toggleable {\n",
|
||
" /* Default theme specific background. It is overwritten whether we have a\n",
|
||
" specific estimator or a Pipeline/ColumnTransformer */\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable label */\n",
|
||
"#sk-container-id-13 label.sk-toggleable__label {\n",
|
||
" cursor: pointer;\n",
|
||
" display: block;\n",
|
||
" width: 100%;\n",
|
||
" margin-bottom: 0;\n",
|
||
" padding: 0.5em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 label.sk-toggleable__label-arrow:before {\n",
|
||
" /* Arrow on the left of the label */\n",
|
||
" content: \"▸\";\n",
|
||
" float: left;\n",
|
||
" margin-right: 0.25em;\n",
|
||
" color: var(--sklearn-color-icon);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 label.sk-toggleable__label-arrow:hover:before {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable content - dropdown */\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-toggleable__content {\n",
|
||
" max-height: 0;\n",
|
||
" max-width: 0;\n",
|
||
" overflow: hidden;\n",
|
||
" text-align: left;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-toggleable__content.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-toggleable__content pre {\n",
|
||
" margin: 0.2em;\n",
|
||
" border-radius: 0.25em;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-toggleable__content.fitted pre {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
||
" /* Expand drop-down */\n",
|
||
" max-height: 200px;\n",
|
||
" max-width: 100%;\n",
|
||
" overflow: auto;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
||
" content: \"▾\";\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Pipeline/ColumnTransformer-specific style */\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific style */\n",
|
||
"\n",
|
||
"/* Colorize estimator box */\n",
|
||
"#sk-container-id-13 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-label label.sk-toggleable__label,\n",
|
||
"#sk-container-id-13 div.sk-label label {\n",
|
||
" /* The background is the default theme color */\n",
|
||
" color: var(--sklearn-color-text-on-default-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover, darken the color of the background */\n",
|
||
"#sk-container-id-13 div.sk-label:hover label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Label box, darken color on hover, fitted */\n",
|
||
"#sk-container-id-13 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator label */\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-label label {\n",
|
||
" font-family: monospace;\n",
|
||
" font-weight: bold;\n",
|
||
" display: inline-block;\n",
|
||
" line-height: 1.2em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-label-container {\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific */\n",
|
||
"#sk-container-id-13 div.sk-estimator {\n",
|
||
" font-family: monospace;\n",
|
||
" border: 1px dotted var(--sklearn-color-border-box);\n",
|
||
" border-radius: 0.25em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" margin-bottom: 0.5em;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-estimator.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* on hover */\n",
|
||
"#sk-container-id-13 div.sk-estimator:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 div.sk-estimator.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
|
||
"\n",
|
||
"/* Common style for \"i\" and \"?\" */\n",
|
||
"\n",
|
||
".sk-estimator-doc-link,\n",
|
||
"a:link.sk-estimator-doc-link,\n",
|
||
"a:visited.sk-estimator-doc-link {\n",
|
||
" float: right;\n",
|
||
" font-size: smaller;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1em;\n",
|
||
" height: 1em;\n",
|
||
" width: 1em;\n",
|
||
" text-decoration: none !important;\n",
|
||
" margin-left: 1ex;\n",
|
||
" /* unfitted */\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted,\n",
|
||
"a:link.sk-estimator-doc-link.fitted,\n",
|
||
"a:visited.sk-estimator-doc-link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Span, style for the box shown on hovering the info icon */\n",
|
||
".sk-estimator-doc-link span {\n",
|
||
" display: none;\n",
|
||
" z-index: 9999;\n",
|
||
" position: relative;\n",
|
||
" font-weight: normal;\n",
|
||
" right: .2ex;\n",
|
||
" padding: .5ex;\n",
|
||
" margin: .5ex;\n",
|
||
" width: min-content;\n",
|
||
" min-width: 20ex;\n",
|
||
" max-width: 50ex;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" box-shadow: 2pt 2pt 4pt #999;\n",
|
||
" /* unfitted */\n",
|
||
" background: var(--sklearn-color-unfitted-level-0);\n",
|
||
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted span {\n",
|
||
" /* fitted */\n",
|
||
" background: var(--sklearn-color-fitted-level-0);\n",
|
||
" border: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link:hover span {\n",
|
||
" display: block;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
|
||
"\n",
|
||
"#sk-container-id-13 a.estimator_doc_link {\n",
|
||
" float: right;\n",
|
||
" font-size: 1rem;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1rem;\n",
|
||
" height: 1rem;\n",
|
||
" width: 1rem;\n",
|
||
" text-decoration: none;\n",
|
||
" /* unfitted */\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 a.estimator_doc_link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"#sk-container-id-13 a.estimator_doc_link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-13 a.estimator_doc_link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"</style><div id=\"sk-container-id-13\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>GridSearchCV(cv=3,\n",
|
||
" estimator=Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler',\n",
|
||
" StandardScaler())]),\n",
|
||
" ['nb_tickets',\n",
|
||
" 'nb_purchases',\n",
|
||
" 'total_amount',\n",
|
||
" 'nb_suppliers',\n",
|
||
" 'vente_internet_max',\n",
|
||
" 'purchase_date_min',\n",
|
||
" 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet',\n",
|
||
" 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[(...\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000,\n",
|
||
" solver='saga'))]),\n",
|
||
" param_grid={'logreg__C': array([9.765625e-04, 1.953125e-03, 3.906250e-03, 7.812500e-03,\n",
|
||
" 1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
|
||
" 2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
|
||
" 4.000000e+00, 8.000000e+00, 1.600000e+01]),\n",
|
||
" 'logreg__penalty': ['l1']},\n",
|
||
" scoring=make_scorer(f1_score, response_method='predict'))</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-83\" type=\"checkbox\" ><label for=\"sk-estimator-id-83\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> GridSearchCV<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.model_selection.GridSearchCV.html\">?<span>Documentation for GridSearchCV</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>GridSearchCV(cv=3,\n",
|
||
" estimator=Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler',\n",
|
||
" StandardScaler())]),\n",
|
||
" ['nb_tickets',\n",
|
||
" 'nb_purchases',\n",
|
||
" 'total_amount',\n",
|
||
" 'nb_suppliers',\n",
|
||
" 'vente_internet_max',\n",
|
||
" 'purchase_date_min',\n",
|
||
" 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet',\n",
|
||
" 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[(...\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000,\n",
|
||
" solver='saga'))]),\n",
|
||
" param_grid={'logreg__C': array([9.765625e-04, 1.953125e-03, 3.906250e-03, 7.812500e-03,\n",
|
||
" 1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
|
||
" 2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
|
||
" 4.000000e+00, 8.000000e+00, 1.600000e+01]),\n",
|
||
" 'logreg__penalty': ['l1']},\n",
|
||
" scoring=make_scorer(f1_score, response_method='predict'))</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-84\" type=\"checkbox\" ><label for=\"sk-estimator-id-84\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">estimator: Pipeline</label><div class=\"sk-toggleable__content fitted\"><pre>Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler',\n",
|
||
" StandardScaler())]),\n",
|
||
" ['nb_tickets', 'nb_purchases',\n",
|
||
" 'total_amount',\n",
|
||
" 'nb_suppliers',\n",
|
||
" 'vente_internet_max',\n",
|
||
" 'purchase_date_min',\n",
|
||
" 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet',\n",
|
||
" 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in',\n",
|
||
" 'is_email_true'])])),\n",
|
||
" ('logreg',\n",
|
||
" LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000, solver='saga'))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-85\" type=\"checkbox\" ><label for=\"sk-estimator-id-85\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> preprocessor: ColumnTransformer<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler', StandardScaler())]),\n",
|
||
" ['nb_tickets', 'nb_purchases', 'total_amount',\n",
|
||
" 'nb_suppliers', 'vente_internet_max',\n",
|
||
" 'purchase_date_min', 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet', 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in', 'is_email_true'])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-86\" type=\"checkbox\" ><label for=\"sk-estimator-id-86\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">num</label><div class=\"sk-toggleable__content fitted\"><pre>['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', 'nb_tickets_internet', 'nb_campaigns', 'nb_campaigns_opened']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-87\" type=\"checkbox\" ><label for=\"sk-estimator-id-87\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> StandardScaler<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.StandardScaler.html\">?<span>Documentation for StandardScaler</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>StandardScaler()</pre></div> </div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-88\" type=\"checkbox\" ><label for=\"sk-estimator-id-88\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">cat</label><div class=\"sk-toggleable__content fitted\"><pre>['opt_in', 'is_email_true']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-89\" type=\"checkbox\" ><label for=\"sk-estimator-id-89\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> OneHotEncoder<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>OneHotEncoder(handle_unknown='ignore', sparse_output=False)</pre></div> </div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-90\" type=\"checkbox\" ><label for=\"sk-estimator-id-90\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> LogisticRegression<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000, solver='saga')</pre></div> </div></div></div></div></div></div></div></div></div></div></div>"
|
||
],
|
||
"text/plain": [
|
||
"GridSearchCV(cv=3,\n",
|
||
" estimator=Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler',\n",
|
||
" StandardScaler())]),\n",
|
||
" ['nb_tickets',\n",
|
||
" 'nb_purchases',\n",
|
||
" 'total_amount',\n",
|
||
" 'nb_suppliers',\n",
|
||
" 'vente_internet_max',\n",
|
||
" 'purchase_date_min',\n",
|
||
" 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet',\n",
|
||
" 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[(...\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000,\n",
|
||
" solver='saga'))]),\n",
|
||
" param_grid={'logreg__C': array([9.765625e-04, 1.953125e-03, 3.906250e-03, 7.812500e-03,\n",
|
||
" 1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
|
||
" 2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
|
||
" 4.000000e+00, 8.000000e+00, 1.600000e+01]),\n",
|
||
" 'logreg__penalty': ['l1']},\n",
|
||
" scoring=make_scorer(f1_score, response_method='predict'))"
|
||
]
|
||
},
|
||
"execution_count": 100,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"logit_grid"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 105,
|
||
"id": "5d553da2-5c2a-491a-b4d2-f31c30c201a6",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'scoring': make_scorer(f1_score, response_method='predict'),\n",
|
||
" 'estimator': Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler',\n",
|
||
" StandardScaler())]),\n",
|
||
" ['nb_tickets', 'nb_purchases',\n",
|
||
" 'total_amount',\n",
|
||
" 'nb_suppliers',\n",
|
||
" 'vente_internet_max',\n",
|
||
" 'purchase_date_min',\n",
|
||
" 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet',\n",
|
||
" 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in',\n",
|
||
" 'is_email_true'])])),\n",
|
||
" ('logreg',\n",
|
||
" LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000, solver='saga'))]),\n",
|
||
" 'n_jobs': None,\n",
|
||
" 'refit': True,\n",
|
||
" 'cv': 3,\n",
|
||
" 'verbose': 0,\n",
|
||
" 'pre_dispatch': '2*n_jobs',\n",
|
||
" 'error_score': nan,\n",
|
||
" 'return_train_score': False,\n",
|
||
" 'param_grid': {'logreg__C': array([9.765625e-04, 1.953125e-03, 3.906250e-03, 7.812500e-03,\n",
|
||
" 1.562500e-02, 3.125000e-02, 6.250000e-02, 1.250000e-01,\n",
|
||
" 2.500000e-01, 5.000000e-01, 1.000000e+00, 2.000000e+00,\n",
|
||
" 4.000000e+00, 8.000000e+00, 1.600000e+01]),\n",
|
||
" 'logreg__penalty': ['l1']},\n",
|
||
" 'multimetric_': False,\n",
|
||
" 'best_index_': 5,\n",
|
||
" 'best_score_': 0.42160313383818665,\n",
|
||
" 'best_params_': {'logreg__C': 0.03125, 'logreg__penalty': 'l1'},\n",
|
||
" 'best_estimator_': Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler',\n",
|
||
" StandardScaler())]),\n",
|
||
" ['nb_tickets', 'nb_purchases',\n",
|
||
" 'total_amount',\n",
|
||
" 'nb_suppliers',\n",
|
||
" 'vente_internet_max',\n",
|
||
" 'purchase_date_min',\n",
|
||
" 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet',\n",
|
||
" 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in',\n",
|
||
" 'is_email_true'])])),\n",
|
||
" ('logreg',\n",
|
||
" LogisticRegression(C=0.03125,\n",
|
||
" class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000, penalty='l1',\n",
|
||
" solver='saga'))]),\n",
|
||
" 'refit_time_': 305.1356477737427,\n",
|
||
" 'feature_names_in_': array(['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers',\n",
|
||
" 'vente_internet_max', 'purchase_date_min', 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet', 'is_email_true', 'opt_in', 'gender_female',\n",
|
||
" 'gender_male', 'nb_campaigns', 'nb_campaigns_opened'], dtype=object),\n",
|
||
" 'scorer_': make_scorer(f1_score, response_method='predict'),\n",
|
||
" 'cv_results_': {'mean_fit_time': array([ 11.07076669, 13.15744201, 27.35094929, 40.0343461 ,\n",
|
||
" 94.58210254, 140.45846391, 159.83818332, 162.80178094,\n",
|
||
" 163.94260454, 171.08749111, 169.26621262, 166.36741408,\n",
|
||
" 167.91208776, 173.06720233, 170.93666704]),\n",
|
||
" 'std_fit_time': array([ 0.09462032, 1.51362591, 6.70859141, 22.68643753, 28.72690872,\n",
|
||
" 70.8434823 , 85.23159321, 79.71538593, 82.70486235, 84.79706797,\n",
|
||
" 86.79005212, 84.67956107, 83.94889047, 89.68716252, 89.41361431]),\n",
|
||
" 'mean_score_time': array([0.11632609, 0.10857773, 0.18140252, 0.1291213 , 0.11651532,\n",
|
||
" 0.07535577, 0.12481014, 0.16039928, 0.15685773, 0.07996233,\n",
|
||
" 0.12988146, 0.10067987, 0.1194102 , 0.09737802, 0.09390028]),\n",
|
||
" 'std_score_time': array([0.02131792, 0.03620144, 0.05853886, 0.06555575, 0.03228018,\n",
|
||
" 0.01433186, 0.03501336, 0.05466042, 0.06882891, 0.01002881,\n",
|
||
" 0.00495894, 0.00905774, 0.04075337, 0.03269379, 0.01990173]),\n",
|
||
" 'param_logreg__C': masked_array(data=[0.0009765625, 0.001953125, 0.00390625, 0.0078125,\n",
|
||
" 0.015625, 0.03125, 0.0625, 0.125, 0.25, 0.5, 1.0, 2.0,\n",
|
||
" 4.0, 8.0, 16.0],\n",
|
||
" mask=[False, False, False, False, False, False, False, False,\n",
|
||
" False, False, False, False, False, False, False],\n",
|
||
" fill_value='?',\n",
|
||
" dtype=object),\n",
|
||
" 'param_logreg__penalty': masked_array(data=['l1', 'l1', 'l1', 'l1', 'l1', 'l1', 'l1', 'l1', 'l1',\n",
|
||
" 'l1', 'l1', 'l1', 'l1', 'l1', 'l1'],\n",
|
||
" mask=[False, False, False, False, False, False, False, False,\n",
|
||
" False, False, False, False, False, False, False],\n",
|
||
" fill_value='?',\n",
|
||
" dtype=object),\n",
|
||
" 'params': [{'logreg__C': 0.0009765625, 'logreg__penalty': 'l1'},\n",
|
||
" {'logreg__C': 0.001953125, 'logreg__penalty': 'l1'},\n",
|
||
" {'logreg__C': 0.00390625, 'logreg__penalty': 'l1'},\n",
|
||
" {'logreg__C': 0.0078125, 'logreg__penalty': 'l1'},\n",
|
||
" {'logreg__C': 0.015625, 'logreg__penalty': 'l1'},\n",
|
||
" {'logreg__C': 0.03125, 'logreg__penalty': 'l1'},\n",
|
||
" {'logreg__C': 0.0625, 'logreg__penalty': 'l1'},\n",
|
||
" {'logreg__C': 0.125, 'logreg__penalty': 'l1'},\n",
|
||
" {'logreg__C': 0.25, 'logreg__penalty': 'l1'},\n",
|
||
" {'logreg__C': 0.5, 'logreg__penalty': 'l1'},\n",
|
||
" {'logreg__C': 1.0, 'logreg__penalty': 'l1'},\n",
|
||
" {'logreg__C': 2.0, 'logreg__penalty': 'l1'},\n",
|
||
" {'logreg__C': 4.0, 'logreg__penalty': 'l1'},\n",
|
||
" {'logreg__C': 8.0, 'logreg__penalty': 'l1'},\n",
|
||
" {'logreg__C': 16.0, 'logreg__penalty': 'l1'}],\n",
|
||
" 'split0_test_score': array([0.27289073, 0.2738913 , 0.27382853, 0.27409759, 0.27454764,\n",
|
||
" 0.27661894, 0.2766145 , 0.27584723, 0.27571682, 0.27576295,\n",
|
||
" 0.27580092, 0.27577943, 0.27581248, 0.27581909, 0.27581909]),\n",
|
||
" 'split1_test_score': array([0.4714244 , 0.47196015, 0.48362373, 0.48891733, 0.49066854,\n",
|
||
" 0.49091122, 0.49086284, 0.49065871, 0.49062783, 0.49049541,\n",
|
||
" 0.49048106, 0.49045238, 0.49043804, 0.49043804, 0.4904237 ]),\n",
|
||
" 'split2_test_score': array([0.50689906, 0.50092334, 0.4981377 , 0.49759178, 0.49725836,\n",
|
||
" 0.49727924, 0.49708801, 0.49738305, 0.49751781, 0.49738248,\n",
|
||
" 0.49738248, 0.49738248, 0.49738248, 0.49738248, 0.49738248]),\n",
|
||
" 'mean_test_score': array([0.4170714 , 0.4155916 , 0.41852999, 0.42020223, 0.42082484,\n",
|
||
" 0.42160313, 0.42152178, 0.42129633, 0.42128749, 0.42121361,\n",
|
||
" 0.42122149, 0.42120476, 0.421211 , 0.4212132 , 0.42120842]),\n",
|
||
" 'std_test_score': array([0.10297463, 0.1008925 , 0.10249081, 0.10337226, 0.10346859,\n",
|
||
" 0.10255226, 0.10249644, 0.10288467, 0.10297243, 0.10288758,\n",
|
||
" 0.10286646, 0.10287015, 0.10285136, 0.10284824, 0.10284503]),\n",
|
||
" 'rank_test_score': array([14, 15, 13, 12, 11, 1, 2, 3, 4, 6, 5, 10, 8, 7, 9],\n",
|
||
" dtype=int32)},\n",
|
||
" 'n_splits_': 3}"
|
||
]
|
||
},
|
||
"execution_count": 105,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"logit_grid.__dict__"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 56,
|
||
"id": "3573f34e-25d5-4afb-82cc-52323e2f63c6",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([[ 0.67553011, 0. , 0.14254288, 0.41574295, 0.03458744,\n",
|
||
" 0.64769185, -1.20510095, 0. , 0.01018587, 0.13959519,\n",
|
||
" 0.24222266, -0.68253886, 0. , 0. ]])"
|
||
]
|
||
},
|
||
"execution_count": 56,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# coefficients trouvés pour le modèle optimal\n",
|
||
"logit_grid.best_estimator_.named_steps[\"logreg\"].coef_"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 57,
|
||
"id": "0332a814-61fb-4b71-836a-e8ace70b1a44",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'preprocessor': ColumnTransformer(transformers=[('num',\n",
|
||
" Pipeline(steps=[('scaler', StandardScaler())]),\n",
|
||
" ['nb_tickets', 'nb_purchases', 'total_amount',\n",
|
||
" 'nb_suppliers', 'vente_internet_max',\n",
|
||
" 'purchase_date_min', 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet', 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened']),\n",
|
||
" ('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in', 'is_email_true'])]),\n",
|
||
" 'logreg': LogisticRegression(C=0.0625,\n",
|
||
" class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539},\n",
|
||
" max_iter=5000, penalty='l1', solver='saga')}"
|
||
]
|
||
},
|
||
"execution_count": 57,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"logit_grid.best_estimator_.named_steps"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 58,
|
||
"id": "287615b9-e062-4b84-be61-26b9364b2cf4",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([-0.44041477])"
|
||
]
|
||
},
|
||
"execution_count": 58,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"logit_grid.best_estimator_.named_steps[\"logreg\"].intercept_"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 115,
|
||
"id": "4d50899d-cc0b-4a71-9406-f8b0a277c4a6",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>60.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>355.268981</td>\n",
|
||
" <td>355.268981</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>140.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>373.540289</td>\n",
|
||
" <td>219.262269</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>50.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.202442</td>\n",
|
||
" <td>5.202442</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>90.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.178958</td>\n",
|
||
" <td>5.178958</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>78.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.174039</td>\n",
|
||
" <td>5.174039</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224208</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224209</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>20.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>392.501030</td>\n",
|
||
" <td>392.501030</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>23.0</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224210</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224211</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>97.11</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>172.334074</td>\n",
|
||
" <td>172.334074</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>13.0</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224212</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>224213 rows × 14 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 2.0 1.0 60.00 1.0 \n",
|
||
"1 8.0 3.0 140.00 1.0 \n",
|
||
"2 2.0 1.0 50.00 1.0 \n",
|
||
"3 3.0 1.0 90.00 1.0 \n",
|
||
"4 2.0 1.0 78.00 1.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"224208 0.0 0.0 0.00 0.0 \n",
|
||
"224209 1.0 1.0 20.00 1.0 \n",
|
||
"224210 0.0 0.0 0.00 0.0 \n",
|
||
"224211 1.0 1.0 97.11 1.0 \n",
|
||
"224212 0.0 0.0 0.00 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 355.268981 355.268981 \n",
|
||
"1 0.0 373.540289 219.262269 \n",
|
||
"2 0.0 5.202442 5.202442 \n",
|
||
"3 0.0 5.178958 5.178958 \n",
|
||
"4 0.0 5.174039 5.174039 \n",
|
||
"... ... ... ... \n",
|
||
"224208 0.0 550.000000 550.000000 \n",
|
||
"224209 1.0 392.501030 392.501030 \n",
|
||
"224210 0.0 550.000000 550.000000 \n",
|
||
"224211 1.0 172.334074 172.334074 \n",
|
||
"224212 0.0 550.000000 550.000000 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in gender_female \\\n",
|
||
"0 0.0 True False 0 \n",
|
||
"1 0.0 True False 0 \n",
|
||
"2 0.0 True False 0 \n",
|
||
"3 0.0 True False 0 \n",
|
||
"4 0.0 True False 1 \n",
|
||
"... ... ... ... ... \n",
|
||
"224208 0.0 True False 0 \n",
|
||
"224209 1.0 True False 0 \n",
|
||
"224210 0.0 True True 0 \n",
|
||
"224211 1.0 True False 0 \n",
|
||
"224212 0.0 True False 0 \n",
|
||
"\n",
|
||
" gender_male nb_campaigns nb_campaigns_opened \n",
|
||
"0 1 0.0 0.0 \n",
|
||
"1 1 0.0 0.0 \n",
|
||
"2 1 0.0 0.0 \n",
|
||
"3 1 0.0 0.0 \n",
|
||
"4 0 0.0 0.0 \n",
|
||
"... ... ... ... \n",
|
||
"224208 1 34.0 3.0 \n",
|
||
"224209 1 23.0 6.0 \n",
|
||
"224210 1 8.0 4.0 \n",
|
||
"224211 1 13.0 5.0 \n",
|
||
"224212 1 4.0 4.0 \n",
|
||
"\n",
|
||
"[224213 rows x 14 columns]"
|
||
]
|
||
},
|
||
"execution_count": 115,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# c'est la 2ème variable nb_purchases qui a été supprimée par le LASSO\n",
|
||
"X_train"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 59,
|
||
"id": "e53b1f79-762d-4f1f-8505-91de1088af42",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"16.0"
|
||
]
|
||
},
|
||
"execution_count": 59,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# best param : alpha = 32 (alpha =1/4 sur le petit subsample)\n",
|
||
"1/logit_grid.best_params_[\"logreg__C\"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 55,
|
||
"id": "41bcaaf6-ab58-4004-a3c5-586d77e872d1",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Accuracy Score: 0.7510718448218449\n",
|
||
"F1 Score: 0.46474681703251214\n",
|
||
"Recall Score: 0.7585829072315559\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# print results for the best model\n",
|
||
"\n",
|
||
"y_pred = logit_grid.predict(X_test)\n",
|
||
"\n",
|
||
"# Calculate the F1 score\n",
|
||
"acc = accuracy_score(y_test, y_pred)\n",
|
||
"print(f\"Accuracy Score: {acc}\")\n",
|
||
"\n",
|
||
"f1 = f1_score(y_test, y_pred)\n",
|
||
"print(f\"F1 Score: {f1}\")\n",
|
||
"\n",
|
||
"recall = recall_score(y_test, y_pred)\n",
|
||
"print(f\"Recall Score: {recall}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 60,
|
||
"id": "a454bb57-76eb-4a22-9950-0733d39e449f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 2 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# confusion matrix \n",
|
||
"\n",
|
||
"draw_confusion_matrix(y_test, y_pred)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 61,
|
||
"id": "25ec1701-ade5-4419-8b46-8a1bb109cf84",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 1400x800 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# ROC curve\n",
|
||
"\n",
|
||
"# Calcul des taux de faux positifs (FPR) et de vrais positifs (TPR)\n",
|
||
"y_pred_prob = logit_grid.predict_proba(X_test)[:, 1]\n",
|
||
"\n",
|
||
"fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob, pos_label=1)\n",
|
||
"\n",
|
||
"# Calcul de l'aire sous la courbe ROC (AUC)\n",
|
||
"roc_auc = auc(fpr, tpr)\n",
|
||
"\n",
|
||
"plt.figure(figsize = (14, 8))\n",
|
||
"plt.plot(fpr, tpr, label=\"ROC curve(area = %0.3f)\" % roc_auc)\n",
|
||
"plt.plot([0, 1], [0, 1], color=\"red\",label=\"Random Baseline\", linestyle=\"--\")\n",
|
||
"plt.grid(color='gray', linestyle='--', linewidth=0.5)\n",
|
||
"plt.xlabel('Taux de faux positifs (FPR)')\n",
|
||
"plt.ylabel('Taux de vrais positifs (TPR)')\n",
|
||
"plt.title('Courbe ROC : modèle logistique')\n",
|
||
"plt.legend(loc=\"lower right\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 62,
|
||
"id": "3b5c9485-511b-4f6b-b667-154f4f519682",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# utilisation d'une métrique plus adaptée aux modèles de marketing : courbe de lift\n",
|
||
"\n",
|
||
"# Tri des prédictions de probabilités et des vraies valeurs\n",
|
||
"sorted_indices = np.argsort(y_pred_prob)[::-1]\n",
|
||
"y_pred_prob_sorted = y_pred_prob[sorted_indices]\n",
|
||
"y_test_sorted = y_test.iloc[sorted_indices]\n",
|
||
"\n",
|
||
"# Calcul du gain cumulatif\n",
|
||
"cumulative_gain = np.cumsum(y_test_sorted) / np.sum(y_test_sorted)\n",
|
||
"\n",
|
||
"# Tracé de la courbe de lift\n",
|
||
"plt.plot(np.linspace(0, 1, len(cumulative_gain)), cumulative_gain, label='Courbe de lift')\n",
|
||
"plt.xlabel('Part de clients identifiés sans modèle ')\n",
|
||
"plt.ylabel('Part de clients identifiés avec modèle')\n",
|
||
"plt.title('Courbe de Lift')\n",
|
||
"plt.legend()\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 63,
|
||
"id": "6e7cfb6c-8049-4bd1-8d82-61a2e97b257d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjoAAAGdCAYAAAAbudkLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAmeElEQVR4nO3df1BVd37/8dcNv0QGTkECl7uyxnSU1cVNW9zww93VJAa0InWTjrZ07mjHxWSMEio01bXTmM5GsjFqunVjrWNj1mBwusZsOrgsZJIQWUUNldkQretutMIExB94AWMvhJzvHzueb64Y4yVckQ/Px8yZ4Zzzvue+D5/B+/Jzz7nXZdu2LQAAAAPdNdwNAAAAhApBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgrPDhbmA4ffbZZ/r4448VGxsrl8s13O0AAIBbYNu2uru75fF4dNddN5+zGdVB5+OPP1ZqaupwtwEAAAahpaVF48ePv2nNqA46sbGxkv7wi4qLixvmbgAAwK3o6upSamqq8zp+M6M66Fx7uyouLo6gAwDACHMrl51wMTIAADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAscKHuwGT3bO6KiTHPfPcvJAcFwAA0zCjAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgLIIOAAAwVlBBp7y8XN/+9rcVGxurpKQkLViwQCdPngyoWbJkiVwuV8CSlZUVUOP3+7Vy5UolJiYqJiZGBQUFam1tDajp7OyU1+uVZVmyLEter1eXL18OqDl79qzmz5+vmJgYJSYmqri4WL29vcGcEgAAMFhQQaeurk5PPPGEGhoaVFtbq08//VS5ubm6cuVKQN2cOXPU1tbmLPv37w/YX1JSon379qmyslL19fXq6elRfn6++vv7nZrCwkI1NTWpurpa1dXVampqktfrdfb39/dr3rx5unLliurr61VZWam9e/eqtLR0ML8HAABgoKA+Gbm6ujpg/eWXX1ZSUpIaGxv1ve99z9keFRUlt9t9w2P4fD7t2LFDu3bt0uzZsyVJr776qlJTU/XWW28pLy9PJ06cUHV1tRoaGpSZmSlJ2r59u7Kzs3Xy5EmlpaWppqZGx48fV0tLizwejyRp48aNWrJkiZ599lnFxcUFc2oAAMBAX+kaHZ/PJ0lKSEgI2P7uu+8qKSlJkydPVlFRkTo6Opx9jY2N6uvrU25urrPN4/EoPT1dBw8elCQdOnRIlmU5IUeSsrKyZFlWQE16eroTciQpLy9Pfr9fjY2NN+zX7/erq6srYAEAAOYadNCxbVurVq3Sd77zHaWnpzvb586dq4qKCr399tvauHGjjh49qgcffFB+v1+S1N7ersjISMXHxwccLzk5We3t7U5NUlLSgOdMSkoKqElOTg7YHx8fr8jISKfmeuXl5c41P5ZlKTU1dbCnDwAARoBBf6nnihUr9Jvf/Eb19fUB2xctWuT8nJ6erunTp2vChAmqqqrSI4888oXHs21bLpfLWf/8z1+l5vPWrFmjVatWOetdXV2EHQAADDaoGZ2VK1fqzTff1DvvvKPx48fftDYlJUUTJkzQqVOnJElut1u9vb3q7OwMqOvo6HBmaNxut86dOzfgWOfPnw+ouX7mprOzU319fQNmeq6JiopSXFxcwAIAAMwVVNCxbVsrVqzQ66+/rrffflsTJ0780sdcvHhRLS0tSklJkSRlZGQoIiJCtbW1Tk1bW5uam5uVk5MjScrOzpbP59ORI0ecmsOHD8vn8wXUNDc3q62tzampqalRVFSUMjIygjktAABgqKDeunriiSe0e/du/eIXv1BsbKwzo2JZlqKjo9XT06N169bp0UcfVUpKis6cOaMf/vCHSkxM1Pe//32ndunSpSotLdW4ceOUkJCgsrIyTZs2zbkLa8qUKZozZ46Kioq0bds2SdKyZcuUn5+vtLQ0SVJubq6mTp0qr9erDRs26NKlSyorK1NRUREzNQAAQFKQMzpbt26Vz+fTrFmzlJKS4ix79uyRJIWFhemDDz7QX/zFX2jy5MlavHixJk+erEOHDik2NtY5zubNm7VgwQItXLhQM2bM0NixY/Vf//VfCgsLc2oqKio0bdo05ebmKjc3V9/61re0a9cuZ39YWJiqqqo0ZswYzZgxQwsXLtSCBQv0wgsvfNXfCQAAMITLtm17uJsYLl1dXbIsSz6fLySzQPesrhryY0rSmefmheS4AACMBMG8fvNdVwAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACMRdABAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACMRdABAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWEEFnfLycn37299WbGyskpKStGDBAp08eTKgxrZtrVu3Th6PR9HR0Zo1a5Y+/PDDgBq/36+VK1cqMTFRMTExKigoUGtra0BNZ2envF6vLMuSZVnyer26fPlyQM3Zs2c1f/58xcTEKDExUcXFxert7Q3mlAAAgMGCCjp1dXV64okn1NDQoNraWn366afKzc3VlStXnJrnn39emzZt0pYtW3T06FG53W49/PDD6u7udmpKSkq0b98+VVZWqr6+Xj09PcrPz1d/f79TU1hYqKamJlVXV6u6ulpNTU3yer3O/v7+fs2bN09XrlxRfX29KisrtXfvXpWWln6V3wcAADCIy7Zte7APPn/+vJKSklRXV6fvfe97sm1bHo9HJSUl+od/+AdJf5i9SU5O1o9//GM99thj8vl8uvvuu7Vr1y4tWrRIkvTxxx8rNTVV+/fvV15enk6cOKGpU6eqoaFBmZmZkqSGhgZlZ2frf/7nf5SWlqZf/vKXys/PV0tLizwejySpsrJSS5YsUUdHh+Li4r60/66uLlmWJZ/Pd0v1wbpnddWQH1OSzjw3LyTHBQBgJAjm9fsrXaPj8/kkSQkJCZKk06dPq729Xbm5uU5NVFSUZs6cqYMHD0qSGhsb1dfXF1Dj8XiUnp7u1Bw6dEiWZTkhR5KysrJkWVZATXp6uhNyJCkvL09+v1+NjY037Nfv96urqytgAQAA5hp00LFtW6tWrdJ3vvMdpaenS5La29slScnJyQG1ycnJzr729nZFRkYqPj7+pjVJSUkDnjMpKSmg5vrniY+PV2RkpFNzvfLycueaH8uylJqaGuxpAwCAEWTQQWfFihX6zW9+o9dee23APpfLFbBu2/aAbde7vuZG9YOp+bw1a9bI5/M5S0tLy017AgAAI9uggs7KlSv15ptv6p133tH48eOd7W63W5IGzKh0dHQ4sy9ut1u9vb3q7Oy8ac25c+cGPO/58+cDaq5/ns7OTvX19Q2Y6bkmKipKcXFxAQsAADBXUEHHtm2tWLFCr7/+ut5++21NnDgxYP/EiRPldrtVW1vrbOvt7VVdXZ1ycnIkSRkZGYqIiAioaWtrU3Nzs1OTnZ0tn8+nI0eOODWHDx+Wz+cLqGlublZbW5tTU1NTo6ioKGVkZARzWgAAwFDhwRQ/8cQT2r17t37xi18oNjbWmVGxLEvR0dFyuVwqKSnR+vXrNWnSJE2aNEnr16/X2LFjVVhY6NQuXbpUpaWlGjdunBISElRWVqZp06Zp9uzZkqQpU6Zozpw5Kioq0rZt2yRJy5YtU35+vtLS0iRJubm5mjp1qrxerzZs2KBLly6prKxMRUVFzNQAAABJQQadrVu3SpJmzZoVsP3ll1/WkiVLJElPPfWUrl69quXLl6uzs1OZmZmqqalRbGysU79582aFh4dr4cKFunr1qh566CHt3LlTYWFhTk1FRYWKi4udu7MKCgq0ZcsWZ39YWJiqqqq0fPlyzZgxQ9HR0SosLNQLL7wQ1C8AAACY6yt9js5Ix+foAAAw8ty2z9EBAAC4kxF0AACAsQg6AADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACMRdABAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWOHD3QDuLPesrgrJcc88Ny8kxwUA4GaY0QEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACMRdABAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYKygg857772n+fPny+PxyOVy6Y033gjYv2TJErlcroAlKysroMbv92vlypVKTExUTEyMCgoK1NraGlDT2dkpr9cry7JkWZa8Xq8uX74cUHP27FnNnz9fMTExSkxMVHFxsXp7e4M9JQAAYKigg86VK1d03333acuWLV9YM2fOHLW1tTnL/v37A/aXlJRo3759qqysVH19vXp6epSfn6/+/n6nprCwUE1NTaqurlZ1dbWamprk9Xqd/f39/Zo3b56uXLmi+vp6VVZWau/evSotLQ32lAAAgKHCg33A3LlzNXfu3JvWREVFye1233Cfz+fTjh07tGvXLs2ePVuS9Oqrryo1NVVvvfWW8vLydOLECVVXV6uhoUGZmZmSpO3btys7O1snT55UWlqaampqdPz4cbW0tMjj8UiSNm7cqCVLlujZZ59VXFxcsKcGAAAME5JrdN59910lJSVp8uTJKioqUkdHh7OvsbFRfX19ys3NdbZ5PB6lp6fr4MGDkqRDhw7Jsiwn5EhSVlaWLMsKqElPT3dCjiTl5eXJ7/ersbHxhn35/X51dXUFLAAAwFxDHnTmzp2riooKvf3229q4caOOHj2qBx98UH6/X5LU3t6uyMhIxcfHBzwuOTlZ7e3tTk1SUtKAYyclJQXUJCcnB+yPj49XZGSkU3O98vJy55ofy7KUmpr6lc8XAADcuYJ+6+rLLFq0yPk5PT1d06dP14QJE1RVVaVHHnnkCx9n27ZcLpez/vmfv0rN561Zs0arVq1y1ru6ugg7AAAYLOS3l6ekpGjChAk6deqUJMntdqu3t1ednZ0BdR0dHc4Mjdvt1rlz5wYc6/z58wE118/cdHZ2qq+vb8BMzzVRUVGKi4sLWAAAgLlCHnQuXryolpYWpaSkSJIyMjIUERGh2tpap6atrU3Nzc3KycmRJGVnZ8vn8+nIkSNOzeHDh+Xz+QJqmpub1dbW5tTU1NQoKipKGRkZoT4tAAAwAgT91lVPT49+97vfOeunT59WU1OTEhISlJCQoHXr1unRRx9VSkqKzpw5ox/+8IdKTEzU97//fUmSZVlaunSpSktLNW7cOCUkJKisrEzTpk1z7sKaMmWK5syZo6KiIm3btk2StGzZMuXn5ystLU2SlJubq6lTp8rr9WrDhg26dOmSysrKVFRUxEwNAACQNIig8/777+uBBx5w1q9d87J48WJt3bpVH3zwgX72s5/p8uXLSklJ0QMPPKA9e/YoNjbWeczmzZsVHh6uhQsX6urVq3rooYe0c+dOhYWFOTUVFRUqLi527s4qKCgI+OyesLAwVVVVafny5ZoxY4aio6NVWFioF154IfjfAgAAMJLLtm17uJsYLl1dXbIsSz6fLySzQPesrhryY0rSmefmheS40sjsGQAwugTz+s13XQEAAGMRdAAAgLEIOgAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACMRdABAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACMRdABAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYwUddN577z3Nnz9fHo9HLpdLb7zxRsB+27a1bt06eTweRUdHa9asWfrwww8Davx+v1auXKnExETFxMSooKBAra2tATWdnZ3yer2yLEuWZcnr9ery5csBNWfPntX8+fMVExOjxMREFRcXq7e3N9hTAgAAhgo66Fy5ckX33XeftmzZcsP9zz//vDZt2qQtW7bo6NGjcrvdevjhh9Xd3e3UlJSUaN++faqsrFR9fb16enqUn5+v/v5+p6awsFBNTU2qrq5WdXW1mpqa5PV6nf39/f2aN2+erly5ovr6elVWVmrv3r0qLS0N9pQAAIChwoN9wNy5czV37twb7rNtWy+++KLWrl2rRx55RJL0yiuvKDk5Wbt379Zjjz0mn8+nHTt2aNeuXZo9e7Yk6dVXX1Vqaqreeust5eXl6cSJE6qurlZDQ4MyMzMlSdu3b1d2drZOnjyptLQ01dTU6Pjx42ppaZHH45Ekbdy4UUuWLNGzzz6ruLi4Qf1CAACAOYb0Gp3Tp0+rvb1dubm5zraoqCjNnDlTBw8elCQ1Njaqr68voMbj8Sg9Pd2pOXTokCzLckKOJGVlZcmyrICa9PR0J+RIUl5envx+vxobG2/Yn9/vV1dXV8ACAADMNaRBp729XZKUnJwcsD05OdnZ197ersjISMXHx9+0JikpacDxk5KSAmquf574+HhFRkY6NdcrLy93rvmxLEupqamDOEsAADBShOSuK5fLFbBu2/aAbde7vuZG9YOp+bw1a9bI5/M5S0tLy017AgAAI9uQBh232y1JA2ZUOjo6nNkXt9ut3t5edXZ23rTm3LlzA45//vz5gJrrn6ezs1N9fX0DZnquiYqKUlxcXMACAADMNaRBZ+LEiXK73aqtrXW29fb2qq6uTjk5OZKkjIwMRUREBNS0tbWpubnZqcnOzpbP59ORI0ecmsOHD8vn8wXUNDc3q62tzampqalRVFSUMjIyhvK0AADACBX0XVc9PT363e9+56yfPn1aTU1NSkhI0Ne//nWVlJRo/fr1mjRpkiZNmqT169dr7NixKiwslCRZlqWlS5eqtLRU48aNU0JCgsrKyjRt2jTnLqwpU6Zozpw5Kioq0rZt2yRJy5YtU35+vtLS0iRJubm5mjp1qrxerzZs2KBLly6prKxMRUVFzNQAAABJgwg677//vh544AFnfdWqVZKkxYsXa+fOnXrqqad09epVLV++XJ2dncrMzFRNTY1iY2Odx2zevFnh4eFauHChrl69qoceekg7d+5UWFiYU1NRUaHi4mLn7qyCgoKAz+4JCwtTVVWVli9frhkzZig6OlqFhYV64YUXgv8tAAAAI7ls27aHu4nh0tXVJcuy5PP5QjILdM/qqiE/piSdeW5eSI4rjcyeAQCjSzCv33zXFQAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACMFfSXegKDEarv0JL4Hi0AwBdjRgcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACMRdABAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMYi6AAAAGMRdAAAgLEIOgAAwFgEHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACMRdABAADGCh/uBgAAt8c9q6tCctwzz80LyXGBocCMDgAAMNaQB51169bJ5XIFLG6329lv27bWrVsnj8ej6OhozZo1Sx9++GHAMfx+v1auXKnExETFxMSooKBAra2tATWdnZ3yer2yLEuWZcnr9ery5ctDfToAAGAEC8mMzje/+U21tbU5ywcffODse/7557Vp0yZt2bJFR48eldvt1sMPP6zu7m6npqSkRPv27VNlZaXq6+vV09Oj/Px89ff3OzWFhYVqampSdXW1qqur1dTUJK/XG4rTAQAAI1RIrtEJDw8PmMW5xrZtvfjii1q7dq0eeeQRSdIrr7yi5ORk7d69W4899ph8Pp927NihXbt2afbs2ZKkV199VampqXrrrbeUl5enEydOqLq6Wg0NDcrMzJQkbd++XdnZ2Tp58qTS0tJCcVoAAGCECcmMzqlTp+TxeDRx4kT91V/9lT766CNJ0unTp9Xe3q7c3FynNioqSjNnztTBgwclSY2Njerr6wuo8Xg8Sk9Pd2oOHToky7KckCNJWVlZsizLqbkRv9+vrq6ugAUAAJhryINOZmamfvazn+lXv/qVtm/frvb2duXk5OjixYtqb2+XJCUnJwc8Jjk52dnX3t6uyMhIxcfH37QmKSlpwHMnJSU5NTdSXl7uXNNjWZZSU1O/0rkCAIA725AHnblz5+rRRx/VtGnTNHv2bFVV/eF2xldeecWpcblcAY+xbXvAtutdX3Oj+i87zpo1a+Tz+ZylpaXlls4JAACMTCG/vTwmJkbTpk3TqVOnnOt2rp916ejocGZ53G63ent71dnZedOac+fODXiu8+fPD5gt+ryoqCjFxcUFLAAAwFwhDzp+v18nTpxQSkqKJk6cKLfbrdraWmd/b2+v6urqlJOTI0nKyMhQREREQE1bW5uam5udmuzsbPl8Ph05csSpOXz4sHw+n1MDAAAw5HddlZWVaf78+fr617+ujo4O/ehHP1JXV5cWL14sl8ulkpISrV+/XpMmTdKkSZO0fv16jR07VoWFhZIky7K0dOlSlZaWaty4cUpISFBZWZnzVpgkTZkyRXPmzFFRUZG2bdsmSVq2bJny8/O54woAADiGPOi0trbqr//6r3XhwgXdfffdysrKUkNDgyZMmCBJeuqpp3T16lUtX75cnZ2dyszMVE1NjWJjY51jbN68WeHh4Vq4cKGuXr2qhx56SDt37lRYWJhTU1FRoeLiYufurIKCAm3ZsmWoTwcAAIxgLtu27eFuYrh0dXXJsiz5fL6QXK8zEr9XJlQ9hxLfswPcmpH4bxJwI8G8fvNdVwAAwFh8ezkADAKzI8DIwIwOAAAwFjM6AIw1Eq85AzC0mNEBAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAs7roCgDsId4oBQ4sZHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsfgcHcAgofwMljPPzQvZsfnsGAChwowOAAAwFkEHAAAYi7euANwS3l4CMBIxowMAAIxF0AEAAMYi6AAAAGMRdAAAgLG4GBkjXqgukg3l58YAAG4Pgg4AAKPcSP2w0VvBW1cAAMBYBB0AAGAsgg4AADAWQQcAABiLoAMAAIxF0AEAAMbi9nLgC5h8uyUAjBbM6AAAAGMxowMMg1DOFgEA/j+CDgAAIwD/QRoc3roCAADGIugAAABjEXQAAICxCDoAAMBYBB0AAGAs7roCAGAIcXfUnYUZHQAAYCyCDgAAMBZBBwAAGItrdAAAX8lI/AJcrqMZPZjRAQAAxiLoAAAAY/HWFQDgjsVbTPiqmNEBAADGIugAAABjEXQAAICxuEZnBOI9awAAbg0zOgAAwFgEHQAAYCyCDgAAMNaIDzovvfSSJk6cqDFjxigjI0MHDhwY7pYAAMAdYkQHnT179qikpERr167VsWPH9N3vfldz587V2bNnh7s1AABwBxjRQWfTpk1aunSpfvCDH2jKlCl68cUXlZqaqq1btw53awAA4A4wYm8v7+3tVWNjo1avXh2wPTc3VwcPHrzhY/x+v/x+v7Pu8/kkSV1dXSHp8TP/JyE5LgAAI0UoXmOvHdO27S+tHbFB58KFC+rv71dycnLA9uTkZLW3t9/wMeXl5XrmmWcGbE9NTQ1JjwAAjHbWi6E7dnd3tyzLumnNiA0617hcroB127YHbLtmzZo1WrVqlbP+2Wef6dKlSxo3btwXPma06erqUmpqqlpaWhQXFzfc7UCMyZ2IMbnzMCZ3plCNi23b6u7ulsfj+dLaERt0EhMTFRYWNmD2pqOjY8AszzVRUVGKiooK2PZHf/RHoWpxRIuLi+MfizsMY3LnYUzuPIzJnSkU4/JlMznXjNiLkSMjI5WRkaHa2tqA7bW1tcrJyRmmrgAAwJ1kxM7oSNKqVavk9Xo1ffp0ZWdn69///d919uxZPf7448PdGgAAuAOM6KCzaNEiXbx4Uf/8z/+strY2paena//+/ZowYcJwtzZiRUVF6emnnx7wFh+GD2Ny52FM7jyMyZ3pThgXl30r92YBAACMQCP2Gh0AAIAvQ9ABAADGIugAAABjEXQAAICxCDqj0EsvvaSJEydqzJgxysjI0IEDB76w9vXXX9fDDz+su+++W3FxccrOztavfvWr29jt6BDMmHzer3/9a4WHh+tP/uRPQtvgKBTsmPj9fq1du1YTJkxQVFSU/viP/1j/8R//cZu6HR2CHZOKigrdd999Gjt2rFJSUvS3f/u3unjx4m3q1nzvvfee5s+fL4/HI5fLpTfeeONLH1NXV6eMjAyNGTNG9957r/7t3/4t9I3aGFUqKyvtiIgIe/v27fbx48ftJ5980o6JibH/93//94b1Tz75pP3jH//YPnLkiP3b3/7WXrNmjR0REWH/93//923u3FzBjsk1ly9ftu+99147NzfXvu+++25Ps6PEYMakoKDAzszMtGtra+3Tp0/bhw8ftn/961/fxq7NFuyYHDhwwL7rrrvsf/mXf7E/+ugj+8CBA/Y3v/lNe8GCBbe5c3Pt37/fXrt2rb13715bkr1v376b1n/00Uf22LFj7SeffNI+fvy4vX37djsiIsL++c9/HtI+CTqjzP33328//vjjAdu+8Y1v2KtXr77lY0ydOtV+5plnhrq1UWuwY7Jo0SL7H//xH+2nn36aoDPEgh2TX/7yl7ZlWfbFixdvR3ujUrBjsmHDBvvee+8N2PaTn/zEHj9+fMh6HM1uJeg89dRT9je+8Y2AbY899pidlZUVws5sm7euRpHe3l41NjYqNzc3YHtubq4OHjx4S8f47LPP1N3drYSEhFC0OOoMdkxefvll/f73v9fTTz8d6hZHncGMyZtvvqnp06fr+eef19e+9jVNnjxZZWVlunr16u1o2XiDGZOcnBy1trZq//79sm1b586d089//nPNmzfvdrSMGzh06NCAMczLy9P777+vvr6+kD3viP5kZATnwoUL6u/vH/Clp8nJyQO+HPWLbNy4UVeuXNHChQtD0eKoM5gxOXXqlFavXq0DBw4oPJw/4aE2mDH56KOPVF9frzFjxmjfvn26cOGCli9frkuXLnGdzhAYzJjk5OSooqJCixYt0v/93//p008/VUFBgf71X//1drSMG2hvb7/hGH766ae6cOGCUlJSQvK8zOiMQi6XK2Ddtu0B227ktdde07p167Rnzx4lJSWFqr1R6VbHpL+/X4WFhXrmmWc0efLk29XeqBTM38lnn30ml8uliooK3X///frzP/9zbdq0STt37mRWZwgFMybHjx9XcXGx/umf/kmNjY2qrq7W6dOn+S7EYXajMbzR9qHEfwdHkcTERIWFhQ34H1BHR8eAlH29PXv2aOnSpfrP//xPzZ49O5RtjirBjkl3d7fef/99HTt2TCtWrJD0hxdZ27YVHh6umpoaPfjgg7eld1MN5u8kJSVFX/va12RZlrNtypQpsm1bra2tmjRpUkh7Nt1gxqS8vFwzZszQ3//930uSvvWtbykmJkbf/e539aMf/Shkswf4Ym63+4ZjGB4ernHjxoXseZnRGUUiIyOVkZGh2tragO21tbXKycn5wse99tprWrJkiXbv3s3720Ms2DGJi4vTBx98oKamJmd5/PHHlZaWpqamJmVmZt6u1o01mL+TGTNm6OOPP1ZPT4+z7be//a3uuusujR8/PqT9jgaDGZNPPvlEd90V+BIXFhYm6f/PIuD2ys7OHjCGNTU1mj59uiIiIkL3xCG91Bl3nGu3aO7YscM+fvy4XVJSYsfExNhnzpyxbdu2V69ebXu9Xqd+9+7ddnh4uP3Tn/7Ubmtrc5bLly8P1ykYJ9gxuR53XQ29YMeku7vbHj9+vP2Xf/mX9ocffmjX1dXZkyZNsn/wgx8M1ykYJ9gxefnll+3w8HD7pZdesn//+9/b9fX19vTp0+37779/uE7BON3d3faxY8fsY8eO2ZLsTZs22ceOHXNu+b9+TK7dXv53f/d39vHjx+0dO3ZwezlC46c//ak9YcIEOzIy0v6zP/szu66uztm3ePFie+bMmc76zJkzbUkDlsWLF9/+xg0WzJhcj6ATGsGOyYkTJ+zZs2fb0dHR9vjx4+1Vq1bZn3zyyW3u2mzBjslPfvITe+rUqXZ0dLSdkpJi/83f/I3d2tp6m7s21zvvvHPT14cbjcm7775r/+mf/qkdGRlp33PPPfbWrVtD3qfLtpnDAwAAZuIaHQAAYCyCDgAAMBZBBwAAGIugAwAAjEXQAQAAxiLoAAAAYxF0AACAsQg6AADAWAQdAABgLIIOAAAwFkEHAAAYi6ADAACM9f8AnEqfEmt/etkAAAAASUVORK5CYII=",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# look at the distribution of the score \n",
|
||
"\n",
|
||
"plt.hist(y_pred_prob, bins=20)\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 64,
|
||
"id": "99f7f70e-c3bb-445e-8889-e7547f6ebd1e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# number of observations\n",
|
||
"N = len(y_pred_prob)\n",
|
||
"\n",
|
||
"# sort the data in ascending order \n",
|
||
"y_pred_prob_sorted = np.sort(y_pred_prob) \n",
|
||
"\n",
|
||
"# get the cdf values of y \n",
|
||
"steps = np.arange(N) / N\n",
|
||
" \n",
|
||
"# plotting \n",
|
||
"plt.xlabel('X') \n",
|
||
"plt.ylabel('P(score<=X)') \n",
|
||
" \n",
|
||
"plt.title('CDF curve of the predicted probability of purchase (score) for sports companies') \n",
|
||
" \n",
|
||
"plt.plot(y_pred_prob_sorted, steps) \n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "bcb94066-9387-4a5f-af3a-ab86d534c885",
|
||
"metadata": {},
|
||
"source": [
|
||
"### K-means clustering"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 66,
|
||
"id": "dd7a4a9c-d7e3-4747-ae59-b2a5a0b77260",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<style>#sk-container-id-4 {\n",
|
||
" /* Definition of color scheme common for light and dark mode */\n",
|
||
" --sklearn-color-text: black;\n",
|
||
" --sklearn-color-line: gray;\n",
|
||
" /* Definition of color scheme for unfitted estimators */\n",
|
||
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
|
||
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
|
||
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
|
||
" --sklearn-color-unfitted-level-3: chocolate;\n",
|
||
" /* Definition of color scheme for fitted estimators */\n",
|
||
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
|
||
" --sklearn-color-fitted-level-1: #d4ebff;\n",
|
||
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
|
||
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
|
||
"\n",
|
||
" /* Specific color for light theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-icon: #696969;\n",
|
||
"\n",
|
||
" @media (prefers-color-scheme: dark) {\n",
|
||
" /* Redefinition of color scheme for dark theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-icon: #878787;\n",
|
||
" }\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 pre {\n",
|
||
" padding: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 input.sk-hidden--visually {\n",
|
||
" border: 0;\n",
|
||
" clip: rect(1px 1px 1px 1px);\n",
|
||
" clip: rect(1px, 1px, 1px, 1px);\n",
|
||
" height: 1px;\n",
|
||
" margin: -1px;\n",
|
||
" overflow: hidden;\n",
|
||
" padding: 0;\n",
|
||
" position: absolute;\n",
|
||
" width: 1px;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-dashed-wrapped {\n",
|
||
" border: 1px dashed var(--sklearn-color-line);\n",
|
||
" margin: 0 0.4em 0.5em 0.4em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" padding-bottom: 0.4em;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-container {\n",
|
||
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
|
||
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
|
||
" so we also need the `!important` here to be able to override the\n",
|
||
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
|
||
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
|
||
" display: inline-block !important;\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-text-repr-fallback {\n",
|
||
" display: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-parallel-item,\n",
|
||
"div.sk-serial,\n",
|
||
"div.sk-item {\n",
|
||
" /* draw centered vertical line to link estimators */\n",
|
||
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
|
||
" background-size: 2px 100%;\n",
|
||
" background-repeat: no-repeat;\n",
|
||
" background-position: center center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Parallel-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-parallel-item::after {\n",
|
||
" content: \"\";\n",
|
||
" width: 100%;\n",
|
||
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
|
||
" flex-grow: 1;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-parallel {\n",
|
||
" display: flex;\n",
|
||
" align-items: stretch;\n",
|
||
" justify-content: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-parallel-item {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-parallel-item:first-child::after {\n",
|
||
" align-self: flex-end;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-parallel-item:last-child::after {\n",
|
||
" align-self: flex-start;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-parallel-item:only-child::after {\n",
|
||
" width: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Serial-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-serial {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
" align-items: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" padding-right: 1em;\n",
|
||
" padding-left: 1em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"\n",
|
||
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
|
||
"clickable and can be expanded/collapsed.\n",
|
||
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
|
||
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
|
||
"*/\n",
|
||
"\n",
|
||
"/* Pipeline and ColumnTransformer style (default) */\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-toggleable {\n",
|
||
" /* Default theme specific background. It is overwritten whether we have a\n",
|
||
" specific estimator or a Pipeline/ColumnTransformer */\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable label */\n",
|
||
"#sk-container-id-4 label.sk-toggleable__label {\n",
|
||
" cursor: pointer;\n",
|
||
" display: block;\n",
|
||
" width: 100%;\n",
|
||
" margin-bottom: 0;\n",
|
||
" padding: 0.5em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 label.sk-toggleable__label-arrow:before {\n",
|
||
" /* Arrow on the left of the label */\n",
|
||
" content: \"▸\";\n",
|
||
" float: left;\n",
|
||
" margin-right: 0.25em;\n",
|
||
" color: var(--sklearn-color-icon);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable content - dropdown */\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-toggleable__content {\n",
|
||
" max-height: 0;\n",
|
||
" max-width: 0;\n",
|
||
" overflow: hidden;\n",
|
||
" text-align: left;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-toggleable__content.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-toggleable__content pre {\n",
|
||
" margin: 0.2em;\n",
|
||
" border-radius: 0.25em;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-toggleable__content.fitted pre {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
||
" /* Expand drop-down */\n",
|
||
" max-height: 200px;\n",
|
||
" max-width: 100%;\n",
|
||
" overflow: auto;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
||
" content: \"▾\";\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Pipeline/ColumnTransformer-specific style */\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific style */\n",
|
||
"\n",
|
||
"/* Colorize estimator box */\n",
|
||
"#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-label label.sk-toggleable__label,\n",
|
||
"#sk-container-id-4 div.sk-label label {\n",
|
||
" /* The background is the default theme color */\n",
|
||
" color: var(--sklearn-color-text-on-default-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover, darken the color of the background */\n",
|
||
"#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Label box, darken color on hover, fitted */\n",
|
||
"#sk-container-id-4 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator label */\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-label label {\n",
|
||
" font-family: monospace;\n",
|
||
" font-weight: bold;\n",
|
||
" display: inline-block;\n",
|
||
" line-height: 1.2em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-label-container {\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific */\n",
|
||
"#sk-container-id-4 div.sk-estimator {\n",
|
||
" font-family: monospace;\n",
|
||
" border: 1px dotted var(--sklearn-color-border-box);\n",
|
||
" border-radius: 0.25em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" margin-bottom: 0.5em;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-estimator.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* on hover */\n",
|
||
"#sk-container-id-4 div.sk-estimator:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-estimator.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
|
||
"\n",
|
||
"/* Common style for \"i\" and \"?\" */\n",
|
||
"\n",
|
||
".sk-estimator-doc-link,\n",
|
||
"a:link.sk-estimator-doc-link,\n",
|
||
"a:visited.sk-estimator-doc-link {\n",
|
||
" float: right;\n",
|
||
" font-size: smaller;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1em;\n",
|
||
" height: 1em;\n",
|
||
" width: 1em;\n",
|
||
" text-decoration: none !important;\n",
|
||
" margin-left: 1ex;\n",
|
||
" /* unfitted */\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted,\n",
|
||
"a:link.sk-estimator-doc-link.fitted,\n",
|
||
"a:visited.sk-estimator-doc-link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Span, style for the box shown on hovering the info icon */\n",
|
||
".sk-estimator-doc-link span {\n",
|
||
" display: none;\n",
|
||
" z-index: 9999;\n",
|
||
" position: relative;\n",
|
||
" font-weight: normal;\n",
|
||
" right: .2ex;\n",
|
||
" padding: .5ex;\n",
|
||
" margin: .5ex;\n",
|
||
" width: min-content;\n",
|
||
" min-width: 20ex;\n",
|
||
" max-width: 50ex;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" box-shadow: 2pt 2pt 4pt #999;\n",
|
||
" /* unfitted */\n",
|
||
" background: var(--sklearn-color-unfitted-level-0);\n",
|
||
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted span {\n",
|
||
" /* fitted */\n",
|
||
" background: var(--sklearn-color-fitted-level-0);\n",
|
||
" border: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link:hover span {\n",
|
||
" display: block;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
|
||
"\n",
|
||
"#sk-container-id-4 a.estimator_doc_link {\n",
|
||
" float: right;\n",
|
||
" font-size: 1rem;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1rem;\n",
|
||
" height: 1rem;\n",
|
||
" width: 1rem;\n",
|
||
" text-decoration: none;\n",
|
||
" /* unfitted */\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 a.estimator_doc_link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"#sk-container-id-4 a.estimator_doc_link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 a.estimator_doc_link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"</style><div id=\"sk-container-id-4\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>KMeans(n_clusters=3, random_state=0)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-23\" type=\"checkbox\" checked><label for=\"sk-estimator-id-23\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> KMeans<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.cluster.KMeans.html\">?<span>Documentation for KMeans</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>KMeans(n_clusters=3, random_state=0)</pre></div> </div></div></div></div>"
|
||
],
|
||
"text/plain": [
|
||
"KMeans(n_clusters=3, random_state=0)"
|
||
]
|
||
},
|
||
"execution_count": 66,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# K-means clustering \n",
|
||
"\n",
|
||
"from sklearn.cluster import KMeans\n",
|
||
"\n",
|
||
"kmeans = KMeans(n_clusters=3, random_state=0)\n",
|
||
"\n",
|
||
"kmeans.fit(y_pred_prob.reshape(-1,1))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 67,
|
||
"id": "10b6ece7-adcf-41c0-884b-a4aef42af378",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([1, 0, 0, ..., 0, 1, 0], dtype=int32)"
|
||
]
|
||
},
|
||
"execution_count": 67,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"y_clusters = kmeans.predict(y_pred_prob.reshape(-1,1))\n",
|
||
"y_clusters"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 75,
|
||
"id": "e4b3b16e-03b8-4883-9788-cb7296fe56cd",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"seuil cluster 0 : 0.38635624748849917 (60.14%)\n",
|
||
"seuil cluster 1 : 0.7395110401019087 (30.69%)\n",
|
||
"seuil cluster 2 : 1.0 (9.16%)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# seuils des clusters et part de clients dans chacun d'eux\n",
|
||
"\n",
|
||
"print(f\"seuil cluster 0 : {y_pred_prob[y_clusters==0].max()} ({round(100 * (y_clusters==0).mean(), 2)}%)\")\n",
|
||
"print(f\"seuil cluster 1 : {y_pred_prob[y_clusters==1].max()} ({round(100 * (y_clusters==1).mean(), 2)}%)\")\n",
|
||
"print(f\"seuil cluster 2 : {y_pred_prob[y_clusters==2].max()} ({round(100* (y_clusters==2).mean(), 2)}%)\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 76,
|
||
"id": "3e404a5e-6734-4d98-8853-48b09c96e7e0",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>cluster</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>100.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>55.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>17.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>80.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>120.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>416.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>478.693148</td>\n",
|
||
" <td>115.631470</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers vente_internet_max \\\n",
|
||
"0 4.0 1.0 100.0 1.0 0.0 \n",
|
||
"1 1.0 1.0 55.0 1.0 0.0 \n",
|
||
"2 17.0 1.0 80.0 1.0 0.0 \n",
|
||
"3 4.0 1.0 120.0 1.0 0.0 \n",
|
||
"4 34.0 2.0 416.0 1.0 0.0 \n",
|
||
"\n",
|
||
" purchase_date_min purchase_date_max nb_tickets_internet is_email_true \\\n",
|
||
"0 5.177187 5.177187 0.0 True \n",
|
||
"1 426.265613 426.265613 0.0 True \n",
|
||
"2 436.033437 436.033437 0.0 True \n",
|
||
"3 5.196412 5.196412 0.0 True \n",
|
||
"4 478.693148 115.631470 0.0 True \n",
|
||
"\n",
|
||
" opt_in gender_female gender_male nb_campaigns nb_campaigns_opened \\\n",
|
||
"0 False 1 0 0.0 0.0 \n",
|
||
"1 True 0 1 0.0 0.0 \n",
|
||
"2 True 1 0 0.0 0.0 \n",
|
||
"3 False 1 0 0.0 0.0 \n",
|
||
"4 False 1 0 0.0 0.0 \n",
|
||
"\n",
|
||
" cluster \n",
|
||
"0 1 \n",
|
||
"1 0 \n",
|
||
"2 0 \n",
|
||
"3 1 \n",
|
||
"4 2 "
|
||
]
|
||
},
|
||
"execution_count": 76,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# les individus des clusters sont-ils semblables ? def des marketing personae\n",
|
||
"\n",
|
||
"X_test_clustered = X_test.assign(cluster = y_clusters)\n",
|
||
"X_test_clustered.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 79,
|
||
"id": "b6f4638d-23c4-427a-88a4-b09528b3f91b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>cluster</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>60.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>222.437500</td>\n",
|
||
" <td>214.639152</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>13.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>209.26</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>418.270723</td>\n",
|
||
" <td>56.167392</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>18.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"cluster \n",
|
||
"0 0.0 0.0 0.00 0.0 \n",
|
||
"1 2.0 1.0 60.00 1.0 \n",
|
||
"2 13.0 4.0 209.26 1.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"cluster \n",
|
||
"0 0.0 550.000000 550.000000 \n",
|
||
"1 1.0 222.437500 214.639152 \n",
|
||
"2 1.0 418.270723 56.167392 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in gender_female \\\n",
|
||
"cluster \n",
|
||
"0 0.0 1.0 1.0 0.0 \n",
|
||
"1 1.0 1.0 0.0 0.0 \n",
|
||
"2 3.0 1.0 0.0 0.0 \n",
|
||
"\n",
|
||
" gender_male nb_campaigns nb_campaigns_opened \n",
|
||
"cluster \n",
|
||
"0 0.0 7.0 0.0 \n",
|
||
"1 1.0 3.0 0.0 \n",
|
||
"2 1.0 18.0 1.0 "
|
||
]
|
||
},
|
||
"execution_count": 79,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test_clustered.groupby(\"cluster\").median().iloc[[0,1,2], :]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 80,
|
||
"id": "f80474be-c897-47f9-8fdd-f2fb8d724ee2",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>cluster</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>0.311325</td>\n",
|
||
" <td>0.114404</td>\n",
|
||
" <td>6.707697</td>\n",
|
||
" <td>0.102898</td>\n",
|
||
" <td>0.048741</td>\n",
|
||
" <td>527.762945</td>\n",
|
||
" <td>527.621410</td>\n",
|
||
" <td>0.137313</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.561640</td>\n",
|
||
" <td>0.239934</td>\n",
|
||
" <td>0.450610</td>\n",
|
||
" <td>12.881201</td>\n",
|
||
" <td>2.163647</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2.926055</td>\n",
|
||
" <td>1.395389</td>\n",
|
||
" <td>82.976104</td>\n",
|
||
" <td>1.000136</td>\n",
|
||
" <td>0.681539</td>\n",
|
||
" <td>228.303268</td>\n",
|
||
" <td>217.641649</td>\n",
|
||
" <td>1.736769</td>\n",
|
||
" <td>0.990202</td>\n",
|
||
" <td>0.145618</td>\n",
|
||
" <td>0.260553</td>\n",
|
||
" <td>0.536871</td>\n",
|
||
" <td>9.821800</td>\n",
|
||
" <td>2.811663</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>44.841472</td>\n",
|
||
" <td>11.576993</td>\n",
|
||
" <td>1942.145881</td>\n",
|
||
" <td>1.493641</td>\n",
|
||
" <td>0.742562</td>\n",
|
||
" <td>382.346041</td>\n",
|
||
" <td>87.811798</td>\n",
|
||
" <td>12.613786</td>\n",
|
||
" <td>0.971724</td>\n",
|
||
" <td>0.132637</td>\n",
|
||
" <td>0.199182</td>\n",
|
||
" <td>0.621735</td>\n",
|
||
" <td>20.781399</td>\n",
|
||
" <td>8.329548</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"cluster \n",
|
||
"0 0.311325 0.114404 6.707697 0.102898 \n",
|
||
"1 2.926055 1.395389 82.976104 1.000136 \n",
|
||
"2 44.841472 11.576993 1942.145881 1.493641 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"cluster \n",
|
||
"0 0.048741 527.762945 527.621410 \n",
|
||
"1 0.681539 228.303268 217.641649 \n",
|
||
"2 0.742562 382.346041 87.811798 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in gender_female \\\n",
|
||
"cluster \n",
|
||
"0 0.137313 1.000000 0.561640 0.239934 \n",
|
||
"1 1.736769 0.990202 0.145618 0.260553 \n",
|
||
"2 12.613786 0.971724 0.132637 0.199182 \n",
|
||
"\n",
|
||
" gender_male nb_campaigns nb_campaigns_opened \n",
|
||
"cluster \n",
|
||
"0 0.450610 12.881201 2.163647 \n",
|
||
"1 0.536871 9.821800 2.811663 \n",
|
||
"2 0.621735 20.781399 8.329548 "
|
||
]
|
||
},
|
||
"execution_count": 80,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test_clustered.groupby(\"cluster\").mean().iloc[[0,1,2], :]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "d0af77f8-ae66-43a5-bf04-b26667f911f6",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Quartile clustering"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 82,
|
||
"id": "2396ec51-4411-4fe3-9d41-449c4ffa75a0",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>score</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>100.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.695913</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>55.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.244205</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>17.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>80.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.279592</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>120.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.696135</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>416.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>478.693148</td>\n",
|
||
" <td>115.631470</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.911844</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers vente_internet_max \\\n",
|
||
"0 4.0 1.0 100.0 1.0 0.0 \n",
|
||
"1 1.0 1.0 55.0 1.0 0.0 \n",
|
||
"2 17.0 1.0 80.0 1.0 0.0 \n",
|
||
"3 4.0 1.0 120.0 1.0 0.0 \n",
|
||
"4 34.0 2.0 416.0 1.0 0.0 \n",
|
||
"\n",
|
||
" purchase_date_min purchase_date_max nb_tickets_internet is_email_true \\\n",
|
||
"0 5.177187 5.177187 0.0 True \n",
|
||
"1 426.265613 426.265613 0.0 True \n",
|
||
"2 436.033437 436.033437 0.0 True \n",
|
||
"3 5.196412 5.196412 0.0 True \n",
|
||
"4 478.693148 115.631470 0.0 True \n",
|
||
"\n",
|
||
" opt_in gender_female gender_male nb_campaigns nb_campaigns_opened \\\n",
|
||
"0 False 1 0 0.0 0.0 \n",
|
||
"1 True 0 1 0.0 0.0 \n",
|
||
"2 True 1 0 0.0 0.0 \n",
|
||
"3 False 1 0 0.0 0.0 \n",
|
||
"4 False 1 0 0.0 0.0 \n",
|
||
"\n",
|
||
" score \n",
|
||
"0 0.695913 \n",
|
||
"1 0.244205 \n",
|
||
"2 0.279592 \n",
|
||
"3 0.696135 \n",
|
||
"4 0.911844 "
|
||
]
|
||
},
|
||
"execution_count": 82,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# quartile clustering\n",
|
||
"\n",
|
||
"X_test[\"score\"] = y_pred_prob\n",
|
||
"X_test.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "bccddbd1-9d63-4d22-a3b3-daa6d83e90de",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df['new_column'] = np.where(df['col2']<9, 'value1',\n",
|
||
" np.where(df['col2']<12, 'value2',\n",
|
||
" np.where(df['col2']<15, 'value3', 'value4')))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 83,
|
||
"id": "f6334f99-725e-4e94-af86-60f161dd93a8",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>score</th>\n",
|
||
" <th>quartile</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>100.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.695913</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>55.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.244205</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>17.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>80.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.279592</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>120.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.696135</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>416.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>478.693148</td>\n",
|
||
" <td>115.631470</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.911844</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>60.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.140069</td>\n",
|
||
" <td>5.140069</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.690015</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>61.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>105.053773</td>\n",
|
||
" <td>105.053773</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.663391</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>80.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>63.206030</td>\n",
|
||
" <td>63.206030</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.441604</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>44.698090</td>\n",
|
||
" <td>44.698090</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.441933</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>165.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>266.012106</td>\n",
|
||
" <td>258.012106</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.581348</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers vente_internet_max \\\n",
|
||
"0 4.0 1.0 100.0 1.0 0.0 \n",
|
||
"1 1.0 1.0 55.0 1.0 0.0 \n",
|
||
"2 17.0 1.0 80.0 1.0 0.0 \n",
|
||
"3 4.0 1.0 120.0 1.0 0.0 \n",
|
||
"4 34.0 2.0 416.0 1.0 0.0 \n",
|
||
"5 2.0 1.0 60.0 1.0 0.0 \n",
|
||
"6 5.0 1.0 61.0 1.0 1.0 \n",
|
||
"7 4.0 1.0 80.0 1.0 0.0 \n",
|
||
"8 1.0 1.0 10.0 1.0 0.0 \n",
|
||
"9 3.0 3.0 165.0 1.0 1.0 \n",
|
||
"\n",
|
||
" purchase_date_min purchase_date_max nb_tickets_internet is_email_true \\\n",
|
||
"0 5.177187 5.177187 0.0 True \n",
|
||
"1 426.265613 426.265613 0.0 True \n",
|
||
"2 436.033437 436.033437 0.0 True \n",
|
||
"3 5.196412 5.196412 0.0 True \n",
|
||
"4 478.693148 115.631470 0.0 True \n",
|
||
"5 5.140069 5.140069 0.0 True \n",
|
||
"6 105.053773 105.053773 5.0 True \n",
|
||
"7 63.206030 63.206030 0.0 True \n",
|
||
"8 44.698090 44.698090 0.0 True \n",
|
||
"9 266.012106 258.012106 3.0 True \n",
|
||
"\n",
|
||
" opt_in gender_female gender_male nb_campaigns nb_campaigns_opened \\\n",
|
||
"0 False 1 0 0.0 0.0 \n",
|
||
"1 True 0 1 0.0 0.0 \n",
|
||
"2 True 1 0 0.0 0.0 \n",
|
||
"3 False 1 0 0.0 0.0 \n",
|
||
"4 False 1 0 0.0 0.0 \n",
|
||
"5 False 0 1 0.0 0.0 \n",
|
||
"6 False 0 0 0.0 0.0 \n",
|
||
"7 True 0 1 0.0 0.0 \n",
|
||
"8 True 0 0 0.0 0.0 \n",
|
||
"9 False 0 0 0.0 0.0 \n",
|
||
"\n",
|
||
" score quartile \n",
|
||
"0 0.695913 3 \n",
|
||
"1 0.244205 1 \n",
|
||
"2 0.279592 2 \n",
|
||
"3 0.696135 3 \n",
|
||
"4 0.911844 4 \n",
|
||
"5 0.690015 3 \n",
|
||
"6 0.663391 3 \n",
|
||
"7 0.441604 2 \n",
|
||
"8 0.441933 2 \n",
|
||
"9 0.581348 3 "
|
||
]
|
||
},
|
||
"execution_count": 83,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test[\"quartile\"] = np.where(X_test['score']<0.25, '1',\n",
|
||
" np.where(X_test['score']<0.5, '2',\n",
|
||
" np.where(X_test['score']<0.75, '3', '4')))\n",
|
||
"X_test.head(10)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 84,
|
||
"id": "0abec0ed-098b-4ecc-b6c3-6b25110c1493",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"quartile\n",
|
||
"1 47871\n",
|
||
"2 17224\n",
|
||
"3 22481\n",
|
||
"4 8520\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 84,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# size of each segment\n",
|
||
"\n",
|
||
"X_test.groupby(\"quartile\").size()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 87,
|
||
"id": "008a0040-8a27-4fd8-8dfa-46d39d6b88d9",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>score</th>\n",
|
||
" <th>has_purchased</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>quartile</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>0.017380</td>\n",
|
||
" <td>0.008586</td>\n",
|
||
" <td>0.475141</td>\n",
|
||
" <td>0.008439</td>\n",
|
||
" <td>0.001358</td>\n",
|
||
" <td>549.044552</td>\n",
|
||
" <td>549.044465</td>\n",
|
||
" <td>0.003071</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.562157</td>\n",
|
||
" <td>0.232416</td>\n",
|
||
" <td>0.416536</td>\n",
|
||
" <td>11.860521</td>\n",
|
||
" <td>1.648430</td>\n",
|
||
" <td>0.169233</td>\n",
|
||
" <td>0.026780</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2.085810</td>\n",
|
||
" <td>0.880283</td>\n",
|
||
" <td>49.701732</td>\n",
|
||
" <td>0.742336</td>\n",
|
||
" <td>0.420866</td>\n",
|
||
" <td>381.428495</td>\n",
|
||
" <td>379.188470</td>\n",
|
||
" <td>1.044473</td>\n",
|
||
" <td>0.998374</td>\n",
|
||
" <td>0.507083</td>\n",
|
||
" <td>0.264515</td>\n",
|
||
" <td>0.596435</td>\n",
|
||
" <td>14.593184</td>\n",
|
||
" <td>3.725732</td>\n",
|
||
" <td>0.360811</td>\n",
|
||
" <td>0.117452</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>3.118100</td>\n",
|
||
" <td>1.478893</td>\n",
|
||
" <td>88.811284</td>\n",
|
||
" <td>1.003292</td>\n",
|
||
" <td>0.703349</td>\n",
|
||
" <td>198.284116</td>\n",
|
||
" <td>184.197970</td>\n",
|
||
" <td>1.879098</td>\n",
|
||
" <td>0.988123</td>\n",
|
||
" <td>0.051777</td>\n",
|
||
" <td>0.264001</td>\n",
|
||
" <td>0.526534</td>\n",
|
||
" <td>9.773898</td>\n",
|
||
" <td>2.978115</td>\n",
|
||
" <td>0.626785</td>\n",
|
||
" <td>0.209332</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>46.046362</td>\n",
|
||
" <td>11.842254</td>\n",
|
||
" <td>2002.607230</td>\n",
|
||
" <td>1.508685</td>\n",
|
||
" <td>0.743192</td>\n",
|
||
" <td>386.401662</td>\n",
|
||
" <td>85.808238</td>\n",
|
||
" <td>12.894131</td>\n",
|
||
" <td>0.971479</td>\n",
|
||
" <td>0.130751</td>\n",
|
||
" <td>0.198239</td>\n",
|
||
" <td>0.622418</td>\n",
|
||
" <td>20.928286</td>\n",
|
||
" <td>8.367723</td>\n",
|
||
" <td>0.902055</td>\n",
|
||
" <td>0.666549</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"quartile \n",
|
||
"1 0.017380 0.008586 0.475141 0.008439 \n",
|
||
"2 2.085810 0.880283 49.701732 0.742336 \n",
|
||
"3 3.118100 1.478893 88.811284 1.003292 \n",
|
||
"4 46.046362 11.842254 2002.607230 1.508685 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"quartile \n",
|
||
"1 0.001358 549.044552 549.044465 \n",
|
||
"2 0.420866 381.428495 379.188470 \n",
|
||
"3 0.703349 198.284116 184.197970 \n",
|
||
"4 0.743192 386.401662 85.808238 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in gender_female \\\n",
|
||
"quartile \n",
|
||
"1 0.003071 1.000000 0.562157 0.232416 \n",
|
||
"2 1.044473 0.998374 0.507083 0.264515 \n",
|
||
"3 1.879098 0.988123 0.051777 0.264001 \n",
|
||
"4 12.894131 0.971479 0.130751 0.198239 \n",
|
||
"\n",
|
||
" gender_male nb_campaigns nb_campaigns_opened score \\\n",
|
||
"quartile \n",
|
||
"1 0.416536 11.860521 1.648430 0.169233 \n",
|
||
"2 0.596435 14.593184 3.725732 0.360811 \n",
|
||
"3 0.526534 9.773898 2.978115 0.626785 \n",
|
||
"4 0.622418 20.928286 8.367723 0.902055 \n",
|
||
"\n",
|
||
" has_purchased \n",
|
||
"quartile \n",
|
||
"1 0.026780 \n",
|
||
"2 0.117452 \n",
|
||
"3 0.209332 \n",
|
||
"4 0.666549 "
|
||
]
|
||
},
|
||
"execution_count": 87,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# check consistency of quartiles (we have an upward bias, which is explained by the fact that we want a decent recall)\n",
|
||
"\n",
|
||
"X_test[\"has_purchased\"] = y_test\n",
|
||
"X_test.groupby(\"quartile\").mean()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "e6bcaff3-0f47-46da-8873-a321d3382e63",
|
||
"metadata": {},
|
||
"source": [
|
||
"Méthode \\\n",
|
||
"On étudie le rythme d'achat des clients et on suppose qu'il sera le même dans le futur"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 106,
|
||
"id": "04218519-bffa-4340-87dc-e11332977067",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# purchasing pace by segment\n",
|
||
"\n",
|
||
"X_test[\"consumption_lifetime\"] = X_test[\"purchase_date_min\"] - X_test[\"purchase_date_max\"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 113,
|
||
"id": "4ac3610d-8a22-4135-a127-328812c5198c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"count 96096.000000\n",
|
||
"mean 30.347912\n",
|
||
"std 95.435372\n",
|
||
"min 0.000000\n",
|
||
"25% 0.000000\n",
|
||
"50% 0.000000\n",
|
||
"75% 0.000000\n",
|
||
"max 547.122986\n",
|
||
"Name: consumption_lifetime, dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 113,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test[\"consumption_lifetime\"].describe()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 149,
|
||
"id": "ee86cfb4-e2c4-4485-b27a-ecaec159a0b9",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_test[\"avg_purchase_delay\"] = (X_test[\"consumption_lifetime\"]/X_test[\"nb_purchases\"]).replace([np.inf, -np.inf], 0)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "raw",
|
||
"id": "a2de6e96-4c92-42b2-8569-1c0f920e7a8c",
|
||
"metadata": {},
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 150,
|
||
"id": "256a684d-0117-4daa-ba38-ff48ac946798",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>score</th>\n",
|
||
" <th>quartile</th>\n",
|
||
" <th>has_purchased</th>\n",
|
||
" <th>consumption_lifetime</th>\n",
|
||
" <th>avg_purchase_delay</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>416.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>478.693148</td>\n",
|
||
" <td>115.631470</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.911844</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>363.061678</td>\n",
|
||
" <td>181.530839</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>165.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>266.012106</td>\n",
|
||
" <td>258.012106</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.581348</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>8.000000</td>\n",
|
||
" <td>2.666667</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>16</th>\n",
|
||
" <td>23.0</td>\n",
|
||
" <td>14.0</td>\n",
|
||
" <td>600.00</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>453.423519</td>\n",
|
||
" <td>15.225949</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.970052</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>438.197569</td>\n",
|
||
" <td>31.299826</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>21</th>\n",
|
||
" <td>21.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>1075.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>431.817072</td>\n",
|
||
" <td>230.432350</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.599176</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>201.384722</td>\n",
|
||
" <td>67.128241</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>22</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>140.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>277.254745</td>\n",
|
||
" <td>12.438877</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.707939</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>264.815868</td>\n",
|
||
" <td>132.407934</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>95943</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>545.673137</td>\n",
|
||
" <td>362.284745</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>46.0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>0.707163</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>183.388391</td>\n",
|
||
" <td>91.694196</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>95989</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>77.31</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>271.676632</td>\n",
|
||
" <td>6.289577</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>21.0</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>0.937049</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>265.387055</td>\n",
|
||
" <td>132.693527</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>95996</th>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>273.76</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>202.691389</td>\n",
|
||
" <td>7.142274</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>25.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>0.916446</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>195.549115</td>\n",
|
||
" <td>65.183038</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96043</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>136.42</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>363.119815</td>\n",
|
||
" <td>173.225752</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>35.0</td>\n",
|
||
" <td>29.0</td>\n",
|
||
" <td>0.866743</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>189.894062</td>\n",
|
||
" <td>94.947031</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96079</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>355.131933</td>\n",
|
||
" <td>355.128542</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>50.0</td>\n",
|
||
" <td>42.0</td>\n",
|
||
" <td>0.720674</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.003391</td>\n",
|
||
" <td>0.001696</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>14734 rows × 19 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"4 34.0 2.0 416.00 1.0 \n",
|
||
"9 3.0 3.0 165.00 1.0 \n",
|
||
"16 23.0 14.0 600.00 2.0 \n",
|
||
"21 21.0 3.0 1075.00 1.0 \n",
|
||
"22 4.0 2.0 140.00 1.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"95943 2.0 2.0 0.00 1.0 \n",
|
||
"95989 2.0 2.0 77.31 2.0 \n",
|
||
"95996 3.0 3.0 273.76 2.0 \n",
|
||
"96043 2.0 2.0 136.42 1.0 \n",
|
||
"96079 2.0 2.0 0.00 1.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"4 0.0 478.693148 115.631470 \n",
|
||
"9 1.0 266.012106 258.012106 \n",
|
||
"16 1.0 453.423519 15.225949 \n",
|
||
"21 0.0 431.817072 230.432350 \n",
|
||
"22 0.0 277.254745 12.438877 \n",
|
||
"... ... ... ... \n",
|
||
"95943 0.0 545.673137 362.284745 \n",
|
||
"95989 1.0 271.676632 6.289577 \n",
|
||
"95996 1.0 202.691389 7.142274 \n",
|
||
"96043 1.0 363.119815 173.225752 \n",
|
||
"96079 0.0 355.131933 355.128542 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in gender_female gender_male \\\n",
|
||
"4 0.0 True False 1 0 \n",
|
||
"9 3.0 True False 0 0 \n",
|
||
"16 1.0 True False 0 1 \n",
|
||
"21 0.0 True True 0 1 \n",
|
||
"22 0.0 True True 0 1 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"95943 0.0 True False 1 0 \n",
|
||
"95989 1.0 True False 1 0 \n",
|
||
"95996 2.0 True False 0 1 \n",
|
||
"96043 2.0 True False 0 1 \n",
|
||
"96079 0.0 True False 1 0 \n",
|
||
"\n",
|
||
" nb_campaigns nb_campaigns_opened score quartile has_purchased \\\n",
|
||
"4 0.0 0.0 0.911844 4 1.0 \n",
|
||
"9 0.0 0.0 0.581348 3 0.0 \n",
|
||
"16 0.0 0.0 0.970052 4 1.0 \n",
|
||
"21 0.0 0.0 0.599176 3 0.0 \n",
|
||
"22 0.0 0.0 0.707939 3 1.0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"95943 46.0 9.0 0.707163 3 0.0 \n",
|
||
"95989 21.0 6.0 0.937049 4 0.0 \n",
|
||
"95996 25.0 3.0 0.916446 4 1.0 \n",
|
||
"96043 35.0 29.0 0.866743 4 1.0 \n",
|
||
"96079 50.0 42.0 0.720674 3 0.0 \n",
|
||
"\n",
|
||
" consumption_lifetime avg_purchase_delay \n",
|
||
"4 363.061678 181.530839 \n",
|
||
"9 8.000000 2.666667 \n",
|
||
"16 438.197569 31.299826 \n",
|
||
"21 201.384722 67.128241 \n",
|
||
"22 264.815868 132.407934 \n",
|
||
"... ... ... \n",
|
||
"95943 183.388391 91.694196 \n",
|
||
"95989 265.387055 132.693527 \n",
|
||
"95996 195.549115 65.183038 \n",
|
||
"96043 189.894062 94.947031 \n",
|
||
"96079 0.003391 0.001696 \n",
|
||
"\n",
|
||
"[14734 rows x 19 columns]"
|
||
]
|
||
},
|
||
"execution_count": 150,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test[X_test[\"avg_purchase_delay\"]>0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 157,
|
||
"id": "55db2f02-37af-4809-a048-2528b7163f31",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>consumption_lifetime</th>\n",
|
||
" <th>avg_purchase_delay</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>quartile</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2.000000</td>\n",
|
||
" <td>0.597093</td>\n",
|
||
" <td>0.298547</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2.592668</td>\n",
|
||
" <td>26.192927</td>\n",
|
||
" <td>11.435486</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>3.203764</td>\n",
|
||
" <td>64.785322</td>\n",
|
||
" <td>25.490483</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>12.041836</td>\n",
|
||
" <td>306.126700</td>\n",
|
||
" <td>68.659817</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_purchases consumption_lifetime avg_purchase_delay\n",
|
||
"quartile \n",
|
||
"1 2.000000 0.597093 0.298547\n",
|
||
"2 2.592668 26.192927 11.435486\n",
|
||
"3 3.203764 64.785322 25.490483\n",
|
||
"4 12.041836 306.126700 68.659817"
|
||
]
|
||
},
|
||
"execution_count": 157,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test[X_test[\"avg_purchase_delay\"]>0].groupby(\"quartile\")[[\"nb_purchases\", \"consumption_lifetime\", \"avg_purchase_delay\"]].mean()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 156,
|
||
"id": "36c1d35d-3b51-4ddc-bcb7-a3ee2896167c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"nb_tickets 0\n",
|
||
"nb_purchases 0\n",
|
||
"total_amount 0\n",
|
||
"nb_suppliers 0\n",
|
||
"vente_internet_max 0\n",
|
||
"purchase_date_min 0\n",
|
||
"purchase_date_max 0\n",
|
||
"nb_tickets_internet 0\n",
|
||
"is_email_true 0\n",
|
||
"opt_in 0\n",
|
||
"gender_female 0\n",
|
||
"gender_male 0\n",
|
||
"nb_campaigns 0\n",
|
||
"nb_campaigns_opened 0\n",
|
||
"score 0\n",
|
||
"quartile 0\n",
|
||
"has_purchased 0\n",
|
||
"consumption_lifetime 0\n",
|
||
"avg_purchase_delay 0\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 156,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test[X_test[\"avg_purchase_delay\"]>0].isna().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "1336c25b-1cf2-4041-b741-7c8c841fe1d2",
|
||
"metadata": {},
|
||
"source": [
|
||
"Etude du biais de surestimation"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 250,
|
||
"id": "9242f53b-1786-4a94-9d93-cb46d70d5fa6",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"count 96096.000000\n",
|
||
"mean 3.362075\n",
|
||
"std 2.425080\n",
|
||
"min 1.000000\n",
|
||
"25% 1.000000\n",
|
||
"50% 2.000000\n",
|
||
"75% 5.000000\n",
|
||
"max 10.000000\n",
|
||
"Name: score, dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 250,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"((10 * X_test[\"score\"]).astype(int)).describe()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 255,
|
||
"id": "22c2b1f6-0506-429e-af8c-3b1b5e05ff80",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"quartile\n",
|
||
"1 6.319295\n",
|
||
"2 3.071979\n",
|
||
"3 2.994212\n",
|
||
"4 1.353321\n",
|
||
"dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 255,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# le biais de surestimation décroit avec le score \n",
|
||
"X_test.groupby(\"quartile\")[\"score\"].mean() / X_test.groupby(\"quartile\")[\"has_purchased\"].mean()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 275,
|
||
"id": "ba363bf9-3169-4c89-a383-c2703436ff49",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>score</th>\n",
|
||
" <th>quartile</th>\n",
|
||
" <th>has_purchased</th>\n",
|
||
" <th>consumption_lifetime</th>\n",
|
||
" <th>avg_purchase_delay</th>\n",
|
||
" <th>avg_purchase_delay_all</th>\n",
|
||
" <th>avg_tickets_delay</th>\n",
|
||
" <th>avg_tickets_delay_all</th>\n",
|
||
" <th>decile</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>100.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.695913</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.294297</td>\n",
|
||
" <td>6</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>55.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.244205</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>17.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>80.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.279592</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>25.649026</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>120.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.696135</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.299103</td>\n",
|
||
" <td>6</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>416.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>478.693148</td>\n",
|
||
" <td>115.631470</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.911844</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>363.061678</td>\n",
|
||
" <td>181.530839</td>\n",
|
||
" <td>239.346574</td>\n",
|
||
" <td>10.678285</td>\n",
|
||
" <td>14.079210</td>\n",
|
||
" <td>9</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96091</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>67.31</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>0.584680</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96092</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>61.41</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>0.654520</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>6</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96093</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>0.116503</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96094</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>79.43</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>0.579827</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96095</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>0.254002</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>96096 rows × 23 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 4.0 1.0 100.00 1.0 \n",
|
||
"1 1.0 1.0 55.00 1.0 \n",
|
||
"2 17.0 1.0 80.00 1.0 \n",
|
||
"3 4.0 1.0 120.00 1.0 \n",
|
||
"4 34.0 2.0 416.00 1.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"96091 1.0 1.0 67.31 1.0 \n",
|
||
"96092 1.0 1.0 61.41 1.0 \n",
|
||
"96093 0.0 0.0 0.00 0.0 \n",
|
||
"96094 1.0 1.0 79.43 1.0 \n",
|
||
"96095 0.0 0.0 0.00 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 5.177187 5.177187 \n",
|
||
"1 0.0 426.265613 426.265613 \n",
|
||
"2 0.0 436.033437 436.033437 \n",
|
||
"3 0.0 5.196412 5.196412 \n",
|
||
"4 0.0 478.693148 115.631470 \n",
|
||
"... ... ... ... \n",
|
||
"96091 1.0 278.442257 278.442257 \n",
|
||
"96092 1.0 189.207373 189.207373 \n",
|
||
"96093 0.0 550.000000 550.000000 \n",
|
||
"96094 1.0 279.312905 279.312905 \n",
|
||
"96095 0.0 550.000000 550.000000 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in ... nb_campaigns_opened \\\n",
|
||
"0 0.0 True False ... 0.0 \n",
|
||
"1 0.0 True True ... 0.0 \n",
|
||
"2 0.0 True True ... 0.0 \n",
|
||
"3 0.0 True False ... 0.0 \n",
|
||
"4 0.0 True False ... 0.0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"96091 1.0 True False ... 5.0 \n",
|
||
"96092 1.0 True False ... 9.0 \n",
|
||
"96093 0.0 True True ... 3.0 \n",
|
||
"96094 1.0 True False ... 4.0 \n",
|
||
"96095 0.0 True False ... 4.0 \n",
|
||
"\n",
|
||
" score quartile has_purchased consumption_lifetime \\\n",
|
||
"0 0.695913 3 0.0 0.000000 \n",
|
||
"1 0.244205 1 1.0 0.000000 \n",
|
||
"2 0.279592 2 0.0 0.000000 \n",
|
||
"3 0.696135 3 0.0 0.000000 \n",
|
||
"4 0.911844 4 1.0 363.061678 \n",
|
||
"... ... ... ... ... \n",
|
||
"96091 0.584680 3 1.0 0.000000 \n",
|
||
"96092 0.654520 3 0.0 0.000000 \n",
|
||
"96093 0.116503 1 0.0 0.000000 \n",
|
||
"96094 0.579827 3 0.0 0.000000 \n",
|
||
"96095 0.254002 2 0.0 0.000000 \n",
|
||
"\n",
|
||
" avg_purchase_delay avg_purchase_delay_all avg_tickets_delay \\\n",
|
||
"0 0.000000 5.177187 0.000000 \n",
|
||
"1 0.000000 426.265613 0.000000 \n",
|
||
"2 0.000000 436.033437 0.000000 \n",
|
||
"3 0.000000 5.196412 0.000000 \n",
|
||
"4 181.530839 239.346574 10.678285 \n",
|
||
"... ... ... ... \n",
|
||
"96091 0.000000 278.442257 0.000000 \n",
|
||
"96092 0.000000 189.207373 0.000000 \n",
|
||
"96093 NaN 0.000000 NaN \n",
|
||
"96094 0.000000 279.312905 0.000000 \n",
|
||
"96095 NaN 0.000000 NaN \n",
|
||
"\n",
|
||
" avg_tickets_delay_all decile \n",
|
||
"0 1.294297 6 \n",
|
||
"1 426.265613 2 \n",
|
||
"2 25.649026 2 \n",
|
||
"3 1.299103 6 \n",
|
||
"4 14.079210 9 \n",
|
||
"... ... ... \n",
|
||
"96091 278.442257 5 \n",
|
||
"96092 189.207373 6 \n",
|
||
"96093 0.000000 1 \n",
|
||
"96094 279.312905 5 \n",
|
||
"96095 0.000000 2 \n",
|
||
"\n",
|
||
"[96096 rows x 23 columns]"
|
||
]
|
||
},
|
||
"execution_count": 275,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# on prend un decoupage plus fin : deciles\n",
|
||
"\n",
|
||
"X_test[\"decile\"] = (10 * X_test[\"score\"]).astype(int)\n",
|
||
"X_test[\"decile\"] = X_test[\"decile\"].apply(lambda x : x-1 if x==10 else x)\n",
|
||
"X_test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 276,
|
||
"id": "b8db5044-74b1-423b-b12f-798606674bfe",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"decile\n",
|
||
"1 17.863019\n",
|
||
"2 3.826401\n",
|
||
"3 3.179880\n",
|
||
"4 3.392496\n",
|
||
"5 3.260982\n",
|
||
"6 3.294104\n",
|
||
"7 1.850487\n",
|
||
"8 1.489675\n",
|
||
"9 1.268598\n",
|
||
"dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 276,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test.groupby(\"decile\")[\"score\"].mean() / X_test.groupby(\"decile\")[\"has_purchased\"].mean()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 277,
|
||
"id": "48a5b42e-fabf-44ae-ac88-fcb5a04d5d4f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"0.006422122322541649"
|
||
]
|
||
},
|
||
"execution_count": 277,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# pour les scores entre 0.1 et 0.2, la proba d'achat est de 0.6% elle est largement surestimée ici\n",
|
||
"X_test[X_test[\"decile\"]==1][\"has_purchased\"].mean()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 284,
|
||
"id": "1091028b-0d07-4cfd-9081-696e289c29de",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>score</th>\n",
|
||
" <th>quartile</th>\n",
|
||
" <th>has_purchased</th>\n",
|
||
" <th>consumption_lifetime</th>\n",
|
||
" <th>avg_purchase_delay</th>\n",
|
||
" <th>avg_purchase_delay_all</th>\n",
|
||
" <th>avg_tickets_delay</th>\n",
|
||
" <th>avg_tickets_delay_all</th>\n",
|
||
" <th>decile</th>\n",
|
||
" <th>overshoot_coeff</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>100.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.695913</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.294297</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>55.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.244205</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>17.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>80.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.279592</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>25.649026</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>120.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.696135</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.299103</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>416.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>478.693148</td>\n",
|
||
" <td>115.631470</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.911844</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>363.061678</td>\n",
|
||
" <td>181.530839</td>\n",
|
||
" <td>239.346574</td>\n",
|
||
" <td>10.678285</td>\n",
|
||
" <td>14.079210</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>1.268598</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96091</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>67.31</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.584680</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>3.260982</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96092</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>61.41</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.654520</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96093</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.116503</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>17.863019</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96094</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>79.43</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.579827</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>3.260982</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96095</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.254002</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>96096 rows × 24 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 4.0 1.0 100.00 1.0 \n",
|
||
"1 1.0 1.0 55.00 1.0 \n",
|
||
"2 17.0 1.0 80.00 1.0 \n",
|
||
"3 4.0 1.0 120.00 1.0 \n",
|
||
"4 34.0 2.0 416.00 1.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"96091 1.0 1.0 67.31 1.0 \n",
|
||
"96092 1.0 1.0 61.41 1.0 \n",
|
||
"96093 0.0 0.0 0.00 0.0 \n",
|
||
"96094 1.0 1.0 79.43 1.0 \n",
|
||
"96095 0.0 0.0 0.00 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 5.177187 5.177187 \n",
|
||
"1 0.0 426.265613 426.265613 \n",
|
||
"2 0.0 436.033437 436.033437 \n",
|
||
"3 0.0 5.196412 5.196412 \n",
|
||
"4 0.0 478.693148 115.631470 \n",
|
||
"... ... ... ... \n",
|
||
"96091 1.0 278.442257 278.442257 \n",
|
||
"96092 1.0 189.207373 189.207373 \n",
|
||
"96093 0.0 550.000000 550.000000 \n",
|
||
"96094 1.0 279.312905 279.312905 \n",
|
||
"96095 0.0 550.000000 550.000000 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in ... score quartile \\\n",
|
||
"0 0.0 True False ... 0.695913 3 \n",
|
||
"1 0.0 True True ... 0.244205 1 \n",
|
||
"2 0.0 True True ... 0.279592 2 \n",
|
||
"3 0.0 True False ... 0.696135 3 \n",
|
||
"4 0.0 True False ... 0.911844 4 \n",
|
||
"... ... ... ... ... ... ... \n",
|
||
"96091 1.0 True False ... 0.584680 3 \n",
|
||
"96092 1.0 True False ... 0.654520 3 \n",
|
||
"96093 0.0 True True ... 0.116503 1 \n",
|
||
"96094 1.0 True False ... 0.579827 3 \n",
|
||
"96095 0.0 True False ... 0.254002 2 \n",
|
||
"\n",
|
||
" has_purchased consumption_lifetime avg_purchase_delay \\\n",
|
||
"0 0.0 0.000000 0.000000 \n",
|
||
"1 1.0 0.000000 0.000000 \n",
|
||
"2 0.0 0.000000 0.000000 \n",
|
||
"3 0.0 0.000000 0.000000 \n",
|
||
"4 1.0 363.061678 181.530839 \n",
|
||
"... ... ... ... \n",
|
||
"96091 1.0 0.000000 0.000000 \n",
|
||
"96092 0.0 0.000000 0.000000 \n",
|
||
"96093 0.0 0.000000 NaN \n",
|
||
"96094 0.0 0.000000 0.000000 \n",
|
||
"96095 0.0 0.000000 NaN \n",
|
||
"\n",
|
||
" avg_purchase_delay_all avg_tickets_delay avg_tickets_delay_all \\\n",
|
||
"0 5.177187 0.000000 1.294297 \n",
|
||
"1 426.265613 0.000000 426.265613 \n",
|
||
"2 436.033437 0.000000 25.649026 \n",
|
||
"3 5.196412 0.000000 1.299103 \n",
|
||
"4 239.346574 10.678285 14.079210 \n",
|
||
"... ... ... ... \n",
|
||
"96091 278.442257 0.000000 278.442257 \n",
|
||
"96092 189.207373 0.000000 189.207373 \n",
|
||
"96093 0.000000 NaN 0.000000 \n",
|
||
"96094 279.312905 0.000000 279.312905 \n",
|
||
"96095 0.000000 NaN 0.000000 \n",
|
||
"\n",
|
||
" decile overshoot_coeff \n",
|
||
"0 6 3.294104 \n",
|
||
"1 2 3.826401 \n",
|
||
"2 2 3.826401 \n",
|
||
"3 6 3.294104 \n",
|
||
"4 9 1.268598 \n",
|
||
"... ... ... \n",
|
||
"96091 5 3.260982 \n",
|
||
"96092 6 3.294104 \n",
|
||
"96093 1 17.863019 \n",
|
||
"96094 5 3.260982 \n",
|
||
"96095 2 3.826401 \n",
|
||
"\n",
|
||
"[96096 rows x 24 columns]"
|
||
]
|
||
},
|
||
"execution_count": 284,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# create a variable to approximate the overestimation by decile\n",
|
||
"\n",
|
||
"# dictionnary mapping decile of the score and average overestimation\n",
|
||
"mapping_score_overshoot = dict(X_test.groupby(\"decile\")[\"score\"].mean() / X_test.groupby(\"decile\")[\"has_purchased\"].mean())\n",
|
||
"X_test[\"overshoot_coeff\"] = X_test[\"decile\"].map(mapping_score_overshoot)\n",
|
||
"X_test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 285,
|
||
"id": "4892d585-c80e-472c-b2bc-dc441255a36d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>quartile</th>\n",
|
||
" <th>has_purchased</th>\n",
|
||
" <th>consumption_lifetime</th>\n",
|
||
" <th>avg_purchase_delay</th>\n",
|
||
" <th>avg_purchase_delay_all</th>\n",
|
||
" <th>avg_tickets_delay</th>\n",
|
||
" <th>avg_tickets_delay_all</th>\n",
|
||
" <th>decile</th>\n",
|
||
" <th>overshoot_coeff</th>\n",
|
||
" <th>ajusted_score</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>100.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.294297</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.211260</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>55.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.063821</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>17.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>80.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>25.649026</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.073069</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>120.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.299103</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.211328</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>416.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>478.693148</td>\n",
|
||
" <td>115.631470</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>363.061678</td>\n",
|
||
" <td>181.530839</td>\n",
|
||
" <td>239.346574</td>\n",
|
||
" <td>10.678285</td>\n",
|
||
" <td>14.079210</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>1.268598</td>\n",
|
||
" <td>0.718781</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96091</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>67.31</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>3.260982</td>\n",
|
||
" <td>0.179296</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96092</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>61.41</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.198694</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96093</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>17.863019</td>\n",
|
||
" <td>0.006522</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96094</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>79.43</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>3.260982</td>\n",
|
||
" <td>0.177808</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96095</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.066382</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>96096 rows × 25 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 4.0 1.0 100.00 1.0 \n",
|
||
"1 1.0 1.0 55.00 1.0 \n",
|
||
"2 17.0 1.0 80.00 1.0 \n",
|
||
"3 4.0 1.0 120.00 1.0 \n",
|
||
"4 34.0 2.0 416.00 1.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"96091 1.0 1.0 67.31 1.0 \n",
|
||
"96092 1.0 1.0 61.41 1.0 \n",
|
||
"96093 0.0 0.0 0.00 0.0 \n",
|
||
"96094 1.0 1.0 79.43 1.0 \n",
|
||
"96095 0.0 0.0 0.00 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 5.177187 5.177187 \n",
|
||
"1 0.0 426.265613 426.265613 \n",
|
||
"2 0.0 436.033437 436.033437 \n",
|
||
"3 0.0 5.196412 5.196412 \n",
|
||
"4 0.0 478.693148 115.631470 \n",
|
||
"... ... ... ... \n",
|
||
"96091 1.0 278.442257 278.442257 \n",
|
||
"96092 1.0 189.207373 189.207373 \n",
|
||
"96093 0.0 550.000000 550.000000 \n",
|
||
"96094 1.0 279.312905 279.312905 \n",
|
||
"96095 0.0 550.000000 550.000000 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in ... quartile \\\n",
|
||
"0 0.0 True False ... 3 \n",
|
||
"1 0.0 True True ... 1 \n",
|
||
"2 0.0 True True ... 2 \n",
|
||
"3 0.0 True False ... 3 \n",
|
||
"4 0.0 True False ... 4 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"96091 1.0 True False ... 3 \n",
|
||
"96092 1.0 True False ... 3 \n",
|
||
"96093 0.0 True True ... 1 \n",
|
||
"96094 1.0 True False ... 3 \n",
|
||
"96095 0.0 True False ... 2 \n",
|
||
"\n",
|
||
" has_purchased consumption_lifetime avg_purchase_delay \\\n",
|
||
"0 0.0 0.000000 0.000000 \n",
|
||
"1 1.0 0.000000 0.000000 \n",
|
||
"2 0.0 0.000000 0.000000 \n",
|
||
"3 0.0 0.000000 0.000000 \n",
|
||
"4 1.0 363.061678 181.530839 \n",
|
||
"... ... ... ... \n",
|
||
"96091 1.0 0.000000 0.000000 \n",
|
||
"96092 0.0 0.000000 0.000000 \n",
|
||
"96093 0.0 0.000000 NaN \n",
|
||
"96094 0.0 0.000000 0.000000 \n",
|
||
"96095 0.0 0.000000 NaN \n",
|
||
"\n",
|
||
" avg_purchase_delay_all avg_tickets_delay avg_tickets_delay_all \\\n",
|
||
"0 5.177187 0.000000 1.294297 \n",
|
||
"1 426.265613 0.000000 426.265613 \n",
|
||
"2 436.033437 0.000000 25.649026 \n",
|
||
"3 5.196412 0.000000 1.299103 \n",
|
||
"4 239.346574 10.678285 14.079210 \n",
|
||
"... ... ... ... \n",
|
||
"96091 278.442257 0.000000 278.442257 \n",
|
||
"96092 189.207373 0.000000 189.207373 \n",
|
||
"96093 0.000000 NaN 0.000000 \n",
|
||
"96094 279.312905 0.000000 279.312905 \n",
|
||
"96095 0.000000 NaN 0.000000 \n",
|
||
"\n",
|
||
" decile overshoot_coeff ajusted_score \n",
|
||
"0 6 3.294104 0.211260 \n",
|
||
"1 2 3.826401 0.063821 \n",
|
||
"2 2 3.826401 0.073069 \n",
|
||
"3 6 3.294104 0.211328 \n",
|
||
"4 9 1.268598 0.718781 \n",
|
||
"... ... ... ... \n",
|
||
"96091 5 3.260982 0.179296 \n",
|
||
"96092 6 3.294104 0.198694 \n",
|
||
"96093 1 17.863019 0.006522 \n",
|
||
"96094 5 3.260982 0.177808 \n",
|
||
"96095 2 3.826401 0.066382 \n",
|
||
"\n",
|
||
"[96096 rows x 25 columns]"
|
||
]
|
||
},
|
||
"execution_count": 285,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test[\"ajusted_score\"] = X_test[\"score\"]/X_test[\"overshoot_coeff\"]\n",
|
||
"X_test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 788,
|
||
"id": "8332e5c3-32ee-4492-91ee-0e49a15f94a1",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"MSE for score : 0.15637498623391197\n",
|
||
"MSE for adjusted score : 0.08877832832116543\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# difference between proba estimated and y has purchased\n",
|
||
"# the calibration allows to half the MSE\n",
|
||
"\n",
|
||
"MSE_score = ((X_test[\"score\"]-X_test[\"has_purchased\"])**2).mean()\n",
|
||
"MSE_ajusted_score = ((X_test[\"score_adjusted\"]-X_test[\"has_purchased\"])**2).mean()\n",
|
||
"print(f\"MSE for score : {MSE_score}\")\n",
|
||
"print(f\"MSE for adjusted score : {MSE_ajusted_score}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 787,
|
||
"id": "89b41b80-c12a-46be-a7d1-59f4f63482e3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"MAE for score : 0.32574831037767815\n",
|
||
"MAE for adjusted score : 0.17556035724742763\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# mean absolute error - divided by 2 with out method\n",
|
||
"\n",
|
||
"MAE_score = abs(X_test[\"score\"]-X_test[\"has_purchased\"]).mean()\n",
|
||
"MAE_ajusted_score = abs(X_test[\"score_adjusted\"]-X_test[\"has_purchased\"]).mean()\n",
|
||
"print(f\"MAE for score : {MAE_score}\")\n",
|
||
"print(f\"MAE for adjusted score : {MAE_ajusted_score}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "15f49d36-da8c-4c08-977e-8de4e438ed61",
|
||
"metadata": {},
|
||
"source": [
|
||
"New method to adjust - best way to fit the logit model"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 317,
|
||
"id": "9e2e1f4c-d9dc-495a-9604-4009f1e4c53f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"somme des scores : 36092.22480054577\n",
|
||
"nombre d'achats : y_has_purchased 13690.0\n",
|
||
"dtype: float64\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# au global, la prbabilité d'achat est largement surestimée, il ft corriger\n",
|
||
"print(\"somme des scores :\", X_test[\"score\"].sum())\n",
|
||
"print(\"nombre d'achats : \", y_test.sum())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 311,
|
||
"id": "1573b9fd-c1be-4f9e-94a5-471ad6cb0726",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"36092.22480054577"
|
||
]
|
||
},
|
||
"execution_count": 311,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# 1. calcul du biais\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 375,
|
||
"id": "5d6d5101-95ce-4137-8349-0e3c6321bc84",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>consumption_lifetime</th>\n",
|
||
" <th>avg_purchase_delay</th>\n",
|
||
" <th>avg_purchase_delay_all</th>\n",
|
||
" <th>avg_tickets_delay</th>\n",
|
||
" <th>avg_tickets_delay_all</th>\n",
|
||
" <th>decile</th>\n",
|
||
" <th>overshoot_coeff</th>\n",
|
||
" <th>ajusted_score</th>\n",
|
||
" <th>odd_ratio</th>\n",
|
||
" <th>test_adjusted_score_2</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>100.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.294297</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.211260</td>\n",
|
||
" <td>2.288530</td>\n",
|
||
" <td>0.533640</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>55.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.063821</td>\n",
|
||
" <td>0.323109</td>\n",
|
||
" <td>0.139085</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>17.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>80.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>25.649026</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.073069</td>\n",
|
||
" <td>0.388102</td>\n",
|
||
" <td>0.162515</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>120.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.299103</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.211328</td>\n",
|
||
" <td>2.290940</td>\n",
|
||
" <td>0.533902</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>416.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>478.693148</td>\n",
|
||
" <td>115.631470</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>363.061678</td>\n",
|
||
" <td>181.530839</td>\n",
|
||
" <td>239.346574</td>\n",
|
||
" <td>10.678285</td>\n",
|
||
" <td>14.079210</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>1.268598</td>\n",
|
||
" <td>0.718781</td>\n",
|
||
" <td>10.343538</td>\n",
|
||
" <td>0.837972</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96091</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>67.31</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>3.260982</td>\n",
|
||
" <td>0.179296</td>\n",
|
||
" <td>1.407779</td>\n",
|
||
" <td>0.413108</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96092</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>61.41</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.198694</td>\n",
|
||
" <td>1.894523</td>\n",
|
||
" <td>0.486458</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96093</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>17.863019</td>\n",
|
||
" <td>0.006522</td>\n",
|
||
" <td>0.131865</td>\n",
|
||
" <td>0.061854</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96094</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>79.43</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>3.260982</td>\n",
|
||
" <td>0.177808</td>\n",
|
||
" <td>1.379973</td>\n",
|
||
" <td>0.408279</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96095</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.066382</td>\n",
|
||
" <td>0.340487</td>\n",
|
||
" <td>0.145477</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>96096 rows × 27 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 4.0 1.0 100.00 1.0 \n",
|
||
"1 1.0 1.0 55.00 1.0 \n",
|
||
"2 17.0 1.0 80.00 1.0 \n",
|
||
"3 4.0 1.0 120.00 1.0 \n",
|
||
"4 34.0 2.0 416.00 1.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"96091 1.0 1.0 67.31 1.0 \n",
|
||
"96092 1.0 1.0 61.41 1.0 \n",
|
||
"96093 0.0 0.0 0.00 0.0 \n",
|
||
"96094 1.0 1.0 79.43 1.0 \n",
|
||
"96095 0.0 0.0 0.00 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 5.177187 5.177187 \n",
|
||
"1 0.0 426.265613 426.265613 \n",
|
||
"2 0.0 436.033437 436.033437 \n",
|
||
"3 0.0 5.196412 5.196412 \n",
|
||
"4 0.0 478.693148 115.631470 \n",
|
||
"... ... ... ... \n",
|
||
"96091 1.0 278.442257 278.442257 \n",
|
||
"96092 1.0 189.207373 189.207373 \n",
|
||
"96093 0.0 550.000000 550.000000 \n",
|
||
"96094 1.0 279.312905 279.312905 \n",
|
||
"96095 0.0 550.000000 550.000000 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in ... consumption_lifetime \\\n",
|
||
"0 0.0 True False ... 0.000000 \n",
|
||
"1 0.0 True True ... 0.000000 \n",
|
||
"2 0.0 True True ... 0.000000 \n",
|
||
"3 0.0 True False ... 0.000000 \n",
|
||
"4 0.0 True False ... 363.061678 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"96091 1.0 True False ... 0.000000 \n",
|
||
"96092 1.0 True False ... 0.000000 \n",
|
||
"96093 0.0 True True ... 0.000000 \n",
|
||
"96094 1.0 True False ... 0.000000 \n",
|
||
"96095 0.0 True False ... 0.000000 \n",
|
||
"\n",
|
||
" avg_purchase_delay avg_purchase_delay_all avg_tickets_delay \\\n",
|
||
"0 0.000000 5.177187 0.000000 \n",
|
||
"1 0.000000 426.265613 0.000000 \n",
|
||
"2 0.000000 436.033437 0.000000 \n",
|
||
"3 0.000000 5.196412 0.000000 \n",
|
||
"4 181.530839 239.346574 10.678285 \n",
|
||
"... ... ... ... \n",
|
||
"96091 0.000000 278.442257 0.000000 \n",
|
||
"96092 0.000000 189.207373 0.000000 \n",
|
||
"96093 NaN 0.000000 NaN \n",
|
||
"96094 0.000000 279.312905 0.000000 \n",
|
||
"96095 NaN 0.000000 NaN \n",
|
||
"\n",
|
||
" avg_tickets_delay_all decile overshoot_coeff ajusted_score \\\n",
|
||
"0 1.294297 6 3.294104 0.211260 \n",
|
||
"1 426.265613 2 3.826401 0.063821 \n",
|
||
"2 25.649026 2 3.826401 0.073069 \n",
|
||
"3 1.299103 6 3.294104 0.211328 \n",
|
||
"4 14.079210 9 1.268598 0.718781 \n",
|
||
"... ... ... ... ... \n",
|
||
"96091 278.442257 5 3.260982 0.179296 \n",
|
||
"96092 189.207373 6 3.294104 0.198694 \n",
|
||
"96093 0.000000 1 17.863019 0.006522 \n",
|
||
"96094 279.312905 5 3.260982 0.177808 \n",
|
||
"96095 0.000000 2 3.826401 0.066382 \n",
|
||
"\n",
|
||
" odd_ratio test_adjusted_score_2 \n",
|
||
"0 2.288530 0.533640 \n",
|
||
"1 0.323109 0.139085 \n",
|
||
"2 0.388102 0.162515 \n",
|
||
"3 2.290940 0.533902 \n",
|
||
"4 10.343538 0.837972 \n",
|
||
"... ... ... \n",
|
||
"96091 1.407779 0.413108 \n",
|
||
"96092 1.894523 0.486458 \n",
|
||
"96093 0.131865 0.061854 \n",
|
||
"96094 1.379973 0.408279 \n",
|
||
"96095 0.340487 0.145477 \n",
|
||
"\n",
|
||
"[96096 rows x 27 columns]"
|
||
]
|
||
},
|
||
"execution_count": 375,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# premier problème : certains scores valent 1, ce qui empeche de calculer un odd ratio \n",
|
||
"# on remplace les scores de 1 par 0.999\n",
|
||
"\n",
|
||
"X_test[\"score\"] = X_test[\"score\"].apply(lambda x : 0.9999999999999996 if x==1 else x)\n",
|
||
"X_test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 377,
|
||
"id": "8a29f835-8e4f-45e9-9c91-e019f56fee5e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>consumption_lifetime</th>\n",
|
||
" <th>avg_purchase_delay</th>\n",
|
||
" <th>avg_purchase_delay_all</th>\n",
|
||
" <th>avg_tickets_delay</th>\n",
|
||
" <th>avg_tickets_delay_all</th>\n",
|
||
" <th>decile</th>\n",
|
||
" <th>overshoot_coeff</th>\n",
|
||
" <th>ajusted_score</th>\n",
|
||
" <th>odd_ratio</th>\n",
|
||
" <th>test_adjusted_score_2</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>100.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.294297</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.211260</td>\n",
|
||
" <td>2.288530</td>\n",
|
||
" <td>0.533640</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>55.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.063821</td>\n",
|
||
" <td>0.323109</td>\n",
|
||
" <td>0.139085</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>17.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>80.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>25.649026</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.073069</td>\n",
|
||
" <td>0.388102</td>\n",
|
||
" <td>0.162515</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>120.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.299103</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.211328</td>\n",
|
||
" <td>2.290940</td>\n",
|
||
" <td>0.533902</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>416.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>478.693148</td>\n",
|
||
" <td>115.631470</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>363.061678</td>\n",
|
||
" <td>181.530839</td>\n",
|
||
" <td>239.346574</td>\n",
|
||
" <td>10.678285</td>\n",
|
||
" <td>14.079210</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>1.268598</td>\n",
|
||
" <td>0.718781</td>\n",
|
||
" <td>10.343538</td>\n",
|
||
" <td>0.837972</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96091</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>67.31</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>3.260982</td>\n",
|
||
" <td>0.179296</td>\n",
|
||
" <td>1.407779</td>\n",
|
||
" <td>0.413108</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96092</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>61.41</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.198694</td>\n",
|
||
" <td>1.894523</td>\n",
|
||
" <td>0.486458</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96093</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>17.863019</td>\n",
|
||
" <td>0.006522</td>\n",
|
||
" <td>0.131865</td>\n",
|
||
" <td>0.061854</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96094</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>79.43</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>3.260982</td>\n",
|
||
" <td>0.177808</td>\n",
|
||
" <td>1.379973</td>\n",
|
||
" <td>0.408279</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96095</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.066382</td>\n",
|
||
" <td>0.340487</td>\n",
|
||
" <td>0.145477</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>96096 rows × 27 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 4.0 1.0 100.00 1.0 \n",
|
||
"1 1.0 1.0 55.00 1.0 \n",
|
||
"2 17.0 1.0 80.00 1.0 \n",
|
||
"3 4.0 1.0 120.00 1.0 \n",
|
||
"4 34.0 2.0 416.00 1.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"96091 1.0 1.0 67.31 1.0 \n",
|
||
"96092 1.0 1.0 61.41 1.0 \n",
|
||
"96093 0.0 0.0 0.00 0.0 \n",
|
||
"96094 1.0 1.0 79.43 1.0 \n",
|
||
"96095 0.0 0.0 0.00 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 5.177187 5.177187 \n",
|
||
"1 0.0 426.265613 426.265613 \n",
|
||
"2 0.0 436.033437 436.033437 \n",
|
||
"3 0.0 5.196412 5.196412 \n",
|
||
"4 0.0 478.693148 115.631470 \n",
|
||
"... ... ... ... \n",
|
||
"96091 1.0 278.442257 278.442257 \n",
|
||
"96092 1.0 189.207373 189.207373 \n",
|
||
"96093 0.0 550.000000 550.000000 \n",
|
||
"96094 1.0 279.312905 279.312905 \n",
|
||
"96095 0.0 550.000000 550.000000 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in ... consumption_lifetime \\\n",
|
||
"0 0.0 True False ... 0.000000 \n",
|
||
"1 0.0 True True ... 0.000000 \n",
|
||
"2 0.0 True True ... 0.000000 \n",
|
||
"3 0.0 True False ... 0.000000 \n",
|
||
"4 0.0 True False ... 363.061678 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"96091 1.0 True False ... 0.000000 \n",
|
||
"96092 1.0 True False ... 0.000000 \n",
|
||
"96093 0.0 True True ... 0.000000 \n",
|
||
"96094 1.0 True False ... 0.000000 \n",
|
||
"96095 0.0 True False ... 0.000000 \n",
|
||
"\n",
|
||
" avg_purchase_delay avg_purchase_delay_all avg_tickets_delay \\\n",
|
||
"0 0.000000 5.177187 0.000000 \n",
|
||
"1 0.000000 426.265613 0.000000 \n",
|
||
"2 0.000000 436.033437 0.000000 \n",
|
||
"3 0.000000 5.196412 0.000000 \n",
|
||
"4 181.530839 239.346574 10.678285 \n",
|
||
"... ... ... ... \n",
|
||
"96091 0.000000 278.442257 0.000000 \n",
|
||
"96092 0.000000 189.207373 0.000000 \n",
|
||
"96093 NaN 0.000000 NaN \n",
|
||
"96094 0.000000 279.312905 0.000000 \n",
|
||
"96095 NaN 0.000000 NaN \n",
|
||
"\n",
|
||
" avg_tickets_delay_all decile overshoot_coeff ajusted_score \\\n",
|
||
"0 1.294297 6 3.294104 0.211260 \n",
|
||
"1 426.265613 2 3.826401 0.063821 \n",
|
||
"2 25.649026 2 3.826401 0.073069 \n",
|
||
"3 1.299103 6 3.294104 0.211328 \n",
|
||
"4 14.079210 9 1.268598 0.718781 \n",
|
||
"... ... ... ... ... \n",
|
||
"96091 278.442257 5 3.260982 0.179296 \n",
|
||
"96092 189.207373 6 3.294104 0.198694 \n",
|
||
"96093 0.000000 1 17.863019 0.006522 \n",
|
||
"96094 279.312905 5 3.260982 0.177808 \n",
|
||
"96095 0.000000 2 3.826401 0.066382 \n",
|
||
"\n",
|
||
" odd_ratio test_adjusted_score_2 \n",
|
||
"0 2.288530 0.533640 \n",
|
||
"1 0.323109 0.139085 \n",
|
||
"2 0.388102 0.162515 \n",
|
||
"3 2.290940 0.533902 \n",
|
||
"4 10.343538 0.837972 \n",
|
||
"... ... ... \n",
|
||
"96091 1.407779 0.413108 \n",
|
||
"96092 1.894523 0.486458 \n",
|
||
"96093 0.131865 0.061854 \n",
|
||
"96094 1.379973 0.408279 \n",
|
||
"96095 0.340487 0.145477 \n",
|
||
"\n",
|
||
"[96096 rows x 27 columns]"
|
||
]
|
||
},
|
||
"execution_count": 377,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test[\"odd_ratio\"] = X_test[\"score\"]/(1-X_test[\"score\"])\n",
|
||
"X_test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 378,
|
||
"id": "b5971afb-a6ef-4433-9cee-13ea978b22c8",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"count 9.609600e+04\n",
|
||
"mean 2.117164e+11\n",
|
||
"std 2.179173e+13\n",
|
||
"min 1.207494e-01\n",
|
||
"25% 1.476621e-01\n",
|
||
"50% 3.337214e-01\n",
|
||
"75% 1.430245e+00\n",
|
||
"max 2.251800e+15\n",
|
||
"Name: odd_ratio, dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 378,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test[\"odd_ratio\"].describe()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 381,
|
||
"id": "e878a711-5d7d-455f-9e0f-da50961568d9",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def adjusted_score(odd_ratio, bias) :\n",
|
||
" adjusted_score = odd_ratio/(bias+odd_ratio)\n",
|
||
" return adjusted_score"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 424,
|
||
"id": "bff25885-1191-432a-976c-4b466dbc0ac7",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def obj_function(bias) :\n",
|
||
" obj = sum([adjusted_score(element, bias) for element in X_test[\"odd_ratio\"]]) # - y_test.sum()[\"y_has_purchased\"]\n",
|
||
" return obj"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 380,
|
||
"id": "a9df55fc-e1c6-4462-9fa5-248d47f4957f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"13690.0"
|
||
]
|
||
},
|
||
"execution_count": 380,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"y_test.sum()[\"y_has_purchased\"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 396,
|
||
"id": "ecae3be2-ddf4-4a76-940d-403a176fa8f5",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"13749.42306555955"
|
||
]
|
||
},
|
||
"execution_count": 396,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# le biais optimal se trouve aux alentours de 6\n",
|
||
"sum([adjusted_score(element, 6) for element in X_test[\"odd_ratio\"]])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 411,
|
||
"id": "5698b75b-759a-4cc5-8466-c513d2ae2aa2",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"36092.2248005385"
|
||
]
|
||
},
|
||
"execution_count": 411,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"sum([adjusted_score(element, 1) for element in X_test[\"odd_ratio\"]])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 412,
|
||
"id": "42840b8b-0314-4b15-afb9-09a9e550a729",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"13690.0"
|
||
]
|
||
},
|
||
"execution_count": 412,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"y_test.sum()[\"y_has_purchased\"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 425,
|
||
"id": "8a61a53c-c98b-4c76-bcfe-a4bb0f3db42a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"36092.2248005385"
|
||
]
|
||
},
|
||
"execution_count": 425,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"obj_function(1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 423,
|
||
"id": "d29623ca-c9f7-4ef7-b5ea-45b2d2f65096",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"3.9020966429798136"
|
||
]
|
||
},
|
||
"execution_count": 423,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# on devrait trouver un résultat autour de 6.04\n",
|
||
"sum([adjusted_score(element, 6.04) for element in X_test[\"odd_ratio\"]]) - y_test.sum()[\"y_has_purchased\"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 426,
|
||
"id": "6417f2a2-9e22-40c7-8297-2ed0b72e9b1d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# minimization\n",
|
||
"\n",
|
||
"from scipy.optimize import minimize\n",
|
||
"\n",
|
||
"\n",
|
||
"y_sum = y_test.sum()[\"y_has_purchased\"]\n",
|
||
"initial_guess = 6\n",
|
||
"estimated_biais = minimize(lambda bias : (obj_function(bias)-y_sum)**2 ,\n",
|
||
"initial_guess , method = \"BFGS\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 430,
|
||
"id": "937606df-1730-43b6-9a95-7c626aa7a3c5",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"bias estimated : 6.042826489667565\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(f\"bias estimated : {estimated_biais.x[0]}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 435,
|
||
"id": "ad6ebcee-f1f6-46fc-8d9a-008762acae28",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>avg_purchase_delay</th>\n",
|
||
" <th>avg_purchase_delay_all</th>\n",
|
||
" <th>avg_tickets_delay</th>\n",
|
||
" <th>avg_tickets_delay_all</th>\n",
|
||
" <th>decile</th>\n",
|
||
" <th>overshoot_coeff</th>\n",
|
||
" <th>ajusted_score</th>\n",
|
||
" <th>odd_ratio</th>\n",
|
||
" <th>test_adjusted_score_2</th>\n",
|
||
" <th>score_adjusted</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>100.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.294297</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.211260</td>\n",
|
||
" <td>2.288530</td>\n",
|
||
" <td>0.533640</td>\n",
|
||
" <td>0.274689</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>55.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.063821</td>\n",
|
||
" <td>0.323109</td>\n",
|
||
" <td>0.139085</td>\n",
|
||
" <td>0.050756</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>17.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>80.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>25.649026</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.073069</td>\n",
|
||
" <td>0.388102</td>\n",
|
||
" <td>0.162515</td>\n",
|
||
" <td>0.060349</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>120.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.299103</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.211328</td>\n",
|
||
" <td>2.290940</td>\n",
|
||
" <td>0.533902</td>\n",
|
||
" <td>0.274899</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>416.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>478.693148</td>\n",
|
||
" <td>115.631470</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>181.530839</td>\n",
|
||
" <td>239.346574</td>\n",
|
||
" <td>10.678285</td>\n",
|
||
" <td>14.079210</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>1.268598</td>\n",
|
||
" <td>0.718781</td>\n",
|
||
" <td>10.343538</td>\n",
|
||
" <td>0.837972</td>\n",
|
||
" <td>0.631228</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96091</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>67.31</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>3.260982</td>\n",
|
||
" <td>0.179296</td>\n",
|
||
" <td>1.407779</td>\n",
|
||
" <td>0.413108</td>\n",
|
||
" <td>0.188948</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96092</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>61.41</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.198694</td>\n",
|
||
" <td>1.894523</td>\n",
|
||
" <td>0.486458</td>\n",
|
||
" <td>0.238685</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96093</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>17.863019</td>\n",
|
||
" <td>0.006522</td>\n",
|
||
" <td>0.131865</td>\n",
|
||
" <td>0.061854</td>\n",
|
||
" <td>0.021356</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96094</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>79.43</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>3.260982</td>\n",
|
||
" <td>0.177808</td>\n",
|
||
" <td>1.379973</td>\n",
|
||
" <td>0.408279</td>\n",
|
||
" <td>0.185910</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96095</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.066382</td>\n",
|
||
" <td>0.340487</td>\n",
|
||
" <td>0.145477</td>\n",
|
||
" <td>0.053340</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>96096 rows × 28 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 4.0 1.0 100.00 1.0 \n",
|
||
"1 1.0 1.0 55.00 1.0 \n",
|
||
"2 17.0 1.0 80.00 1.0 \n",
|
||
"3 4.0 1.0 120.00 1.0 \n",
|
||
"4 34.0 2.0 416.00 1.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"96091 1.0 1.0 67.31 1.0 \n",
|
||
"96092 1.0 1.0 61.41 1.0 \n",
|
||
"96093 0.0 0.0 0.00 0.0 \n",
|
||
"96094 1.0 1.0 79.43 1.0 \n",
|
||
"96095 0.0 0.0 0.00 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 5.177187 5.177187 \n",
|
||
"1 0.0 426.265613 426.265613 \n",
|
||
"2 0.0 436.033437 436.033437 \n",
|
||
"3 0.0 5.196412 5.196412 \n",
|
||
"4 0.0 478.693148 115.631470 \n",
|
||
"... ... ... ... \n",
|
||
"96091 1.0 278.442257 278.442257 \n",
|
||
"96092 1.0 189.207373 189.207373 \n",
|
||
"96093 0.0 550.000000 550.000000 \n",
|
||
"96094 1.0 279.312905 279.312905 \n",
|
||
"96095 0.0 550.000000 550.000000 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in ... avg_purchase_delay \\\n",
|
||
"0 0.0 True False ... 0.000000 \n",
|
||
"1 0.0 True True ... 0.000000 \n",
|
||
"2 0.0 True True ... 0.000000 \n",
|
||
"3 0.0 True False ... 0.000000 \n",
|
||
"4 0.0 True False ... 181.530839 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"96091 1.0 True False ... 0.000000 \n",
|
||
"96092 1.0 True False ... 0.000000 \n",
|
||
"96093 0.0 True True ... NaN \n",
|
||
"96094 1.0 True False ... 0.000000 \n",
|
||
"96095 0.0 True False ... NaN \n",
|
||
"\n",
|
||
" avg_purchase_delay_all avg_tickets_delay avg_tickets_delay_all \\\n",
|
||
"0 5.177187 0.000000 1.294297 \n",
|
||
"1 426.265613 0.000000 426.265613 \n",
|
||
"2 436.033437 0.000000 25.649026 \n",
|
||
"3 5.196412 0.000000 1.299103 \n",
|
||
"4 239.346574 10.678285 14.079210 \n",
|
||
"... ... ... ... \n",
|
||
"96091 278.442257 0.000000 278.442257 \n",
|
||
"96092 189.207373 0.000000 189.207373 \n",
|
||
"96093 0.000000 NaN 0.000000 \n",
|
||
"96094 279.312905 0.000000 279.312905 \n",
|
||
"96095 0.000000 NaN 0.000000 \n",
|
||
"\n",
|
||
" decile overshoot_coeff ajusted_score odd_ratio \\\n",
|
||
"0 6 3.294104 0.211260 2.288530 \n",
|
||
"1 2 3.826401 0.063821 0.323109 \n",
|
||
"2 2 3.826401 0.073069 0.388102 \n",
|
||
"3 6 3.294104 0.211328 2.290940 \n",
|
||
"4 9 1.268598 0.718781 10.343538 \n",
|
||
"... ... ... ... ... \n",
|
||
"96091 5 3.260982 0.179296 1.407779 \n",
|
||
"96092 6 3.294104 0.198694 1.894523 \n",
|
||
"96093 1 17.863019 0.006522 0.131865 \n",
|
||
"96094 5 3.260982 0.177808 1.379973 \n",
|
||
"96095 2 3.826401 0.066382 0.340487 \n",
|
||
"\n",
|
||
" test_adjusted_score_2 score_adjusted \n",
|
||
"0 0.533640 0.274689 \n",
|
||
"1 0.139085 0.050756 \n",
|
||
"2 0.162515 0.060349 \n",
|
||
"3 0.533902 0.274899 \n",
|
||
"4 0.837972 0.631228 \n",
|
||
"... ... ... \n",
|
||
"96091 0.413108 0.188948 \n",
|
||
"96092 0.486458 0.238685 \n",
|
||
"96093 0.061854 0.021356 \n",
|
||
"96094 0.408279 0.185910 \n",
|
||
"96095 0.145477 0.053340 \n",
|
||
"\n",
|
||
"[96096 rows x 28 columns]"
|
||
]
|
||
},
|
||
"execution_count": 435,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# on corrige les scores trouvés en fonction\n",
|
||
"\n",
|
||
"X_test[\"score_adjusted\"] = adjusted_score(X_test[\"odd_ratio\"], bias=estimated_biais.x[0])\n",
|
||
"X_test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 436,
|
||
"id": "2934bfff-23ac-4c4e-8fe6-2087afac1e0f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>avg_purchase_delay</th>\n",
|
||
" <th>avg_purchase_delay_all</th>\n",
|
||
" <th>avg_tickets_delay</th>\n",
|
||
" <th>avg_tickets_delay_all</th>\n",
|
||
" <th>decile</th>\n",
|
||
" <th>overshoot_coeff</th>\n",
|
||
" <th>ajusted_score</th>\n",
|
||
" <th>odd_ratio</th>\n",
|
||
" <th>test_adjusted_score_2</th>\n",
|
||
" <th>score_adjusted</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>100.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.294297</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.211260</td>\n",
|
||
" <td>2.288530</td>\n",
|
||
" <td>0.533640</td>\n",
|
||
" <td>0.274689</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>55.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.063821</td>\n",
|
||
" <td>0.323109</td>\n",
|
||
" <td>0.139085</td>\n",
|
||
" <td>0.050756</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>17.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>80.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>25.649026</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.073069</td>\n",
|
||
" <td>0.388102</td>\n",
|
||
" <td>0.162515</td>\n",
|
||
" <td>0.060349</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>120.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.299103</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.211328</td>\n",
|
||
" <td>2.290940</td>\n",
|
||
" <td>0.533902</td>\n",
|
||
" <td>0.274899</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>416.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>478.693148</td>\n",
|
||
" <td>115.631470</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>181.530839</td>\n",
|
||
" <td>239.346574</td>\n",
|
||
" <td>10.678285</td>\n",
|
||
" <td>14.079210</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>1.268598</td>\n",
|
||
" <td>0.718781</td>\n",
|
||
" <td>10.343538</td>\n",
|
||
" <td>0.837972</td>\n",
|
||
" <td>0.631228</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96091</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>67.31</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>3.260982</td>\n",
|
||
" <td>0.179296</td>\n",
|
||
" <td>1.407779</td>\n",
|
||
" <td>0.413108</td>\n",
|
||
" <td>0.188948</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96092</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>61.41</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.198694</td>\n",
|
||
" <td>1.894523</td>\n",
|
||
" <td>0.486458</td>\n",
|
||
" <td>0.238685</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96093</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>17.863019</td>\n",
|
||
" <td>0.006522</td>\n",
|
||
" <td>0.131865</td>\n",
|
||
" <td>0.061854</td>\n",
|
||
" <td>0.021356</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96094</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>79.43</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>3.260982</td>\n",
|
||
" <td>0.177808</td>\n",
|
||
" <td>1.379973</td>\n",
|
||
" <td>0.408279</td>\n",
|
||
" <td>0.185910</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96095</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.066382</td>\n",
|
||
" <td>0.340487</td>\n",
|
||
" <td>0.145477</td>\n",
|
||
" <td>0.053340</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>96096 rows × 28 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 4.0 1.0 100.00 1.0 \n",
|
||
"1 1.0 1.0 55.00 1.0 \n",
|
||
"2 17.0 1.0 80.00 1.0 \n",
|
||
"3 4.0 1.0 120.00 1.0 \n",
|
||
"4 34.0 2.0 416.00 1.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"96091 1.0 1.0 67.31 1.0 \n",
|
||
"96092 1.0 1.0 61.41 1.0 \n",
|
||
"96093 0.0 0.0 0.00 0.0 \n",
|
||
"96094 1.0 1.0 79.43 1.0 \n",
|
||
"96095 0.0 0.0 0.00 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 5.177187 5.177187 \n",
|
||
"1 0.0 426.265613 426.265613 \n",
|
||
"2 0.0 436.033437 436.033437 \n",
|
||
"3 0.0 5.196412 5.196412 \n",
|
||
"4 0.0 478.693148 115.631470 \n",
|
||
"... ... ... ... \n",
|
||
"96091 1.0 278.442257 278.442257 \n",
|
||
"96092 1.0 189.207373 189.207373 \n",
|
||
"96093 0.0 550.000000 550.000000 \n",
|
||
"96094 1.0 279.312905 279.312905 \n",
|
||
"96095 0.0 550.000000 550.000000 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in ... avg_purchase_delay \\\n",
|
||
"0 0.0 True False ... 0.000000 \n",
|
||
"1 0.0 True True ... 0.000000 \n",
|
||
"2 0.0 True True ... 0.000000 \n",
|
||
"3 0.0 True False ... 0.000000 \n",
|
||
"4 0.0 True False ... 181.530839 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"96091 1.0 True False ... 0.000000 \n",
|
||
"96092 1.0 True False ... 0.000000 \n",
|
||
"96093 0.0 True True ... NaN \n",
|
||
"96094 1.0 True False ... 0.000000 \n",
|
||
"96095 0.0 True False ... NaN \n",
|
||
"\n",
|
||
" avg_purchase_delay_all avg_tickets_delay avg_tickets_delay_all \\\n",
|
||
"0 5.177187 0.000000 1.294297 \n",
|
||
"1 426.265613 0.000000 426.265613 \n",
|
||
"2 436.033437 0.000000 25.649026 \n",
|
||
"3 5.196412 0.000000 1.299103 \n",
|
||
"4 239.346574 10.678285 14.079210 \n",
|
||
"... ... ... ... \n",
|
||
"96091 278.442257 0.000000 278.442257 \n",
|
||
"96092 189.207373 0.000000 189.207373 \n",
|
||
"96093 0.000000 NaN 0.000000 \n",
|
||
"96094 279.312905 0.000000 279.312905 \n",
|
||
"96095 0.000000 NaN 0.000000 \n",
|
||
"\n",
|
||
" decile overshoot_coeff ajusted_score odd_ratio \\\n",
|
||
"0 6 3.294104 0.211260 2.288530 \n",
|
||
"1 2 3.826401 0.063821 0.323109 \n",
|
||
"2 2 3.826401 0.073069 0.388102 \n",
|
||
"3 6 3.294104 0.211328 2.290940 \n",
|
||
"4 9 1.268598 0.718781 10.343538 \n",
|
||
"... ... ... ... ... \n",
|
||
"96091 5 3.260982 0.179296 1.407779 \n",
|
||
"96092 6 3.294104 0.198694 1.894523 \n",
|
||
"96093 1 17.863019 0.006522 0.131865 \n",
|
||
"96094 5 3.260982 0.177808 1.379973 \n",
|
||
"96095 2 3.826401 0.066382 0.340487 \n",
|
||
"\n",
|
||
" test_adjusted_score_2 score_adjusted \n",
|
||
"0 0.533640 0.274689 \n",
|
||
"1 0.139085 0.050756 \n",
|
||
"2 0.162515 0.060349 \n",
|
||
"3 0.533902 0.274899 \n",
|
||
"4 0.837972 0.631228 \n",
|
||
"... ... ... \n",
|
||
"96091 0.413108 0.188948 \n",
|
||
"96092 0.486458 0.238685 \n",
|
||
"96093 0.061854 0.021356 \n",
|
||
"96094 0.408279 0.185910 \n",
|
||
"96095 0.145477 0.053340 \n",
|
||
"\n",
|
||
"[96096 rows x 28 columns]"
|
||
]
|
||
},
|
||
"execution_count": 436,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 549,
|
||
"id": "0dadc6f7-9c49-4188-9ae4-8b9c84770cf6",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# histogramme des probas et des probas ajustées\n",
|
||
"\n",
|
||
"plt.hist(X_test[\"score\"], label = \"score\", alpha=0.5)\n",
|
||
"plt.hist(X_test[\"score_adjusted\"], label=\"adjusted score\", alpha=0.5)\n",
|
||
"plt.legend()\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 557,
|
||
"id": "646a8e9b-99dc-4e06-ab5a-42b21de6917b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"0.32260447885447885\n",
|
||
"0.06268731268731269\n",
|
||
"0.14246170496170496\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# on passe de 32% de scores supérieurs à 1/2 à 6%\n",
|
||
"\n",
|
||
"print((X_test[\"score\"]>0.5).mean())\n",
|
||
"print((X_test[\"score_adjusted\"]>0.5).mean())\n",
|
||
"print(y_test.mean()[\"y_has_purchased\"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 437,
|
||
"id": "3a60fa17-c960-4702-baa1-a7dc6cd227b0",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"nombre de clients ayant acheté : 13690.0\n",
|
||
"somme des scores ajustés : 13690.000010280266\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# on vérifie que cette correction a permis d'avoir des résultats cohérents\n",
|
||
"\n",
|
||
"print(\"nombre de clients ayant acheté :\",y_sum)\n",
|
||
"print(\"somme des scores ajustés :\", X_test[\"score_adjusted\"].sum())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 440,
|
||
"id": "3a7479a5-b6a3-47a2-8f78-4259746498f1",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"MSE for score : 0.15637498623391197\n",
|
||
"MSE for ajusted score : 0.08877832832116543\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# cet ajustement permet de plus de réduire drastiquement le MSE \n",
|
||
"\n",
|
||
"MSE_score = ((X_test[\"score\"]-X_test[\"has_purchased\"])**2).mean()\n",
|
||
"MSE_ajusted_score = ((X_test[\"score_adjusted\"]-X_test[\"has_purchased\"])**2).mean()\n",
|
||
"print(f\"MSE for score : {MSE_score}\")\n",
|
||
"print(f\"MSE for ajusted score : {MSE_ajusted_score}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 518,
|
||
"id": "fd963072-26f7-4805-84db-5612a40dcafd",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>score</th>\n",
|
||
" <th>score_adjusted</th>\n",
|
||
" <th>has_purchased</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>quartile</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>0.169233</td>\n",
|
||
" <td>0.033442</td>\n",
|
||
" <td>0.026780</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>0.360811</td>\n",
|
||
" <td>0.088246</td>\n",
|
||
" <td>0.117452</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>0.626785</td>\n",
|
||
" <td>0.222962</td>\n",
|
||
" <td>0.209332</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>0.902055</td>\n",
|
||
" <td>0.652198</td>\n",
|
||
" <td>0.666549</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" score score_adjusted has_purchased\n",
|
||
"quartile \n",
|
||
"1 0.169233 0.033442 0.026780\n",
|
||
"2 0.360811 0.088246 0.117452\n",
|
||
"3 0.626785 0.222962 0.209332\n",
|
||
"4 0.902055 0.652198 0.666549"
|
||
]
|
||
},
|
||
"execution_count": 518,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# on est bcp plus proche des probas d'achat moyennes\n",
|
||
"X_test.groupby(\"quartile\")[[\"score\",\"score_adjusted\", \"has_purchased\"]].mean()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "0552d1c9-7edd-44ed-9954-0bc7810ec2f3",
|
||
"metadata": {},
|
||
"source": [
|
||
"Etape suivante : on peut donc calculer le potentiel de CA de chaque segment"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 473,
|
||
"id": "86f0740a-80b5-435b-a1ee-ae59d9143666",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>decile</th>\n",
|
||
" <th>overshoot_coeff</th>\n",
|
||
" <th>ajusted_score</th>\n",
|
||
" <th>odd_ratio</th>\n",
|
||
" <th>test_adjusted_score_2</th>\n",
|
||
" <th>score_adjusted</th>\n",
|
||
" <th>nb_tickets_projected</th>\n",
|
||
" <th>total_amount_projected</th>\n",
|
||
" <th>nb_tickets_expected</th>\n",
|
||
" <th>total_amount_expected</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>100.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.211260</td>\n",
|
||
" <td>2.288530</td>\n",
|
||
" <td>0.533640</td>\n",
|
||
" <td>0.274689</td>\n",
|
||
" <td>2.666667</td>\n",
|
||
" <td>66.666667</td>\n",
|
||
" <td>0.732503</td>\n",
|
||
" <td>18.312587</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>55.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.063821</td>\n",
|
||
" <td>0.323109</td>\n",
|
||
" <td>0.139085</td>\n",
|
||
" <td>0.050756</td>\n",
|
||
" <td>0.666667</td>\n",
|
||
" <td>36.666667</td>\n",
|
||
" <td>0.033837</td>\n",
|
||
" <td>1.861053</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>17.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>80.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.073069</td>\n",
|
||
" <td>0.388102</td>\n",
|
||
" <td>0.162515</td>\n",
|
||
" <td>0.060349</td>\n",
|
||
" <td>11.333333</td>\n",
|
||
" <td>53.333333</td>\n",
|
||
" <td>0.683958</td>\n",
|
||
" <td>3.218627</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>120.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.211328</td>\n",
|
||
" <td>2.290940</td>\n",
|
||
" <td>0.533902</td>\n",
|
||
" <td>0.274899</td>\n",
|
||
" <td>2.666667</td>\n",
|
||
" <td>80.000000</td>\n",
|
||
" <td>0.733063</td>\n",
|
||
" <td>21.991884</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>416.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>478.693148</td>\n",
|
||
" <td>115.631470</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>1.268598</td>\n",
|
||
" <td>0.718781</td>\n",
|
||
" <td>10.343538</td>\n",
|
||
" <td>0.837972</td>\n",
|
||
" <td>0.631228</td>\n",
|
||
" <td>22.666667</td>\n",
|
||
" <td>277.333333</td>\n",
|
||
" <td>14.307843</td>\n",
|
||
" <td>175.060667</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96091</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>67.31</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>3.260982</td>\n",
|
||
" <td>0.179296</td>\n",
|
||
" <td>1.407779</td>\n",
|
||
" <td>0.413108</td>\n",
|
||
" <td>0.188948</td>\n",
|
||
" <td>0.666667</td>\n",
|
||
" <td>44.873333</td>\n",
|
||
" <td>0.125966</td>\n",
|
||
" <td>8.478740</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96092</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>61.41</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.198694</td>\n",
|
||
" <td>1.894523</td>\n",
|
||
" <td>0.486458</td>\n",
|
||
" <td>0.238685</td>\n",
|
||
" <td>0.666667</td>\n",
|
||
" <td>40.940000</td>\n",
|
||
" <td>0.159123</td>\n",
|
||
" <td>9.771748</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96093</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>17.863019</td>\n",
|
||
" <td>0.006522</td>\n",
|
||
" <td>0.131865</td>\n",
|
||
" <td>0.061854</td>\n",
|
||
" <td>0.021356</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96094</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>79.43</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>3.260982</td>\n",
|
||
" <td>0.177808</td>\n",
|
||
" <td>1.379973</td>\n",
|
||
" <td>0.408279</td>\n",
|
||
" <td>0.185910</td>\n",
|
||
" <td>0.666667</td>\n",
|
||
" <td>52.953333</td>\n",
|
||
" <td>0.123940</td>\n",
|
||
" <td>9.844555</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96095</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.066382</td>\n",
|
||
" <td>0.340487</td>\n",
|
||
" <td>0.145477</td>\n",
|
||
" <td>0.053340</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>96096 rows × 32 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 4.0 1.0 100.00 1.0 \n",
|
||
"1 1.0 1.0 55.00 1.0 \n",
|
||
"2 17.0 1.0 80.00 1.0 \n",
|
||
"3 4.0 1.0 120.00 1.0 \n",
|
||
"4 34.0 2.0 416.00 1.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"96091 1.0 1.0 67.31 1.0 \n",
|
||
"96092 1.0 1.0 61.41 1.0 \n",
|
||
"96093 0.0 0.0 0.00 0.0 \n",
|
||
"96094 1.0 1.0 79.43 1.0 \n",
|
||
"96095 0.0 0.0 0.00 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 5.177187 5.177187 \n",
|
||
"1 0.0 426.265613 426.265613 \n",
|
||
"2 0.0 436.033437 436.033437 \n",
|
||
"3 0.0 5.196412 5.196412 \n",
|
||
"4 0.0 478.693148 115.631470 \n",
|
||
"... ... ... ... \n",
|
||
"96091 1.0 278.442257 278.442257 \n",
|
||
"96092 1.0 189.207373 189.207373 \n",
|
||
"96093 0.0 550.000000 550.000000 \n",
|
||
"96094 1.0 279.312905 279.312905 \n",
|
||
"96095 0.0 550.000000 550.000000 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in ... decile \\\n",
|
||
"0 0.0 True False ... 6 \n",
|
||
"1 0.0 True True ... 2 \n",
|
||
"2 0.0 True True ... 2 \n",
|
||
"3 0.0 True False ... 6 \n",
|
||
"4 0.0 True False ... 9 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"96091 1.0 True False ... 5 \n",
|
||
"96092 1.0 True False ... 6 \n",
|
||
"96093 0.0 True True ... 1 \n",
|
||
"96094 1.0 True False ... 5 \n",
|
||
"96095 0.0 True False ... 2 \n",
|
||
"\n",
|
||
" overshoot_coeff ajusted_score odd_ratio test_adjusted_score_2 \\\n",
|
||
"0 3.294104 0.211260 2.288530 0.533640 \n",
|
||
"1 3.826401 0.063821 0.323109 0.139085 \n",
|
||
"2 3.826401 0.073069 0.388102 0.162515 \n",
|
||
"3 3.294104 0.211328 2.290940 0.533902 \n",
|
||
"4 1.268598 0.718781 10.343538 0.837972 \n",
|
||
"... ... ... ... ... \n",
|
||
"96091 3.260982 0.179296 1.407779 0.413108 \n",
|
||
"96092 3.294104 0.198694 1.894523 0.486458 \n",
|
||
"96093 17.863019 0.006522 0.131865 0.061854 \n",
|
||
"96094 3.260982 0.177808 1.379973 0.408279 \n",
|
||
"96095 3.826401 0.066382 0.340487 0.145477 \n",
|
||
"\n",
|
||
" score_adjusted nb_tickets_projected total_amount_projected \\\n",
|
||
"0 0.274689 2.666667 66.666667 \n",
|
||
"1 0.050756 0.666667 36.666667 \n",
|
||
"2 0.060349 11.333333 53.333333 \n",
|
||
"3 0.274899 2.666667 80.000000 \n",
|
||
"4 0.631228 22.666667 277.333333 \n",
|
||
"... ... ... ... \n",
|
||
"96091 0.188948 0.666667 44.873333 \n",
|
||
"96092 0.238685 0.666667 40.940000 \n",
|
||
"96093 0.021356 0.000000 0.000000 \n",
|
||
"96094 0.185910 0.666667 52.953333 \n",
|
||
"96095 0.053340 0.000000 0.000000 \n",
|
||
"\n",
|
||
" nb_tickets_expected total_amount_expected \n",
|
||
"0 0.732503 18.312587 \n",
|
||
"1 0.033837 1.861053 \n",
|
||
"2 0.683958 3.218627 \n",
|
||
"3 0.733063 21.991884 \n",
|
||
"4 14.307843 175.060667 \n",
|
||
"... ... ... \n",
|
||
"96091 0.125966 8.478740 \n",
|
||
"96092 0.159123 9.771748 \n",
|
||
"96093 0.000000 0.000000 \n",
|
||
"96094 0.123940 9.844555 \n",
|
||
"96095 0.000000 0.000000 \n",
|
||
"\n",
|
||
"[96096 rows x 32 columns]"
|
||
]
|
||
},
|
||
"execution_count": 473,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# on calcule d'abord pour chaque client le nombre de tickets achetés / montant total si achat\n",
|
||
"# comme la période d'étude est d'un an et demi, sur l'année à venir on espère vendre 1.5 fois le nbre de tickets vendu\n",
|
||
"\n",
|
||
"# ensuite, on multiplie par la proba d'achat pour avoir le nombre de tickets potentiellement acheté\n",
|
||
"# et le montant total associé\n",
|
||
"\n",
|
||
"\n",
|
||
"X_test[\"nb_tickets_projected\"] = X_test[\"nb_tickets\"] / 1.5\n",
|
||
"X_test[\"total_amount_projected\"] = X_test[\"total_amount\"] / 1.5\n",
|
||
"\n",
|
||
"X_test[\"nb_tickets_expected\"] = X_test[\"score_adjusted\"] * X_test[\"nb_tickets_projected\"]\n",
|
||
"X_test[\"total_amount_expected\"] = X_test[\"score_adjusted\"] * X_test[\"total_amount_projected\"]\n",
|
||
"\n",
|
||
"X_test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 474,
|
||
"id": "c8c8eec5-27d9-41cc-b62f-66246a24f1a4",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_tickets_expected</th>\n",
|
||
" <th>total_amount_expected</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>quartile</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>0.017380</td>\n",
|
||
" <td>0.475141</td>\n",
|
||
" <td>0.000590</td>\n",
|
||
" <td>0.016112</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2.085810</td>\n",
|
||
" <td>49.701732</td>\n",
|
||
" <td>0.134566</td>\n",
|
||
" <td>3.298096</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>3.118100</td>\n",
|
||
" <td>88.811284</td>\n",
|
||
" <td>0.478898</td>\n",
|
||
" <td>13.258736</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>46.046362</td>\n",
|
||
" <td>2002.607230</td>\n",
|
||
" <td>26.753314</td>\n",
|
||
" <td>1246.363503</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets total_amount nb_tickets_expected total_amount_expected\n",
|
||
"quartile \n",
|
||
"1 0.017380 0.475141 0.000590 0.016112\n",
|
||
"2 2.085810 49.701732 0.134566 3.298096\n",
|
||
"3 3.118100 88.811284 0.478898 13.258736\n",
|
||
"4 46.046362 2002.607230 26.753314 1246.363503"
|
||
]
|
||
},
|
||
"execution_count": 474,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# potentiel de CA par segment, et comparaison avec le CA passé/1.5\n",
|
||
"\n",
|
||
"X_test.groupby(\"quartile\")[[\"nb_tickets\",\"total_amount\",\"nb_tickets_expected\",\"total_amount_expected\"]].mean()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 519,
|
||
"id": "f7052cc7-054b-4b9d-935e-81611b1f6a61",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>quartile</th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>554.666667</td>\n",
|
||
" <td>1.516365e+04</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>23950.666667</td>\n",
|
||
" <td>5.707084e+05</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>46732.000000</td>\n",
|
||
" <td>1.331044e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>261543.333333</td>\n",
|
||
" <td>1.137481e+07</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" quartile nb_tickets total_amount\n",
|
||
"0 1 554.666667 1.516365e+04\n",
|
||
"1 2 23950.666667 5.707084e+05\n",
|
||
"2 3 46732.000000 1.331044e+06\n",
|
||
"3 4 261543.333333 1.137481e+07"
|
||
]
|
||
},
|
||
"execution_count": 519,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_expected_CA = (X_test.groupby(\"quartile\")[[\"nb_tickets\",\"total_amount\"]].sum()/1.5).reset_index()\n",
|
||
"df_expected_CA"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 520,
|
||
"id": "655c499e-29d2-4811-bba2-e4184bc123e5",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df_expected_CA[[\"nb_tickets_expected\",\"total_amount_expected\"]] = (X_test.groupby(\"quartile\")[[\"nb_tickets_expected\",\"total_amount_expected\"]].sum()).reset_index()[[\"nb_tickets_expected\", \"total_amount_expected\"]]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 521,
|
||
"id": "917891a5-8906-4c19-96ff-5160fb437a86",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>quartile</th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_tickets_expected</th>\n",
|
||
" <th>total_amount_expected</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>554.666667</td>\n",
|
||
" <td>1.516365e+04</td>\n",
|
||
" <td>28.262185</td>\n",
|
||
" <td>7.713112e+02</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>23950.666667</td>\n",
|
||
" <td>5.707084e+05</td>\n",
|
||
" <td>2317.763439</td>\n",
|
||
" <td>5.680641e+04</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>46732.000000</td>\n",
|
||
" <td>1.331044e+06</td>\n",
|
||
" <td>10766.103277</td>\n",
|
||
" <td>2.980696e+05</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>261543.333333</td>\n",
|
||
" <td>1.137481e+07</td>\n",
|
||
" <td>227938.234982</td>\n",
|
||
" <td>1.061902e+07</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" quartile nb_tickets total_amount nb_tickets_expected \\\n",
|
||
"0 1 554.666667 1.516365e+04 28.262185 \n",
|
||
"1 2 23950.666667 5.707084e+05 2317.763439 \n",
|
||
"2 3 46732.000000 1.331044e+06 10766.103277 \n",
|
||
"3 4 261543.333333 1.137481e+07 227938.234982 \n",
|
||
"\n",
|
||
" total_amount_expected \n",
|
||
"0 7.713112e+02 \n",
|
||
"1 5.680641e+04 \n",
|
||
"2 2.980696e+05 \n",
|
||
"3 1.061902e+07 "
|
||
]
|
||
},
|
||
"execution_count": 521,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_expected_CA"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 532,
|
||
"id": "6b90ea7d-37be-49e4-b0c2-b38a37058e24",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# add number of customers of each segment\n",
|
||
"df_expected_CA.insert(1, \"size\", X_test.groupby(\"quartile\").size().values)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 535,
|
||
"id": "7efab307-0a98-4049-afe6-b292fa3c4036",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>quartile</th>\n",
|
||
" <th>size</th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_tickets_expected</th>\n",
|
||
" <th>total_amount_expected</th>\n",
|
||
" <th>total_amount_recovered</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>47871</td>\n",
|
||
" <td>554.666667</td>\n",
|
||
" <td>1.516365e+04</td>\n",
|
||
" <td>28.262185</td>\n",
|
||
" <td>7.713112e+02</td>\n",
|
||
" <td>0.050866</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>17224</td>\n",
|
||
" <td>23950.666667</td>\n",
|
||
" <td>5.707084e+05</td>\n",
|
||
" <td>2317.763439</td>\n",
|
||
" <td>5.680641e+04</td>\n",
|
||
" <td>0.099537</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>22481</td>\n",
|
||
" <td>46732.000000</td>\n",
|
||
" <td>1.331044e+06</td>\n",
|
||
" <td>10766.103277</td>\n",
|
||
" <td>2.980696e+05</td>\n",
|
||
" <td>0.223937</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>8520</td>\n",
|
||
" <td>261543.333333</td>\n",
|
||
" <td>1.137481e+07</td>\n",
|
||
" <td>227938.234982</td>\n",
|
||
" <td>1.061902e+07</td>\n",
|
||
" <td>0.933556</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" quartile size nb_tickets total_amount nb_tickets_expected \\\n",
|
||
"0 1 47871 554.666667 1.516365e+04 28.262185 \n",
|
||
"1 2 17224 23950.666667 5.707084e+05 2317.763439 \n",
|
||
"2 3 22481 46732.000000 1.331044e+06 10766.103277 \n",
|
||
"3 4 8520 261543.333333 1.137481e+07 227938.234982 \n",
|
||
"\n",
|
||
" total_amount_expected total_amount_recovered \n",
|
||
"0 7.713112e+02 0.050866 \n",
|
||
"1 5.680641e+04 0.099537 \n",
|
||
"2 2.980696e+05 0.223937 \n",
|
||
"3 1.061902e+07 0.933556 "
|
||
]
|
||
},
|
||
"execution_count": 535,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_expected_CA[\"total_amount_recovered\"] = df_expected_CA[\"total_amount_expected\"]/df_expected_CA[\"total_amount\"]\n",
|
||
"df_expected_CA"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 539,
|
||
"id": "00cc2db8-d20b-4a0b-846c-c6199c58a834",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>count</th>\n",
|
||
" <th>mean</th>\n",
|
||
" <th>std</th>\n",
|
||
" <th>min</th>\n",
|
||
" <th>25%</th>\n",
|
||
" <th>50%</th>\n",
|
||
" <th>75%</th>\n",
|
||
" <th>max</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>quartile</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>47871.0</td>\n",
|
||
" <td>0.033442</td>\n",
|
||
" <td>0.013951</td>\n",
|
||
" <td>0.019591</td>\n",
|
||
" <td>0.019867</td>\n",
|
||
" <td>0.023766</td>\n",
|
||
" <td>0.048136</td>\n",
|
||
" <td>0.052262</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>17224.0</td>\n",
|
||
" <td>0.088246</td>\n",
|
||
" <td>0.028737</td>\n",
|
||
" <td>0.052283</td>\n",
|
||
" <td>0.060481</td>\n",
|
||
" <td>0.082054</td>\n",
|
||
" <td>0.115089</td>\n",
|
||
" <td>0.141983</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>22481.0</td>\n",
|
||
" <td>0.222962</td>\n",
|
||
" <td>0.048039</td>\n",
|
||
" <td>0.141993</td>\n",
|
||
" <td>0.183323</td>\n",
|
||
" <td>0.219550</td>\n",
|
||
" <td>0.268865</td>\n",
|
||
" <td>0.331754</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>8520.0</td>\n",
|
||
" <td>0.652198</td>\n",
|
||
" <td>0.201486</td>\n",
|
||
" <td>0.332049</td>\n",
|
||
" <td>0.473052</td>\n",
|
||
" <td>0.640295</td>\n",
|
||
" <td>0.827644</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" count mean std min 25% 50% 75% \\\n",
|
||
"quartile \n",
|
||
"1 47871.0 0.033442 0.013951 0.019591 0.019867 0.023766 0.048136 \n",
|
||
"2 17224.0 0.088246 0.028737 0.052283 0.060481 0.082054 0.115089 \n",
|
||
"3 22481.0 0.222962 0.048039 0.141993 0.183323 0.219550 0.268865 \n",
|
||
"4 8520.0 0.652198 0.201486 0.332049 0.473052 0.640295 0.827644 \n",
|
||
"\n",
|
||
" max \n",
|
||
"quartile \n",
|
||
"1 0.052262 \n",
|
||
"2 0.141983 \n",
|
||
"3 0.331754 \n",
|
||
"4 1.000000 "
|
||
]
|
||
},
|
||
"execution_count": 539,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# la part de CA recouvrée est tjs supérieure à la part de clients qui reviennent\n",
|
||
"# ça semble logique : ceux qui reviennent sont aussi ceux qui consomment le plus \n",
|
||
"# se voit srtt sur dernier quartile : on récupère 65% des clients (avec probas ajustées) mais 93% du CA \n",
|
||
"X_test.groupby(\"quartile\")[\"score_adjusted\"].describe()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "59a0850a-c40d-472a-9361-e96840e2b046",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Study potential of each segment"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 180,
|
||
"id": "1773bac2-ab5e-4bca-bda5-aa13e36991e5",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 1000x600 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# is pace of purchase a good measure ? \n",
|
||
"# we ll compare the avg purchase delay and the purchase date max\n",
|
||
"\n",
|
||
"plt.figure(figsize = [10,6])\n",
|
||
"\n",
|
||
"plt.hist(X_test[X_test[\"avg_purchase_delay\"]>0][\"avg_purchase_delay\"], alpha = 0.5, label = \"average purchase delay\")\n",
|
||
"plt.hist(X_test[X_test[\"avg_purchase_delay\"]>0][\"purchase_date_max\"], alpha=0.5, label = \"recency of the last purchase\")\n",
|
||
"plt.legend()\n",
|
||
"plt.xlabel(\"durée (jours)\")\n",
|
||
"plt.ylabel(\"fréquence\")\n",
|
||
"plt.title(\"Distribution des délais moyen entre deux achats et de l'ancienneté du dernier achat\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 199,
|
||
"id": "3ef409fe-dcf7-4c07-9be3-28b3e8ca5546",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 1000x600 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"plt.figure(figsize = [10,6])\n",
|
||
"\n",
|
||
"plt.hist(X_test[X_test[\"avg_purchase_delay\"]>0][\"avg_purchase_delay\"], alpha = 0.5, label = \"average purchase delay on the purchasing period\")\n",
|
||
"plt.hist(X_test[X_test[\"avg_purchase_delay\"]>0][\"purchase_date_min\"]/X_test[X_test[\"avg_purchase_delay\"]>0][\"nb_purchases\"], alpha=0.5, label = \"average purchase delay on the full period\")\n",
|
||
"plt.legend()\n",
|
||
"plt.xlabel(\"durée (jours)\")\n",
|
||
"plt.ylabel(\"fréquence\")\n",
|
||
"plt.title(\"Comparaison entre le délai-type d'achat sur la période d'achat et sur l'ensemble de la période\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "2a46a811-9169-43e2-a759-461562f4f250",
|
||
"metadata": {},
|
||
"source": [
|
||
"Il vaut mieux prendre le rythme en considérant purchase date min au dénominateur plutôt que le délai entre le \n",
|
||
"1er et le dernier achat"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 192,
|
||
"id": "fad27180-e1f2-4876-b0b8-2254c342fc36",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"count 1.473400e+04\n",
|
||
"mean 9.011960e+07\n",
|
||
"std 8.222514e+08\n",
|
||
"min 0.000000e+00\n",
|
||
"25% 7.194159e-01\n",
|
||
"50% 3.564579e+00\n",
|
||
"75% 2.645439e+01\n",
|
||
"max 1.996151e+10\n",
|
||
"dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 192,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"(X_test[X_test[\"avg_purchase_delay\"]>0][\"purchase_date_max\"]/X_test[X_test[\"avg_purchase_delay\"]>0][\"avg_purchase_delay\"]).describe()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 196,
|
||
"id": "c232ced3-c9b2-4e35-b89b-c18f7c99dc7a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"plt.boxplot(X_test[X_test[\"avg_purchase_delay\"]>0][\"purchase_date_max\"]/X_test[X_test[\"avg_purchase_delay\"]>0][\"avg_purchase_delay\"], showfliers=False)\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 188,
|
||
"id": "cdc917b9-eb2e-443f-8376-9a4ec4d24074",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"count 14734.000000\n",
|
||
"mean 145.979256\n",
|
||
"std 123.403697\n",
|
||
"min 0.000000\n",
|
||
"25% 38.053773\n",
|
||
"50% 111.560918\n",
|
||
"75% 225.056992\n",
|
||
"max 546.378919\n",
|
||
"Name: purchase_date_max, dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 188,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test[X_test[\"avg_purchase_delay\"]>0][\"purchase_date_max\"].describe()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "d386e36f-deba-43c9-8a51-eba868b39f0e",
|
||
"metadata": {},
|
||
"source": [
|
||
"Il est plus pertinent de considérer l'ensemble de la période que de couper à la date du dernier achat \\\n",
|
||
"On définit donc avg purchase delay all comme le délai moyen entre deux achats depuis que le client est \n",
|
||
"connu et jusqu'a aujourd'hui"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 202,
|
||
"id": "71b6ff7e-c48c-45b7-bc1a-70dafd11fbf1",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_test[\"avg_purchase_delay_all\"] = (X_test[\"purchase_date_min\"]/X_test[\"nb_purchases\"]).replace([np.inf, -np.inf], 0)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "20c757fe-4f3a-406c-b3b9-dd12b57a474c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "e65af9b9-9266-4ec5-950f-2fc2ed14140c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "f0652202-f5bc-4141-a384-07afd96f146b",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "7b3b3398-3ddc-41ee-b669-aea86e7f6d4e",
|
||
"metadata": {},
|
||
"source": [
|
||
"Il faut aussi étudier le nombre de tickets acheté, pas seulement le nombre d'achats"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 203,
|
||
"id": "3b01367d-4fb0-46bb-90e8-307e6152e8bb",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# on def avg tickets delay de façon similaire à avg purchase delay mais en utilisant plutôt nb tickets\n",
|
||
"\n",
|
||
"X_test[\"avg_tickets_delay\"] = (X_test[\"consumption_lifetime\"]/X_test[\"nb_tickets\"]).replace([np.inf, -np.inf], 0)\n",
|
||
"X_test[\"avg_tickets_delay_all\"] = (X_test[\"purchase_date_min\"]/X_test[\"nb_tickets\"]).replace([np.inf, -np.inf], 0)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 204,
|
||
"id": "0eb59297-0ec2-4181-b743-0264f95a7bee",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>score</th>\n",
|
||
" <th>quartile</th>\n",
|
||
" <th>has_purchased</th>\n",
|
||
" <th>consumption_lifetime</th>\n",
|
||
" <th>avg_purchase_delay</th>\n",
|
||
" <th>avg_purchase_delay_all</th>\n",
|
||
" <th>avg_tickets_delay</th>\n",
|
||
" <th>avg_tickets_delay_all</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>100.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.695913</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.294297</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>55.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.244205</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>17.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>80.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.279592</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>25.649026</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>120.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.696135</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.299103</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>416.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>478.693148</td>\n",
|
||
" <td>115.631470</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.911844</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>363.061678</td>\n",
|
||
" <td>181.530839</td>\n",
|
||
" <td>239.346574</td>\n",
|
||
" <td>10.678285</td>\n",
|
||
" <td>14.079210</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96091</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>67.31</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>15.0</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>0.584680</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96092</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>61.41</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>0.654520</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96093</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>29.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>0.116503</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96094</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>79.43</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>20.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>0.579827</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96095</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>31.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>0.254002</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>96096 rows × 22 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 4.0 1.0 100.00 1.0 \n",
|
||
"1 1.0 1.0 55.00 1.0 \n",
|
||
"2 17.0 1.0 80.00 1.0 \n",
|
||
"3 4.0 1.0 120.00 1.0 \n",
|
||
"4 34.0 2.0 416.00 1.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"96091 1.0 1.0 67.31 1.0 \n",
|
||
"96092 1.0 1.0 61.41 1.0 \n",
|
||
"96093 0.0 0.0 0.00 0.0 \n",
|
||
"96094 1.0 1.0 79.43 1.0 \n",
|
||
"96095 0.0 0.0 0.00 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 5.177187 5.177187 \n",
|
||
"1 0.0 426.265613 426.265613 \n",
|
||
"2 0.0 436.033437 436.033437 \n",
|
||
"3 0.0 5.196412 5.196412 \n",
|
||
"4 0.0 478.693148 115.631470 \n",
|
||
"... ... ... ... \n",
|
||
"96091 1.0 278.442257 278.442257 \n",
|
||
"96092 1.0 189.207373 189.207373 \n",
|
||
"96093 0.0 550.000000 550.000000 \n",
|
||
"96094 1.0 279.312905 279.312905 \n",
|
||
"96095 0.0 550.000000 550.000000 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in ... nb_campaigns \\\n",
|
||
"0 0.0 True False ... 0.0 \n",
|
||
"1 0.0 True True ... 0.0 \n",
|
||
"2 0.0 True True ... 0.0 \n",
|
||
"3 0.0 True False ... 0.0 \n",
|
||
"4 0.0 True False ... 0.0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"96091 1.0 True False ... 15.0 \n",
|
||
"96092 1.0 True False ... 12.0 \n",
|
||
"96093 0.0 True True ... 29.0 \n",
|
||
"96094 1.0 True False ... 20.0 \n",
|
||
"96095 0.0 True False ... 31.0 \n",
|
||
"\n",
|
||
" nb_campaigns_opened score quartile has_purchased \\\n",
|
||
"0 0.0 0.695913 3 0.0 \n",
|
||
"1 0.0 0.244205 1 1.0 \n",
|
||
"2 0.0 0.279592 2 0.0 \n",
|
||
"3 0.0 0.696135 3 0.0 \n",
|
||
"4 0.0 0.911844 4 1.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"96091 5.0 0.584680 3 1.0 \n",
|
||
"96092 9.0 0.654520 3 0.0 \n",
|
||
"96093 3.0 0.116503 1 0.0 \n",
|
||
"96094 4.0 0.579827 3 0.0 \n",
|
||
"96095 4.0 0.254002 2 0.0 \n",
|
||
"\n",
|
||
" consumption_lifetime avg_purchase_delay avg_purchase_delay_all \\\n",
|
||
"0 0.000000 0.000000 5.177187 \n",
|
||
"1 0.000000 0.000000 426.265613 \n",
|
||
"2 0.000000 0.000000 436.033437 \n",
|
||
"3 0.000000 0.000000 5.196412 \n",
|
||
"4 363.061678 181.530839 239.346574 \n",
|
||
"... ... ... ... \n",
|
||
"96091 0.000000 0.000000 278.442257 \n",
|
||
"96092 0.000000 0.000000 189.207373 \n",
|
||
"96093 0.000000 NaN 0.000000 \n",
|
||
"96094 0.000000 0.000000 279.312905 \n",
|
||
"96095 0.000000 NaN 0.000000 \n",
|
||
"\n",
|
||
" avg_tickets_delay avg_tickets_delay_all \n",
|
||
"0 0.000000 1.294297 \n",
|
||
"1 0.000000 426.265613 \n",
|
||
"2 0.000000 25.649026 \n",
|
||
"3 0.000000 1.299103 \n",
|
||
"4 10.678285 14.079210 \n",
|
||
"... ... ... \n",
|
||
"96091 0.000000 278.442257 \n",
|
||
"96092 0.000000 189.207373 \n",
|
||
"96093 NaN 0.000000 \n",
|
||
"96094 0.000000 279.312905 \n",
|
||
"96095 NaN 0.000000 \n",
|
||
"\n",
|
||
"[96096 rows x 22 columns]"
|
||
]
|
||
},
|
||
"execution_count": 204,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 206,
|
||
"id": "d6ef721a-dac6-49e0-8e1c-518a3cf79cbc",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>consumption_lifetime</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>avg_purchase_delay</th>\n",
|
||
" <th>avg_purchase_delay_all</th>\n",
|
||
" <th>avg_tickets_delay</th>\n",
|
||
" <th>avg_tickets_delay_all</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>quartile</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2.000000</td>\n",
|
||
" <td>2.714286</td>\n",
|
||
" <td>0.597093</td>\n",
|
||
" <td>450.171815</td>\n",
|
||
" <td>0.298547</td>\n",
|
||
" <td>225.085907</td>\n",
|
||
" <td>0.198968</td>\n",
|
||
" <td>174.041855</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2.592668</td>\n",
|
||
" <td>5.446707</td>\n",
|
||
" <td>26.192927</td>\n",
|
||
" <td>329.247848</td>\n",
|
||
" <td>11.435486</td>\n",
|
||
" <td>147.533946</td>\n",
|
||
" <td>5.992807</td>\n",
|
||
" <td>88.757091</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>3.203764</td>\n",
|
||
" <td>6.791530</td>\n",
|
||
" <td>64.785322</td>\n",
|
||
" <td>266.488673</td>\n",
|
||
" <td>25.490483</td>\n",
|
||
" <td>107.753468</td>\n",
|
||
" <td>14.307458</td>\n",
|
||
" <td>65.942338</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>12.041836</td>\n",
|
||
" <td>46.274086</td>\n",
|
||
" <td>306.126700</td>\n",
|
||
" <td>391.637751</td>\n",
|
||
" <td>68.659817</td>\n",
|
||
" <td>92.058104</td>\n",
|
||
" <td>38.736644</td>\n",
|
||
" <td>53.575899</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_purchases nb_tickets consumption_lifetime purchase_date_min \\\n",
|
||
"quartile \n",
|
||
"1 2.000000 2.714286 0.597093 450.171815 \n",
|
||
"2 2.592668 5.446707 26.192927 329.247848 \n",
|
||
"3 3.203764 6.791530 64.785322 266.488673 \n",
|
||
"4 12.041836 46.274086 306.126700 391.637751 \n",
|
||
"\n",
|
||
" avg_purchase_delay avg_purchase_delay_all avg_tickets_delay \\\n",
|
||
"quartile \n",
|
||
"1 0.298547 225.085907 0.198968 \n",
|
||
"2 11.435486 147.533946 5.992807 \n",
|
||
"3 25.490483 107.753468 14.307458 \n",
|
||
"4 68.659817 92.058104 38.736644 \n",
|
||
"\n",
|
||
" avg_tickets_delay_all \n",
|
||
"quartile \n",
|
||
"1 174.041855 \n",
|
||
"2 88.757091 \n",
|
||
"3 65.942338 \n",
|
||
"4 53.575899 "
|
||
]
|
||
},
|
||
"execution_count": 206,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test[X_test[\"avg_purchase_delay\"]>0].groupby(\"quartile\")[[\"nb_purchases\", \"nb_tickets\", \"consumption_lifetime\", \n",
|
||
" \"purchase_date_min\", \"avg_purchase_delay\", \n",
|
||
" \"avg_purchase_delay_all\", \"avg_tickets_delay\", \n",
|
||
" \"avg_tickets_delay_all\"]].mean()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "2ec816bf-852d-4fa7-a110-77d3e1b6f6a3",
|
||
"metadata": {},
|
||
"source": [
|
||
"Le délai moyen entre deux achats sur l'ensemble de la période"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 210,
|
||
"id": "8b57c418-31dc-4d0e-af80-304f4118a9e4",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>score</th>\n",
|
||
" <th>quartile</th>\n",
|
||
" <th>has_purchased</th>\n",
|
||
" <th>consumption_lifetime</th>\n",
|
||
" <th>avg_purchase_delay</th>\n",
|
||
" <th>avg_purchase_delay_all</th>\n",
|
||
" <th>avg_tickets_delay</th>\n",
|
||
" <th>avg_tickets_delay_all</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>30</th>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>35.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>219.530451</td>\n",
|
||
" <td>193.553044</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.387177</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>25.977407</td>\n",
|
||
" <td>8.659136</td>\n",
|
||
" <td>73.176817</td>\n",
|
||
" <td>8.659136</td>\n",
|
||
" <td>73.176817</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>37</th>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>105.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>422.518935</td>\n",
|
||
" <td>422.474444</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.258480</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.044491</td>\n",
|
||
" <td>0.022245</td>\n",
|
||
" <td>211.259468</td>\n",
|
||
" <td>0.007415</td>\n",
|
||
" <td>70.419823</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>38</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>145.50</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>469.053773</td>\n",
|
||
" <td>337.012106</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.424641</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>132.041667</td>\n",
|
||
" <td>66.020833</td>\n",
|
||
" <td>234.526887</td>\n",
|
||
" <td>33.010417</td>\n",
|
||
" <td>117.263443</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>51</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>276.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>317.012106</td>\n",
|
||
" <td>294.012106</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.353000</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>23.000000</td>\n",
|
||
" <td>5.750000</td>\n",
|
||
" <td>79.253027</td>\n",
|
||
" <td>5.750000</td>\n",
|
||
" <td>79.253027</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>67</th>\n",
|
||
" <td>11.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>210.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>373.627303</td>\n",
|
||
" <td>255.476065</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.463581</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>118.151238</td>\n",
|
||
" <td>59.075619</td>\n",
|
||
" <td>186.813652</td>\n",
|
||
" <td>10.741022</td>\n",
|
||
" <td>33.966118</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>71408</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>62.51</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>490.113715</td>\n",
|
||
" <td>489.507940</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>0.469953</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.605775</td>\n",
|
||
" <td>0.302888</td>\n",
|
||
" <td>245.056858</td>\n",
|
||
" <td>0.302888</td>\n",
|
||
" <td>245.056858</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>71439</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>28.54</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>505.334005</td>\n",
|
||
" <td>505.324873</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>16.0</td>\n",
|
||
" <td>13.0</td>\n",
|
||
" <td>0.499401</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.009132</td>\n",
|
||
" <td>0.004566</td>\n",
|
||
" <td>252.667002</td>\n",
|
||
" <td>0.004566</td>\n",
|
||
" <td>252.667002</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>74420</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>115.90</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>522.320521</td>\n",
|
||
" <td>522.318229</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>35.0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>0.453181</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.002292</td>\n",
|
||
" <td>0.001146</td>\n",
|
||
" <td>261.160260</td>\n",
|
||
" <td>0.001146</td>\n",
|
||
" <td>261.160260</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>79490</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>73.06</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>541.175509</td>\n",
|
||
" <td>521.153692</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>36.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>0.463122</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>20.021817</td>\n",
|
||
" <td>10.010909</td>\n",
|
||
" <td>270.587755</td>\n",
|
||
" <td>10.010909</td>\n",
|
||
" <td>270.587755</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>89618</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>134.66</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>527.497685</td>\n",
|
||
" <td>506.694931</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>30.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.449862</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>20.802755</td>\n",
|
||
" <td>10.401377</td>\n",
|
||
" <td>263.748843</td>\n",
|
||
" <td>10.401377</td>\n",
|
||
" <td>263.748843</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>1473 rows × 22 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"30 3.0 3.0 35.00 1.0 \n",
|
||
"37 6.0 2.0 105.00 1.0 \n",
|
||
"38 4.0 2.0 145.50 1.0 \n",
|
||
"51 4.0 4.0 276.00 1.0 \n",
|
||
"67 11.0 2.0 210.00 1.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"71408 2.0 2.0 62.51 1.0 \n",
|
||
"71439 2.0 2.0 28.54 1.0 \n",
|
||
"74420 2.0 2.0 115.90 1.0 \n",
|
||
"79490 2.0 2.0 73.06 1.0 \n",
|
||
"89618 2.0 2.0 134.66 1.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"30 0.0 219.530451 193.553044 \n",
|
||
"37 0.0 422.518935 422.474444 \n",
|
||
"38 1.0 469.053773 337.012106 \n",
|
||
"51 1.0 317.012106 294.012106 \n",
|
||
"67 0.0 373.627303 255.476065 \n",
|
||
"... ... ... ... \n",
|
||
"71408 1.0 490.113715 489.507940 \n",
|
||
"71439 1.0 505.334005 505.324873 \n",
|
||
"74420 0.0 522.320521 522.318229 \n",
|
||
"79490 1.0 541.175509 521.153692 \n",
|
||
"89618 1.0 527.497685 506.694931 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in ... nb_campaigns \\\n",
|
||
"30 0.0 True True ... 0.0 \n",
|
||
"37 0.0 True True ... 0.0 \n",
|
||
"38 4.0 True True ... 0.0 \n",
|
||
"51 4.0 True True ... 0.0 \n",
|
||
"67 0.0 True True ... 0.0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"71408 2.0 True False ... 12.0 \n",
|
||
"71439 2.0 True False ... 16.0 \n",
|
||
"74420 0.0 True False ... 35.0 \n",
|
||
"79490 2.0 True False ... 36.0 \n",
|
||
"89618 2.0 True False ... 30.0 \n",
|
||
"\n",
|
||
" nb_campaigns_opened score quartile has_purchased \\\n",
|
||
"30 0.0 0.387177 2 1.0 \n",
|
||
"37 0.0 0.258480 2 0.0 \n",
|
||
"38 0.0 0.424641 2 1.0 \n",
|
||
"51 0.0 0.353000 2 0.0 \n",
|
||
"67 0.0 0.463581 2 0.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"71408 6.0 0.469953 2 0.0 \n",
|
||
"71439 13.0 0.499401 2 0.0 \n",
|
||
"74420 9.0 0.453181 2 1.0 \n",
|
||
"79490 4.0 0.463122 2 0.0 \n",
|
||
"89618 0.0 0.449862 2 0.0 \n",
|
||
"\n",
|
||
" consumption_lifetime avg_purchase_delay avg_purchase_delay_all \\\n",
|
||
"30 25.977407 8.659136 73.176817 \n",
|
||
"37 0.044491 0.022245 211.259468 \n",
|
||
"38 132.041667 66.020833 234.526887 \n",
|
||
"51 23.000000 5.750000 79.253027 \n",
|
||
"67 118.151238 59.075619 186.813652 \n",
|
||
"... ... ... ... \n",
|
||
"71408 0.605775 0.302888 245.056858 \n",
|
||
"71439 0.009132 0.004566 252.667002 \n",
|
||
"74420 0.002292 0.001146 261.160260 \n",
|
||
"79490 20.021817 10.010909 270.587755 \n",
|
||
"89618 20.802755 10.401377 263.748843 \n",
|
||
"\n",
|
||
" avg_tickets_delay avg_tickets_delay_all \n",
|
||
"30 8.659136 73.176817 \n",
|
||
"37 0.007415 70.419823 \n",
|
||
"38 33.010417 117.263443 \n",
|
||
"51 5.750000 79.253027 \n",
|
||
"67 10.741022 33.966118 \n",
|
||
"... ... ... \n",
|
||
"71408 0.302888 245.056858 \n",
|
||
"71439 0.004566 252.667002 \n",
|
||
"74420 0.001146 261.160260 \n",
|
||
"79490 10.010909 270.587755 \n",
|
||
"89618 10.401377 263.748843 \n",
|
||
"\n",
|
||
"[1473 rows x 22 columns]"
|
||
]
|
||
},
|
||
"execution_count": 210,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test[(X_test[\"avg_purchase_delay\"]>0) & (X_test[\"quartile\"]==\"2\")]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 214,
|
||
"id": "5119ba18-9a89-4819-b98b-d0ae8e31291e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>score</th>\n",
|
||
" <th>quartile</th>\n",
|
||
" <th>has_purchased</th>\n",
|
||
" <th>consumption_lifetime</th>\n",
|
||
" <th>avg_purchase_delay</th>\n",
|
||
" <th>avg_purchase_delay_all</th>\n",
|
||
" <th>avg_tickets_delay</th>\n",
|
||
" <th>avg_tickets_delay_all</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>136</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>50.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.179282</td>\n",
|
||
" <td>4.441181</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.690843</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.738102</td>\n",
|
||
" <td>0.369051</td>\n",
|
||
" <td>2.589641</td>\n",
|
||
" <td>0.369051</td>\n",
|
||
" <td>2.589641</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>187</th>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>117.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.190961</td>\n",
|
||
" <td>4.422014</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.694387</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.768947</td>\n",
|
||
" <td>0.384473</td>\n",
|
||
" <td>2.595480</td>\n",
|
||
" <td>0.256316</td>\n",
|
||
" <td>1.730320</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>229</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>196.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.144676</td>\n",
|
||
" <td>5.123021</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.697071</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.021655</td>\n",
|
||
" <td>0.010828</td>\n",
|
||
" <td>2.572338</td>\n",
|
||
" <td>0.005414</td>\n",
|
||
" <td>1.286169</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>312</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>200.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.180069</td>\n",
|
||
" <td>5.061979</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.697224</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.118090</td>\n",
|
||
" <td>0.059045</td>\n",
|
||
" <td>2.590035</td>\n",
|
||
" <td>0.029523</td>\n",
|
||
" <td>1.295017</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>439</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>156.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.167558</td>\n",
|
||
" <td>5.112234</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.696639</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.055324</td>\n",
|
||
" <td>0.027662</td>\n",
|
||
" <td>2.583779</td>\n",
|
||
" <td>0.013831</td>\n",
|
||
" <td>1.291889</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>613</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>156.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>3.285567</td>\n",
|
||
" <td>2.801887</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.478423</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.483681</td>\n",
|
||
" <td>0.241840</td>\n",
|
||
" <td>1.642784</td>\n",
|
||
" <td>0.120920</td>\n",
|
||
" <td>0.821392</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>713</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>100.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.183241</td>\n",
|
||
" <td>5.060972</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.696068</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.122269</td>\n",
|
||
" <td>0.061134</td>\n",
|
||
" <td>2.591620</td>\n",
|
||
" <td>0.030567</td>\n",
|
||
" <td>1.295810</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>967</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>60.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>3.251076</td>\n",
|
||
" <td>3.127894</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.691127</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.123183</td>\n",
|
||
" <td>0.061591</td>\n",
|
||
" <td>1.625538</td>\n",
|
||
" <td>0.061591</td>\n",
|
||
" <td>1.625538</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1042</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>106.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.140903</td>\n",
|
||
" <td>5.133646</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.690563</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.007257</td>\n",
|
||
" <td>0.003628</td>\n",
|
||
" <td>2.570451</td>\n",
|
||
" <td>0.003628</td>\n",
|
||
" <td>2.570451</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1096</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>110.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.190764</td>\n",
|
||
" <td>4.646551</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.696727</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.544213</td>\n",
|
||
" <td>0.272106</td>\n",
|
||
" <td>2.595382</td>\n",
|
||
" <td>0.136053</td>\n",
|
||
" <td>1.297691</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1124</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>50.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.192303</td>\n",
|
||
" <td>5.144618</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.689933</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.047685</td>\n",
|
||
" <td>0.023843</td>\n",
|
||
" <td>2.596152</td>\n",
|
||
" <td>0.023843</td>\n",
|
||
" <td>2.596152</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1451</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>60.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.126782</td>\n",
|
||
" <td>5.118449</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.690032</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.008333</td>\n",
|
||
" <td>0.004167</td>\n",
|
||
" <td>2.563391</td>\n",
|
||
" <td>0.004167</td>\n",
|
||
" <td>2.563391</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1728</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>100.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.137326</td>\n",
|
||
" <td>4.958299</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.696165</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.179028</td>\n",
|
||
" <td>0.089514</td>\n",
|
||
" <td>2.568663</td>\n",
|
||
" <td>0.044757</td>\n",
|
||
" <td>1.284332</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1740</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>60.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.183495</td>\n",
|
||
" <td>5.176933</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.690001</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.006563</td>\n",
|
||
" <td>0.003281</td>\n",
|
||
" <td>2.591748</td>\n",
|
||
" <td>0.003281</td>\n",
|
||
" <td>2.591748</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1843</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>102.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.184803</td>\n",
|
||
" <td>5.180162</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.690491</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.004641</td>\n",
|
||
" <td>0.002321</td>\n",
|
||
" <td>2.592402</td>\n",
|
||
" <td>0.002321</td>\n",
|
||
" <td>2.592402</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1862</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>106.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.197801</td>\n",
|
||
" <td>5.191470</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.690534</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.006331</td>\n",
|
||
" <td>0.003166</td>\n",
|
||
" <td>2.598900</td>\n",
|
||
" <td>0.003166</td>\n",
|
||
" <td>2.598900</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1984</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>88.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.189468</td>\n",
|
||
" <td>5.182257</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.690328</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.007211</td>\n",
|
||
" <td>0.003605</td>\n",
|
||
" <td>2.594734</td>\n",
|
||
" <td>0.003605</td>\n",
|
||
" <td>2.594734</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2041</th>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>147.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>4.597095</td>\n",
|
||
" <td>4.373079</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.694326</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.224016</td>\n",
|
||
" <td>0.112008</td>\n",
|
||
" <td>2.298547</td>\n",
|
||
" <td>0.074672</td>\n",
|
||
" <td>1.532365</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2115</th>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>75.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.182986</td>\n",
|
||
" <td>5.129433</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.692971</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.053553</td>\n",
|
||
" <td>0.026777</td>\n",
|
||
" <td>2.591493</td>\n",
|
||
" <td>0.017851</td>\n",
|
||
" <td>1.727662</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2384</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>196.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.171771</td>\n",
|
||
" <td>4.604873</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.697762</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.566898</td>\n",
|
||
" <td>0.283449</td>\n",
|
||
" <td>2.585885</td>\n",
|
||
" <td>0.141725</td>\n",
|
||
" <td>1.292943</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>20 rows × 22 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"136 2.0 2.0 50.0 1.0 \n",
|
||
"187 3.0 2.0 117.0 1.0 \n",
|
||
"229 4.0 2.0 196.0 1.0 \n",
|
||
"312 4.0 2.0 200.0 1.0 \n",
|
||
"439 4.0 2.0 156.0 1.0 \n",
|
||
"613 4.0 2.0 156.0 1.0 \n",
|
||
"713 4.0 2.0 100.0 1.0 \n",
|
||
"967 2.0 2.0 60.0 1.0 \n",
|
||
"1042 2.0 2.0 106.0 1.0 \n",
|
||
"1096 4.0 2.0 110.0 1.0 \n",
|
||
"1124 2.0 2.0 50.0 1.0 \n",
|
||
"1451 2.0 2.0 60.0 1.0 \n",
|
||
"1728 4.0 2.0 100.0 1.0 \n",
|
||
"1740 2.0 2.0 60.0 1.0 \n",
|
||
"1843 2.0 2.0 102.0 1.0 \n",
|
||
"1862 2.0 2.0 106.0 1.0 \n",
|
||
"1984 2.0 2.0 88.0 1.0 \n",
|
||
"2041 3.0 2.0 147.0 1.0 \n",
|
||
"2115 3.0 2.0 75.0 1.0 \n",
|
||
"2384 4.0 2.0 196.0 1.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"136 0.0 5.179282 4.441181 \n",
|
||
"187 0.0 5.190961 4.422014 \n",
|
||
"229 0.0 5.144676 5.123021 \n",
|
||
"312 0.0 5.180069 5.061979 \n",
|
||
"439 0.0 5.167558 5.112234 \n",
|
||
"613 0.0 3.285567 2.801887 \n",
|
||
"713 0.0 5.183241 5.060972 \n",
|
||
"967 0.0 3.251076 3.127894 \n",
|
||
"1042 0.0 5.140903 5.133646 \n",
|
||
"1096 0.0 5.190764 4.646551 \n",
|
||
"1124 0.0 5.192303 5.144618 \n",
|
||
"1451 0.0 5.126782 5.118449 \n",
|
||
"1728 0.0 5.137326 4.958299 \n",
|
||
"1740 0.0 5.183495 5.176933 \n",
|
||
"1843 0.0 5.184803 5.180162 \n",
|
||
"1862 0.0 5.197801 5.191470 \n",
|
||
"1984 0.0 5.189468 5.182257 \n",
|
||
"2041 0.0 4.597095 4.373079 \n",
|
||
"2115 0.0 5.182986 5.129433 \n",
|
||
"2384 0.0 5.171771 4.604873 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in ... nb_campaigns \\\n",
|
||
"136 0.0 True False ... 0.0 \n",
|
||
"187 0.0 True False ... 0.0 \n",
|
||
"229 0.0 True False ... 0.0 \n",
|
||
"312 0.0 True False ... 0.0 \n",
|
||
"439 0.0 True False ... 0.0 \n",
|
||
"613 0.0 True True ... 0.0 \n",
|
||
"713 0.0 True False ... 0.0 \n",
|
||
"967 0.0 True False ... 0.0 \n",
|
||
"1042 0.0 True False ... 0.0 \n",
|
||
"1096 0.0 True False ... 0.0 \n",
|
||
"1124 0.0 True False ... 0.0 \n",
|
||
"1451 0.0 True False ... 0.0 \n",
|
||
"1728 0.0 True False ... 0.0 \n",
|
||
"1740 0.0 True False ... 0.0 \n",
|
||
"1843 0.0 True False ... 0.0 \n",
|
||
"1862 0.0 True False ... 0.0 \n",
|
||
"1984 0.0 True False ... 0.0 \n",
|
||
"2041 0.0 True False ... 0.0 \n",
|
||
"2115 0.0 True False ... 0.0 \n",
|
||
"2384 0.0 True False ... 0.0 \n",
|
||
"\n",
|
||
" nb_campaigns_opened score quartile has_purchased \\\n",
|
||
"136 0.0 0.690843 3 0.0 \n",
|
||
"187 0.0 0.694387 3 0.0 \n",
|
||
"229 0.0 0.697071 3 0.0 \n",
|
||
"312 0.0 0.697224 3 0.0 \n",
|
||
"439 0.0 0.696639 3 0.0 \n",
|
||
"613 0.0 0.478423 2 0.0 \n",
|
||
"713 0.0 0.696068 3 0.0 \n",
|
||
"967 0.0 0.691127 3 0.0 \n",
|
||
"1042 0.0 0.690563 3 0.0 \n",
|
||
"1096 0.0 0.696727 3 0.0 \n",
|
||
"1124 0.0 0.689933 3 1.0 \n",
|
||
"1451 0.0 0.690032 3 0.0 \n",
|
||
"1728 0.0 0.696165 3 0.0 \n",
|
||
"1740 0.0 0.690001 3 0.0 \n",
|
||
"1843 0.0 0.690491 3 0.0 \n",
|
||
"1862 0.0 0.690534 3 0.0 \n",
|
||
"1984 0.0 0.690328 3 0.0 \n",
|
||
"2041 0.0 0.694326 3 0.0 \n",
|
||
"2115 0.0 0.692971 3 0.0 \n",
|
||
"2384 0.0 0.697762 3 0.0 \n",
|
||
"\n",
|
||
" consumption_lifetime avg_purchase_delay avg_purchase_delay_all \\\n",
|
||
"136 0.738102 0.369051 2.589641 \n",
|
||
"187 0.768947 0.384473 2.595480 \n",
|
||
"229 0.021655 0.010828 2.572338 \n",
|
||
"312 0.118090 0.059045 2.590035 \n",
|
||
"439 0.055324 0.027662 2.583779 \n",
|
||
"613 0.483681 0.241840 1.642784 \n",
|
||
"713 0.122269 0.061134 2.591620 \n",
|
||
"967 0.123183 0.061591 1.625538 \n",
|
||
"1042 0.007257 0.003628 2.570451 \n",
|
||
"1096 0.544213 0.272106 2.595382 \n",
|
||
"1124 0.047685 0.023843 2.596152 \n",
|
||
"1451 0.008333 0.004167 2.563391 \n",
|
||
"1728 0.179028 0.089514 2.568663 \n",
|
||
"1740 0.006563 0.003281 2.591748 \n",
|
||
"1843 0.004641 0.002321 2.592402 \n",
|
||
"1862 0.006331 0.003166 2.598900 \n",
|
||
"1984 0.007211 0.003605 2.594734 \n",
|
||
"2041 0.224016 0.112008 2.298547 \n",
|
||
"2115 0.053553 0.026777 2.591493 \n",
|
||
"2384 0.566898 0.283449 2.585885 \n",
|
||
"\n",
|
||
" avg_tickets_delay avg_tickets_delay_all \n",
|
||
"136 0.369051 2.589641 \n",
|
||
"187 0.256316 1.730320 \n",
|
||
"229 0.005414 1.286169 \n",
|
||
"312 0.029523 1.295017 \n",
|
||
"439 0.013831 1.291889 \n",
|
||
"613 0.120920 0.821392 \n",
|
||
"713 0.030567 1.295810 \n",
|
||
"967 0.061591 1.625538 \n",
|
||
"1042 0.003628 2.570451 \n",
|
||
"1096 0.136053 1.297691 \n",
|
||
"1124 0.023843 2.596152 \n",
|
||
"1451 0.004167 2.563391 \n",
|
||
"1728 0.044757 1.284332 \n",
|
||
"1740 0.003281 2.591748 \n",
|
||
"1843 0.002321 2.592402 \n",
|
||
"1862 0.003166 2.598900 \n",
|
||
"1984 0.003605 2.594734 \n",
|
||
"2041 0.074672 1.532365 \n",
|
||
"2115 0.017851 1.727662 \n",
|
||
"2384 0.141725 1.292943 \n",
|
||
"\n",
|
||
"[20 rows x 22 columns]"
|
||
]
|
||
},
|
||
"execution_count": 214,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test[(X_test[\"avg_purchase_delay\"]>0) & (X_test[\"purchase_date_min\"]<10)].head(20)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 217,
|
||
"id": "91ec6a21-89dd-40cd-91fc-8dfab132a9e8",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"y_has_purchased 13690.0\n",
|
||
"dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 217,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"y_test.sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 218,
|
||
"id": "3223968c-409e-4110-8dcc-fe319d34d44f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"36092.22480054577"
|
||
]
|
||
},
|
||
"execution_count": 218,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test[\"score\"].sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 241,
|
||
"id": "0233ab78-81d7-41a2-b948-4bc24f51c9e9",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"0.20933232507450736"
|
||
]
|
||
},
|
||
"execution_count": 241,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test[X_test[\"quartile\"]==\"3\"][\"has_purchased\"].mean()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "c3bf1a55-7d46-42c7-9436-b68ce8c7ef24",
|
||
"metadata": {},
|
||
"source": [
|
||
"Autre méthode \\\n",
|
||
"On considère la durée totale sur laquelle les features ont été observées (1 an et demi) sans se soucier de la \n",
|
||
"date du 1er achat. \n",
|
||
"Et on extrapole le rythme d'achat en considérant que le client devrait acheter nb_tickets/1.5 tickets durant l'année à venir. "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 240,
|
||
"id": "d594a3ee-22cb-45b5-a6fa-4439c0aad01c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"period_duration_years = 1.5\n",
|
||
"\n",
|
||
"expected_tickets_purchased = X_test[\"nb_tickets\"]/period_duration_years\n",
|
||
"expected_amount = X_test[\"total_amount\"]/period_duration_years"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 297,
|
||
"id": "807f9810-a691-4e51-af51-cdb7f0b4bd40",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>quartile</th>\n",
|
||
" <th>has_purchased</th>\n",
|
||
" <th>consumption_lifetime</th>\n",
|
||
" <th>avg_purchase_delay</th>\n",
|
||
" <th>avg_purchase_delay_all</th>\n",
|
||
" <th>avg_tickets_delay</th>\n",
|
||
" <th>avg_tickets_delay_all</th>\n",
|
||
" <th>decile</th>\n",
|
||
" <th>overshoot_coeff</th>\n",
|
||
" <th>ajusted_score</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>100.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.294297</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.211260</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>55.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.063821</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>17.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>80.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>25.649026</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.073069</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>120.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.299103</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.211328</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>416.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>478.693148</td>\n",
|
||
" <td>115.631470</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>363.061678</td>\n",
|
||
" <td>181.530839</td>\n",
|
||
" <td>239.346574</td>\n",
|
||
" <td>10.678285</td>\n",
|
||
" <td>14.079210</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>1.268598</td>\n",
|
||
" <td>0.718781</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96091</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>67.31</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>3.260982</td>\n",
|
||
" <td>0.179296</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96092</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>61.41</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.198694</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96093</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>17.863019</td>\n",
|
||
" <td>0.006522</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96094</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>79.43</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>3.260982</td>\n",
|
||
" <td>0.177808</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96095</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.066382</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>96096 rows × 25 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 4.0 1.0 100.00 1.0 \n",
|
||
"1 1.0 1.0 55.00 1.0 \n",
|
||
"2 17.0 1.0 80.00 1.0 \n",
|
||
"3 4.0 1.0 120.00 1.0 \n",
|
||
"4 34.0 2.0 416.00 1.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"96091 1.0 1.0 67.31 1.0 \n",
|
||
"96092 1.0 1.0 61.41 1.0 \n",
|
||
"96093 0.0 0.0 0.00 0.0 \n",
|
||
"96094 1.0 1.0 79.43 1.0 \n",
|
||
"96095 0.0 0.0 0.00 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 5.177187 5.177187 \n",
|
||
"1 0.0 426.265613 426.265613 \n",
|
||
"2 0.0 436.033437 436.033437 \n",
|
||
"3 0.0 5.196412 5.196412 \n",
|
||
"4 0.0 478.693148 115.631470 \n",
|
||
"... ... ... ... \n",
|
||
"96091 1.0 278.442257 278.442257 \n",
|
||
"96092 1.0 189.207373 189.207373 \n",
|
||
"96093 0.0 550.000000 550.000000 \n",
|
||
"96094 1.0 279.312905 279.312905 \n",
|
||
"96095 0.0 550.000000 550.000000 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in ... quartile \\\n",
|
||
"0 0.0 True False ... 3 \n",
|
||
"1 0.0 True True ... 1 \n",
|
||
"2 0.0 True True ... 2 \n",
|
||
"3 0.0 True False ... 3 \n",
|
||
"4 0.0 True False ... 4 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"96091 1.0 True False ... 3 \n",
|
||
"96092 1.0 True False ... 3 \n",
|
||
"96093 0.0 True True ... 1 \n",
|
||
"96094 1.0 True False ... 3 \n",
|
||
"96095 0.0 True False ... 2 \n",
|
||
"\n",
|
||
" has_purchased consumption_lifetime avg_purchase_delay \\\n",
|
||
"0 0.0 0.000000 0.000000 \n",
|
||
"1 1.0 0.000000 0.000000 \n",
|
||
"2 0.0 0.000000 0.000000 \n",
|
||
"3 0.0 0.000000 0.000000 \n",
|
||
"4 1.0 363.061678 181.530839 \n",
|
||
"... ... ... ... \n",
|
||
"96091 1.0 0.000000 0.000000 \n",
|
||
"96092 0.0 0.000000 0.000000 \n",
|
||
"96093 0.0 0.000000 NaN \n",
|
||
"96094 0.0 0.000000 0.000000 \n",
|
||
"96095 0.0 0.000000 NaN \n",
|
||
"\n",
|
||
" avg_purchase_delay_all avg_tickets_delay avg_tickets_delay_all \\\n",
|
||
"0 5.177187 0.000000 1.294297 \n",
|
||
"1 426.265613 0.000000 426.265613 \n",
|
||
"2 436.033437 0.000000 25.649026 \n",
|
||
"3 5.196412 0.000000 1.299103 \n",
|
||
"4 239.346574 10.678285 14.079210 \n",
|
||
"... ... ... ... \n",
|
||
"96091 278.442257 0.000000 278.442257 \n",
|
||
"96092 189.207373 0.000000 189.207373 \n",
|
||
"96093 0.000000 NaN 0.000000 \n",
|
||
"96094 279.312905 0.000000 279.312905 \n",
|
||
"96095 0.000000 NaN 0.000000 \n",
|
||
"\n",
|
||
" decile overshoot_coeff ajusted_score \n",
|
||
"0 6 3.294104 0.211260 \n",
|
||
"1 2 3.826401 0.063821 \n",
|
||
"2 2 3.826401 0.073069 \n",
|
||
"3 6 3.294104 0.211328 \n",
|
||
"4 9 1.268598 0.718781 \n",
|
||
"... ... ... ... \n",
|
||
"96091 5 3.260982 0.179296 \n",
|
||
"96092 6 3.294104 0.198694 \n",
|
||
"96093 1 17.863019 0.006522 \n",
|
||
"96094 5 3.260982 0.177808 \n",
|
||
"96095 2 3.826401 0.066382 \n",
|
||
"\n",
|
||
"[96096 rows x 25 columns]"
|
||
]
|
||
},
|
||
"execution_count": 297,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "ab7489e3-58e8-4be8-b870-60c869ba7953",
|
||
"metadata": {},
|
||
"source": [
|
||
"Estimation de l'overshoot : méthode plus rigoureuse \n",
|
||
"\n",
|
||
"on étudie le rapport entre le score et has purchased\n",
|
||
"plus exactement entre score/(1-score) et has_purchased/(1-has_purchased) - permet de coller à structure du logit"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 301,
|
||
"id": "3587dd1d-73a7-4810-9330-4b29caeb1e9f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"quartile\n",
|
||
"1 0.203706\n",
|
||
"2 0.564483\n",
|
||
"3 1.679424\n",
|
||
"4 9.209851\n",
|
||
"Name: score, dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 301,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"score_odd_ratio_quartile = X_test.groupby(\"quartile\")[\"score\"].mean()/(1-X_test.groupby(\"quartile\")[\"score\"].mean())\n",
|
||
"score_odd_ratio_quartile"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 302,
|
||
"id": "1a7dcc8c-33c5-4abf-828f-ba17dceb3287",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"quartile\n",
|
||
"1 0.027517\n",
|
||
"2 0.133083\n",
|
||
"3 0.264754\n",
|
||
"4 1.998944\n",
|
||
"Name: has_purchased, dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 302,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"y_odd_ratio_quartile = X_test.groupby(\"quartile\")[\"has_purchased\"].mean()/(1-X_test.groupby(\"quartile\")[\"has_purchased\"].mean())\n",
|
||
"y_odd_ratio_quartile"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "6307f5f8-3597-422b-86ef-cdcac3648862",
|
||
"metadata": {},
|
||
"source": [
|
||
"### PB : a-t-on le même résultat de calcul du biais sur X_train et y_train ?"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 478,
|
||
"id": "c857531d-3002-4047-b206-a31cc11c451c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>decile</th>\n",
|
||
" <th>overshoot_coeff</th>\n",
|
||
" <th>ajusted_score</th>\n",
|
||
" <th>odd_ratio</th>\n",
|
||
" <th>test_adjusted_score_2</th>\n",
|
||
" <th>score_adjusted</th>\n",
|
||
" <th>nb_tickets_projected</th>\n",
|
||
" <th>total_amount_projected</th>\n",
|
||
" <th>nb_tickets_expected</th>\n",
|
||
" <th>total_amount_expected</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>100.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>5.177187</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.211260</td>\n",
|
||
" <td>2.288530</td>\n",
|
||
" <td>0.533640</td>\n",
|
||
" <td>0.274689</td>\n",
|
||
" <td>2.666667</td>\n",
|
||
" <td>66.666667</td>\n",
|
||
" <td>0.732503</td>\n",
|
||
" <td>18.312587</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>55.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>426.265613</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.063821</td>\n",
|
||
" <td>0.323109</td>\n",
|
||
" <td>0.139085</td>\n",
|
||
" <td>0.050756</td>\n",
|
||
" <td>0.666667</td>\n",
|
||
" <td>36.666667</td>\n",
|
||
" <td>0.033837</td>\n",
|
||
" <td>1.861053</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>17.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>80.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>436.033437</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.073069</td>\n",
|
||
" <td>0.388102</td>\n",
|
||
" <td>0.162515</td>\n",
|
||
" <td>0.060349</td>\n",
|
||
" <td>11.333333</td>\n",
|
||
" <td>53.333333</td>\n",
|
||
" <td>0.683958</td>\n",
|
||
" <td>3.218627</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>120.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>5.196412</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.211328</td>\n",
|
||
" <td>2.290940</td>\n",
|
||
" <td>0.533902</td>\n",
|
||
" <td>0.274899</td>\n",
|
||
" <td>2.666667</td>\n",
|
||
" <td>80.000000</td>\n",
|
||
" <td>0.733063</td>\n",
|
||
" <td>21.991884</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>416.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>478.693148</td>\n",
|
||
" <td>115.631470</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>1.268598</td>\n",
|
||
" <td>0.718781</td>\n",
|
||
" <td>10.343538</td>\n",
|
||
" <td>0.837972</td>\n",
|
||
" <td>0.631228</td>\n",
|
||
" <td>22.666667</td>\n",
|
||
" <td>277.333333</td>\n",
|
||
" <td>14.307843</td>\n",
|
||
" <td>175.060667</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96091</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>67.31</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>278.442257</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>3.260982</td>\n",
|
||
" <td>0.179296</td>\n",
|
||
" <td>1.407779</td>\n",
|
||
" <td>0.413108</td>\n",
|
||
" <td>0.188948</td>\n",
|
||
" <td>0.666667</td>\n",
|
||
" <td>44.873333</td>\n",
|
||
" <td>0.125966</td>\n",
|
||
" <td>8.478740</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96092</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>61.41</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>189.207373</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3.294104</td>\n",
|
||
" <td>0.198694</td>\n",
|
||
" <td>1.894523</td>\n",
|
||
" <td>0.486458</td>\n",
|
||
" <td>0.238685</td>\n",
|
||
" <td>0.666667</td>\n",
|
||
" <td>40.940000</td>\n",
|
||
" <td>0.159123</td>\n",
|
||
" <td>9.771748</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96093</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>17.863019</td>\n",
|
||
" <td>0.006522</td>\n",
|
||
" <td>0.131865</td>\n",
|
||
" <td>0.061854</td>\n",
|
||
" <td>0.021356</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96094</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>79.43</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>279.312905</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>3.260982</td>\n",
|
||
" <td>0.177808</td>\n",
|
||
" <td>1.379973</td>\n",
|
||
" <td>0.408279</td>\n",
|
||
" <td>0.185910</td>\n",
|
||
" <td>0.666667</td>\n",
|
||
" <td>52.953333</td>\n",
|
||
" <td>0.123940</td>\n",
|
||
" <td>9.844555</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96095</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3.826401</td>\n",
|
||
" <td>0.066382</td>\n",
|
||
" <td>0.340487</td>\n",
|
||
" <td>0.145477</td>\n",
|
||
" <td>0.053340</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>96096 rows × 32 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 4.0 1.0 100.00 1.0 \n",
|
||
"1 1.0 1.0 55.00 1.0 \n",
|
||
"2 17.0 1.0 80.00 1.0 \n",
|
||
"3 4.0 1.0 120.00 1.0 \n",
|
||
"4 34.0 2.0 416.00 1.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"96091 1.0 1.0 67.31 1.0 \n",
|
||
"96092 1.0 1.0 61.41 1.0 \n",
|
||
"96093 0.0 0.0 0.00 0.0 \n",
|
||
"96094 1.0 1.0 79.43 1.0 \n",
|
||
"96095 0.0 0.0 0.00 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 5.177187 5.177187 \n",
|
||
"1 0.0 426.265613 426.265613 \n",
|
||
"2 0.0 436.033437 436.033437 \n",
|
||
"3 0.0 5.196412 5.196412 \n",
|
||
"4 0.0 478.693148 115.631470 \n",
|
||
"... ... ... ... \n",
|
||
"96091 1.0 278.442257 278.442257 \n",
|
||
"96092 1.0 189.207373 189.207373 \n",
|
||
"96093 0.0 550.000000 550.000000 \n",
|
||
"96094 1.0 279.312905 279.312905 \n",
|
||
"96095 0.0 550.000000 550.000000 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in ... decile \\\n",
|
||
"0 0.0 True False ... 6 \n",
|
||
"1 0.0 True True ... 2 \n",
|
||
"2 0.0 True True ... 2 \n",
|
||
"3 0.0 True False ... 6 \n",
|
||
"4 0.0 True False ... 9 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"96091 1.0 True False ... 5 \n",
|
||
"96092 1.0 True False ... 6 \n",
|
||
"96093 0.0 True True ... 1 \n",
|
||
"96094 1.0 True False ... 5 \n",
|
||
"96095 0.0 True False ... 2 \n",
|
||
"\n",
|
||
" overshoot_coeff ajusted_score odd_ratio test_adjusted_score_2 \\\n",
|
||
"0 3.294104 0.211260 2.288530 0.533640 \n",
|
||
"1 3.826401 0.063821 0.323109 0.139085 \n",
|
||
"2 3.826401 0.073069 0.388102 0.162515 \n",
|
||
"3 3.294104 0.211328 2.290940 0.533902 \n",
|
||
"4 1.268598 0.718781 10.343538 0.837972 \n",
|
||
"... ... ... ... ... \n",
|
||
"96091 3.260982 0.179296 1.407779 0.413108 \n",
|
||
"96092 3.294104 0.198694 1.894523 0.486458 \n",
|
||
"96093 17.863019 0.006522 0.131865 0.061854 \n",
|
||
"96094 3.260982 0.177808 1.379973 0.408279 \n",
|
||
"96095 3.826401 0.066382 0.340487 0.145477 \n",
|
||
"\n",
|
||
" score_adjusted nb_tickets_projected total_amount_projected \\\n",
|
||
"0 0.274689 2.666667 66.666667 \n",
|
||
"1 0.050756 0.666667 36.666667 \n",
|
||
"2 0.060349 11.333333 53.333333 \n",
|
||
"3 0.274899 2.666667 80.000000 \n",
|
||
"4 0.631228 22.666667 277.333333 \n",
|
||
"... ... ... ... \n",
|
||
"96091 0.188948 0.666667 44.873333 \n",
|
||
"96092 0.238685 0.666667 40.940000 \n",
|
||
"96093 0.021356 0.000000 0.000000 \n",
|
||
"96094 0.185910 0.666667 52.953333 \n",
|
||
"96095 0.053340 0.000000 0.000000 \n",
|
||
"\n",
|
||
" nb_tickets_expected total_amount_expected \n",
|
||
"0 0.732503 18.312587 \n",
|
||
"1 0.033837 1.861053 \n",
|
||
"2 0.683958 3.218627 \n",
|
||
"3 0.733063 21.991884 \n",
|
||
"4 14.307843 175.060667 \n",
|
||
"... ... ... \n",
|
||
"96091 0.125966 8.478740 \n",
|
||
"96092 0.159123 9.771748 \n",
|
||
"96093 0.000000 0.000000 \n",
|
||
"96094 0.123940 9.844555 \n",
|
||
"96095 0.000000 0.000000 \n",
|
||
"\n",
|
||
"[96096 rows x 32 columns]"
|
||
]
|
||
},
|
||
"execution_count": 478,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 479,
|
||
"id": "af371c21-a121-41ce-92a2-e01bdac8ad81",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_pred_prob_train = logit_grid.predict_proba(X_train)[:, 1]\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 484,
|
||
"id": "1e1ddbe4-037a-4866-ae35-161e6ba14ffd",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"somme des scores calculés sur X train : 84127.81461345348\n",
|
||
"somme des y train : 32154.0\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# globalement, on a toujours une somme de scores 3 fois supérieure (même si le biais semble atténué)\n",
|
||
"print(\"somme des scores calculés sur X train : \",y_pred_prob_train.sum())\n",
|
||
"print(\"somme des y train : \", y_train.sum()[\"y_has_purchased\"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 493,
|
||
"id": "ff61821b-b643-4002-88d8-8a0ec1268e73",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>score</th>\n",
|
||
" <th>odd_ratio</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>60.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>355.268981</td>\n",
|
||
" <td>355.268981</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.493834</td>\n",
|
||
" <td>0.975638</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>140.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>373.540289</td>\n",
|
||
" <td>219.262269</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.722704</td>\n",
|
||
" <td>2.606253</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>50.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.202442</td>\n",
|
||
" <td>5.202442</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.689866</td>\n",
|
||
" <td>2.224409</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>90.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.178958</td>\n",
|
||
" <td>5.178958</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.693078</td>\n",
|
||
" <td>2.258158</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>78.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.174039</td>\n",
|
||
" <td>5.174039</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.690209</td>\n",
|
||
" <td>2.227980</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224208</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>0.250218</td>\n",
|
||
" <td>0.333721</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224209</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>20.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>392.501030</td>\n",
|
||
" <td>392.501030</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>23.0</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>0.524745</td>\n",
|
||
" <td>1.104135</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224210</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>0.117175</td>\n",
|
||
" <td>0.132728</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224211</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>97.11</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>172.334074</td>\n",
|
||
" <td>172.334074</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>13.0</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>0.643851</td>\n",
|
||
" <td>1.807814</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224212</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>0.250170</td>\n",
|
||
" <td>0.333636</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>224213 rows × 16 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 2.0 1.0 60.00 1.0 \n",
|
||
"1 8.0 3.0 140.00 1.0 \n",
|
||
"2 2.0 1.0 50.00 1.0 \n",
|
||
"3 3.0 1.0 90.00 1.0 \n",
|
||
"4 2.0 1.0 78.00 1.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"224208 0.0 0.0 0.00 0.0 \n",
|
||
"224209 1.0 1.0 20.00 1.0 \n",
|
||
"224210 0.0 0.0 0.00 0.0 \n",
|
||
"224211 1.0 1.0 97.11 1.0 \n",
|
||
"224212 0.0 0.0 0.00 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 355.268981 355.268981 \n",
|
||
"1 0.0 373.540289 219.262269 \n",
|
||
"2 0.0 5.202442 5.202442 \n",
|
||
"3 0.0 5.178958 5.178958 \n",
|
||
"4 0.0 5.174039 5.174039 \n",
|
||
"... ... ... ... \n",
|
||
"224208 0.0 550.000000 550.000000 \n",
|
||
"224209 1.0 392.501030 392.501030 \n",
|
||
"224210 0.0 550.000000 550.000000 \n",
|
||
"224211 1.0 172.334074 172.334074 \n",
|
||
"224212 0.0 550.000000 550.000000 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in gender_female \\\n",
|
||
"0 0.0 True False 0 \n",
|
||
"1 0.0 True False 0 \n",
|
||
"2 0.0 True False 0 \n",
|
||
"3 0.0 True False 0 \n",
|
||
"4 0.0 True False 1 \n",
|
||
"... ... ... ... ... \n",
|
||
"224208 0.0 True False 0 \n",
|
||
"224209 1.0 True False 0 \n",
|
||
"224210 0.0 True True 0 \n",
|
||
"224211 1.0 True False 0 \n",
|
||
"224212 0.0 True False 0 \n",
|
||
"\n",
|
||
" gender_male nb_campaigns nb_campaigns_opened score odd_ratio \n",
|
||
"0 1 0.0 0.0 0.493834 0.975638 \n",
|
||
"1 1 0.0 0.0 0.722704 2.606253 \n",
|
||
"2 1 0.0 0.0 0.689866 2.224409 \n",
|
||
"3 1 0.0 0.0 0.693078 2.258158 \n",
|
||
"4 0 0.0 0.0 0.690209 2.227980 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"224208 1 34.0 3.0 0.250218 0.333721 \n",
|
||
"224209 1 23.0 6.0 0.524745 1.104135 \n",
|
||
"224210 1 8.0 4.0 0.117175 0.132728 \n",
|
||
"224211 1 13.0 5.0 0.643851 1.807814 \n",
|
||
"224212 1 4.0 4.0 0.250170 0.333636 \n",
|
||
"\n",
|
||
"[224213 rows x 16 columns]"
|
||
]
|
||
},
|
||
"execution_count": 493,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_train[\"score\"] = y_pred_prob_train\n",
|
||
"# X_train[\"odd_ratio\"] = X_train[\"score\"]/(1-X_train[\"score\"])\n",
|
||
"X_train"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 491,
|
||
"id": "240afa08-692d-4c2d-93c7-c8c8a46afdb3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"count 2.241790e+05\n",
|
||
"mean 5.824134e+10\n",
|
||
"std 1.462083e+13\n",
|
||
"min 1.207494e-01\n",
|
||
"25% 1.476621e-01\n",
|
||
"50% 3.338869e-01\n",
|
||
"75% 1.427047e+00\n",
|
||
"max 4.503600e+15\n",
|
||
"Name: odd_ratio, dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 491,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_train[\"odd_ratio\"][X_train[\"odd_ratio\"]<np.inf].describe()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 494,
|
||
"id": "863ff04a-c4de-44cd-af9d-1e5032624592",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>score</th>\n",
|
||
" <th>odd_ratio</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>60.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>355.268981</td>\n",
|
||
" <td>355.268981</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.493834</td>\n",
|
||
" <td>0.975638</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>140.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>373.540289</td>\n",
|
||
" <td>219.262269</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.722704</td>\n",
|
||
" <td>2.606253</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>50.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.202442</td>\n",
|
||
" <td>5.202442</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.689866</td>\n",
|
||
" <td>2.224409</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>90.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.178958</td>\n",
|
||
" <td>5.178958</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.693078</td>\n",
|
||
" <td>2.258158</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>78.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5.174039</td>\n",
|
||
" <td>5.174039</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.690209</td>\n",
|
||
" <td>2.227980</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224208</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>34.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>0.250218</td>\n",
|
||
" <td>0.333721</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224209</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>20.00</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>392.501030</td>\n",
|
||
" <td>392.501030</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>23.0</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>0.524745</td>\n",
|
||
" <td>1.104135</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224210</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>0.117175</td>\n",
|
||
" <td>0.132728</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224211</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>97.11</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>172.334074</td>\n",
|
||
" <td>172.334074</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>13.0</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>0.643851</td>\n",
|
||
" <td>1.807814</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224212</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>0.250170</td>\n",
|
||
" <td>0.333636</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>224213 rows × 16 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 2.0 1.0 60.00 1.0 \n",
|
||
"1 8.0 3.0 140.00 1.0 \n",
|
||
"2 2.0 1.0 50.00 1.0 \n",
|
||
"3 3.0 1.0 90.00 1.0 \n",
|
||
"4 2.0 1.0 78.00 1.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"224208 0.0 0.0 0.00 0.0 \n",
|
||
"224209 1.0 1.0 20.00 1.0 \n",
|
||
"224210 0.0 0.0 0.00 0.0 \n",
|
||
"224211 1.0 1.0 97.11 1.0 \n",
|
||
"224212 0.0 0.0 0.00 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 355.268981 355.268981 \n",
|
||
"1 0.0 373.540289 219.262269 \n",
|
||
"2 0.0 5.202442 5.202442 \n",
|
||
"3 0.0 5.178958 5.178958 \n",
|
||
"4 0.0 5.174039 5.174039 \n",
|
||
"... ... ... ... \n",
|
||
"224208 0.0 550.000000 550.000000 \n",
|
||
"224209 1.0 392.501030 392.501030 \n",
|
||
"224210 0.0 550.000000 550.000000 \n",
|
||
"224211 1.0 172.334074 172.334074 \n",
|
||
"224212 0.0 550.000000 550.000000 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in gender_female \\\n",
|
||
"0 0.0 True False 0 \n",
|
||
"1 0.0 True False 0 \n",
|
||
"2 0.0 True False 0 \n",
|
||
"3 0.0 True False 0 \n",
|
||
"4 0.0 True False 1 \n",
|
||
"... ... ... ... ... \n",
|
||
"224208 0.0 True False 0 \n",
|
||
"224209 1.0 True False 0 \n",
|
||
"224210 0.0 True True 0 \n",
|
||
"224211 1.0 True False 0 \n",
|
||
"224212 0.0 True False 0 \n",
|
||
"\n",
|
||
" gender_male nb_campaigns nb_campaigns_opened score odd_ratio \n",
|
||
"0 1 0.0 0.0 0.493834 0.975638 \n",
|
||
"1 1 0.0 0.0 0.722704 2.606253 \n",
|
||
"2 1 0.0 0.0 0.689866 2.224409 \n",
|
||
"3 1 0.0 0.0 0.693078 2.258158 \n",
|
||
"4 0 0.0 0.0 0.690209 2.227980 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"224208 1 34.0 3.0 0.250218 0.333721 \n",
|
||
"224209 1 23.0 6.0 0.524745 1.104135 \n",
|
||
"224210 1 8.0 4.0 0.117175 0.132728 \n",
|
||
"224211 1 13.0 5.0 0.643851 1.807814 \n",
|
||
"224212 1 4.0 4.0 0.250170 0.333636 \n",
|
||
"\n",
|
||
"[224213 rows x 16 columns]"
|
||
]
|
||
},
|
||
"execution_count": 494,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# on utilise le second score comme valeur de remplacement quand score = 1\n",
|
||
"X_train_second_score = X_train[\"score\"][X_train[\"score\"]<1].max()\n",
|
||
"\n",
|
||
"X_train[\"score\"] = X_train[\"score\"].apply(lambda x : X_train_second_score if x==1 else x)\n",
|
||
"X_train"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 498,
|
||
"id": "b2690332-9f2e-4597-ab13-cef073de367f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"0.9999999999999998"
|
||
]
|
||
},
|
||
"execution_count": 498,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_train[\"score\"].max()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 499,
|
||
"id": "e749e3b5-f5f9-4ab5-a0c1-ee99c5e88a26",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"count 2.242130e+05\n",
|
||
"mean 7.411652e+11\n",
|
||
"std 5.734858e+13\n",
|
||
"min 1.207494e-01\n",
|
||
"25% 1.476621e-01\n",
|
||
"50% 3.338869e-01\n",
|
||
"75% 1.427525e+00\n",
|
||
"max 4.503600e+15\n",
|
||
"Name: odd_ratio, dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 499,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_train[\"odd_ratio\"] = X_train[\"score\"]/(1-X_train[\"score\"])\n",
|
||
"X_train[\"odd_ratio\"].describe()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 500,
|
||
"id": "84fea40a-896f-4e74-8d3c-18ecbe9f4c5f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def obj_function_X_train(bias) :\n",
|
||
" obj = sum([adjusted_score(element, bias) for element in X_train[\"odd_ratio\"]]) # - y_test.sum()[\"y_has_purchased\"]\n",
|
||
" return obj"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 501,
|
||
"id": "9886995b-59d7-4fdf-acb0-981338a4e083",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# minimization\n",
|
||
"\n",
|
||
"from scipy.optimize import minimize\n",
|
||
"\n",
|
||
"\n",
|
||
"y_train_sum = y_train.sum()[\"y_has_purchased\"]\n",
|
||
"initial_guess = 6\n",
|
||
"estimated_biais_train = minimize(lambda bias : (obj_function_X_train(bias)-y_train_sum)**2 ,\n",
|
||
"initial_guess , method = \"BFGS\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 502,
|
||
"id": "80cb872f-2aac-4c77-b935-2d05e0199837",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"bias estimated on train set: 5.947447991192572\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# biais de 5.95 contre 6.04 pour le test set, OK\n",
|
||
"print(f\"bias estimated on train set: {estimated_biais_train.x[0]}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "25d8c4e0-ca60-4aeb-8aa9-9cfa8efdf52a",
|
||
"metadata": {},
|
||
"source": [
|
||
"### construction d'une fonction de généralisation de la méthode de calcul du biais\n",
|
||
"\n",
|
||
"Le biais est calculé de la façon suivante. \n",
|
||
"En notant $\\hat{p(x_i)}$ le score calculé et $p(x_i)$ le vrai score (sans biais), et $\\beta$ le logarithme du biais, on a : \\\n",
|
||
"$\\ln{\\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}}} = \\beta + \\ln{\\frac{p(x_i)}{1-p(x_i)}}$ \\\n",
|
||
"$ \\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}} = \\exp(\\beta) . \\frac{p(x_i)}{1-p(x_i)} $ \\\n",
|
||
"Ce qu'on appelle biais et qu'on estime dans le code par la suite est : $B=\\exp(\\beta) $. Les probabilités ne sont donc pas biaisées si $B=1$. Il y a surestimation si $B>1$. \n",
|
||
"\n",
|
||
"On cherche le B qui permette d'ajuster les probabilités de telle sorte que la somme des scores soit égale à la somme des y_has_purchased. Cela revient à résoudre : \n",
|
||
"\n",
|
||
"\\begin{equation}\n",
|
||
"\\sum_{i}{\\frac{\\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}}}{B+\\frac{\\hat{p(x_i)}}{1-\\hat{p(x_i)}}}} = \\sum_{i}{Y_i}\n",
|
||
"\\end{equation}\n",
|
||
"\n",
|
||
"C'est ce que fait la fonction find_bias"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 733,
|
||
"id": "41f588ad-b093-47f9-a2c9-52428c61d8d8",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def adjusted_score(odd_ratio, bias) :\n",
|
||
" adjusted_score = odd_ratio/(bias+odd_ratio)\n",
|
||
" return adjusted_score"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 734,
|
||
"id": "208900ab-0211-4e0a-a235-e4ea3a6957ce",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# fonction qui prend un vecteur en entrée et remplace les 1 par la seconde plus grande valeur\n",
|
||
"# permet de remplacer les 1 par une valeur de score très proche, et d'ainsi éviter des odd ratio infinis\n",
|
||
"\n",
|
||
"def adjust_score_1(score) :\n",
|
||
" second_best_score = np.array([element for element in score if element !=1]).max()\n",
|
||
" new_score = np.array([element if element!=1 else second_best_score for element in score])\n",
|
||
" \n",
|
||
" return new_score\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 735,
|
||
"id": "942c3952-577e-4e18-87a8-e15ed3040241",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def odd_ratio(score) :\n",
|
||
" return score / (1 - score)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 768,
|
||
"id": "f34e16f6-1596-492e-8ff2-0703173e815e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# definition of a function that automatically detects the bias\n",
|
||
"\n",
|
||
"def find_bias(odd_ratios, y_objective, initial_guess=6) :\n",
|
||
" \"\"\"\n",
|
||
" results = minimize(lambda bias : (sum([adjusted_score(element, bias) for element in list(odd_ratios)]) - y_objective)**2 ,\n",
|
||
" initial_guess , method = \"BFGS\")\n",
|
||
"\n",
|
||
" estimated_bias = results.x[0]\n",
|
||
" \"\"\"\n",
|
||
"\n",
|
||
" # faster method\n",
|
||
" bias_estimated = fsolve(lambda bias : sum([adjusted_score(element, bias) for element in list(odd_ratios)]) - y_objective, x0=6)\n",
|
||
" \n",
|
||
" return bias_estimated[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 761,
|
||
"id": "8cc3a658-5ab5-482b-ba26-b12a3bf9c81b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([6.0428265])"
|
||
]
|
||
},
|
||
"execution_count": 761,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# autre méthode : avec fsolve\n",
|
||
"\n",
|
||
"from scipy.optimize import fsolve\n",
|
||
"\n",
|
||
"bias_estimated = fsolve(lambda bias : sum([adjusted_score(element, bias) for element in list(odd_ratios)]) - y_objective, x0=6)\n",
|
||
"bias_estimated"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 760,
|
||
"id": "92be0759-2583-411d-a0b0-f09fd53ff367",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import time"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 763,
|
||
"id": "58eb3320-fd4a-4b21-9cfe-6b9f7533a730",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"résultat : [6.0428265]\n",
|
||
"tps de calcul 2.112041473388672\n",
|
||
"résultat : 6.042826489667565\n",
|
||
"tps de calcul 3.9603891372680664\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# comparaison du temps pris par les deux opérations\n",
|
||
"\n",
|
||
"temps_debut = time.time()\n",
|
||
"bias_estimated_1 = fsolve(lambda bias : sum([adjusted_score(element, bias) for element in list(odd_ratios)]) - y_objective, x0=6)\n",
|
||
"temps_fin = time.time()\n",
|
||
"\n",
|
||
"temps_ecoule = temps_fin - temps_debut\n",
|
||
"print(\"résultat : \",bias_estimated_1)\n",
|
||
"print(\"tps de calcul\", temps_ecoule)\n",
|
||
"\n",
|
||
"temps_debut = time.time()\n",
|
||
"bias_estimated_2 = minimize(lambda bias : (sum([adjusted_score(element, bias) for element in list(odd_ratios)]) - y_objective)**2 ,\n",
|
||
" x0=6 , method = \"BFGS\").x[0]\n",
|
||
"temps_fin = time.time()\n",
|
||
"\n",
|
||
"temps_ecoule = temps_fin - temps_debut\n",
|
||
"print(\"résultat : \",bias_estimated_2)\n",
|
||
"print(\"tps de calcul\", temps_ecoule)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 755,
|
||
"id": "5e6c5b4a-4a13-43ed-af96-e5892563057a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([2.28853049, 0.3231094 , 0.38810178, ..., 0.13186529, 1.37997272,\n",
|
||
" 0.34048672])"
|
||
]
|
||
},
|
||
"execution_count": 755,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"odd_ratios"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 749,
|
||
"id": "6ef9088a-3ae7-419a-b009-cb5aae4ab4c7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"36092.2248005385"
|
||
]
|
||
},
|
||
"execution_count": 749,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"sum([adjusted_score(element, 1) for element in list(odd_ratios)]) # - y_objective"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 704,
|
||
"id": "5fcd2467-9119-4bba-af38-f7833173c2d7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[0, 1]"
|
||
]
|
||
},
|
||
"execution_count": 704,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"[element for element in np.array([0,1])]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 544,
|
||
"id": "e20820a3-30a4-4e24-8c65-6178c4d7e9c1",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"5.947447991192572"
|
||
]
|
||
},
|
||
"execution_count": 544,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# the function works well !!\n",
|
||
"\n",
|
||
"bias_train_set = find_bias(odd_ratios = X_train[\"odd_ratio\"], y_objective = y_train_sum, initial_guess = 6)\n",
|
||
"bias_train_set"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 716,
|
||
"id": "c17e4a3c-a3de-425b-a3da-1e15e33cb403",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([2.28853049, 0.3231094 , 0.38810178, ..., 0.13186529, 1.37997272,\n",
|
||
" 0.34048672])"
|
||
]
|
||
},
|
||
"execution_count": 716,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"odd_ratio = odd_ratio(adjust_score_1(X_test[\"score\"]))\n",
|
||
"odd_ratio"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 751,
|
||
"id": "0aad15bd-e820-4eda-b229-64bd1f90f7f5",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# definition of the values for the pb\n",
|
||
"\n",
|
||
"new_score = adjust_score_1(X_test[\"score\"])\n",
|
||
"\n",
|
||
"odd_ratios = odd_ratio(np.array(new_score))\n",
|
||
"\n",
|
||
"y_objective = y_test[\"y_has_purchased\"].sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 752,
|
||
"id": "498560c3-e446-4dcc-bb19-47f2910d5fbb",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(array([0.69591281, 0.2442046 , 0.27959173, ..., 0.11650264, 0.57982712,\n",
|
||
" 0.25400231]),\n",
|
||
" array([2.28853049, 0.3231094 , 0.38810178, ..., 0.13186529, 1.37997272,\n",
|
||
" 0.34048672]),\n",
|
||
" 13690.0)"
|
||
]
|
||
},
|
||
"execution_count": 752,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"new_score, odd_ratios, y_objective"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 769,
|
||
"id": "03f4a8f1-f568-4a7d-9501-8a7467a9a864",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"6.042826497117542"
|
||
]
|
||
},
|
||
"execution_count": 769,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# computation with the function defined\n",
|
||
"\n",
|
||
"bias_test_set = find_bias(odd_ratios = odd_ratios, \n",
|
||
" y_objective = y_objective,\n",
|
||
" initial_guess=6)\n",
|
||
"bias_test_set"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 770,
|
||
"id": "d0ea666d-33e8-46e8-9a4d-f17091dbfa93",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"5.947447998640124"
|
||
]
|
||
},
|
||
"execution_count": 770,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"biais_train_set = find_bias(odd_ratios = odd_ratio(adjust_score_1(X_train[\"score\"])), \n",
|
||
" y_objective = y_train[\"y_has_purchased\"].sum(),\n",
|
||
" initial_guess=6)\n",
|
||
"biais_train_set"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 772,
|
||
"id": "1c1bdbc6-4fa7-45fb-ba27-b4c02ff1ff9c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"5.947447991192572"
|
||
]
|
||
},
|
||
"execution_count": 772,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"bias_train_set"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 776,
|
||
"id": "eced1d08-5230-4449-8024-105111fe5873",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"betâ test - betâ train = 0.015909647078591174\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# différence des beta (log du biais)\n",
|
||
"print(\"betâ test - betâ train = \",np.log(bias_test_set/bias_train_set))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "d2d5aca0-7e8b-4039-9bb2-ff5011c436a6",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Random forest"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"id": "da8873e5-c4e7-4580-8567-70e411c029ab",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>43000</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>14.0</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>183923</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>19.0</td>\n",
|
||
" <td>11.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>97373</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>66956</th>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>254.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>378.343062</td>\n",
|
||
" <td>370.453947</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>116487</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>83146</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>35.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>37.474040</td>\n",
|
||
" <td>37.474040</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>223586</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>23.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>56489</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>141236</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6999</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>20.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>171.446921</td>\n",
|
||
" <td>171.446921</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>10000 rows × 14 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"43000 0.0 0.0 0.0 0.0 \n",
|
||
"183923 0.0 0.0 0.0 0.0 \n",
|
||
"97373 0.0 0.0 0.0 0.0 \n",
|
||
"66956 7.0 2.0 254.0 1.0 \n",
|
||
"116487 0.0 0.0 0.0 0.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"83146 1.0 1.0 35.0 1.0 \n",
|
||
"223586 0.0 0.0 0.0 0.0 \n",
|
||
"56489 0.0 0.0 0.0 0.0 \n",
|
||
"141236 0.0 0.0 0.0 0.0 \n",
|
||
"6999 2.0 1.0 20.0 1.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"43000 0.0 550.000000 550.000000 \n",
|
||
"183923 0.0 550.000000 550.000000 \n",
|
||
"97373 0.0 550.000000 550.000000 \n",
|
||
"66956 1.0 378.343062 370.453947 \n",
|
||
"116487 0.0 550.000000 550.000000 \n",
|
||
"... ... ... ... \n",
|
||
"83146 1.0 37.474040 37.474040 \n",
|
||
"223586 0.0 550.000000 550.000000 \n",
|
||
"56489 0.0 550.000000 550.000000 \n",
|
||
"141236 0.0 550.000000 550.000000 \n",
|
||
"6999 0.0 171.446921 171.446921 \n",
|
||
"\n",
|
||
" nb_tickets_internet is_email_true opt_in gender_female \\\n",
|
||
"43000 0.0 True True 0 \n",
|
||
"183923 0.0 True True 0 \n",
|
||
"97373 0.0 True False 0 \n",
|
||
"66956 7.0 True False 0 \n",
|
||
"116487 0.0 True False 1 \n",
|
||
"... ... ... ... ... \n",
|
||
"83146 1.0 True False 0 \n",
|
||
"223586 0.0 True True 0 \n",
|
||
"56489 0.0 True True 0 \n",
|
||
"141236 0.0 True False 0 \n",
|
||
"6999 0.0 True True 1 \n",
|
||
"\n",
|
||
" gender_male nb_campaigns nb_campaigns_opened \n",
|
||
"43000 1 14.0 12.0 \n",
|
||
"183923 1 19.0 11.0 \n",
|
||
"97373 0 7.0 2.0 \n",
|
||
"66956 1 0.0 0.0 \n",
|
||
"116487 0 5.0 0.0 \n",
|
||
"... ... ... ... \n",
|
||
"83146 1 9.0 3.0 \n",
|
||
"223586 1 23.0 1.0 \n",
|
||
"56489 1 4.0 0.0 \n",
|
||
"141236 1 6.0 0.0 \n",
|
||
"6999 0 0.0 0.0 \n",
|
||
"\n",
|
||
"[10000 rows x 14 columns]"
|
||
]
|
||
},
|
||
"execution_count": 26,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_train_subsample"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "fcbb8bea-e9d3-4fd4-8b47-7e796c788a1f",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Preprocessing"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"id": "55e0c6d8-9e98-47be-9d5d-41e06505ceba",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# no need to standardize variables in a random forest\n",
|
||
"# we just encode categorical variables\n",
|
||
"\n",
|
||
"categorical_features = ['opt_in', 'is_email_true'] \n",
|
||
"\n",
|
||
"# Transformer for the categorical features\n",
|
||
"categorical_transformer = Pipeline(steps=[\n",
|
||
" #(\"imputer\", SimpleImputer(strategy=\"most_frequent\")), # Impute missing values with the most frequent\n",
|
||
" (\"onehot\", OneHotEncoder(handle_unknown='ignore', sparse_output=False))\n",
|
||
"])\n",
|
||
"\n",
|
||
"preproc = ColumnTransformer(\n",
|
||
" transformers=[\n",
|
||
" (\"cat\", categorical_transformer, categorical_features)\n",
|
||
" ]\n",
|
||
")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"id": "27af28da-d2bb-4eff-b842-18cec9740c84",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<style>#sk-container-id-2 {\n",
|
||
" /* Definition of color scheme common for light and dark mode */\n",
|
||
" --sklearn-color-text: black;\n",
|
||
" --sklearn-color-line: gray;\n",
|
||
" /* Definition of color scheme for unfitted estimators */\n",
|
||
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
|
||
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
|
||
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
|
||
" --sklearn-color-unfitted-level-3: chocolate;\n",
|
||
" /* Definition of color scheme for fitted estimators */\n",
|
||
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
|
||
" --sklearn-color-fitted-level-1: #d4ebff;\n",
|
||
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
|
||
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
|
||
"\n",
|
||
" /* Specific color for light theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-icon: #696969;\n",
|
||
"\n",
|
||
" @media (prefers-color-scheme: dark) {\n",
|
||
" /* Redefinition of color scheme for dark theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-icon: #878787;\n",
|
||
" }\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 pre {\n",
|
||
" padding: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 input.sk-hidden--visually {\n",
|
||
" border: 0;\n",
|
||
" clip: rect(1px 1px 1px 1px);\n",
|
||
" clip: rect(1px, 1px, 1px, 1px);\n",
|
||
" height: 1px;\n",
|
||
" margin: -1px;\n",
|
||
" overflow: hidden;\n",
|
||
" padding: 0;\n",
|
||
" position: absolute;\n",
|
||
" width: 1px;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-dashed-wrapped {\n",
|
||
" border: 1px dashed var(--sklearn-color-line);\n",
|
||
" margin: 0 0.4em 0.5em 0.4em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" padding-bottom: 0.4em;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-container {\n",
|
||
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
|
||
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
|
||
" so we also need the `!important` here to be able to override the\n",
|
||
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
|
||
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
|
||
" display: inline-block !important;\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-text-repr-fallback {\n",
|
||
" display: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-parallel-item,\n",
|
||
"div.sk-serial,\n",
|
||
"div.sk-item {\n",
|
||
" /* draw centered vertical line to link estimators */\n",
|
||
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
|
||
" background-size: 2px 100%;\n",
|
||
" background-repeat: no-repeat;\n",
|
||
" background-position: center center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Parallel-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-parallel-item::after {\n",
|
||
" content: \"\";\n",
|
||
" width: 100%;\n",
|
||
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
|
||
" flex-grow: 1;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-parallel {\n",
|
||
" display: flex;\n",
|
||
" align-items: stretch;\n",
|
||
" justify-content: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-parallel-item {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-parallel-item:first-child::after {\n",
|
||
" align-self: flex-end;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-parallel-item:last-child::after {\n",
|
||
" align-self: flex-start;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-parallel-item:only-child::after {\n",
|
||
" width: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Serial-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-serial {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
" align-items: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" padding-right: 1em;\n",
|
||
" padding-left: 1em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"\n",
|
||
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
|
||
"clickable and can be expanded/collapsed.\n",
|
||
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
|
||
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
|
||
"*/\n",
|
||
"\n",
|
||
"/* Pipeline and ColumnTransformer style (default) */\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-toggleable {\n",
|
||
" /* Default theme specific background. It is overwritten whether we have a\n",
|
||
" specific estimator or a Pipeline/ColumnTransformer */\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable label */\n",
|
||
"#sk-container-id-2 label.sk-toggleable__label {\n",
|
||
" cursor: pointer;\n",
|
||
" display: block;\n",
|
||
" width: 100%;\n",
|
||
" margin-bottom: 0;\n",
|
||
" padding: 0.5em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 label.sk-toggleable__label-arrow:before {\n",
|
||
" /* Arrow on the left of the label */\n",
|
||
" content: \"▸\";\n",
|
||
" float: left;\n",
|
||
" margin-right: 0.25em;\n",
|
||
" color: var(--sklearn-color-icon);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable content - dropdown */\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-toggleable__content {\n",
|
||
" max-height: 0;\n",
|
||
" max-width: 0;\n",
|
||
" overflow: hidden;\n",
|
||
" text-align: left;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-toggleable__content.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-toggleable__content pre {\n",
|
||
" margin: 0.2em;\n",
|
||
" border-radius: 0.25em;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-toggleable__content.fitted pre {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
||
" /* Expand drop-down */\n",
|
||
" max-height: 200px;\n",
|
||
" max-width: 100%;\n",
|
||
" overflow: auto;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
||
" content: \"▾\";\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Pipeline/ColumnTransformer-specific style */\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific style */\n",
|
||
"\n",
|
||
"/* Colorize estimator box */\n",
|
||
"#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-label label.sk-toggleable__label,\n",
|
||
"#sk-container-id-2 div.sk-label label {\n",
|
||
" /* The background is the default theme color */\n",
|
||
" color: var(--sklearn-color-text-on-default-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover, darken the color of the background */\n",
|
||
"#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Label box, darken color on hover, fitted */\n",
|
||
"#sk-container-id-2 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator label */\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-label label {\n",
|
||
" font-family: monospace;\n",
|
||
" font-weight: bold;\n",
|
||
" display: inline-block;\n",
|
||
" line-height: 1.2em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-label-container {\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific */\n",
|
||
"#sk-container-id-2 div.sk-estimator {\n",
|
||
" font-family: monospace;\n",
|
||
" border: 1px dotted var(--sklearn-color-border-box);\n",
|
||
" border-radius: 0.25em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" margin-bottom: 0.5em;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-estimator.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* on hover */\n",
|
||
"#sk-container-id-2 div.sk-estimator:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-estimator.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
|
||
"\n",
|
||
"/* Common style for \"i\" and \"?\" */\n",
|
||
"\n",
|
||
".sk-estimator-doc-link,\n",
|
||
"a:link.sk-estimator-doc-link,\n",
|
||
"a:visited.sk-estimator-doc-link {\n",
|
||
" float: right;\n",
|
||
" font-size: smaller;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1em;\n",
|
||
" height: 1em;\n",
|
||
" width: 1em;\n",
|
||
" text-decoration: none !important;\n",
|
||
" margin-left: 1ex;\n",
|
||
" /* unfitted */\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted,\n",
|
||
"a:link.sk-estimator-doc-link.fitted,\n",
|
||
"a:visited.sk-estimator-doc-link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Span, style for the box shown on hovering the info icon */\n",
|
||
".sk-estimator-doc-link span {\n",
|
||
" display: none;\n",
|
||
" z-index: 9999;\n",
|
||
" position: relative;\n",
|
||
" font-weight: normal;\n",
|
||
" right: .2ex;\n",
|
||
" padding: .5ex;\n",
|
||
" margin: .5ex;\n",
|
||
" width: min-content;\n",
|
||
" min-width: 20ex;\n",
|
||
" max-width: 50ex;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" box-shadow: 2pt 2pt 4pt #999;\n",
|
||
" /* unfitted */\n",
|
||
" background: var(--sklearn-color-unfitted-level-0);\n",
|
||
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted span {\n",
|
||
" /* fitted */\n",
|
||
" background: var(--sklearn-color-fitted-level-0);\n",
|
||
" border: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link:hover span {\n",
|
||
" display: block;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
|
||
"\n",
|
||
"#sk-container-id-2 a.estimator_doc_link {\n",
|
||
" float: right;\n",
|
||
" font-size: 1rem;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1rem;\n",
|
||
" height: 1rem;\n",
|
||
" width: 1rem;\n",
|
||
" text-decoration: none;\n",
|
||
" /* unfitted */\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 a.estimator_doc_link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"#sk-container-id-2 a.estimator_doc_link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 a.estimator_doc_link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>ColumnTransformer(transformers=[('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in', 'is_email_true'])])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> ColumnTransformer<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for ColumnTransformer</span></a><span class=\"sk-estimator-doc-link \">i<span>Not fitted</span></span></label><div class=\"sk-toggleable__content \"><pre>ColumnTransformer(transformers=[('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in', 'is_email_true'])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">cat</label><div class=\"sk-toggleable__content \"><pre>['opt_in', 'is_email_true']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> OneHotEncoder<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></label><div class=\"sk-toggleable__content \"><pre>OneHotEncoder(handle_unknown='ignore', sparse_output=False)</pre></div> </div></div></div></div></div></div></div></div></div></div></div>"
|
||
],
|
||
"text/plain": [
|
||
"ColumnTransformer(transformers=[('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in', 'is_email_true'])])"
|
||
]
|
||
},
|
||
"execution_count": 16,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"preproc"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "0cb46acb-647f-469d-b5e1-510bf1283196",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "1ce9acf4-3514-4056-a71a-c7654e25b9de",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "dfdd4601-4866-4102-b620-4f10648e7981",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Pipeline"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "eeefae73-afe7-4441-a04c-bd6a04beedd2",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Define models and parameters for GridSearch\n",
|
||
"model = {\n",
|
||
" 'model': RandomForestClassifier(),\n",
|
||
" 'params': {\n",
|
||
" 'randforest__n_estimators': [100, 150, 200, 250, 300],\n",
|
||
" 'randforest__max_depth': [None, 15, 20, 25, 30, 35, 40],\n",
|
||
" }\n",
|
||
" }\n",
|
||
"\n",
|
||
"# Test each model using GridSearchCV\n",
|
||
"pipe = Pipeline(steps=[('preprocessor', preproc), ('randforest', model['model'])])\n",
|
||
"clf = GridSearchCV(pipe, model['params'], cv=3)\n",
|
||
"clf.fit(X_train, y_train)\n",
|
||
"\n",
|
||
"print(f\"Model: {model['model']}\")\n",
|
||
"print(f\"Best parameters: {clf.best_params_}\")\n",
|
||
"print('Best classification accuracy in train is: {}'.format(clf.best_score_))\n",
|
||
"print('Classification accuracy on test is: {}'.format(clf.score(X_test, y_test)))\n",
|
||
"print(\"------\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 43,
|
||
"id": "2a88f13b-05bc-4a70-b08b-8b07c118cedc",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<style>#sk-container-id-7 {\n",
|
||
" /* Definition of color scheme common for light and dark mode */\n",
|
||
" --sklearn-color-text: black;\n",
|
||
" --sklearn-color-line: gray;\n",
|
||
" /* Definition of color scheme for unfitted estimators */\n",
|
||
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
|
||
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
|
||
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
|
||
" --sklearn-color-unfitted-level-3: chocolate;\n",
|
||
" /* Definition of color scheme for fitted estimators */\n",
|
||
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
|
||
" --sklearn-color-fitted-level-1: #d4ebff;\n",
|
||
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
|
||
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
|
||
"\n",
|
||
" /* Specific color for light theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-icon: #696969;\n",
|
||
"\n",
|
||
" @media (prefers-color-scheme: dark) {\n",
|
||
" /* Redefinition of color scheme for dark theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-icon: #878787;\n",
|
||
" }\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 pre {\n",
|
||
" padding: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 input.sk-hidden--visually {\n",
|
||
" border: 0;\n",
|
||
" clip: rect(1px 1px 1px 1px);\n",
|
||
" clip: rect(1px, 1px, 1px, 1px);\n",
|
||
" height: 1px;\n",
|
||
" margin: -1px;\n",
|
||
" overflow: hidden;\n",
|
||
" padding: 0;\n",
|
||
" position: absolute;\n",
|
||
" width: 1px;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-dashed-wrapped {\n",
|
||
" border: 1px dashed var(--sklearn-color-line);\n",
|
||
" margin: 0 0.4em 0.5em 0.4em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" padding-bottom: 0.4em;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-container {\n",
|
||
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
|
||
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
|
||
" so we also need the `!important` here to be able to override the\n",
|
||
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
|
||
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
|
||
" display: inline-block !important;\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-text-repr-fallback {\n",
|
||
" display: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-parallel-item,\n",
|
||
"div.sk-serial,\n",
|
||
"div.sk-item {\n",
|
||
" /* draw centered vertical line to link estimators */\n",
|
||
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
|
||
" background-size: 2px 100%;\n",
|
||
" background-repeat: no-repeat;\n",
|
||
" background-position: center center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Parallel-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-parallel-item::after {\n",
|
||
" content: \"\";\n",
|
||
" width: 100%;\n",
|
||
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
|
||
" flex-grow: 1;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-parallel {\n",
|
||
" display: flex;\n",
|
||
" align-items: stretch;\n",
|
||
" justify-content: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-parallel-item {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-parallel-item:first-child::after {\n",
|
||
" align-self: flex-end;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-parallel-item:last-child::after {\n",
|
||
" align-self: flex-start;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-parallel-item:only-child::after {\n",
|
||
" width: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Serial-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-serial {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
" align-items: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" padding-right: 1em;\n",
|
||
" padding-left: 1em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"\n",
|
||
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
|
||
"clickable and can be expanded/collapsed.\n",
|
||
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
|
||
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
|
||
"*/\n",
|
||
"\n",
|
||
"/* Pipeline and ColumnTransformer style (default) */\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-toggleable {\n",
|
||
" /* Default theme specific background. It is overwritten whether we have a\n",
|
||
" specific estimator or a Pipeline/ColumnTransformer */\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable label */\n",
|
||
"#sk-container-id-7 label.sk-toggleable__label {\n",
|
||
" cursor: pointer;\n",
|
||
" display: block;\n",
|
||
" width: 100%;\n",
|
||
" margin-bottom: 0;\n",
|
||
" padding: 0.5em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 label.sk-toggleable__label-arrow:before {\n",
|
||
" /* Arrow on the left of the label */\n",
|
||
" content: \"▸\";\n",
|
||
" float: left;\n",
|
||
" margin-right: 0.25em;\n",
|
||
" color: var(--sklearn-color-icon);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 label.sk-toggleable__label-arrow:hover:before {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable content - dropdown */\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-toggleable__content {\n",
|
||
" max-height: 0;\n",
|
||
" max-width: 0;\n",
|
||
" overflow: hidden;\n",
|
||
" text-align: left;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-toggleable__content.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-toggleable__content pre {\n",
|
||
" margin: 0.2em;\n",
|
||
" border-radius: 0.25em;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-toggleable__content.fitted pre {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
||
" /* Expand drop-down */\n",
|
||
" max-height: 200px;\n",
|
||
" max-width: 100%;\n",
|
||
" overflow: auto;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
||
" content: \"▾\";\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Pipeline/ColumnTransformer-specific style */\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific style */\n",
|
||
"\n",
|
||
"/* Colorize estimator box */\n",
|
||
"#sk-container-id-7 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-label label.sk-toggleable__label,\n",
|
||
"#sk-container-id-7 div.sk-label label {\n",
|
||
" /* The background is the default theme color */\n",
|
||
" color: var(--sklearn-color-text-on-default-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover, darken the color of the background */\n",
|
||
"#sk-container-id-7 div.sk-label:hover label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Label box, darken color on hover, fitted */\n",
|
||
"#sk-container-id-7 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator label */\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-label label {\n",
|
||
" font-family: monospace;\n",
|
||
" font-weight: bold;\n",
|
||
" display: inline-block;\n",
|
||
" line-height: 1.2em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-label-container {\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific */\n",
|
||
"#sk-container-id-7 div.sk-estimator {\n",
|
||
" font-family: monospace;\n",
|
||
" border: 1px dotted var(--sklearn-color-border-box);\n",
|
||
" border-radius: 0.25em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" margin-bottom: 0.5em;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-estimator.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* on hover */\n",
|
||
"#sk-container-id-7 div.sk-estimator:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 div.sk-estimator.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
|
||
"\n",
|
||
"/* Common style for \"i\" and \"?\" */\n",
|
||
"\n",
|
||
".sk-estimator-doc-link,\n",
|
||
"a:link.sk-estimator-doc-link,\n",
|
||
"a:visited.sk-estimator-doc-link {\n",
|
||
" float: right;\n",
|
||
" font-size: smaller;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1em;\n",
|
||
" height: 1em;\n",
|
||
" width: 1em;\n",
|
||
" text-decoration: none !important;\n",
|
||
" margin-left: 1ex;\n",
|
||
" /* unfitted */\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted,\n",
|
||
"a:link.sk-estimator-doc-link.fitted,\n",
|
||
"a:visited.sk-estimator-doc-link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Span, style for the box shown on hovering the info icon */\n",
|
||
".sk-estimator-doc-link span {\n",
|
||
" display: none;\n",
|
||
" z-index: 9999;\n",
|
||
" position: relative;\n",
|
||
" font-weight: normal;\n",
|
||
" right: .2ex;\n",
|
||
" padding: .5ex;\n",
|
||
" margin: .5ex;\n",
|
||
" width: min-content;\n",
|
||
" min-width: 20ex;\n",
|
||
" max-width: 50ex;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" box-shadow: 2pt 2pt 4pt #999;\n",
|
||
" /* unfitted */\n",
|
||
" background: var(--sklearn-color-unfitted-level-0);\n",
|
||
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted span {\n",
|
||
" /* fitted */\n",
|
||
" background: var(--sklearn-color-fitted-level-0);\n",
|
||
" border: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link:hover span {\n",
|
||
" display: block;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
|
||
"\n",
|
||
"#sk-container-id-7 a.estimator_doc_link {\n",
|
||
" float: right;\n",
|
||
" font-size: 1rem;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1rem;\n",
|
||
" height: 1rem;\n",
|
||
" width: 1rem;\n",
|
||
" text-decoration: none;\n",
|
||
" /* unfitted */\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 a.estimator_doc_link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"#sk-container-id-7 a.estimator_doc_link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-7 a.estimator_doc_link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"</style><div id=\"sk-container-id-7\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in',\n",
|
||
" 'is_email_true'])])),\n",
|
||
" ('random_forest',\n",
|
||
" RandomForestClassifier(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539}))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-27\" type=\"checkbox\" ><label for=\"sk-estimator-id-27\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> Pipeline<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.pipeline.Pipeline.html\">?<span>Documentation for Pipeline</span></a><span class=\"sk-estimator-doc-link \">i<span>Not fitted</span></span></label><div class=\"sk-toggleable__content \"><pre>Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in',\n",
|
||
" 'is_email_true'])])),\n",
|
||
" ('random_forest',\n",
|
||
" RandomForestClassifier(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539}))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-28\" type=\"checkbox\" ><label for=\"sk-estimator-id-28\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> preprocessor: ColumnTransformer<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></label><div class=\"sk-toggleable__content \"><pre>ColumnTransformer(transformers=[('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in', 'is_email_true'])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-29\" type=\"checkbox\" ><label for=\"sk-estimator-id-29\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">cat</label><div class=\"sk-toggleable__content \"><pre>['opt_in', 'is_email_true']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-30\" type=\"checkbox\" ><label for=\"sk-estimator-id-30\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> OneHotEncoder<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></label><div class=\"sk-toggleable__content \"><pre>OneHotEncoder(handle_unknown='ignore', sparse_output=False)</pre></div> </div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-31\" type=\"checkbox\" ><label for=\"sk-estimator-id-31\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> RandomForestClassifier<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.ensemble.RandomForestClassifier.html\">?<span>Documentation for RandomForestClassifier</span></a></label><div class=\"sk-toggleable__content \"><pre>RandomForestClassifier(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539})</pre></div> </div></div></div></div></div></div>"
|
||
],
|
||
"text/plain": [
|
||
"Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in',\n",
|
||
" 'is_email_true'])])),\n",
|
||
" ('random_forest',\n",
|
||
" RandomForestClassifier(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539}))])"
|
||
]
|
||
},
|
||
"execution_count": 43,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Pipeline - on joue sur : max_depth\n",
|
||
"\n",
|
||
"param_grid = {\"random_forest__max_depth\" : [None, 10, 20, 40, 50, 60]}\n",
|
||
"\n",
|
||
"pipeline = Pipeline(steps=[\n",
|
||
" ('preprocessor', preproc),\n",
|
||
" ('random_forest', RandomForestClassifier(bootstrap = False, class_weight = weight_dict,\n",
|
||
" )) \n",
|
||
"])\n",
|
||
"\n",
|
||
"pipeline.set_output(transform=\"pandas\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"id": "494dca83-4d60-4e49-8689-7d7ac612bb83",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'estimator': DecisionTreeClassifier(),\n",
|
||
" 'n_estimators': 100,\n",
|
||
" 'estimator_params': ('criterion',\n",
|
||
" 'max_depth',\n",
|
||
" 'min_samples_split',\n",
|
||
" 'min_samples_leaf',\n",
|
||
" 'min_weight_fraction_leaf',\n",
|
||
" 'max_features',\n",
|
||
" 'max_leaf_nodes',\n",
|
||
" 'min_impurity_decrease',\n",
|
||
" 'random_state',\n",
|
||
" 'ccp_alpha',\n",
|
||
" 'monotonic_cst'),\n",
|
||
" 'bootstrap': True,\n",
|
||
" 'oob_score': False,\n",
|
||
" 'n_jobs': None,\n",
|
||
" 'random_state': None,\n",
|
||
" 'verbose': 0,\n",
|
||
" 'warm_start': False,\n",
|
||
" 'class_weight': None,\n",
|
||
" 'max_samples': None,\n",
|
||
" 'criterion': 'gini',\n",
|
||
" 'max_depth': None,\n",
|
||
" 'min_samples_split': 2,\n",
|
||
" 'min_samples_leaf': 1,\n",
|
||
" 'min_weight_fraction_leaf': 0.0,\n",
|
||
" 'max_features': 'sqrt',\n",
|
||
" 'max_leaf_nodes': None,\n",
|
||
" 'min_impurity_decrease': 0.0,\n",
|
||
" 'monotonic_cst': None,\n",
|
||
" 'ccp_alpha': 0.0}"
|
||
]
|
||
},
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"RandomForestClassifier().__dict__"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 44,
|
||
"id": "ee7cbc1c-7c31-4111-82a3-995141e2f13f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<style>#sk-container-id-8 {\n",
|
||
" /* Definition of color scheme common for light and dark mode */\n",
|
||
" --sklearn-color-text: black;\n",
|
||
" --sklearn-color-line: gray;\n",
|
||
" /* Definition of color scheme for unfitted estimators */\n",
|
||
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
|
||
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
|
||
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
|
||
" --sklearn-color-unfitted-level-3: chocolate;\n",
|
||
" /* Definition of color scheme for fitted estimators */\n",
|
||
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
|
||
" --sklearn-color-fitted-level-1: #d4ebff;\n",
|
||
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
|
||
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
|
||
"\n",
|
||
" /* Specific color for light theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-icon: #696969;\n",
|
||
"\n",
|
||
" @media (prefers-color-scheme: dark) {\n",
|
||
" /* Redefinition of color scheme for dark theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-icon: #878787;\n",
|
||
" }\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 pre {\n",
|
||
" padding: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 input.sk-hidden--visually {\n",
|
||
" border: 0;\n",
|
||
" clip: rect(1px 1px 1px 1px);\n",
|
||
" clip: rect(1px, 1px, 1px, 1px);\n",
|
||
" height: 1px;\n",
|
||
" margin: -1px;\n",
|
||
" overflow: hidden;\n",
|
||
" padding: 0;\n",
|
||
" position: absolute;\n",
|
||
" width: 1px;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-dashed-wrapped {\n",
|
||
" border: 1px dashed var(--sklearn-color-line);\n",
|
||
" margin: 0 0.4em 0.5em 0.4em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" padding-bottom: 0.4em;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-container {\n",
|
||
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
|
||
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
|
||
" so we also need the `!important` here to be able to override the\n",
|
||
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
|
||
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
|
||
" display: inline-block !important;\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-text-repr-fallback {\n",
|
||
" display: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-parallel-item,\n",
|
||
"div.sk-serial,\n",
|
||
"div.sk-item {\n",
|
||
" /* draw centered vertical line to link estimators */\n",
|
||
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
|
||
" background-size: 2px 100%;\n",
|
||
" background-repeat: no-repeat;\n",
|
||
" background-position: center center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Parallel-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-parallel-item::after {\n",
|
||
" content: \"\";\n",
|
||
" width: 100%;\n",
|
||
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
|
||
" flex-grow: 1;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-parallel {\n",
|
||
" display: flex;\n",
|
||
" align-items: stretch;\n",
|
||
" justify-content: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-parallel-item {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-parallel-item:first-child::after {\n",
|
||
" align-self: flex-end;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-parallel-item:last-child::after {\n",
|
||
" align-self: flex-start;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-parallel-item:only-child::after {\n",
|
||
" width: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Serial-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-serial {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
" align-items: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" padding-right: 1em;\n",
|
||
" padding-left: 1em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"\n",
|
||
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
|
||
"clickable and can be expanded/collapsed.\n",
|
||
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
|
||
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
|
||
"*/\n",
|
||
"\n",
|
||
"/* Pipeline and ColumnTransformer style (default) */\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-toggleable {\n",
|
||
" /* Default theme specific background. It is overwritten whether we have a\n",
|
||
" specific estimator or a Pipeline/ColumnTransformer */\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable label */\n",
|
||
"#sk-container-id-8 label.sk-toggleable__label {\n",
|
||
" cursor: pointer;\n",
|
||
" display: block;\n",
|
||
" width: 100%;\n",
|
||
" margin-bottom: 0;\n",
|
||
" padding: 0.5em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 label.sk-toggleable__label-arrow:before {\n",
|
||
" /* Arrow on the left of the label */\n",
|
||
" content: \"▸\";\n",
|
||
" float: left;\n",
|
||
" margin-right: 0.25em;\n",
|
||
" color: var(--sklearn-color-icon);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 label.sk-toggleable__label-arrow:hover:before {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable content - dropdown */\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-toggleable__content {\n",
|
||
" max-height: 0;\n",
|
||
" max-width: 0;\n",
|
||
" overflow: hidden;\n",
|
||
" text-align: left;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-toggleable__content.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-toggleable__content pre {\n",
|
||
" margin: 0.2em;\n",
|
||
" border-radius: 0.25em;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-toggleable__content.fitted pre {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
||
" /* Expand drop-down */\n",
|
||
" max-height: 200px;\n",
|
||
" max-width: 100%;\n",
|
||
" overflow: auto;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
||
" content: \"▾\";\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Pipeline/ColumnTransformer-specific style */\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific style */\n",
|
||
"\n",
|
||
"/* Colorize estimator box */\n",
|
||
"#sk-container-id-8 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-label label.sk-toggleable__label,\n",
|
||
"#sk-container-id-8 div.sk-label label {\n",
|
||
" /* The background is the default theme color */\n",
|
||
" color: var(--sklearn-color-text-on-default-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover, darken the color of the background */\n",
|
||
"#sk-container-id-8 div.sk-label:hover label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Label box, darken color on hover, fitted */\n",
|
||
"#sk-container-id-8 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator label */\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-label label {\n",
|
||
" font-family: monospace;\n",
|
||
" font-weight: bold;\n",
|
||
" display: inline-block;\n",
|
||
" line-height: 1.2em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-label-container {\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific */\n",
|
||
"#sk-container-id-8 div.sk-estimator {\n",
|
||
" font-family: monospace;\n",
|
||
" border: 1px dotted var(--sklearn-color-border-box);\n",
|
||
" border-radius: 0.25em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" margin-bottom: 0.5em;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-estimator.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* on hover */\n",
|
||
"#sk-container-id-8 div.sk-estimator:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 div.sk-estimator.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
|
||
"\n",
|
||
"/* Common style for \"i\" and \"?\" */\n",
|
||
"\n",
|
||
".sk-estimator-doc-link,\n",
|
||
"a:link.sk-estimator-doc-link,\n",
|
||
"a:visited.sk-estimator-doc-link {\n",
|
||
" float: right;\n",
|
||
" font-size: smaller;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1em;\n",
|
||
" height: 1em;\n",
|
||
" width: 1em;\n",
|
||
" text-decoration: none !important;\n",
|
||
" margin-left: 1ex;\n",
|
||
" /* unfitted */\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted,\n",
|
||
"a:link.sk-estimator-doc-link.fitted,\n",
|
||
"a:visited.sk-estimator-doc-link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Span, style for the box shown on hovering the info icon */\n",
|
||
".sk-estimator-doc-link span {\n",
|
||
" display: none;\n",
|
||
" z-index: 9999;\n",
|
||
" position: relative;\n",
|
||
" font-weight: normal;\n",
|
||
" right: .2ex;\n",
|
||
" padding: .5ex;\n",
|
||
" margin: .5ex;\n",
|
||
" width: min-content;\n",
|
||
" min-width: 20ex;\n",
|
||
" max-width: 50ex;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" box-shadow: 2pt 2pt 4pt #999;\n",
|
||
" /* unfitted */\n",
|
||
" background: var(--sklearn-color-unfitted-level-0);\n",
|
||
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted span {\n",
|
||
" /* fitted */\n",
|
||
" background: var(--sklearn-color-fitted-level-0);\n",
|
||
" border: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link:hover span {\n",
|
||
" display: block;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
|
||
"\n",
|
||
"#sk-container-id-8 a.estimator_doc_link {\n",
|
||
" float: right;\n",
|
||
" font-size: 1rem;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1rem;\n",
|
||
" height: 1rem;\n",
|
||
" width: 1rem;\n",
|
||
" text-decoration: none;\n",
|
||
" /* unfitted */\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 a.estimator_doc_link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"#sk-container-id-8 a.estimator_doc_link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-8 a.estimator_doc_link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"</style><div id=\"sk-container-id-8\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>GridSearchCV(cv=3,\n",
|
||
" estimator=Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in',\n",
|
||
" 'is_email_true'])])),\n",
|
||
" ('random_forest',\n",
|
||
" RandomForestClassifier(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539}))]),\n",
|
||
" param_grid={'random_forest__max_depth': [None, 10, 20, 40, 50,\n",
|
||
" 60]},\n",
|
||
" scoring=make_scorer(f1_score, response_method='predict'))</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-32\" type=\"checkbox\" ><label for=\"sk-estimator-id-32\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> GridSearchCV<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.model_selection.GridSearchCV.html\">?<span>Documentation for GridSearchCV</span></a><span class=\"sk-estimator-doc-link \">i<span>Not fitted</span></span></label><div class=\"sk-toggleable__content \"><pre>GridSearchCV(cv=3,\n",
|
||
" estimator=Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in',\n",
|
||
" 'is_email_true'])])),\n",
|
||
" ('random_forest',\n",
|
||
" RandomForestClassifier(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539}))]),\n",
|
||
" param_grid={'random_forest__max_depth': [None, 10, 20, 40, 50,\n",
|
||
" 60]},\n",
|
||
" scoring=make_scorer(f1_score, response_method='predict'))</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-33\" type=\"checkbox\" ><label for=\"sk-estimator-id-33\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">estimator: Pipeline</label><div class=\"sk-toggleable__content \"><pre>Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in',\n",
|
||
" 'is_email_true'])])),\n",
|
||
" ('random_forest',\n",
|
||
" RandomForestClassifier(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539}))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-34\" type=\"checkbox\" ><label for=\"sk-estimator-id-34\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> preprocessor: ColumnTransformer<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></label><div class=\"sk-toggleable__content \"><pre>ColumnTransformer(transformers=[('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in', 'is_email_true'])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-35\" type=\"checkbox\" ><label for=\"sk-estimator-id-35\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">cat</label><div class=\"sk-toggleable__content \"><pre>['opt_in', 'is_email_true']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-36\" type=\"checkbox\" ><label for=\"sk-estimator-id-36\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> OneHotEncoder<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></label><div class=\"sk-toggleable__content \"><pre>OneHotEncoder(handle_unknown='ignore', sparse_output=False)</pre></div> </div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-37\" type=\"checkbox\" ><label for=\"sk-estimator-id-37\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> RandomForestClassifier<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.ensemble.RandomForestClassifier.html\">?<span>Documentation for RandomForestClassifier</span></a></label><div class=\"sk-toggleable__content \"><pre>RandomForestClassifier(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539})</pre></div> </div></div></div></div></div></div></div></div></div></div></div>"
|
||
],
|
||
"text/plain": [
|
||
"GridSearchCV(cv=3,\n",
|
||
" estimator=Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in',\n",
|
||
" 'is_email_true'])])),\n",
|
||
" ('random_forest',\n",
|
||
" RandomForestClassifier(class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539}))]),\n",
|
||
" param_grid={'random_forest__max_depth': [None, 10, 20, 40, 50,\n",
|
||
" 60]},\n",
|
||
" scoring=make_scorer(f1_score, response_method='predict'))"
|
||
]
|
||
},
|
||
"execution_count": 44,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# pipeline on the subsample\n",
|
||
"\n",
|
||
"random_forest_grid = GridSearchCV(pipeline, param_grid, cv=3, scoring = f1_scorer #, error_score=\"raise\"\n",
|
||
" )\n",
|
||
"\n",
|
||
"random_forest_grid"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 45,
|
||
"id": "3f149137-6313-4b4e-99d6-b3af7f296ad7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/opt/mamba/lib/python3.11/site-packages/sklearn/base.py:1351: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||
" return fit_method(estimator, *args, **kwargs)\n",
|
||
"/opt/mamba/lib/python3.11/site-packages/sklearn/base.py:1351: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||
" return fit_method(estimator, *args, **kwargs)\n",
|
||
"/opt/mamba/lib/python3.11/site-packages/sklearn/base.py:1351: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||
" return fit_method(estimator, *args, **kwargs)\n",
|
||
"/opt/mamba/lib/python3.11/site-packages/sklearn/base.py:1351: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||
" return fit_method(estimator, *args, **kwargs)\n",
|
||
"/opt/mamba/lib/python3.11/site-packages/sklearn/base.py:1351: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||
" return fit_method(estimator, *args, **kwargs)\n",
|
||
"/opt/mamba/lib/python3.11/site-packages/sklearn/base.py:1351: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||
" return fit_method(estimator, *args, **kwargs)\n",
|
||
"/opt/mamba/lib/python3.11/site-packages/sklearn/base.py:1351: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||
" return fit_method(estimator, *args, **kwargs)\n",
|
||
"/opt/mamba/lib/python3.11/site-packages/sklearn/base.py:1351: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||
" return fit_method(estimator, *args, **kwargs)\n",
|
||
"/opt/mamba/lib/python3.11/site-packages/sklearn/base.py:1351: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||
" return fit_method(estimator, *args, **kwargs)\n",
|
||
"/opt/mamba/lib/python3.11/site-packages/sklearn/base.py:1351: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||
" return fit_method(estimator, *args, **kwargs)\n",
|
||
"/opt/mamba/lib/python3.11/site-packages/sklearn/base.py:1351: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||
" return fit_method(estimator, *args, **kwargs)\n",
|
||
"/opt/mamba/lib/python3.11/site-packages/sklearn/base.py:1351: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||
" return fit_method(estimator, *args, **kwargs)\n",
|
||
"/opt/mamba/lib/python3.11/site-packages/sklearn/base.py:1351: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||
" return fit_method(estimator, *args, **kwargs)\n",
|
||
"/opt/mamba/lib/python3.11/site-packages/sklearn/base.py:1351: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||
" return fit_method(estimator, *args, **kwargs)\n",
|
||
"/opt/mamba/lib/python3.11/site-packages/sklearn/base.py:1351: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||
" return fit_method(estimator, *args, **kwargs)\n",
|
||
"/opt/mamba/lib/python3.11/site-packages/sklearn/base.py:1351: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||
" return fit_method(estimator, *args, **kwargs)\n",
|
||
"/opt/mamba/lib/python3.11/site-packages/sklearn/base.py:1351: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||
" return fit_method(estimator, *args, **kwargs)\n",
|
||
"/opt/mamba/lib/python3.11/site-packages/sklearn/base.py:1351: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||
" return fit_method(estimator, *args, **kwargs)\n",
|
||
"/opt/mamba/lib/python3.11/site-packages/sklearn/base.py:1351: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
|
||
" return fit_method(estimator, *args, **kwargs)\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Returned hyperparameter: {'random_forest__max_depth': None}\n",
|
||
"Best classification F1 score in train is: 0.33107422141513826\n",
|
||
"Classification F1 score on test is: 0.31752789604029275\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# run the pipeline on the full sample\n",
|
||
"\n",
|
||
"random_forest_grid.fit(X_train, y_train)\n",
|
||
"\n",
|
||
"# print results\n",
|
||
"print('Returned hyperparameter: {}'.format(random_forest_grid.best_params_))\n",
|
||
"print('Best classification F1 score in train is: {}'.format(random_forest_grid.best_score_))\n",
|
||
"print('Classification F1 score on test is: {}'.format(random_forest_grid.score(X_test, y_test)))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"id": "cd79f942-abd0-48c9-aa0d-0d22673abeec",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'scoring': make_scorer(f1_score, response_method='predict'),\n",
|
||
" 'estimator': Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in',\n",
|
||
" 'is_email_true'])])),\n",
|
||
" ('random_forest',\n",
|
||
" RandomForestClassifier(bootstrap=False,\n",
|
||
" class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539}))]),\n",
|
||
" 'n_jobs': None,\n",
|
||
" 'refit': True,\n",
|
||
" 'cv': 3,\n",
|
||
" 'verbose': 0,\n",
|
||
" 'pre_dispatch': '2*n_jobs',\n",
|
||
" 'error_score': nan,\n",
|
||
" 'return_train_score': False,\n",
|
||
" 'param_grid': {'random_forest__max_depth': [None, 10, 20, 40, 50, 60]},\n",
|
||
" 'multimetric_': False,\n",
|
||
" 'best_index_': 0,\n",
|
||
" 'best_score_': 0.33107422141513826,\n",
|
||
" 'best_params_': {'random_forest__max_depth': None},\n",
|
||
" 'best_estimator_': Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('cat',\n",
|
||
" Pipeline(steps=[('onehot',\n",
|
||
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
" sparse_output=False))]),\n",
|
||
" ['opt_in',\n",
|
||
" 'is_email_true'])])),\n",
|
||
" ('random_forest',\n",
|
||
" RandomForestClassifier(bootstrap=False,\n",
|
||
" class_weight={0.0: 0.5837086520288036,\n",
|
||
" 1.0: 3.486549107420539}))]),\n",
|
||
" 'refit_time_': 2.2247676849365234,\n",
|
||
" 'feature_names_in_': array(['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers',\n",
|
||
" 'vente_internet_max', 'purchase_date_min', 'purchase_date_max',\n",
|
||
" 'nb_tickets_internet', 'is_email_true', 'opt_in', 'gender_female',\n",
|
||
" 'gender_male', 'nb_campaigns', 'nb_campaigns_opened'], dtype=object),\n",
|
||
" 'scorer_': make_scorer(f1_score, response_method='predict'),\n",
|
||
" 'cv_results_': {'mean_fit_time': array([1.64734515, 1.4220806 , 1.43256299, 1.68632547, 1.4271005 ,\n",
|
||
" 1.42404906]),\n",
|
||
" 'std_fit_time': array([0.32811727, 0.01915 , 0.02151065, 0.2729267 , 0.02447776,\n",
|
||
" 0.02384922]),\n",
|
||
" 'mean_score_time': array([0.14065607, 0.13571024, 0.13531415, 0.17512798, 0.13398822,\n",
|
||
" 0.13499872]),\n",
|
||
" 'std_score_time': array([0.00759402, 0.00653712, 0.00743453, 0.04901062, 0.00848726,\n",
|
||
" 0.00789539]),\n",
|
||
" 'param_random_forest__max_depth': masked_array(data=[None, 10, 20, 40, 50, 60],\n",
|
||
" mask=[False, False, False, False, False, False],\n",
|
||
" fill_value='?',\n",
|
||
" dtype=object),\n",
|
||
" 'params': [{'random_forest__max_depth': None},\n",
|
||
" {'random_forest__max_depth': 10},\n",
|
||
" {'random_forest__max_depth': 20},\n",
|
||
" {'random_forest__max_depth': 40},\n",
|
||
" {'random_forest__max_depth': 50},\n",
|
||
" {'random_forest__max_depth': 60}],\n",
|
||
" 'split0_test_score': array([0.19168873, 0.19168873, 0.19168873, 0.19168873, 0.19168873,\n",
|
||
" 0.19168873]),\n",
|
||
" 'split1_test_score': array([0.34428494, 0.34428494, 0.34428494, 0.34428494, 0.34428494,\n",
|
||
" 0.34428494]),\n",
|
||
" 'split2_test_score': array([0.45724899, 0.45724899, 0.45724899, 0.45724899, 0.45724899,\n",
|
||
" 0.45724899]),\n",
|
||
" 'mean_test_score': array([0.33107422, 0.33107422, 0.33107422, 0.33107422, 0.33107422,\n",
|
||
" 0.33107422]),\n",
|
||
" 'std_test_score': array([0.10881622, 0.10881622, 0.10881622, 0.10881622, 0.10881622,\n",
|
||
" 0.10881622]),\n",
|
||
" 'rank_test_score': array([1, 1, 1, 1, 1, 1], dtype=int32)},\n",
|
||
" 'n_splits_': 3}"
|
||
]
|
||
},
|
||
"execution_count": 33,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"random_forest_grid.__dict__"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 46,
|
||
"id": "1806fe6d-cf98-459d-b05a-eb95972281dc",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Accuracy Score: 0.48955211455211456\n",
|
||
"F1 Score: 0.31752789604029275\n",
|
||
"Recall Score: 0.8335281227173119\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# print results for the best model\n",
|
||
"\n",
|
||
"y_pred = random_forest_grid.predict(X_test)\n",
|
||
"\n",
|
||
"# Calculate the F1 score\n",
|
||
"acc = accuracy_score(y_test, y_pred)\n",
|
||
"print(f\"Accuracy Score: {acc}\")\n",
|
||
"\n",
|
||
"f1 = f1_score(y_test, y_pred)\n",
|
||
"print(f\"F1 Score: {f1}\")\n",
|
||
"\n",
|
||
"recall = recall_score(y_test, y_pred)\n",
|
||
"print(f\"Recall Score: {recall}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 47,
|
||
"id": "1a6a8e07-bd93-496b-986e-d219c03b82c5",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 2 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# confusion matrix \n",
|
||
"\n",
|
||
"draw_confusion_matrix(y_test, y_pred)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 48,
|
||
"id": "1e1b3e42-1075-4a4a-bf44-3dadde3dbed1",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "iVBORw0KGgoAAAANSUhEUgAABIQAAAK8CAYAAACeK2TMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3QUhdfG8W96LwQIJSQ0qVJCEQWUKihFQQERFdGf2LC8YsfeC6LYsXdFRAEVEMGCSFEpAQSkl4QSIIT0np33j4HgsAETSDK72edzDsfsZDa5QZ5suMzc62UYhoGIiIiIiIiIiHgMb7sLEBERERERERGRqqWGkIiIiIiIiIiIh1FDSERERERERETEw6ghJCIiIiIiIiLiYdQQEhERERERERHxMGoIiYiIiIiIiIh4GDWEREREREREREQ8jBpCIiIiIiIiIiIeRg0hEREREREREREPo4aQiIiIiJt7/PHHqVmzJrt27bK7FBEREXETagiJiIhUorVr13LttdfSuHFjAgMDCQ0NpWPHjkycOJHU1FRbavroo4/w8vJixYoVlfp5du7ciZeXV8kvb29vatSoQd++fZk/f/4Jnzdv3jwGDRpE7dq1CQgIIDY2ljFjxrBhw4YTPuf333/nsssuIyYmBn9/fyIiIujWrRtTpkwhOzu7Mr48Wxz9f7dz507L8UcffZRLLrmEESNGUFBQUOpzH3vsMby8vCqsloULF+Ll5cXChQsr7GOWplGjRlxzzTXles7SpUt57LHHSEtLc3pfr1696NWrV4XUJiIi4s7UEBIREakk7777Lp06dWL58uXcc889zJs3j5kzZzJixAjeeustrrvuOrtLrBK33XYby5Yt4/fff2fSpEls2bKFgQMHsmjRIqdz7733XgYMGIDD4eDNN99kwYIFPProoyxfvpyOHTsyY8YMp+c8+uij9OjRgz179vDkk0+yYMECvvzyS/r27ctjjz3GQw89VBVfpu3eeustateuzfjx4+0upULNnDmThx9+uFzPWbp0KY8//nipDaE333yTN998s4KqExERcV++dhcgIiJSHS1btoybb76Zfv36MWvWLAICAkre169fP+666y7mzZtXpTUVFhZW6BUiZRUXF8c555wDQPfu3WnWrBk9e/bk/fffp0ePHiXnTZ06lRdeeIGbb77Z8hf2Hj16MGrUKHr27Mno0aOJj4+nSZMmAEyfPp0nnniC6667jnfffdfy9Q0YMIB7772XZcuWVdFXai9fX1/mzJljdxkVrkOHDhX68Vq3bl2hH09ERMRd6QohERGRSvDMM8/g5eXFO++8Y2kGHeXv78/FF19c8tjhcDBx4kRatmxJQEAA0dHRXH311ezevdvyvBPdPnP8bTBHb+f59NNPueuuu4iJiSEgIICtW7eWnHP48GGuvfZaoqKiCAkJ4aKLLmL79u1OH/unn36ib9++hIeHExwcTPfu3fn5559P4XfF1LlzZwD2799vOf70009To0YNJk2a5PSckJAQXnvtNXJycpg8eXLJ8SeeeIIaNWrw6quvltrsCgsLo3///qdc6/F69epFmzZtWLZsGd26dSMoKIhGjRrx4YcfAjBnzhw6duxIcHAwbdu2LbXpt3jxYvr27UtYWBjBwcF069at1EbOH3/8Qffu3QkMDKR+/fpMmDCBwsLCUuuaNm0aXbt2JSQkhNDQUPr378/KlSvL9DUd/9wLLriAhISEcvyuWH333Xd07dqV4OBgwsLC6NevX6lNuW+//ZZ27doREBBAkyZNeOWVV0q9re34P/MOh4OnnnqKFi1aEBQURGRkJO3ateOVV14BzFvj7rnnHgAaN25ccsvi0VvbSrtlbO/evVx22WWEhYURERHByJEj+eOPP/Dy8uKjjz4qOe9Et5tdc801NGrUyHKsoKCAp556qiTTtWvX5tprr+XgwYNl+40UERGpZGoIiYiIVLDi4mJ++eUXOnXqRGxsbJmec/PNN3PffffRr18/vvvuO5588knmzZtHt27dSElJOeVaJkyYQGJiIm+99Rbff/890dHRJe+77rrr8Pb25osvvuDll1/mr7/+olevXpbbbD777DP69+9PeHg4H3/8MV999RVRUVFccMEFp9wU2rFjBwDNmzcvObZv3z7Wr19P//79CQ4OLvV5Xbt2JTo6mgULFpQ8Z926dSd9TlkcbZ499thjZTo/OTmZa6+9lrFjx/Ltt9/Stm1b/ve///HEE08wYcIE7r33Xr755htCQ0MZOnQoe/fuLXnub7/9Rp8+fUhPT+f9999n6tSphIWFcdFFFzFt2rSS8zZs2EDfvn1JS0vjo48+4q233iIhIYGnnnrKqZ5nnnmGUaNG0bp1a7766is++eQTMjIyOO+881i3bt1Jv5bjn/vpp5+SmZnJeeedd9KZTSfyxRdfMGTIEMLDw5k6dSrvv/8+hw8fplevXixevLjkvHnz5nHppZdSs2ZNpk2bxsSJE5k6dSoff/zxf36OiRMn8thjjzFq1CjmzJnDtGnTuO6660r+3I4dO5bbbrsNgBkzZrBs2TKWLVtGx44dS/14ubm5nH/++cyfP59nn32W6dOnU7duXUaOHFnur/8oh8PBkCFDeO6557jiiiuYM2cOzz33HAsWLKBXr17k5uae8scWERGpMIaIiIhUqOTkZAMwLr/88jKd/88//xiAMW7cOMvxP//80wCMBx54oORYw4YNjTFjxjh9jJ49exo9e/Ysefzrr78agNGjRw+ncz/88EMDMC655BLL8SVLlhiA8dRTTxmGYRjZ2dlGVFSUcdFFF1nOKy4uNtq3b2906dLlpF/Xjh07DMB4/vnnjcLCQiMvL89YvXq10bVrV6NevXrGjh07Ss79448/DMC4//77T/oxzz77bCMoKKhcz/kvCxcuNHx8fIzHH3/8P8/t2bOnARgrVqwoOXbo0CHDx8fHCAoKMvbs2VNyfPXq1QZgvPrqqyXHzjnnHCM6OtrIzMwsOVZUVGS0adPGaNCggeFwOAzDMIyRI0caQUFBRnJysuW8li1bGkDJ711iYqLh6+tr3HLLLZY6MzIyjOjoaGP48OElxx599FHj3z/6HX3ubbfdZnluZmamUbduXeOyyy476e/F0T9jv/76q2EY5p+L+vXrG23btjWKi4stHy86Otro1q1bybGzzjrLiI2NNfLz8y3n1axZ0zj+x9Pj/8wPHjzYiI+PP2ltL7zwguX36d+Oz8qUKVMMwPj2228t511//fUGYHz44YcnfO5RY8aMMRo2bFjyeOrUqQZgfPPNN5bzli9fbgDGm2++edL6RUREqoKuEBIREbHZr7/+CuB0K1iXLl1o1arVad2eNWzYsBO+78orr7Q87tatGw0bNiypZ+nSpaSmpjJmzBiKiopKfjkcDi688EKWL19epg1e9913H35+fgQGBhIfH8+6dev4/vvvnW6xKQvDMCp8DlLPnj0pKirikUceKdP59erVo1OnTiWPo6KiiI6OJj4+nvr165ccb9WqFUDJKvjs7Gz+/PNPhg8fTmhoaMl5Pj4+jB49mt27d7Np0ybA/DPRt29f6tSpYznv+KtWfvzxR4qKivjf//5nOR4WFkbv3r357bffTvh1HH3u1Vdfbfn/GxgYSM+ePcu9PWzTpk3s3buX0aNH4+197EfM0NBQhg0bxh9//EFOTg7Z2dmsWLGCoUOH4u/vbznvoosu+s/P06VLF9asWcO4ceP48ccfycjIKFedx/v1118JCwuz3MIJcMUVV5zyx5w9ezaRkZFcdNFFlt/b+Ph46tatW+mb2URERMpCQ6VFREQqWK1atQgODi65Neq/HDp0CDAbDcerX79+SUPhVJT2MY+qW7duqceO1nN0xs/w4cNP+DFSU1MJCQk5aQ3/93//x1VXXUV+fj5//PEHDz30EEOGDGHNmjXUrFkTMAdPA//5e7Zr166S2/DK+pyKFhUV5XTM39/f6fjRZkdeXh5gzmwyDOOE/5/h2J+FQ4cOnfD/z78d/X/UrVs3p3OPNu9O5OhzzzrrrFLf/++mTln8159jh8NR8ntgGIal2XVUaceON2HCBEJCQvjss89466238PHxoUePHjz//PMl86nKW3dpn7e03/+y2r9/P2lpaZaG17+dzm2gIiIiFUUNIRERkQrm4+ND3759+eGHH9i9ezcNGjQ46flHmyL79u1zOnfv3r3UqlWr5HFgYCD5+flOHyMlJcVy3lEnu5omOTm51GNnnHEGQMnHe+2110q2hB2vLH+Bb9CgQclf1Lt3707dunW56qqrePTRR3n99dcBs4lw5plnMn/+fHJyckqdCbRs2TL279/PiBEjSp7Ttm3bkz7HldSoUQNvb2/27dvn9L6jc4aO/p7XrFnzhP9//u3o+TNnzqRx48blqufoc7/++msaNmxYrueW5t9/jo+3d+9evL29qVGjRslVXscPFYfS/0wez9fXlzvvvJM777yTtLQ0fvrpJx544AEuuOACkpKSyv3noGbNmvz1119lqiUwMJD09HSn48c3eGrVqkXNmjVPuEkwLCysXDWKiIhUBt0yJiIiUgkmTJiAYRhcf/31FBQUOL2/sLCQ77//HoA+ffoA5gDnf1u+fDn//PMPffv2LTnWqFEj1q5dazlv8+bNJbcalcfnn39uebx06VJ27dpVskWpe/fuREZGsmHDBjp37lzqrxNdAXEyV155Jb169eLdd9+1XP304IMPcvjwYe6++26n52RnZ3P77bcTHBzM+PHjS44//PDDHD58mNtvvx3DMJyel5WVxfz588tdY2UICQnh7LPPZsaMGZahwg6Hg88++4wGDRqUDNru3bs3P//8s6VpUlxcbBk8DXDBBRfg6+tLQkICLVu2LPXXiRx97rZt2074/7c8WrRoQUxMDF988YXl/0V2djbffPNNyeaxkJAQOnfuzKxZsyzZyMrKYvbs2eX6nJGRkQwfPpxbbrmF1NRUdu7cCVCy2a8sw5t79+5NZmYm3333neX4F1984XRuo0aN2Lx5s6Upe+jQIZYuXWo5b/DgwRw6dIji4uJSf19btGhRrq9TRESkMugKIRERkUrQtWtXpkyZwrhx4+jUqRM333wzZ555JoWFhSQkJPDOO+/Qpk0bLrroIlq0aMENN9zAa6+9hre3NwMGDGDnzp08/PDDxMbGWhogo0eP5qqrrmLcuHEMGzaMXbt2MXHiRGrXrl3uGlesWMHYsWMZMWIESUlJPPjgg8TExDBu3DjAnOny2muvMWbMGFJTUxk+fDjR0dEcPHiQNWvWcPDgQaZMmXJKvz/PP/88Z599Nk8++STvvfceAKNGjWLVqlVMmjSJnTt38r///Y86deqwadMmJk+ezLZt2/jiiy9o0qRJyccZMWIEDz/8ME8++SQbN27kuuuuo2nTpuTk5PDnn3/y9ttvM3LkyJOunv/tt9/o27cvjzzySJnnCJ2qZ599ln79+tG7d2/uvvtu/P39efPNN1m3bh1Tp04tuaLroYce4rvvvqNPnz488sgjBAcH88YbbzjNbGrUqBFPPPEEDz/8MDt27GDgwIFERUWxf/9+/vzzT0JCQnjiiSdKreXocx988EG2b9/OhRdeSI0aNdi/fz9//fUXISEhPP7442X+2ry9vZk4cSJXXnklgwcP5sYbbyQ/P58XXniBtLQ0nnvuuZJzn3jiCQYNGsQFF1zA//3f/1FcXMwLL7xAaGgoqampJ/08F110EW3atKFz587Url2bXbt28fLLL9OwYUOaNWsGQNu2bQF45ZVXGDNmDH5+frRo0aLUK3OuvvpqJk+ezNVXX83TTz9Ns2bNmDt3Lj/++KPTuaNHj+btt9/mqquu4vrrr+fQoUNMnDiR8PBwy3mXX345n3/+OQMHDuT//u//6NKlC35+fuzevZtff/2VIUOGcMkll5T591ZERKRS2DfPWkREpPpbvXq1MWbMGCMuLs7w9/c3QkJCjA4dOhiPPPKIceDAgZLziouLjeeff95o3ry54efnZ9SqVcu46qqrjKSkJMvHczgcxsSJE40mTZoYgYGBRufOnY1ffvnlhFvGpk+f7lTT0S1j8+fPN0aPHm1ERkYaQUFBxsCBA40tW7Y4nf/bb78ZgwYNMqKiogw/Pz8jJibGGDRoUKkf+9+Obhl74YUXSn3/iBEjDF9fX2Pr1q2W43PnzjUGDhxo1KxZs+TzjR492li/fv0JP9dvv/1mDB8+3KhXr57h5+dnhIeHG127djVeeOEFIyMj46R1Hv29evTRR096nmGYW6bOPPNMp+MNGzY0Bg0a5HQccNoA9vvvvxt9+vQxQkJCjKCgIOOcc84xvv/+e6fnLlmyxDjnnHOMgIAAo27dusY999xjvPPOO6Vuz5o1a5bRu3dvIzw83AgICDAaNmxoDB8+3Pjpp59Kzjl+y1h5nlua47eM/fvjnX322UZgYKAREhJi9O3b11iyZInT82fOnGm0bdvW8Pf3N+Li4oznnnvOuP32240aNWpYzjt+y9iLL75odOvWzahVq1bJc6+77jpj586dludNmDDBqF+/vuHt7W2ps7RNYbt37zaGDRtmhIaGGmFhYcawYcOMpUuXOm0ZMwzD+Pjjj41WrVoZgYGBRuvWrY1p06Y5bRkzDMMoLCw0Jk2aZLRv394IDAw0QkNDjZYtWxo33nhjqTkTERGpal6GUcr11SIiIiIiVaiwsJD4+HhiYmJc4ja/nTt30rhxYz788EOnDYAiIiLVgW4ZExEREZEqd91119GvXz/q1atHcnIyb731Fv/88w+vvPKK3aWJiIh4BDWERERERKTKZWZmcvfdd3Pw4EH8/Pzo2LEjc+fO5fzzz7e7NBEREY+gW8ZERERERERERDyM1s6LiIiIiIiIiHgYNYRERERERERERDyMGkIiIiIiIiIiIh7G44ZKOxwO9u7dS1hYGF5eXnaXIyIiIiIiIiJSIQzDIDMzk/r16+PtffJrgDyuIbR3715iY2PtLkNEREREREREpFIkJSXRoEGDk57jcQ2hsLAwwPzNCQ8Pt7ma0zNv3jwuvPBCu8sQcRnKhIiVMiHiTLkQsVImRKzcPRMZGRnExsaW9D5OxuPWzmdkZBAREUF6errbN4SKiorw9fW4np7ICSkTIlbKhIgz5ULESpkQsXL3TJSn56Gh0m5s+vTpdpcg4lKUCRErZULEmXIhYqVMiFh5UibUEBIRERERERER8TBqCLmx1q1b212CiEtRJkSslAkRZ8qFiJUyIWLlSZlQQ8iNRURE2F2CiEtRJkSslAkRZ8qFiJUyIWLlSZlQQ8iNLVu2zO4SRFyKMiFipUyIOFMuRKyUCRErT8qEGkIiIiIiIiIiIh5Ga+fd2KFDh6hZs6bdZYi4DGVCxEqZEHGmXIhYKRMiVu6eCa2d9xAbNmywuwQRl6JMiFgpEyLOlAsRK2VCxMqTMqGGkBvbvXu33SWIuBRlQsRKmRBxplyIWCkTIlaelAk1hNxYcHCw3SWIuBRlQsRKmRBxplyIWCkTIlaelAnNEBIRERERERERqQY0Q8hDTJ061e4SRFyKMiFipUyIOFMuRKyUCRErT8qEGkIiIiIiIiIiIh5GDSE31rx5c7tLEHEpyoSIlTIh4ky5ELFSJkSsPCkTagi5sejoaLtLEHEpyoSIlTIh4ky5ELFSJkSsPCkTagi5scWLF9tdgohLUSZErJQJEWfKhYiVMiFi5UmZUENIRERERERERMTDaO28G9u/fz916tSxuwwRl6FMiFgpEyLOlAsRK2VCxMrdM6G18x5i27Ztdpcg4lKUCRErZULEmXIhYqVMiFh5UibUEHJju3btsrsEEZeiTIhYKRMizpQLEStlQsTKkzKhhpAb8/f3t7sEEZeiTIhYKRMizpQLEStlQsTKkzJh6wyhRYsW8cILL7By5Ur27dvHzJkzGTp06Emf89tvv3HnnXeyfv166tevz7333stNN91U5s9ZnWYIiYiIiIiIiIgc5TYzhLKzs2nfvj2vv/56mc7fsWMHAwcO5LzzziMhIYEHHniA22+/nW+++aaSK3VN06dPt7sEEZeiTIhYKRMizpQLEStlQsTKkzLha+cnHzBgAAMGDCjz+W+99RZxcXG8/PLLALRq1YoVK1YwadIkhg0bVklVuq6ioiK7SxBxKcqEiJUyIeJMuRCxUiZETPlFxazfk86S/V6MsLuYKmJrQ6i8li1bRv/+/S3HLrjgAt5//30KCwvx8/Nzek5+fj75+fkljzMyMiq9zqrSpEkTu0sQcSnKhIiVMiHiTLkQsVImxBMZhsHuw7msSjxMQmIaW//ZyYVfTcGvsIBZA/+P+9PzqBsRaHeZlc6tGkLJycnUqVPHcqxOnToUFRWRkpJCvXr1nJ7z7LPP8vjjjzsdnz59OsHBwVx66aX8/PPPpKenEx0dTZcuXZg9ezYAHTt2xOFwsHr1agCGDBnC4sWLOXToEFFRUfTo0YNZs2YB0K5dO/z8/Fi5ciUAgwYNYsWKFezfv5/w8HD69+/P119/DcCZZ55JaGgof/75J2A2tdatW8eePXsICQlh8ODBTJs2DYAWLVpQq1YtlixZAsD555/P5s2bSUxMxOFwcPbZZzNt2jQcDgdNmzYlJiaGRYsWAdCrVy8SExPZvn07vr6+jBgxgm+++YaCggIaNmxI06ZN+eWXXwA499xzOXDgAJs3bwZg1KhRfPvtt+Tk5NCgQQNat27N/PnzAejatSvp6els2LABgBEjRjBv3jwyMzOpW7cuHTt2ZO7cuQCcddZZ5OXl8ffffwNwySWXsHDhQg4fPkytWrXo2rUr33//PQAdOnQAICEhAYCLLrqIZcuWkZKSQo0aNejVqxczZ84EoG3btgQGBrJ8+XIABg4cyKpVq0hOTiYsLIwLL7yw5FK/1q1bExERwbJlywDo378/GzZsYPfu3QQHBzNkyBCmTp0KQPPmzYmOjmbx4sUA9OnTh23btrFr1y78/f0ZNmwY06dPp6ioiCZNmhAXF8fChQsB6NGjB3v27GHbtm14e3szcuRIZsyYQX5+PnFxcTRv3pyffvoJgO7du5OSksKmTZsAGDlyJLNnzyY7O5uYmBjatGnDjz/+CMDZZ59NVlYW69evB2D48OHMnz+fjIwM6tSpQ+fOnZkzZw4AnTp1orCwkLVr1wIwdOhQFi1aRGpqKjVr1uTcc8/l22+/BSA+Ph5vb29WrVoFwODBg/nrr784cOAAERER9O3blxkzZgDQpk0bgoOD+euvvwDz6r41a9awd+9eQkNDGThwIF999RUALVu2JCoqiqVLlwLQr18/Nm7cSFJSEkFBQQwdOpQvv/wSwzBo1qwZdevW5ffffwegd+/e7Ny5kx07duDn58fw4cP5+uuvKSwspHHjxjRq1Ihff/0VgPPOO4/k5GS2bNmCl5cXl19+Odu2bWP79u3ExsbSsmVLFixYAEC3bt1ITU1l48aNAFx22WXMnTuXrKws6tevT/v27fnhhx8A6NKlCzk5Oaxbtw7Arb9HBAQEcOmll+p7BJ77PSIkJISMjAx9jzjyPWLWrFnk5ubqe4SHf4/Iy8sjJCRE3yP0c4S+R2B+j/D39y/5M6zvEfo5orp+j8gvhtotOvP7P7tZszudpBxfsoq88Csu5OqVs3lz6ZeE52fjwIsf+gxhe+JuduenueX3iJycHMrK1qHS/+bl5fWfQ6WbN2/Otddey4QJE0qOLVmyhHPPPZd9+/ZRt25dp+eUdoVQbGxstRgqPXXqVEaNGmV3GSIuQ5kQsVImRJwpFyJWyoRUNw6HwfaULFYlppGQmEZC4mE278/E8e/Oh2Fw4fY/eeS3j6h/cDcABW3b4ffyZL7cv9+tM1GeodJudYVQ3bp1SU5Othw7cOAAvr6+1KxZs9TnBAQEEBAQUBXliYiIiIiIiEgVOpxdwOrdx5o/q5PSyMxzno0VExlEfFwk5/rnMGjyg4QvNa92o04dePpp/K+5Bnx84MgVXZ7ArRpC/77k76j58+fTuXPnUucHVXc9evSwuwQRl6JMiFgpEyLOlAsRK2VC3ElRsYONyZkkJB1p/iSmsT0l2+m8QD9v2jWIpENcJB1ia9AhLpI64UdmAmVkwLh/ICAA7roL7r8fwsJKnutJmbC1IZSVlcXWrVtLHu/YsYPVq1cTFRVFXFwcEyZMYM+ePXzyyScA3HTTTbz++uvceeedXH/99Sxbtoz333+/5J5MT7Nnzx5iYmLsLkPEZSgTIlbKhIgz5ULESpkQV7Y/I4+EI4OfExLTWLsnjbxCh9N5TWqFEB8XSYe4GnSIjaRF3TD8fLzNd+blwZdfwpgx4OUF4eHwxRfQrBk0bOj0sTwpE7Y2hFasWEHv3r1LHt95550AjBkzho8++oh9+/aRmJhY8v7GjRszd+5cxo8fzxtvvEH9+vV59dVXPXLlPMC2bdvo0qWL3WWIuAxlQsRKmRBxplyIWCkT4iryCotZvze9pPmTkHiYvel5TueFBfrSIa4G8bHmFUDxDSKpEeLv/AENA6ZPh/vug507ITgYLrvMfN/555+wDk/KhK0NoV69enGymdYfffSR07GePXuWTCr3dN7e3naXIOJSlAkRK2VCxJlyIWKlTIgdDMMgKTWXhKTDJc2fDfsyKCy29ge8vaBF3fAjt36ZVwA1qRWCt7fXyT/B8uUwfjwc2bJJTAz4l9I0KoUnZcJltoxVlfJM3BYRERERERGR05OVX8TapLSS2T8JiWkcyi5wOq9WqL9529eR2T/tGkQQElCO61h274YHHoBPPzUfBwfDvffC3XdDSEgFfTWurdpuGROrGTNmcOmll9pdhojLUCZErJQJEWfKhYiVMiEVzeEw2HYwy7zy58gVQJv2Z3L8pSh+Pl6cWT/CbP4cmf3ToEYQXl7/cfXPyVx2GSxbZr49ejQ88ww0aFCuD+FJmVBDyI3l5+fbXYKIS1EmRKyUCRFnyoWIlTIhpys1u4DVSccGP69JSiMzv/S17yXNn7hIWtcLJ9DP5/Q+ucMBxcVwdOv4U0/BI4/A5Mlw1lmn9CE9KRNqCLmxuLg4u0sQcSnKhIiVMiHiTLkQsVImpDwKix1s3Jdpmf2z81CO03lBfj60j404Nvw5NpLoo2vfK8rSpXDHHTBkCDz4oHmsTx/o3dvcJnaKPCkTagi5sebNm9tdgohLUSZErJQJEWfKhYiVMiEnk5x+ZO37kdk/a3enk1/kvPa9ae0Qy+yf5nVC8fWppOHMu3aZm8OmTTMf795tzggKCDAfn84tZ3hWJtQQcmM//fQTo0aNsrsMEZehTIhYKRMizpQLEStlQo7KKyxm3Z50y+yffaWsfQ8/svb96O1f8Q0iiQj2q/wCMzPh2WfhpZcgP99s/Fx3HTz55LFmUAXwpEyoISQiIiIiIiLiQQzDIDE1p+S2r4SkNDbszaDI4bz2veXRte9HmkCNa5Zh7XtF++knuOoq2L/ffNy7t9kYio+v2jqqGTWE3Fj37t3tLkHEpSgTIlbKhIgz5ULESpnwDJl5haxJSi9p/qxOSiO1lLXvtcMC6BB7rPnTNqaca98rS6NGkJoKZ5wBkybBxRef9q1hJ+JJmXCB/7NyqlJSUjxq4JXIf1EmRKyUCRFnyoWIlTJR/RQ7DLYeyDKbP0du/9pyIMtp7bu/jzdtYsKPDX6OiyQm8jTXvleUrVth/nwYN858fMYZsGABdO0K/v6V+qk9KRNqCLmxTZs20bFjR7vLEHEZyoSIlTIh4ky5ELFSJtzfoax8ViellTR/1iSlk1XK2vfYqCA6xB6b/dOqXhgBvqe59r2ipaWZq+NffRWKiuCcc+Don8+ePaukBE/KhBpCIiIiIiIiIm6goMjBxuQMy+yfXaWsfQ/296F9g8hjg59jI6kdVnGDlytcURG88w48+iikpJjHLrwQQkPtraua8zKM4y8cq94yMjKIiIggPT2d8PBwu8s5LQ6HA2/vSlrlJ+KGlAkRK2VCxJlyIWKlTLi2fem5x5o/iWn8vaf0te9nRIdaZv80rxOGT1UPfj5VP/4Id94JGzaYj1u1ghdfhAEDbCnH3TNRnp6HrhByY7Nnz+biiy+2uwwRl6FMiFgpEyLOlAsRK2XCdeQWFPP3nnTL7J/9GflO50UE+ZlX/hy5/at9bCQRQVWw9r0yZGfD6NFw8CDUrAmPPw433AB+9n09npQJNYTcWHZ2tt0liLgUZULESpkQcaZciFgpE/YwDIOdh3IszZ9/9mVSfNzadx9vL1rVC6ND7LHBz41rhbjG4OdTlZYGERHmlrCQEHj2WVi/Hh5+GGrUsLs6j8qEGkJuLCYmxu4SRFyKMiFipUyIOFMuRKyUiaqRkVfImqODnxMPszopjcM5hU7nRYcF0DHu2ODntjERBPm72ODnU1VQAG++CU88AW+/DSNGmMevu87euo7jSZlQQ8iNtWnTxu4SRFyKMiFipUyIOFMuRKyUiYpX7DDYciDTMvtn68FS1r77etM2JsIy+6deRKB7X/1TGsOA2bPhrrtgyxbz2GefHWsIuRhPyoQaQm7sxx9/ZNSoUXaXIeIylAkRK2VCxJlyIWKlTJy+lKx8Vh+57SshMY01SWlkFxQ7nRcXFXxk9s/Rte/h+Pu67/DiMlm71hwY/fPP5uPoaHj6abj2WnvrOglPyoQaQiIiIiIiIiJlUFDkYMO+DMvsn6TUXKfzQvx9aH9k5k+H2BrEx0VSK9SF175XhueegwcfBIcD/P3NxtCECeDm276rEzWE3NjZZ59tdwkiLkWZELFSJkScKRciVsrEiRmGwd70vGPNn8TDrNubQcFxa9+9vKBZdGhJ46dDXCTNot1o7Xtl6dTJbAaNGAHPPw+NG9tdUZl4UibUEHJjWVlZdpcg4lKUCRErZULEmXIhYqVMHJNTUMTfu9NJSDo2++dApvPa9xrBfubMnyO3frWLjSA80E3XvlcUw4Cvv4b0dBg71jzWrx/8/Te42UweT8qEGkJubP369bRr187uMkRchjIhYqVMiDhTLkSsPDUThmGwIyW75LavhMQ0NiY7r3339faiVb3wI1u/zNu/GtYMrn6Dn0/HihUwfjwsXgyhoTB4MNSta77PzZpB4FmZUENIREREREREqrX03H+tfT/SAErPdV77Xifcuva9Tf1qtPa9ou3ZAw88AJ98Yj4OCjLnBIWF2VuXlJmXYRy//K56y8jIICIigvT0dMLdfJhVYWEhfn4efmmiyL8oEyJWyoSIM+VCxKo6ZqKo2MHm/VkljZ+ExMNsO5jtdF7A0bXvcf9e+x5kQ8VuJicHJk0y5wLl5JjHrroKnnkGYmPtra0CuHsmytPz0BVCbmz+/PkMGjTI7jJEXIYyIWKlTIg4Uy5ErKpDJg5m5pszf47M/lm7O52cUta+N6oZTIe4GsQf2f7Vsq4HrH2vDLt3w5NPQlERdO0KL78MXbrYXVWFqQ6ZKCs1hNxYRkaG3SWIuBRlQsRKmRBxplyIWLlbJvKLitmwN+PIrV9mA2j3Yee176EBviWNnw5xkbRvEElNT1v7XpF27Di2Jax5c3jqKWjUCC67zFyzVo24WyZOhxpCbqxOnTp2lyDiUpQJEStlQsSZciFi5cqZMAyDPWm5R277Mmf/rN+TQUGx89r35tFhxwY/x9Wgae1QrX2vCLt2wf33w1dfmcOjO3Qwj993n711VSJXzkRFU0PIjXXu3NnuEkRcijIhYqVMiDhTLkSsXCkTOQVFrN2dXjL3JyEpjYOlrH2PCvE/svL9yNr3BhGEefra94qWlQXPPQcvvgh5eWbX7ddfjzWEqjFXykRlU0PIjc2ZM4dRo0bZXYaIy1AmRKyUCRFnyoWIlV2ZcDgMtqdk/2v2TxqbkjM4bus7vt5etK4ffqQBZA5+jovS2vdK43DAxx+b28OSk81jPXvC5Mke0QwCz3qdUENIREREREREKlVaTgGrS9a+p7E68TAZeUVO59WPCLQMfm4TE0Ggn9a+V5kBA2D+fPPtpk3hhRdg6NBqNydITGoIubFOnTrZXYKIS1EmRKyUCRFnyoWIVWVkoqjYwab9mZbZP9tLWfse6OdNu5hjg5/jY2tQNyKwwuuRchgyBP74Ax5+GG67DQI8bxC3J71OqCHkxgoLC+0uQcSlKBMiVsqEiDPlQsSqIjJxIDPvWPPnyNr33ELnte+Na4VYZv+0qBuGn4/WvtsmPd3cFta9u3kVEMANN8CIEVC7tq2l2cmTXifUEHJja9eu5cwzz7S7DBGXoUyIWCkTIs6UCxGr8mYiv6iY9UfXviceJiExjT1pzmvfwwJ8iY+LLJn90z42kqgQ/4osXU5VURG89x488ggcPAgzZsDAgeDvD76+Ht0MAs96nVBDSERERERERJwYhsHuw7msOtL4SUhKY8PedAqLrZOfvbygRZ0ja99jzcHPTWuH4q21765nwQIYPx7Wrzcft2xpbhLz05Y2T+RlGIbx36dVHxkZGURERJCenk54eLjd5ZyW3NxcgoKC7C5DxGUoEyJWyoSIM+VCxOrfmcjOL2LN7rSS279WJx0mJavA6Tk1Q/xLNn51iI2kXWwkoQG61sClbdkCd94Js2ebj6Oi4PHH4cYb1Qw6jru/TpSn56HUurFFixZxwQUX2F2GiMtQJkSslAkRZ8qFiMlc+57FJ3MXUxjegITEw2zen+m09t3Px4vW9SNKZv90jKtBgxpBWvvubnbsMJtBvr5w663m7WI1athdlUvypNcJNYTcWGpqqt0liLgUZULESpkQcaZciKdKyykgIenY4OfVSWlklqx9Tyw5LyYyyDL758z64Vr77o4KC2HtWji6Mat/f3OA9IgR0Ly5vbW5OE96nVBDyI3VrFnT7hJEXIoyIWKlTIg4Uy7EExQVO9iYnHmkAXSY1YlpbE8pfe17bIhBn/aNS2b/1AnX2ne3ZhgwZw7cfTfs3WveKlanjvm+Bx+0tzY34UmvE5oh5MZycnIIDg62uwwRl6FMiFgpEyLOlAupjvZn5JVs/EpITGPtnjTyCh1O5zWpFWJe/RNXgw6xkbSsG0ZBfp4yUV2sW2fOCVqwwHxcuzZ8/TX06GFvXW7G3V8nNEPIQ3z77beMGjXK7jJEXIYyIWKlTIg4Uy7E3eUVFrN+b3pJ8ych8TB70/OczgsL9KVDXA3ij8z+iW8QSY1S1r5PVybc34ED5kygd98Fh8NcH3/HHfDAAxARYXd1bseTXifUEBIREREREXFBhmGQlJpLQtLhkubPhn0ZTmvfvb2gRd3wkq1fHeJq0KRWiNa+e4KsLGjdGg4dMh8PHw7PPw9Nmthbl7gFNYTcWHx8vN0liLgUZULESpkQcaZciCvLyi9ibVJayeyfhMQ0DmU7r32vFfrvte81aNcggpBTXPuuTLi50FC48kpYvBgmT9btYRXAkzKhhpAb8/b2trsEEZeiTIhYKRMizpQLcRUOh8G2g1nmlT9HrgDatD+T4ye8+vl4cWb9CLP5c2T2T0WufVcm3MyqVebA6JdegqONi+eeg4AA0P/LCuFJmVBDyI2tWrWKFi1a2F2GiMtQJkSslAkRZ8qF2CU1u4DVSccGP69JSiMzv8jpvJjIoGPNn7hIWter3LXvyoSb2LfP3BL20UfmJrEHHoC5c833BQXZWlp140mZUENIRERERESkAhUWO9i4L9My+2fnoRyn84L8fGgfG1Fy5U98XCTRYVr7Lv+Sm2teDfTss5CdbR678krzschp0tp5N5aZmUlYWJjdZYi4DGVCxEqZEHGmXEhlSE4/svb9yOyftbvTyS9yXvvetHaIZfZP8zqh+PrYe3uKMuHCZs40t4UlJpqPzzkHXn4Zzj7bzqqqPXfPhNbOe4i//vqLvn372l2GiMtQJkSslAkRZ8qFnK68wmLW7Um3zP7ZV8ra9/Aja9+P3v4V3yCSiGA/Gyo+OWXChe3ZYzaDYmPNzWGXXw4VNDtKTsyTMqGGkBs7cOCA3SWIuBRlQsRKmRBxplxIeRiGQWJqTsltXwlJaWzYm0GRw3nte8uja9+PNIEa13SPte/KhAtJSoK9e49dAXTjjea8oLFjNSeoCnlSJtQQcmMRERF2lyDiUpQJEStlQsSZciEnk5lXyJqk9JLmz+qkNFJLWfteOyyADrHHmj9tY0597bvdlAkXkJUFEyfCCy9ATAysX29uDfPzg9tus7s6j+NJmdAMITeWn59PQECA3WWIuAxlQsRKmRBxplzIUcUOg60Hsszmz5Hbv7YcyHJa++7v402bmPCS5k98bCQxkRW39t1uyoSNHA745BNzY9i+feaxHj1g6lSoX9/e2jyYu2dCM4Q8xIwZMxg1apTdZYi4DGVCxEqZEHGmXHiuQ1n5rE5KK2n+rElKJ6uUte+xUUF0iD02+6dVvTACfCtv7bvdlAmb/P47jB8PK1eaj5s0Ma8QuuQSzQmymSdlQg0hERERERGpVgqKHGxMzrDM/tlVytr3YH8f2jeIPDb4OTaS2mHue2WAuImEBPNKIIDwcHjoIbj9dvM2MZEqpIaQG2vTpo3dJYi4FGVCxEqZEHGmXFRP+9JzjzV/EtP4e0/pa9/PiA61zP5pXicMHzcY/FyZlIkq4nCAt7f5docOMHgwNGgAjz8O0dH21iYWnpQJNYTcWHBwsN0liLgUZULESpkQcaZcuL/cgmL+3pNumf2zPyPf6byIID/zyp8jt3+1j40kIsj11r7bTZmoZMXF8P77MGkSLF58rPkzaxb4VN9bEd2ZJ2VCDSE39tdff9G0aVO7yxBxGcqEiJUyIeJMuXAvhmGw81COpfnzz75Mio9b++7j7UWremElzZ/42Ega1wqpNoOfK5MyUYl+/tmcE/T33+bj116DJ58031YzyGV5UibUEBIREREREZeQkVfImqODnxMPszopjcM5hU7nRYcF0DHu2ODntjERBPnrL9jiIjZvhrvvhu+/Nx/XqAGPPgrjxtlbl8hxtHbejaWlpREZGWl3GSIuQ5kQsVImRJwpF66j2GGw5UCmZfbP1oOlrH339aZtTIRl9k+9iEBd/VNBlIkKZBhw330weTIUFYGvr9kEevRRiIqyuzopI3fPhNbOe4g1a9bQs2dPu8sQcRnKhIiVMiHiTLmwT0pWPquP3PaVkJjGmqQ0sguKnc6Liwo+Mvvn6Nr3cPx9vW2o2DMoExXIywvy8sxm0KBB5tygli3trkrKyZMyoYaQG9u7d6/dJYi4FGVCxEqZEHGmXFSNgiIHG/ZllNz2lZCYRmKq89r3EH8f4o/M/OkQW4P4uEhqhWr1dlVSJk6DYcDcudCoEZx5pnns0UfNDWL9+9tampw6T8qEGkJuLDQ01O4SRFyKMiFipUyIOFMuKp5hGOxNzzs2+DnxMOv2ZlBw3Np3Ly9oFh1aMvi5Q1wNzogO9fi173ZTJk7RunVw110wfz707QsLFph/yGvWVDPIzXlSJjRDyI0VFxfjo+n0IiWUCRErZULEmXJx+nIKivh7dzoJScdm/xzIdF77XiPYz5z5c+TWr3axEYQHau27q1EmyungQfMqoLffBocD/Pzg//4PnnnGfFvcnrtnQjOEPMRXX33FqFGj7C5DxGUoEyJWyoSIM+WifAzDYEdKdsnK94TENDYmO6999/X2olW98CNX/pi3fzWsGazBz25AmSij/HxzbfxTT0F6unns0kth4kTwkBXlnsKTMqGGkIiIiIiIAJCe+6+170nm/J+0Uta+1wm3rn1vU19r36Wa+/RTuOce8+0OHcxNYh4yeFiqLzWE3FhLTawXsVAmRKyUCRFnysUxRcUONu/PMhs/iWkkJKWx9UCW03kBvt60axBhDn4uWfseZEPFUhmUiZPIzYWgI3/Wx4yBL76A0aPh6qvBjW8pkpPzpEyoIeTGoqKi7C5BxKUoEyJWyoSIM0/OxcHMfHPmz5HZP2t3p5NTytr3RjWDSxo/HWJr0LJeGH4+WvteXXlyJk5o3z546CFYsgTWrgV/f3M+0C+/2F2ZVAFPyoQaQm5s6dKlNGzY0O4yRFyGMiFipUyIOPOUXOQXFbNhb8aRW7/MBtDuw7lO54UG+B658sf81b5BJDW19t2jeEomyiQ317wV7JlnIDvbPDZ/vrlGXjyGJ2VCDSERERERETdmGAZ70nKPrHw3Z/+s35NBQbHz2vfm0WHHBj/H1aBpba19F8EwYNo0uO8+SEw0j519ttkc6trV3tpEKpHWzruxlJQUatWqZXcZIi5DmRCxUiZEnFWHXOQUFLF2d/qRBpB5C9jBUta+R4X4H1n5fmTte4MIwrT2XY5THTJxWjIyYMAAWLrUfNygATz3HIwaBd66VdITuXsmtHbeQ2zcuJFzzz3X7jJEXIYyIWKlTIg4c7dcOBwG21OySUg0N36Za98zOG7rO77eXpxZP9wy+DkuSmvf5b+5WyYqXFiY+Ss4GO6/H+66y3xbPJYnZUINITeWlJRkdwkiLkWZELFSJkScuXou0nIKSho/CUlprE48TEZekdN59SMCjw1+jovkzPoRBPpp65GUn6tnosJlZ8NLL8FNN0Ht2ua9lFOmmIOjY2Lsrk5cgCdlQg0hNxYUpHWfIv+mTIhYKRMizlwpF0XFDjbtz7TM/tl+MNvpvEA/b9rFHBv8HB9bg7oRgTZULNWRK2WiUjkc8Omn8MADsHevuUnszTfN9zVubG9t4lI8JhNohpDd5YiIiIiIhziQmXes+XNk7XtuofPa98a1Qiyzf1rU1dp3kdOyeDGMHw8rVpiPGzeGSZPg0kvtrUukEmiGkIf48ssvufzyy+0uQ8RlKBMiVsqEiLOqykV+UTHrj659TzxMQmIae9Kc176HBfgSHxd5pAFUg/axkUSF+Fd6fSJHVevXih07zM1h06ebj8PC4MEH4f/+DwJ1lZ2Urlpn4jhqCLkxD7u4S+Q/KRMiVsqEiLPKyIVhGOw+nMuqfw1+3rDXee27txc0rxNWMvunY1wkTWqF4q2172Kjav1aMXmy2Qzy9obrroMnn4Q6deyuSlxctc7EcdQQcmPNmjWzuwQRl6JMiFgpEyLOKiIX2flFrNmdVnL71+qkw6RkFTidVzPE3zL4uV2DSEID9OO3uJZq9VpRXAyHD8PRleGPPAJJSfDYY9C+va2lifuoVpn4D3pFcmN169a1uwQRl6JMiFgpEyLOypsLc+17Fqv+Nftn8/5Mp7Xvfj5etK4fUTL7p2NcDRrUCNLad3F51ea14pdfzDlBderAjz+a28Nq1YKZM+2uTNxMtclEGagh5MZ+//13Ro0aZXcZIi5DmRCxUiZEnP1XLtJyCkhIOtb8WZ2URmYpa99jIoMss3/OrB+ute/iltz+tWLLFrj7bvjuO/NxZKR5VVBcnK1lifty+0yUgxpCIiIiIuKRioodbEzOPNIAOszqxDS2p5xg7XuDI1u/Ys1bwOqEayCtiK0OHzZnAr3+OhQWgo8PjBsHjz4KNWvaXZ2IW9DaeTeWnJzsUZezifwXZULESpkQsdqfkccva3ewMxMSEtP4+wRr35vUDilp/HSIi6RFnTB8tfZdqim3fK1Yswb69oVDh8zHAweaa+RbtbK3LqkW3DIT/6K18x5i586dbv0HVaSiKRMiVsqEeLK8wmLW700vGfyckHiYvel5TueFB/oSH1ejZPZPfGwkkcFa+y6ewy1fK1q1gqgoc17QSy/BBRfYXZFUI26ZiVOkhpAb27FjB+ecc47dZYi4DGVCxEqZEE9hGAZJqbkkJB0uaf5s2JdBYbH1QnhvL6gTUEzv9o1LZv80qRWite/i0dzitWLDBnjlFXjtNfD3N3/9+CPExoKv/korFcstMlFBlB435ufnZ3cJIi5FmRCxUiakusrKL2JtUlrJ7J+ExDQOZTuvfa8V+q+177E1aNcggh++n8XwS9raULWIa3Lp14qUFHNl/FtvmSvlW7WCO+4w39e4sZ2VSTXm0pmoYJohJCIiIiIuy+Ew2HYwy7zy58gVQJv2Z3L8T7D+Pt60rh9+ZO6PeQuY1r6LuKmCAnNY9BNPQHq6eeySS2DiRDjjDHtrE3FxmiHkIb7++muGDx9udxkiLkOZELFSJsQdHc4uYPXRK3+S0lidmEZmvvPa9wY1gkoaPx3iImldP5wA3/9e+65ciFi5VCYMw1wff/fdsHWreSw+HiZPhl697KxMPIhLZaKSqSHkxgoLC+0uQcSlKBMiVsqEuLrCYgcb92VaZv/sPJTjdF6Qnw/tYyNKGkDxcZFEh53a2nflQsTK5TLx+utmM6hOHXj6abjmGnOlvEgVcblMVCI1hNxYY903K2KhTIhYKRPiavZn5LFq1+GS2T9rd6eTX+RwOq9p7RDL7J/mdUIrbO27ciFiZXsmkpPNIdFRUeDlZW4NmzoVJkyAsDB7axOPZHsmqpAaQm6sUaNGdpcg4lKUCRErZUJchcNhMGn+Jqb8ts1p9k9EkB/xR2776hBXg/gGkUQEV95AT+VCxMq2TOTlmbeCPfMMjBljXhkE0Lat+UvEJp70OlEx/9Qitvj111/tLkHEpSgTIlbKhLiCzLxCbvh0BW8uNJtBreqFc+XZcUwa0Z6f7+pJwsP9+Ph/Xbjj/Ob0bF67UptBoFyIHK/KM2EY8NVX5sawBx6ArCxISIAi51lhInbwpNcJXSEkIiIiIpVi16Fsxn68gi0Hsgjw9Wbi8HYMiY+xuywRscvy5TB+PCxZYj6OiYHnnoMrrgBvXasgUtXUEHJj5513nt0liLgUZULESpkQOy3ZmsK4z1eRnltInfAA3hndmfaxkXaXpVyIHKfKMvHJJ+atYQDBwXDffeY2seDgqvn8ImXkSa8TasO6seTkZLtLEHEpyoSIlTIhdjAMg4+X7uTqD/4iPbeQ+NhIvr/1XJdoBoFyIXK8KsvEgAEQGQlXXw2bN8Mjj6gZJC7Jk14n1BByY1u2bLG7BBGXokyIWCkTUtUKihw8MPNvHv1uPcUOg0s7xPDlDecQHX5qK+Irg3IhYlUpmXA44LPP4Lrrjh2rXdtcJ//xx+atYiIuypNeJ3TLmBvz8vKyuwQRl6JMiFgpE1KVUrLyufmzlSzfeRhvL5gwoBVjz2vscn8OXa0eEbtVeCaWLoU77jDnBQGMHAn9+5tv16xZsZ9LpBJ40uuEl2Ecv/yzesvIyCAiIoL09HTCw8PtLkdERETE7a3fm84Nn6xkT1ouYQG+vHpFB3q3iLa7LBGpSrt2mXOBpk0zH4eGmlvExo+HQNe5SlCkuitPz0O3jLmxWbNm2V2CiEtRJkSslAmpCj/8vY/hU5axJy2XxrVCmHlLd5duBikXIlannYncXHjwQWjRwmwGeXnB2LGwZQtMmKBmkLgdT3qd0C1jbiw3N9fuEkRcijIhYqVMSGVyOAxe+XkLr/xszlo4r1ktXh/VkYhgP5srOznlQsTqtDPh4wNffQX5+dC7N7z0EsTHV0htInbwpNcJNYTcWGxsrN0liLgUZULESpmQypKdX8RdX61h3npzE8vYcxtz/4CW+Pq4/sXnyoWI1SllYvFiOPts8PMDf3+YMgWys+Hii80rhETcmCe9Trj+q7acUMuWLe0uQcSlKBMiVsqEVIak1ByGTVnKvPXJ+Pt4M3F4Ox4a3NotmkGgXIgcr1yZ2LoVLrkEzjvPbAIddf75MGSImkFSLXjS64R7vHJLqRYsWGB3CSIuRZkQsVImpKL9tSOVIW8sYWNyJrVCA5h6w9lc1tm9/iVVuRCxKlMm0tLg7ruhdWuYNcu8TezAgcouTcQWnvQ6oVvGREREROQ/Tf0rkYdnraPIYdAmJpx3RnemfmSQ3WWJSGUqKoJ334VHHoGUFPPYhRfCiy+azSERcWtqCLmxbt262V2CiEtRJkSslAmpCIXFDp6avYGPl+0CYHC7erwwvD1B/j42V3ZqlAsRq5Nm4pZb4J13zLdbtTIHRl94YdUUJmITT3qd0C1jbiw1NdXuEkRcijIhYqVMyOk6nF3AmA/+KmkG3XNBC14b1cFtm0GgXIgczykThnHs7VtugehoeP11WLtWzSDxCJ70OmF7Q+jNN9+kcePGBAYG0qlTJ37//feTnv/555/Tvn17goODqVevHtdeey2HDh2qompdy8aNG+0uQcSlKBMiVsqEnI7N+zMZ8sYSlm47RIi/D++M7sQtvc/Ay82HxioXIlYlmTh0CG67zZwVdFS7dpCYaDaGfHVziXgGT3qdsLUhNG3aNO644w4efPBBEhISOO+88xgwYACJiYmlnr948WKuvvpqrrvuOtavX8/06dNZvnw5Y8eOreLKRURERKqvnzbs55I3lpCYmkNsVBAzxnWn/5l17S5LRCqBd1ERTJ4MZ5xhXgn06quQlHTshIAA+4oTkUrlZRj/viawap199tl07NiRKf9aWdiqVSuGDh3Ks88+63T+pEmTmDJlCtu2bSs59tprrzFx4kSS/v1N6yQyMjKIiIggPT2d8PDw0/8ibFRcXIyPj/tesi1S0ZQJEStlQsrLMAzeXLiNSfM3YRjQtUlN3ryyIzVC/O0urcIoFyJHGAZ8/z3G3XfjtWWLeaxdO3NOUN++9tYmYiN3f50oT8/DtiuECgoKWLlyJf3797cc79+/P0uXLi31Od26dWP37t3MnTsXwzDYv38/X3/9NYMGDTrh58nPzycjI8Pyq7qYO3eu3SWIuBRlQsRKmZDyyC0o5vYvV/PCj2Yz6OquDfnkui7VqhkEyoUIADt2QL9+MGSI2QyKjjaHR69apWaQeDxPep2w7UbQlJQUiouLqVOnjuV4nTp1SE5OLvU53bp14/PPP2fkyJHk5eVRVFTExRdfzGuvvXbCz/Pss8/y+OOPOx2fPn06wcHBXHrppfz888+kp6cTHR1Nly5dmD17NgAdO3bE4XCwevVqAIYMGcLixYs5dOgQUVFR9OjRg1mzZgHQrl07/Pz8WLlyJQCDBg1ixYoV7N+/n/DwcPr378/XX38NwJlnnkloaCh//vknABdccAHr1q1jz549hISEMHjwYKZNmwZAixYtqFWrFkuWLAHg/PPPZ/PmzSQmJrJv3z7AvPXO4XDQtGlTYmJiWLRoEQC9evUiMTGR7du34+vry4gRI/jmm28oKCigYcOGNG3alF9++QWAc889lwMHDrB582YARo0axbfffktOTg4NGjSgdevWzJ8/H4CuXbuSnp7Ohg0bABgxYgTz5s0jMzOTunXr0rFjx5IQnXXWWeTl5fH3338DcMkll7Bw4UIOHz5MrVq16Nq1K99//z0AHTp0ACAhIQGAiy66iGXLlpGSkkKNGjXo1asXM2fOBKBt27YEBgayfPlyAAYOHMiqVatITk4mLCyMCy+8kOnTpwPQunVrIiIiWLZsGWA2HTds2MDu3bsJDg5myJAhTJ06FYDmzZsTHR3N4sWLAejTpw/btm1j165d+Pv7M2zYMKZPn05RURFNmjQhLi6OhQsXAtCjRw/27NnDtm3b8Pb2ZuTIkcyYMYP8/Hzi4uJo3rw5P/30EwDdu3cnJSWFTZs2ATBy5Ehmz55NdnY2MTExtGnThh9//BEwr6TLyspi/fr1AAwfPpz58+eTkZFBnTp16Ny5M3PmzAGgU6dOFBYWsnbtWgCGDh3KokWLSE1NpWbNmpx77rl8++23AMTHx+Pt7c2qVasAGDx4MH/99RcHDhwgIiKCvn37MmPGDADatGlDcHAwf/31FwADBgxgzZo17N27l9DQUAYOHMhXX30FQMuWLYmKiipp7Pbr14+NGzeSlJREUFAQQ4cO5csvv8QwDJo1a0bdunVLZof17t2bnTt3smPHDvz8/Bg+fDhff/01hYWFNG7cmEaNGvHrr78CcN5555GcnMyWLVvw8vLi8ssvZ+vWrUydOpXY2FhatmzJggULAPN7R2pqasn9wJdddhlz584lKyuL+vXr0759e3744QcAunTpQk5ODuvWrQNw6+8RAQEBXHrppfoeged+j0hJSSEjI0PfI458j5g1axa5ubn6HlHK94jDeQZf7avB9rQivDG4uEEeN3epyaoVy6vd94g9e/aUzK/09O8R+jnCc79HzPrxRwYvXYqvnx/rzj+fTcOGURQczPmpqfo5Av0c4enfI/bs2UOHDh3c9ntETk4OZWXbLWN79+4lJiaGpUuX0rVr15LjTz/9NJ9++mmpg5w2bNjA+eefz/jx47ngggvYt28f99xzD2eddRbvv/9+qZ8nPz+f/Pz8kscZGRnExsZWi1vGfvvtN3r27Gl3GSIuQ5kQsVImpCxW7jrMjZ+uJCUrn6gQf6Zc2ZGzm9S0u6xKo1yIR8rPh1mzYOTIY8e+/x7atOG3xERlQuRf3P11ojy3jNl2hVCtWrXw8fFxuhrowIEDTlcNHfXss8/SvXt37rnnHsDsgoWEhHDeeefx1FNPUa9ePafnBAQEEFBNB6G1b9/e7hJEXIoyIWKlTMh/+Xrlbh6Y8TcFxQ5a1g3j3as7ExsVbHdZlUq5EI9iGPDNN3DvveZtYhERx1bHX3QRAO1r1LCxQBHX40mvE7bNEPL396dTp04ll1kdtWDBArp161bqc3JycvD2tpZ8dNiTjbOxbXP0sjMRMSkTIlbKhJxIUbGDp2Zv4O7paygodnDBmXX45uZu1b4ZBMqFeJCVK6FnTxgxwmwG1a8PhYVOpykTIlaelAnbrhACuPPOOxk9ejSdO3ema9euvPPOOyQmJnLTTTcBMGHCBPbs2cMnn3wCmPd5Xn/99UyZMqXklrE77riDLl26UL9+fTu/FBERERG3kJ5byG1TE1i0+SAA/9e3Gf/Xtxne3l42VyYiFWLvXnjgAfjkE/MKoaAguOce8yqhkBC7qxMRF2JrQ2jkyJEcOnSIJ554gn379tGmTRvmzp1Lw4YNAdi3bx+JiYkl519zzTVkZmby+uuvc9dddxEZGUmfPn14/vnn7foSbNWlSxe7SxBxKcqEiJUyIcfbdjCL6z9ewfaUbIL8fHjxsvYMbOt8y311plxItWYYMGAAHBn6y1VXwTPPQGzsCZ+iTIhYeVImbG0IAYwbN45x48aV+r6PPvrI6dhtt93GbbfdVslVuYfyTA8X8QTKhIiVMiH/tnDTAW6bmkBmXhExkUG8c3UnzqwfYXdZVU65kGrH4TAbQT4+4OUFjz0GL7wAL78MZfiLrTIhYuVJmbBthpCcvqOr6kTEpEyIWCkTAuacxXcXbed/Hy0nM6+Izg1r8O2t3T2yGQTKhVQzy5ZB167w1lvHjg0dCkuWlKkZBMqEyPE8KRO2XyEkIiIiIpUjr7CYB2eu45tVuwEY2TmWJ4e2wd9X/yYo4tZ27YL774cvvzQf798PN94Ivr7mVUIiImXgZXjYeq6MjAwiIiJIT08nPDzc7nJOS35+PgEBAXaXIeIylAkRK2XCsx3IyOPGz1aSkJiGj7cXDw9qxZhujfDy8L8sKhfi1jIz4bnn4KWXIC/PbP5cey089RTUO7V5YMqEiJW7Z6I8PQ/985Ab+/nnn+0uQcSlKBMiVsqE51q7O42LX19CQmIaEUF+fHxtF67p3tjjm0GgXIgbmzcPmjc3h0Tn5UGvXuZq+fffP+VmECgTIsfzpEzoljE3lp6ebncJIi5FmRCxUiY807er93Dv12vJL3JwRnQo713dmUa1tGr6KOVC3FbduuatYU2bwqRJMGRIhdwepkyIWHlSJtQQcmPR0dF2lyDiUpQJEStlwrMUOwwmzd/ElIXbAOjTMppXLo8nLNDP5spci3IhbmPbNli8GMaMMR/Hx8MPP5hXBlXg7SzKhIiVJ2VCM4TcWGZmJmFhYXaXIeIylAkRK2XCc2TmFXLHl6v5eeMBAG7u1ZS7+7fAx1u3iB1PuRCXl55uzgR69VVzpfy6ddCiRaV9OmVCxMrdM6EZQh5i9uzZdpcg4lKUCRErZcIz7EzJ5pI3l/LzxgME+HrzyuXx3HdhSzWDTkC5EJdVVGSuj2/WzLwlrKAA+vQB78r9K5syIWLlSZnQLWMiIiIibmrJ1hTGfb6K9NxC6oQH8M7ozrSPjbS7LBEpr/nz4c47Yf1683GLFuYmsQEDtEZeRCqNGkJurGPHjnaXIOJSlAkRK2Wi+jIMg4+X7uTJOf9Q7DCIj43kndGdiA4PtLs0l6dciMtJS4Phw82V8lFR8NhjcNNN4Fc187+UCRErT8qEGkJuzOFw2F2CiEtRJkSslInqqaDIwSPfruPL5UkAXNoxhmcuaUugn4/NlbkH5UJcQmYmHJ1REhkJjz4KSUnwyCNmU6gKKRMiVp6UCc0QcmOrV6+2uwQRl6JMiFgpE9VPSlY+V773B18uT8LbCx4a1IoXR7RXM6gclAuxVWGhOSy6YUPzNrGj7roLXn65yptBoEyIHM+TMqErhERERETcwPq96dzwyUr2pOUSFujLa6M60KuF56zGFXFrhgFz5sDdd8OmTeax99+H/v3trUtEPJrWzruxnJwcgoOD7S5DxGUoEyJWykT1Mffvfdz11RpyC4tpUiuEd8d0pmntULvLckvKhVS5devMgdELFpiPa9c218pfdx342H91nzIhYuXumdDaeQ+xePFiu0sQcSnKhIiVMuH+HA6DlxZsZtznq8gtLKZH89rMHNddzaDToFxIlXrySWjf3mwG+fvDvffCli1www0u0QwCZULkeJ6UCd0y5sYOHTpkdwkiLkWZELFSJtxbdn4Rd321hnnrkwEYe25j7h/QEl8f/Xve6VAupEq1bg0Oh7lF7PnnoUkTuytyokyIWHlSJtQQcmNRNgydE3FlyoSIlTLhvpJSc7j+kxVsTM7E38ebpy9pw4jOsXaXVS0oF1JpDANmzoSCArj8cvPYpZfCypXgwmuslQkRK0/KhGYIubHc3FyCgoLsLkPEZSgTIlbKhHv6c/shbv58FanZBdQKDeDt0Z3o1LCG3WVVG8qFVIpVq2D8eFi0CGrWhK1bzXXybkCZELFy90xohpCHmDVrlt0liLgUZULESplwP1/8mciV7/1JanYBbWMi+P627moGVTDlQirU3r1w7bXQubPZDAoMhJtvBj8/uysrM2VCxMqTMqFbxkRERERsVljs4MnZG/hk2S4ALmpfn4nD2hHk7xpDZ0XkOLm58OKL8NxzkJ1tHrviCnj2WYiLs7c2EZEyUkPIjbVr187uEkRcijIhYqVMuIfD2QWM+3wVy7YfwssL7u7fgnG9muLl5WV3adWSciEVYtMmeOQRc27QOefA5Mnmf92QMiFi5UmZUEPIjfm50aWoIlVBmRCxUiZc36bkTMZ+spyk1FxC/H14+fIO9Gtdx+6yqjXlQk7Z7t3QoIH5dnw8PPAAnHmmOUDajRu4yoSIlSdlQjOE3NjKlSvtLkHEpSgTIlbKhGtbsGE/l765hKTUXOKigpl5S3c1g6qAciHllpQEV15prozfvPnY8aeeglGj3LoZBMqEyPE8KRO6QkhERESkChmGwZsLtzFp/iYMA7o2qcmbV3akRoi/3aWJyL9lZcHEifDCC5CXZzZ+FiyA5s3trkxEpEJo7bwby8jIcPuvQaQiKRMiVsqE68ktKOaer9cwe+0+AMZ0bchDg1vj56OLtquKciH/yeGATz+FCRNgn5lVevQw5wR17GhvbZVAmRCxcvdMaO28h1ixYoXdJYi4FGVCxEqZcC1703IZ8fZSZq/dh6+3F89c0pbHh7RRM6iKKRdyUoYBffvCNdeYzaDGjeGbb2DhwmrZDAJlQuR4npQJ3TLmxvbv3293CSIuRZkQsVImXMfKXYe58dOVpGTlExXiz5QrO3J2k5p2l+WRlAs5KS8v6N8fVq6Ehx+G22+HgAC7q6pUyoSIlSdlQg0hN+bOl7GJVAZlQsRKmXAN01ck8eDMdRQUO2hZN4x3r+5MbFSw3WV5LOVCLDIy4OmnzSZQ377msfHj4brrIDra3tqqiDIhYuVJmdAMITdWWFjoUSvxRP6LMiFipUzYq6jYwbM/bOT9xTsAuPDMurx4WXtCAvTvcXZSLgSA4mJ4/3146CE4eBDatIHVq8HHx+7KqpwyIWLl7pnQDCEP8fXXX9tdgohLUSZErJQJ+6TnFHLtR8tLmkH/17cZb17ZUc0gF6BcCD/9BB06wI03ms2g5s3h2WfB2zP/aqRMiFh5Uib0U4mIiIhIBdp2MIvrP17B9pRsgvx8ePGy9gxsW8/uskRk82a4+274/nvzcY0a8OijMG4cuPHVACIip0oNITd25pln2l2CiEtRJkSslImq9+umA9w+NYHMvCJiIoN45+pOnFk/wu6y5F+UCw+2Zo3ZDPL1NZtAjz4KUVF2V2U7ZULEypMyoYaQGwsNDbW7BBGXokyIWCkTVccwDN77fQfP/vAPDgPOalSDKVd1olZo9d5O5I6UCw9SWAibNpnzgQCGD4f774cxY6BlS3trcyHKhIiVJ2XCM2+UrSb+/PNPu0sQcSnKhIiVMlE18gqLuWv6Gp6eazaDLj8rls/HnqNmkItSLjyAYcDcudCuHfTuDenp5nEvL3NWkJpBFsqEiJUnZUINIREREZFTdCAjj8vf+YMZq/bg4+3F4xefybOXtsXfVz9iidhi/Xq48EIYNAg2bjSPbdhgb00iIi5Ka+fdWGpqKlG671mkhDIhYqVMVK41SWnc8OkK9mfkExHkx5tXdqT7GbXsLkv+g3JRTR08aM4EevttcDjMIdF33AEPPggRmuN1MsqEiJW7Z0Jr5z3EunXr7C5BxKUoEyJWykTl+Xb1Hi57exn7M/JpFh3Kd7d2VzPITSgX1VBqKrRoAVOmmM2gSy+Ff/6BiRPVDCoDZULEypMyoaHSbmzPnj12lyDiUpQJEStlouIVOwwmzd/ElIXbAOjbMpqXL48nLFArq92FclENRUXBkCHmFrGXXoJeveyuyK0oEyJWnpQJNYTcWEhIiN0liLgUZULESpmoWJl5hdzx5Wp+3ngAgHG9mnJX/xb4eHvZXJmUh3JRDSQkwH33wZtvwhlnmMdefRWCg8HHx97a3JAyIWLlSZnQDCE35nA48PbWXX8iRykTIlbKRMXZmZLN2E9WsPVAFgG+3kwc3o4h8TF2lyWnQLlwY/v2mTOBPvrI3CQ2YgR89ZXdVbk9ZULEyt0zoRlCHmLatGl2lyDiUpQJEStlomIs2ZrCkDeWsPVAFnXDA5l+U1c1g9yYcuGGcnPh6aehWTP48EOzGXT55fDCC3ZXVi0oEyJWnpQJ3TImIiIiUgrDMPho6U6emvMPxQ6D+NhI3hndiejwQLtLE/EcM2bA+PGQmGg+PvtsmDwZuna1ty4RkWpADSE31qJFC7tLEHEpyoSIlTJx6vKLinlk1nqmrUgC4NKOMTxzSVsC/TSfxN0pF27mn3/MZlCDBvDcczBqFLjxrRyuSJkQsfKkTKgh5MZq1dJ6W5F/UyZErJSJU5OSlc9Nn65kxa7DeHvBAwNbcd25jfHy0vDo6kC5cHG7d5tr5Nu1Mx/feScEBsLNN5tDo6XCKRMiVp6UCbXX3diSJUvsLkHEpSgTIlbKRPmt35vOxa8tZsWuw4QF+vLBNWcx9rwmagZVI8qFi8rOhkcfhebNYfRoKC42jwcFwV13qRlUiZQJEStPyoSuEBIREREB5qzdx93T15BbWEyTWiG8O6YzTWuH2l2WSPXmcMBnn8GECbB3r3ksPBwOHYLoaHtrExGp5rR23o0dPHiQ2rVr212GiMtQJkSslImycTgMXv55C6/+vAWAHs1r89rlHYgI9rO5MqkMyoULWbzYHBi9YoX5uFEjc3PYsGGgq/KqjDIhYuXumdDaeQ+xefNmu0sQcSnKhIiVMvHfsvOLuPnzlSXNoLHnNuaDMZ3VDKrGlAsXsWQJnHee2QwKCzMHRv/zDwwfrmZQFVMmRKw8KRNqCLmxxKPrN0UEUCZEjqdMnFxSag7Dpizlx/X78ffxZtKI9jw0uDW+PvrxqDpTLmz07xsTunUzG0I33ABbtsB995nDo6XKKRMiVp6UCc0QcmMBAQF2lyDiUpQJEStl4sT+2H6IcZ+vIjW7gNphAbw9uhMd42rYXZZUAeXCBsXF8MEH8Npr5m1i4eHmVUA//wx+uhrPbsqEiJUnZUIzhERERMSjfP7nLh79dj1FDoO2MRG8c3Un6kUE2V2WSPX0yy/mnKC1a83Hzz4L999vb00iItWYZgh5iGnTptldgohLUSZErJQJq8JiBw/PWseDM9dR5DC4uH19pt/UVc0gD6NcVJEtW2DoUOjb12wGRUbC5Mlw5512VybHUSZErDwpE7plzI05HA67SxBxKcqEiJUycUxqdgHjPl/JH9tT8fKCey5owc09m+Kl4bUeR7moZIYB99wDr74KhYXg4wPjxsGjj0LNmnZXJ6VQJkSsPCkTagi5saZNm9pdgohLUSZErJQJ06bkTMZ+spyk1FxC/H145fIOnN+6jt1liU2Ui0rm5QUHD5rNoIEDYdIkaNXK7qrkJJQJEStPyoQaQm4sJibG7hJEXIoyIWKlTMD89cmMn7aa7IJi4qKCeW9MZ5rXCbO7LLGRclEJ5s2D5s2hSRPz8TPPwBVXwAUX2FuXlIkyIWLlSZnQDCE3tmjRIrtLEHEpyoSIlSdnwjAMXv9lCzd8upLsgmK6Na3Jt7d0VzNIPDoXFW7DBhgwwPx1zz3HjsfEqBnkRpQJEStPyoSuEBIREZFqJbegmHu+XsPstfsAuKZbIx4c1Ao/H/07mEiFSEmBxx6Dt94yV8r7+UHjxuBwgLdyJiLiLtQQcmO9evWyuwQRl6JMiFh5Yib2puVyw6crWLcnAz8fL54Y0oZRXeLsLktciCfmosIUFMDrr8MTT0B6unls6FB44QU44wxbS5NTp0yIWHlSJtTCd2OJiYl2lyDiUpQJEStPy8TKXalc/PoS1u3JICrEn8/HnqNmkDjxtFxUqDffhLvuMptB8fHwyy8wc6aaQW5OmRCx8qRMqCHkxrZv3253CSIuRZkQsfKkTHy1IolR7/xJSlY+LeuG8d2t3enSOMrussQFeVIuKkRBwbG3b7gBzjoL3nsPVqyA3r3tq0sqjDIhYuVJmdAtY27M11f/+0T+TZkQsfKETBQVO3j2h428v3gHAAPa1GXSiPaEBFT/r11OjSfkokLs3w8PPQQJCfDnn+DjA8HB5tteXnZXJxVImRCx8qRMeBmGYdhdRFXKyMggIiKC9PR0wsPD7S5HRERETlF6TiG3Tl3F71tSALjj/Gbc3qcZ3t76y6rIKcvLg5dfNlfHZ2aax376Cfr2tbUsEREpm/L0PHTLmBv75ptv7C5BxKUoEyJW1TkTWw9kMfTNJfy+JYUgPx+mXNmRO85vrmaQ/KfqnIvTYhgwfTq0agUTJpjNoC5dYMkSNYOqOWVCxMqTMuE510JVQwX/vqdbRJQJkeNU10z8uukAt3+RQGZ+ETGRQbx7dWda19dVv1I21TUXp+XQIXNb2OLF5uOYGHjuObjiCq2R9wDKhIiVJ2VCDSE31rBhQ7tLEHEpyoSIVXXLhGEYvPv7dp79YSOGAV0aRfHmVR2pFRpgd2niRqpbLipEVBQUFZkzgu69F+6+G0JC7K5KqogyIWLlSZlQQ8iNNW3a1O4SRFyKMiFiVZ0ykVdYzAMz/mZGwh4ARnWJ5fGL2+Dvq6sXpHyqUy5OWU4OvPoqjBsH4eHmkOgPPoCwMGjQwO7qpIopEyJWnpQJ/RTlxn755Re7SxBxKcqEiFV1ycT+jDxGvvMHMxL24OPtxRNDzuSZS9qqGSSnpLrk4pQ4HPDZZ9C8uTkn6Nlnj72vVSs1gzyUR2dCpBSelAldISQiIiIua01SGjd8uoL9GflEBvvx5hUd6XZGLbvLEnE/S5fCHXfA8uXm44YN4ayzbC1JRETspYaQGzv33HPtLkHEpSgTIlbunolZCXu495u1FBQ5aBYdyntjOtOwpuaayOlx91yU265dcN99MG2a+Tg0FB580GwOBQbaWpq4Bo/LhMh/8KRMlOta602bNvHYY4/Rt29fmjZtSr169WjXrh1jxozhiy++ID8/v7LqlFIcOHDA7hJEXIoyIWLlrpkodhg898NG7pi2moIiB+e3imbGuG5qBkmFcNdcnLLHHjObQV5eMHYsbNkC99+vZpCU8LhMiPwHT8pEmRpCCQkJ9OvXj/bt27No0SLOOuss7rjjDp588kmuuuoqDMPgwQcfpH79+jz//PNqDFWRzZs3212CiEtRJkSs3DETmXmFXP/JCt76bRsAt/RuyjujOxMW6GdzZVJduGMuyqW4GNLTjz1+8kkYOBBWrYJ334W6de2rTVxStc+ESDl5UibKdMvY0KFDueeee5g2bRpRUVEnPG/ZsmVMnjyZF198kQceeKDCihQREZHqb2dKNmM/WcHWA1kE+HozcXg7hsTH2F2WiPtYuBDGj4czzoDp081jDRrAnDm2liUiIq7JyzAM479OKigowN/fv8wftLznV6WMjAwiIiJIT08nPDzc7nJEREQEWLwlhVu+WEV6biF1wwN55+pOtGsQaXdZIu5h61a45x6YNct8HBkJGzdCnTp2ViUiIjYoT8+jTLeMlbW5s2fPnnKdL6fn22+/tbsEEZeiTIhYuUMmDMPgwyU7GPPhX6TnFtIhLpLvbu2uZpBUGnfIRZmlpcHdd0Pr1mYzyMcHbrnFnBOkZpCUUbXKhEgF8KRMVMiWseTkZJ5++mnee+89cnNzK+JDShnk5OTYXYKIS1EmRKxcPRP5RcU8Mms901YkATCsYwOevqQNgX4+Nlcm1Zmr56LMli83ZwOlpJiPL7wQXnzRbA6JlEO1yYRIBfGkTJR5y1haWhpXXnkltWvXpn79+rz66qs4HA4eeeQRmjRpwh9//MEHH3xQmbXKcRo0aGB3CSIuRZkQsXLlTKRk5XPlu38ybUUS3l7w0KBWTBrRTs0gqXSunItyad0aAgKgVSuYOxd++EHNIDkl1SYTIhXEkzJR5iuEHnjgARYtWsSYMWOYN28e48ePZ968eeTl5fHDDz/Qs2fPyqxTStFaL/oiFsqEiJWrZmL93nSu/3gFe9PzCAv05bVRHejVItrussRDuGou/tM//8Dbb8NLL4G3N4SEwM8/Q5Mm4KctfHLq3DYTIpXEkzJR5iuE5syZw4cffsikSZP47rvvMAyD5s2b88svv6gZZJP58+fbXYKIS1EmRKxcMRNz1u5j2JSl7E3Po0mtEGbd0l3NIKlSrpiLkzp0CG67Ddq2hVdegU8+Ofa+Fi3UDJLT5naZEKlknpSJMl8htHfv3pJOWZMmTQgMDGTs2LGVVpiIiIhUHw6Hwcs/bebVX7YC0KN5bV4b1YGIIP1lVqRUBQXw5pvw+OPm8GiAiy+Gbt1sLUtERKqPMjeEHA4Hfv/6FwgfHx9CQkIqpSgpm65du9pdgohLUSZErFwlE9n5RYyftpr5G/YDcP15jbl/QCt8vL1srkw8kavk4oQMA2bPhrvuMreFAbRrB5MnQ58+9tYm1ZLLZ0KkinlSJsrcEDIMg2uuuYaAgAAA8vLyuOmmm5yaQjNmzKjYCuWE0tPT7S5BxKUoEyJWrpCJpNQcrv9kBRuTM/H38eaZS9syvJPnDGsU1+MKufhPTz9tNoOio823r73WXCkvUgncIhMiVciTMlHmGUJjxowhOjqaiIgIIiIiuOqqq6hfv37J46O/pOps2LDB7hJEXIoyIWJldyaWbTvExa8vZmNyJrXDAvjyxnPUDBLb2Z2LUu3fD1lZ5tteXvDyy3D//WZTaOxYNYOkUrlkJkRs5EmZKPMVQh9++GFl1iEiIiLVyGd/7OKx79ZT5DBoGxPBO1d3ol5EkN1libiWvDxzUPTTT8Ptt8NTT5nHzznH/CUiIlKJvAzDMMp68q5du5g/fz6FhYX06tXLLdexZWRkEBERQXp6OuHh4XaXc1qKiorw9S1zT0+k2lMmRKzsyERhsYPHv1/PZ38kAnBx+/pMHN6OQD9d4SCuwSVeKwwDvvkG7r0Xduwwj517Lvz2m7lSXqQKuUQmRFyIu2eiPD2PMr/iLFq0iDPPPJMbb7yRW2+9lfj4eKZOnXraxcqpmzdvnt0liLgUZULEqqozkZpdwOj3/+SzPxLx8oJ7L2zBK5fHqxkkLsX214qVK6FnTxgxwmwG1a8PH3+sZpDYxvZMiLgYT8pEmV91Hn74YXr37s3u3bs5dOgQ//vf/7j33nsrszb5D5mZmXaXIOJSlAkRq6rMxMbkDC5+fTF/bE8lNMCXd0d3ZlyvM/Dy0iYxcS22vla88w507gy//w5BQfDII7B5M1x9tZpBYhv9/CRi5UmZKPN1UH///TeLFi2ifv36ALz44ou8++67HD58mBo1alRagXJidevWtbsEEZeiTIhYVVUmflyfzPhpq8kpKKZhzWDeu7ozzeqEVcnnFikvW18rBgyA4GC49FJ45hmIjbWvFpEj9POTiJUnZaLMDaG0tDSio6NLHoeEhBAcHExaWpoaQjbp2LGj3SWIuBRlQsSqsjNhGAav/7KVFxdsBqD7GTV544qORAb7V+rnFTkdVfZa4XDA1KmwYgVMnmwei42FbdvAg/6yIa5PPz+JWHlSJsp1beqGDRtYu3ZtyS/DMPjnn38sx6TqzJ071+4SRFyKMiFiVZmZyC0o5tapCSXNoGu6NeKja7uoGSQur0peK5Ytg65d4aqrzBXyS5cee5+aQeJi9POTiJUnZaJco7P79u3L8UvJBg8ejJeXF4Zh4OXlRXFxcYUWKCIiIq5lb1ou13+ygvV7M/Dz8eKJIW0Y1SXO7rJE7LdrF9x/P3z5pfk4NBQeeAA6dLC3LhERkVKUuSG04+hKTHEZZ511lt0liLgUZULEqjIysXJXKjd+upKUrAJqhvgz5apOdGkcVeGfR6SyVMprRU6OORPoxRchLw+8vODaa+Gpp6BevYr/fCIVSD8/iVh5UibK3BD6+OOPufvuuwkODq7MeqQc8vLy7C5BxKUoEyJWFZ2Jr5Yn8eCsvyksNmhVL5x3r+5Egxr6uUDcS6W8VhgGfPih2Qzq1QteeklXBYnb0M9PIlaelIkyzxB6/PHHycrKqsxapJz+/vtvu0sQcSnKhIhVRWWiqNjBE99v4N5v1lJYbDCgTV2+ubmrmkHilirsteLPP83B0QAhIfDGGzBzJvzyi5pB4lb085OIlSdloswNoeNnB4mIiEj1l55TyLUfLeeDJeat4+PPb84bV3Qk2L9cYwhFqo9t22DYMDjnHPjss2PHhw41f3l52VWZiIhIuXgZZez0eHt7s3//fmrXrl3ZNVWqjIwMIiIiSE9PJzw83O5yTkteXh6BgYF2lyHiMpQJEavTzcTWA5mM/XgFOw/lEOTnw+SR7bmwjeahiHs75Vykp5szgV59FQoKwNsbHn4YHnuswmsUqUr6+UnEyt0zUZ6eR7nWzvft25eOHTue9JdUnYULF9pdgohLUSZErE4nE79uPMAlbyxl56EcYiKD+ObmbmoGSbVQ7lwUFcFbb0GzZjBpktkM6t8f1q5VM0iqBf38JGLlSZko1/XeF1xwAaGhoZVVi5TT4cOH7S5BxKUoEyJWp5IJwzB4Z9F2npu3EcOALo2jmHJlR2qGBlRChSJVr9y5+N//4NNPzbdbtjQ3iQ0YoFvDpNrQz08iVp6UiXI1hO655x6io6MrqxYpp1q1atldgohLUSZErMqbibzCYibM+JuZCXsAGNUljscvPhN/33JdUCzi0sr9WnHjjTB3rnk10I03gp9fpdQlYhf9/CRi5UmZKPMMIR8fH/bt2+f2DaHqNEMoKytLV2yJ/IsyIWJVnkzsz8jjhk9XsiYpDR9vLx67qDVXndMQL10FIdXMSXORmgqPPw41a8Ijjxw7np1tbhITqYb085OIlbtnolJmCGnLmOv5/vvv7S5BxKUoEyJWZc3E6qQ0LnptMWuS0ogM9uPT/3VhdNdGagZJtVRqLgoLzWHRZ5xh/vfZZ+HgwWPvVzNIqjH9/CRi5UmZKPMtYzt27PCoS6dEREQ8wcyE3dz3zd8UFDloXieU964+i7iawXaXJVI1DAPmzIG774ZNm8xjbdvC5Mng5pt1RURE/kuZrhB67rnnqF27Nt7e/336n3/+yZw5c8pcwJtvvknjxo0JDAykU6dO/P777yc9Pz8/nwcffJCGDRsSEBBA06ZN+eCDD8r8+aqTDh062F2CiEtRJkSsTpaJYofBsz/8w/hpaygocnB+qzrMGNddzSCp9kpysW0bXHABXHSR2QyqXRvefhsSEqBvX3uLFKlC+vlJxMqTMlGmK4Q2bNhAXFwcI0aM4OKLL6Zz587UPvKvJkVFRWzYsIHFixfz2WefsW/fPj755JMyffJp06Zxxx138Oabb9K9e3fefvttBgwYUPL5SnPZZZexf/9+3n//fc444wwOHDhAUVFRGb9cERERycgr5P+mJvDrJvOWmFt7n8Gd/Zrj7a1bxMSD+PrC77+Dvz/ccQc88ABERNhdlYiISJUp0xVCn3zyCb/88gsOh4Mrr7ySunXr4u/vT1hYGAEBAXTo0IEPPviAa665ho0bN3LeeeeV6ZO/9NJLXHfddYwdO5ZWrVrx8ssvExsby5QpU0o9f968efz222/MnTuX888/n0aNGtGlSxe6detW9q+4GklISLC7BBGXokyIWJWWiR0p2VzyxhJ+3XSQAF9vXh3VgbsvaKFmkFR/+fnw3XfHctGwIXz0EfzzDzz/vJpB4rH085OIlSdloswzhNq1a8fbb7/NW2+9xdq1a9m5cye5ubnUqlWL+Pj4cs8XKigoYOXKldx///2W4/3792fp0qWlPue7776jc+fOTJw4kU8//ZSQkBAuvvhinnzySYKCgkp9Tn5+Pvn5+SWPMzIyylWniIhIdfH7loPc8vkqMvKKqBseyLtXd6ZtA/0lWKo5w4AZM+Dee2H7dmo99tix940caVtZIiIiditzQ+goLy8v2rdvT/v27U/rE6ekpFBcXEydOnUsx+vUqUNycnKpz9m+fTuLFy8mMDCQmTNnkpKSwrhx40hNTT3hHKFnn32Wxx9/3On49OnTCQ4O5tJLL+Xnn38mPT2d6OhounTpwuzZswHo2LEjDoeD1atXAzBkyBAWL17MoUOHiIqKokePHsyaNQswG2Z+fn6sXLkSgEGDBrFixQr2799PeHg4/fv35+uvvwbgzDPPJDQ0lD///BOACy64gHXr1rFnzx5CQkIYPHgw06ZNA6BFixbUqlWLJUuWAHD++eezefNmEhMT8fHxAcxb7xwOB02bNiUmJoZFixYB0KtXLxITE9m+fTu+vr6MGDGCb775hoKCAho2bEjTpk355ZdfADj33HM5cOAAmzdvBmDUqFF8++235OTk0KBBA1q3bs38+fMB6Nq1K+np6WzYsAGAESNGMG/ePDIzM6lbty4dO3Zk7ty5AJx11lnk5eXx999/A3DJJZewcOFCDh8+TK1atejatWvJFPej92oe7chedNFFLFu2jJSUFGrUqEGvXr2YOXMmAG3btiUwMJDly5cDMHDgQFatWkVycjJhYWFceOGFTJ8+HYDWrVsTERHBsmXLALPpuGHDBnbv3k1wcDBDhgxh6tSpADRv3pzo6GgWL14MQJ8+fdi2bRu7du3C39+fYcOGMX36dIqKimjSpAlxcXEsXLgQgB49erBnzx62bduGt7c3I0eOZMaMGeTn5xMXF0fz5s356aefAOjevTspKSlsOjLAcuTIkcyePZvs7GxiYmJo06YNP/74IwBnn302WVlZrF+/HoDhw4czf/58MjIyqFOnDp07dy6Z29WpUycKCwtZu3YtAEOHDmXRokWkpqZSs2ZNzj33XL799lsA4uPj8fb2ZtWqVQAMHjyYv/76iwMHDhAREUHfvn2ZMWMGAG3atCE4OJi//voLgAEDBrBmzRr27t1LaGgoAwcO5KuvvgKgZcuWREVFlTR2+/Xrx8aNG0lKSiIoKIihQ4fy5ZdfYhgGzZo1o27duiWzw3r37s3OnTvZsWMHfn5+DB8+nK+//prCwkIaN25Mo0aN+PXXXwE477zzSE5OZsuWLXh5eXH55Zfj6+vL1KlTiY2NpWXLlixYsACAbt26kZqaysaNGwHz1tO5c+eSlZVF/fr1ad++PT/88AMAXbp0IScnh3Xr1gG49feIgIAALr30Un2PwHO/R0RFRZGRkcHs2XNYmuLP3H2BOAyIDS5idIODnFHTnx9//NFjvkfMmjWL3NxcfY/woO8R6z75hCavvUb0kf+3OZGReGdmsm7dOn2P0M8R+h6B+T2iQ4cOJX+GPe17hH6O0PeI0r5HFBUVsXv3brf9HpGTk0NZeRk27ZPfu3cvMTExLF26lK5du5Ycf/rpp/n0009LfjP/rX///vz+++8kJycTceSy3hkzZjB8+HCys7NLvUqotCuEYmNjSU9PJzw8vBK+sqqzYMEC+vXrZ3cZIi5DmRCxWrBgAT169+GRWeuZtiIJgOGdGvD0JW0I8PWxuTqRSrR3Lzz4IHz8sXmFUGAg3HMP3HsvC5Yt02uFyL/o5ycRK3fPREZGBhEREWXqeZT7CqGKUqtWLXx8fJyuBjpw4IDTVUNH1atXj5iYmJJmEECrVq0wDIPdu3fTrFkzp+cEBAQQEBBQscW7iJSUFLtLEHEpyoSI1Y59h3j73T9Zsesw3l7wwMBWXHduY7y8NC9IqjGHA3r3hiNXInDllfDssxAbC+i1QuR4yoSIlSdlokxDpSuDv78/nTp1KrnM6qgFCxaccEh09+7d2bt3L1lZWSXHNm/ejLe3Nw0aNKjUel1RjRo17C5BxKUoEyLHrNuTzpRtYazYdZiwQF8+vLYLY89romaQVE+GYTaCALy9YcIEOOcc+OMP+OyzkmYQ6LVC5HjKhIiVJ2XCtlvGwJx9M3r0aN566y26du3KO++8w7vvvsv69etp2LAhEyZMYM+ePSVr7LOysmjVqhXnnHMOjz/+OCkpKYwdO5aePXvy7rvvlulzlufyKVeXl5dHYGCg3WWIuAxlQsQ0Z+0+7pq+mrxCB01qh/De1Z1pUjvU7rJEKscff8D48TBuHIwebR5zOMDLy/x1HL1WiFgpEyJW7p6J8vQ8TvsKoYyMDGbNmsU///xT7ueOHDmSl19+mSeeeIL4+HgWLVrE3LlzadiwIQD79u0jMTGx5PzQ0FAWLFhAWloanTt35sorr+Siiy7i1VdfPd0vwy0dHXomIiZlQjydw2Hw0vxN3PLFKvIKHTQPK2LmuO5qBkn1lJRk3g7WtavZFHr6aetVQie4Gk6vFSJWyoSIlSdlotwzhC677DJ69OjBrbfeSm5uLp07d2bnzp0YhsGXX37JsGHDyvXxxo0bx7hx40p930cffeR07N/TvEVERMSUnV/E+Gmrmb9hPwA39GhCw7TVRAT52VyZSAXLyoKJE+GFFyAvz2z8jBljNoS8bZuGICIi4nbK/aq5aNEizjvvPMDsnBmGQVpaGq+++ipPPfVUhRcoJ9a2bVu7SxBxKcqEeKqk1ByGTVnK/A378ffx5sUR7XlgYCvat1MmpJqZMweaN4cnnzSbQT16wIoV8OGHUL9+mT6EXitErJQJEStPykS5G0Lp6elERUUBMG/ePIYNG0ZwcDCDBg1iy5YtFV6gnJg739coUhmUCfFEy7Yd4uLXF7MxOZPaYQF8eeM5DOtkLlpQJqTaCQ+HffugcWP4+mtYuBA6dizXh1AuRKyUCRErT8pEuRtCsbGxLFu2jOzsbObNm0f//v0BOHz4sEf9xrmC5cuX212CiEtRJsTTfPrHLka//yeHcwpp1yCC7289l45xxzZjKBPi9rZvh+nTjz0+7zyYORP++QeGDTvhnKCTUS5ErJQJEStPykS5ZwjdcccdXHnllYSGhtKwYUN69eoFmLeSedKlVSIiInYpLHbw2Hfr+fxPc/HCkPj6PD+sHYF+PjZXJlJBMjLMmUAvvww+PuYK+aOr44cOtbMyERGRaqNMa+czMjIs68pWrlxJYmIi/fr1IzTU3FwyZ84cIiMj6d69e+VVWwGq09r59PR0IiIi7C5DxGUoE+IJUrMLuPmzlfy5IxUvL7j3gpbc1LMJXqVcKaFMiNspLob334eHHoKDB81j/frBlCnQtGmFfArlQsRKmRCxcvdMVPja+Ro1anDgwAEA+vTpQ9OmTbnkkktKmkEAgwYNcvlmUHWzatUqu0sQcSnKhFR3/+zL4OLXF/PnjlRCA3x57+rO3NyraanNIFAmxM38/DN06AA33mg2g1q0gNmz4ccfK6wZBMqFyPGUCRErT8pEmW4ZCw0N5dChQ0RHR7Nw4UIKCwsruy4pg+TkZLtLEHEpyoRUZz+uT2b8tNXkFBTTsGYw713dmWZ1wk76HGVC3Mb+/TBoEOTnQ40a8NhjcPPN4OdX4Z9KuRCxUiZErDwpE2VqCJ1//vn07t2bVq1aAXDJJZfg7+9f6rm//PJLxVUnJxUWdvK/CIh4GmVCqiPDMHj9l628uGAzAN3PqMkbV3QkMrj01+F/UybEpeXkQHCw+XadOnDffZCWBo8+Ckc22lYG5ULESpkQsfKkTJRphlBubi4ff/wx27Zt48UXX+T6668n+OgL+HEmT55c4UVWpOo0Q6ioqAhf33LPBReptpQJqW5yCoq4Z/pa5vy9D4BrujXiwUGt8PMp25JQZUJcUmEhvPUWPP64eUvYOedU6adXLkSslAkRK3fPRHl6HmX6KoOCgrjpppsAWLFiBc8//zyRkZGnXaicnunTpzNq1Ci7yxBxGcqEVCd70nK54ZMVrN+bgZ+PF08MacOoLnHl+hjKhLgUw4C5c+Huu2HjRvPYlClV3hBSLkSslAkRK0/KRLnbXr/++mtl1CEiIiJHrNiZyk2frSQlq4CaIf68NboTZzWqvFtoRCrdunVw110wf775uFYtePJJGDvW3rpEREQ8WJkaQnfeeSdPPvkkISEh3HnnnSc996WXXqqQwuS/tW7d2u4SRFyKMiHVwVfLk3hw1t8UFhu0rhfOu2M6ExMZdEofS5kQl/DII/D00+BwmEOi/+//4MEHwaarzZULEStlQsTKkzJRpoZQQkJCyWaxVatWnXC9rVStiIgIu0sQcSnKhLizomIHT8/9hw+X7ARgYNu6TBrRnmD/U7+HXZkQl9CokdkMuuQSmDgRzjjD1nKUCxErZULEypMyUaafMv99m9jChQsrqxYpp2XLltGoUSO7yxBxGcqEuKu0nAJu/SKBxVtTALizX3Nu63PGaf8DjDIhVc4w4NtvwccHLrrIPDZmDLRqBV272lvbEcqFiJUyIWLlSZko25qSf/nf//5HZmam0/Hs7Gz+97//VUhRIiIinmLrgUyGvrGExVtTCPb34a2rOnF732a6Glfcz+rV0KePeSXQuHHmWnkwm0Mu0gwSERGRY8q0dv7ffHx82LdvH9HR0ZbjKSkp1K1bl6KiogotsKJVp7Xzhw4dombNmnaXIeIylAlxN79s3M/tU1eTlV9ETGQQ743pTKt6FffapExIlUhOhocegg8+MK8QCgw0B0g/8AAEB9tdnRPlQsRKmRCxcvdMlKfnUeYrhDIyMkhPT8cwDDIzM8nIyCj5dfjwYebOnevUJJLKtWHDBrtLEHEpyoS4C8MweOu3bVz38Qqy8ovo0jiK727tXqHNIFAmpJLl5sIzz0CzZvD++2Yz6PLLzZXyTz3lks0gUC5EjqdMiFh5UibKPKkyMjISLy8vvLy8aN68udP7vby8ePzxxyu0ODm53bt3212CiEtRJsQd5BUWc/83a5m1ei8AV5wdx2MXnYm/b7nv4v5PyoRUqpUrzW1hAF26wOTJ0K2bvTWVgXIhYqVMiFh5UibK3BD69ddfMQyDPn368M033xAVFVXyPn9/fxo2bEj9+vUrpUgpXbCL/subiF2UCXF1yel53PjpCtbsTsfH24vHLj6T0ec0rLTPp0xIhdu/H+rUMd8+91y49VY45xwYNQq8K76pWRmUCxErZULEypMyUe4ZQrt27SIuLs5th11WpxlCIiLiPlYnpXHDJys4kJlPZLAfb17ZkW5Na9ldlkjZJCXBhAkwaxZs2gQxMXZXJCIiIqWo8BlCa9euxeFwAJCens7ff//N2rVrS/0lVWfq1Kl2lyDiUpQJcVUzE3Zz2dvLOJCZT4s6YXx3y7lV0gxSJuS0ZWfDo49Cixbw+efm4x9+sLuq06JciFgpEyJWnpSJMt0yFh8fT3JyMtHR0cTHx+Pl5UVpFxZ5eXlRXFxc4UWKiIi4o2KHwcR5G3l70XYAzm9Vh5cvjyc0oMx3bIvYw+GATz81N4XtNeddce655pygzp3trU1EREQqRJl+It2xYwe1a9cueVtcQ2nDvUU8mTIhriQjr5D/m5rAr5sOAnBr7zO4s19zvL2r7pZrZUJOicMBPXvC4sXm48aNYeJEGDYM3HRkwL8pFyJWyoSIlSdlokwNoYYNG5b6ttgrOjra7hJEXIoyIa5iR0o2Yz9ezraD2QT6efPC8PZc1L7qFy8oE3JKvL2he3dYswYeeghuvx0CA+2uqsIoFyJWyoSIlSdlotzrID7++GPmzJlT8vjee+8lMjKSbt26sWvXrgotTk5u8dF/uRMRQJkQ1/D7loMMeX0x2w5mUy8ikOk3drOlGQTKhJRRRoY5MPqvv44de/BB2LIF7r23WjWDQLkQOZ4yIWLlSZkod0PomWeeISgoCIBly5bx+uuvM3HiRGrVqsX48eMrvEARERF3YBgGHyzewZgP/iIjr4iOcZF8e2t32jaIsLs0kdIVF8O770KzZvDcc3DHHXB0RmRY2LH18iIiIlItlXuqZVJSEmeccQYAs2bNYvjw4dxwww10796dXr16VXR9chJ9+vSxuwQRl6JMiF3yi4p5eNY6vlqxG4DhnRrw9CVtCPD1sbUuZUJO6JdfYPx4OLohtnlzc4C0B1AuRKyUCRErT8pEua8QCg0N5dChQwDMnz+f888/H4DAwEByc3Mrtjo5qW3bttldgohLUSbEDgcz87ni3T/5asVuvL3g4cGteWF4O9ubQaBMSCm2bIGhQ6FvX7MZFBlpbg77+28YPLhaDI3+L8qFiJUyIWLlSZkod0OoX79+jB07lrFjx7J582YGDRoEwPr162nUqFFF1ycnoZlNIlbKhFS1dXvSufj1xazcdZjwQF8+urYL153bGC8X+Uu1MiFOfvsNvv0WfHzgtttg61bzVjF/f7srqzLKhYiVMiFi5UmZKHdD6I033qBr164cPHiQb775hpo1awKwcuVKRo0aVeEFyon5e9APbyJloUxIVZq9di/D31rKvvQ8mtQOYdYt3enRvLbdZVkoE0JREWzefOzxtdeajaC//4ZXX4UjP8d5EuVCxEqZELHypEx4GcbR6YGeISMjg4iICNLT0wkPD7e7HBERcTMOh8Hknzbz2i9bAejVojavjupAeKCfzZWJHGfePLjzTsjKgk2b4MhSEBEREam+ytPzKPcVQgBpaWm8+OKLjB07luuvv56XXnqJ9PT0UypWTt306dPtLkHEpSgTUtmy8ou48bOVJc2gG3s04f0xZ7lsM0iZ8FAbNsCAAeavf/6BnBxYv97uqlyGciFipUyIWHlSJsrdEFqxYgVNmzZl8uTJpKamkpKSwuTJk2natCmrVq2qjBrlBIqKiuwuQcSlKBNSmZJScxj25lIWbNiPv683L13WngkDW+Hj7RrzgkqjTHiYlBS49VZo1868OsjPD+66y5wT1Lmz3dW5DOVCxEqZELHypEyUe+38+PHjufjii3n33Xfx9TWfXlRUxNixY7njjjtYtGhRhRcppWvSpIndJYi4FGVCKsuybYcY9/lKDucUEh0WwNujO9EhrobdZf0nZcKDJCdDq1aQlmY+HjoUXngBzjjDzqpcknIhYqVMiFh5UibK3RBasWKFpRkE4Ovry7333ktn/etTlYqLi7O7BBGXokxIZfj0j108/t16ihwG7RpE8M7oztSNCLS7rDJRJjxI3brQuzfs2AEvvWS+LaVSLkSslAkRK0/KRLlvGQsPDycxMdHpeFJSEmFhYRVSlJTNwoUL7S5BxKUoE1KRCosdPDjzbx6etY4ih8GQ+Pp8dWNXt2kGgTJRra1ZA4MHw549x4598AGsWKFm0H9QLkSslAkRK0/KRLkbQiNHjuS6665j2rRpJCUlsXv3br788kvGjh2rtfMiIlItpGYXcNV7f/L5n4l4ecF9F7bk5ZHxBPr52F2aeLrkZLj+eujQAebMgUceOfa+yEjw0Z9RERERKZty3zI2adIkvLy8uPrqq0uGLfn5+XHzzTfz3HPPVXiBcmI9evSwuwQRl6JMSEX4Z18G13+ygt2HcwkN8OWVy+Pp26qO3WWdEmWiGsnLg8mT4ZlnzDXyACNHwsMP21uXG1IuRKyUCRErT8pEua8Q8vf355VXXuHw4cOsXr2ahIQEUlNTmTx5MgEBAZVRo5zAnn9fJi4iyoSctnnrkhk2ZSm7D+fSsGYwM8d1c9tmECgT1caMGebA6AceMJtBZ50FixfDl19Co0Z2V+d2lAsRK2VCxMqTMlHuhtBRwcHBREZGEhUVRXBwcEXWJGW0bds2u0sQcSnKhJwqwzB49ect3PTZSnIKiul+Rk2+vaU7zeq492w8ZaKa+OMP2LkTYmLg00/Nx927212V21IuRKyUCRErT8pEuRtCRUVFPPzww0RERNCoUSMaNmxIREQEDz30EIWFhZVRo5yAt/cp9/NEqiVlQk5FTkERt36RwEsLNgNwTbdGfHxtFyKD/W2u7PQpE25q927YtOnY4wcfNG8V27QJrroK9P/1tCgXIlbKhIiVJ2XCyzAMozxPuOmmm5g5cyZPPPEEXbt2BWDZsmU89thjDBkyhLfeeqtSCq0oGRkZREREkJ6eTnh4uN3liIiIjfak5XL9xyvYsC8DPx8vnhzShsu7eM6qUXExOTnwwgvw/PPQsSP8/jt4edldlYiIiLiR8vQ8yt36mjp1Kh999BE33ngj7dq1o127dtx444188MEHTJ069ZSLlvKbMWOG3SWIuBRlQspj+c5ULn5tMRv2ZVAzxJ8vrj+n2jWDlAk34XDAZ59B8+bw2GOQm2seP3zY1rKqK+VCxEqZELHypEyUe8tYYGAgjUoZYNioUSP8/d3/8np3kp+fb3cJIi5FmZCymrY8kYdmraOw2KB1vXDeHdOZmMggu8uqcMqEG1i6FO64A5YvNx83bAgTJ8KIEbo6qJIoFyJWyoSIlSdlotxXCN1yyy08+eSTlt+k/Px8nn76aW699dYKLU5OLi6uev1LtsjpUibkvxQVO3jsu/Xc983fFBYbDGxbl69v7lotm0GgTLi8n34yh0MvXw6hofDss7BxI1x2mZpBlUi5ELFSJkSsPCkT5b5CKCEhgZ9//pkGDRrQvn17ANasWUNBQQF9+/bl0ksvLTnXky61skPz5s3tLkHEpSgTcjJpOQXc+kUCi7emAHBnv+bc1ucMvKrxX7yVCRdkGMeaPb17Q3w8dO4MTz4JdevaWpqnUC5ErJQJEStPykS5rxCKjIxk2LBhDB48mNjYWGJjYxk8eDCXXnopERERll9SuX766Se7SxBxKcqEnMiW/ZkMeWMJi7emEOzvw1tXdeL2vs2qdTMIlAmXUlwM770HXbtCXp55zMfHXCH/7rtqBlUh5ULESpkQsfKkTJT7CqEPP/ywMuoQERGpFD//s5//+3I1WflFNKgRxLtXd6ZVPW2ZlCr0668wfjysWWM+fucduP128+2AAPvqEhEREY9W7oaQuI7u3bvbXYKIS1Em5N8Mw+Ct37Yz8ceNGAac3TiKKVd1IirEcxYgKBM227IF7rkHvv3WfBwRAY88AjfdZG9dHk65ELFSJkSsPCkT5b5lTFxHSkqK3SWIuBRlQo7KKyzmjmmreX6e2Qy68uw4Pht7tkc1g0CZsE1xMdx1F5x5ptkM8vGBceNg61a4807QVlZbKRciVsqEiJUnZUINITe2adMmu0sQcSnKhAAkp+dx2dvL+Hb1Xny9vXhyaBuevqQtfj6e95KnTNjExwe2bYPCQrjwQli7Ft54A2rVsrsyQbkQOZ4yIWLlSZnQLWMiIlJtJCQe5sZPV3IgM58awX68cWVHujXVX8KlCvz4I7RrB/XqmY8nTYIbb4QBA+ytS0REROQEvAzDME73g6SlpREZGVkB5VS+jIwMIiIiSE9PJzzcvYeKOhwOvL0971+8RU5EmfBsM1bt5v4Zf1NQ5KBFnTDevbozcTWD7S7LVspEFfjnH7j7bpg7F669Fj74wO6K5D8oFyJWyoSIlbtnojw9j3J/lc8//zzTpk0reXzZZZdRs2ZNYmJiWHN0e4ZUidmzZ9tdgohLUSY8U7HD4Nm5/3DnV2soKHLQr3UdvhnXzeObQaBMVKpDh+C226BtW7MZ5OsLUVFw+v/OJpVMuRCxUiZErDwpE+VuCL399tvExsYCsGDBAhYsWMAPP/zAgAEDuOeeeyq8QDmx7Oxsu0sQcSnKhOfJyCvkuo+X8/ai7QDc1ucM3r6qE6EBuiMalIlKUVAAL78MZ5wBr79uDpAeMgQ2bDBvE/PysrtC+Q/KhYiVMiFi5UmZKPdPzPv27StpCM2ePZvLLruM/v3706hRI84+++wKL1BOLCYmxu4SRFyKMuFZth/MYuwnK9h+MJtAP29eGN6ei9rXt7ssl6JMVIIXXoCHHjLfbtcOJk+GPn3srUnKRbkQsVImRKw8KRPlvkKoRo0aJCUlATBv3jzOP/98AAzDoLi4uGKrk5Nq06aN3SWIuBRlwnMs2nyQoW8sYfvBbOpFBPL1Td3UDCqFMlFBioqOvX3LLeY6+XfegVWr1AxyQ8qFiJUyIWLlSZkod0Po0ksv5YorrqBfv34cOnTo/9m77/AoqjaMw7/0kISETuiE3lsQpAqKNEUQELBQVFCKItjLJ4IFbIAigig2EOlVRASVDtJ7aNJCCxACSSCk7nx/rETHpQWSzG72ua8rF9nZ2Z13F56d5OXMObT5e/WMbdu2Ua5cuUwvUK7t119/tboEEaeiTOR8hmHw9erD9Pp2A3GJqYSXysuCZxpTrViI1aU5JWXiNp0+DU89Bffe+8/cQHnywM6d0KePfXl5cTnKhYiZMiFi5k6ZyPAlY6NHj6Z06dIcO3aMDz/8kKCgIMB+KVn//v0zvUARERGApNQ0/jd3FzM3HwfgofDivPtgNfy89Uu5ZLLERPj0U3jvPYiPt29btw4aNrR/r3mCREREJAfIlGXnXUlOWnb+0KFDlClTxuoyRJyGMpFznYlPpO/kzWyJvICnB/zvvio83qg0HvrF/LqUiQwyDJg1C15+GY4csW8LD7fPE9SkiaWlSeZRLkTMlAkRM1fPREZ6Hjc1QmjBggW0adMGHx8fFixYcN19H3jggZuvVG7LxYsXrS5BxKkoEznTrhOx9Jm0iVOxiQT7ezP2kTo0rVDQ6rJcgjKRAadPQ+fOsHq1/XbRojBiBDz2GHhm+Ap7cWLKhYiZMiFi5k6ZuKmGUIcOHYiKiqJQoUJ06NDhmvt5eHhoYulstHv3bmrUqGF1GSJOQ5nIeX7afpKXZm0nMcVG2YKBTOx5B2EFAq0uy2UoExmQPz9cuAC5csFLL9lHCQXq31pOpFyImCkTImbulImbagjZbLarfi8iIpIVbDaDUUv3M3bZXwA0r1iQTx+uTbC/j8WVSY6RkABffAH9+4O/P3h7w+TJ9sZQiRJWVyciIiKS5TSHkAtLSUnBx0e/HIlcoUzkDBeTUhk8fRtLI04D8PRdZXi5VSW8PDVfUEYpE1dhs8HUqfDqq3D8OLz/PrzyitVVSTZSLkTMlAkRM1fPRKbPIfRfly5dYsWKFURGRpKcnGy6b+DAgbfylHILlixZwn333Wd1GSJOQ5lwfZHnEug9aSP7T1/E19uTDzpV58Haxa0uy2UpE/+xbh0MHgzr19tvlywJ5ctbW5NkO+VCxEyZEDFzp0xkuCG0detW2rZtS0JCApcuXSJfvnxER0cTEBBAoUKF1BDKRnFxcVaXIOJUlAnXtvZgNP2nbOFCQgqFcvvxZY+61CqRx+qyXJoy8bejR+0jgqZNs98OCoLXXrM3h3LlsrY2yXbKhYiZMiFi5k6ZyPCyGYMHD6Zdu3bExMSQK1cu/vzzT44ePUp4eDgff/xxVtQo11C4cGGrSxBxKsqE65q87gjdv97AhYQUahYPYcEzjdUMygTKxN9eeMHeDPLwgCeegP374fXX1QxyU8qFiJkyIWLmTpnI8BxCefLkYf369VSsWJE8efKwbt06KleuzPr16+nZsyd79+7NqlozRU6aQyguLs7lX4NIZlImXE9yqo1hP+1myvpIADrUKsr7nWrg7+NlcWU5g9tmIi0NLl+2jwQCewPomWfggw+gdm1raxPLuW0uRK5BmRAxc/VMZKTnkeERQj4+Pnh42Cf2LFy4MJGR9h/iQ0JC0r+X7PHzzz9bXYKIU1EmXMu5i0l0/3o9U9ZH4uEBr7apxOiutdQMykRumYkVK+COO2DQoH+2VagAS5aoGSSAm+ZC5DqUCREzd8pEhucQql27Nps2baJChQo0b96cIUOGEB0dzeTJk6levXpW1CgiIjnMnlNx9Jm0iePnLxPk582Yh2txdyX3GZ4rWeDgQXj5ZZgzx377yBH46CPIm9fSskREREScVYZHCA0fPpwiRYoA8M4775A/f3769evHmTNn+PLLLzO9QLm28PBwq0sQcSrKhGtYvCuKTuPXcvz8ZUrnD2DegIZqBmURt8hEbKy9EVSlir0Z5OkJ/frBvn1qBslVuUUuRDJAmRAxc6dMZGiEkGEYFCxYkKpVqwJQsGBBFi1alCWFyY2lpKRYXYKIU1EmnJthGHz2x1+MWrofgMblCjD2kdrkCfC1uLKcK8dnYu1a6NABzp61327ZEkaNgr9/ThG5mhyfC5EMUiZEzNwpExkaIWQYBuXLl+f48eNZVY9kwI4dO6wuQcSpKBPOKyE5lWd+3JreDHq8UWm+e/wONYOyWI7PROXKYLNBpUrw88+weLGaQXJDOT4XIhmkTIiYuVMmMtQQ8vT0pHz58pw7dy6r6hERkRzmxIXLdB6/jp93nsLHy4MPOlXnrXZV8fbK8FXL4u727oU33oArC6TmzQvLlsGOHdC2rX1ZeRERERG5KRledv7nn3/m/fffZ/z48VSrVi2r6soyOWnZ+cuXL5MrVy6ryxBxGsqE89l4JIa+kzdz7lIyBYJ8+eKxcOqWzmd1WW4jx2QiJgaGDYNx4yA11T5X0IMPWl2VuKgckwuRTKJMiJi5eiaydNn5xx57jA0bNlCzZk1y5cpFvnz5TF+SfVauXGl1CSJORZlwLtM2RPLIV39y7lIyVYoEM/+ZxmoGZTOXz0RKCowZA+XK2f9MTYV27XRZmNwWl8+FSCZTJkTM3CkTGV52fvTo0XhoSLZTiImJsboEEaeiTDiH1DQb7/68h+/WHgHgvupF+OihGgT4ZviUI7fJZTNhGPY5gV580b5aGED16vYJo1u0sLY2cXkumwuRLKJMiJi5UyYy/NN5r169sqAMuRX58+e3ugQRp6JMWO9CQjIDftzCmr/sc829cG8Fnrm7nP4jwSIumwmbDV591d4MKlgQ3n0XnnwSvLysrkxyAJfNhUgWUSZEzNwpExmeQ6h58+Y89thjdO7cmZCQkKyqK8vkpDmEEhISCAgIsLoMEaehTFjrwOl4ek/axNFzCQT4ejGqSy1aVwu1uiy35lKZOHsWcucGf3/77aVL4bff4PXXwQV/3hDn5VK5EMkGyoSImatnIkvnEKpevTr/+9//CA0NpVOnTsybN4/k5ORbLlZu3fz5860uQcSpKBPW+X3PaR4ct5aj5xIonjcXc/o3VDPICbhEJpKS4KOP/pkn6Ip774UPPlAzSDKdS+RCJBspEyJm7pSJDDeExowZw4kTJ5g/fz65c+emZ8+ehIaG8tRTT7FixYqsqFFERJyUYRiMX36Q3pM2cTEplfph+VjwTGMqhbr2CEzJBoZhXy2sShV4+WWIi4PFi/9ZUl5EREREslSGG0IAnp6etGzZku+++47Tp08zYcIENmzYwN13353Z9cl11KpVy+oSRJyKMpG9ElPSGDR9Gx8s3othwKP1S/JD7/rkC/S1ujT5m9NmYssWaN4cOnWCQ4egSBH47jv7JWKab0qymNPmQsQiyoSImTtl4raWfImKimLatGn88MMP7NixgzvuuCOz6pKb4Ol5S/08kRxLmcg+UbGJPDV5EzuOx+Lt6cFbD1Sl+52lrC5L/sMpMzF2LAwcaB8J5O8PL71kHyEUFGR1ZeImnDIXIhZSJkTM3CkTGX6lcXFxfPvtt9x7772UKFGC8ePH065dO/bv38/69euzoka5hi1btlhdgohTUSayx9bI87Qbu5odx2PJG+DDpCfrqRnkpJwyEy1agLc3PPKIfRWxt99WM0iylVPmQsRCyoSImTtlIsMjhAoXLkzevHnp0qULw4cP16ggERE3MnvzcV6bu5PkVBsVC+fmqx51KZnfdVdhkCxmGDBtGuzZY2/8AFSqBH/9BSVLWlubiIiIiJvL8LLzS5YsoUWLFi47jConLTsfHx9P7ty5rS5DxGkoE1knzWbwweK9fLnyEAAtqxRmVNdaBPnd1pXHksUszcSff8LgwfY/PTxg2zaoUcOaWkT+RecKETNlQsTM1TORpcvOt2zZ0mWbQTnNhg0brC5BxKkoE1kj9nIKT3y3Mb0ZNPDucnzxWLiaQS7AkkwcOwaPPgoNGtibQYGB9tFB5ctnfy0iV6FzhYiZMiFi5k6Z0E/zLuzMmTNWlyDiVJSJzHfo7EV6T9rEobOX8Pfx5OOHanJ/jaJWlyU3KVszcekSfPABfPQRJCbaRwX16gXvvgtF9W9GnIfOFSJmyoSImTtlQg0hFxYSEmJ1CSJORZnIXCv2n+WZH7cQn5hK0RB/vuxRl2rF9B67kmzNRHIyfP65vRnUtCmMHg116mTf8UVuks4VImbKhIiZO2Uiw3MIubqcNIdQUlISfn5+Vpch4jSUicxhGAZfrz7M8EV7sBkQXiovXzwWTsHcem9dTZZnYutWqFXLPhoIYMoUyJULHnzwn20iTkbnChEzZULEzNUzkaVzCP1bYmLi7TxcbtOcOXOsLkHEqSgTty8pNY2XZu3g3Z/tzaAudYvzY5/6aga5qCzLxKFD0LmzfQTQggX/bH/0UejYUc0gcWo6V4iYKRMiZu6UiQw3hGw2G++88w7FihUjKCiIQ4fsk4y++eabfP3115leoIiIZI8z8Yk8/OWfzNp8HE8PeKtdFT7oVAM/by+rSxNnERcHr7wClSvD7Nng6Qk7d1pdlYiIiIjcggw3hN59912+++47PvzwQ3x9fdO3V69enYkTJ2ZqcXJ91apVs7oEEaeiTNy6ncdjaT92DVsiLxDs7833T9Tj8UZheGikh0vLtEykpcGXX0K5cvDhh/b5gu69176U/P/+lznHEMkmOleImCkTImbulIkMN4QmTZrEl19+yaOPPoqX1z//a1yjRg327t2bqcXJ9QUEBFhdgohTUSZuzYLtJ+n8xVpOxSZStmAg859pTJPyBa0uSzJBpmXi4Yfh6afh7FmoUAEWLoRff4Xq1TPn+UWykc4VImbKhIiZO2Uiww2hEydOUK5cOYftNpuNlJSUTClKbs6GDRusLkHEqSgTGWOzGXz0614GTt1KUqqN5hULMndAI8IKBFpdmmSSTMvE449D3rzwySewaxfcd5/mCRKXpXOFiJkyIWLmTpnI8LLzVatWZdWqVZQqVcq0febMmdSuXTvTChMRkaxzMSmVQdO28due0wA8fVcZXm5VCS9P/ZLv9mJi4O23ISwMnnvOvq1NGzhyBFx8dU4RERER+UeGG0JvvfUW3bt358SJE9hsNubMmcO+ffuYNGkSCxcuzIoa5RratGljdQkiTkWZuDmR5xLoPWkj+09fxNfbkw86VefB2sWtLkuyQIYykZICX3wBQ4fam0LBwdCzJ+TJY79fzSDJIXSuEDFTJkTM3CkTGb5krF27dkyfPp1Fixbh4eHBkCFD2LNnDz/99BP33ntvVtQo17B9+3arSxBxKsrEja09GM0Dn69m/+mLFMrtx4ynG6gZlIPdVCYMAxYtgho1YOBAezOoWjWYNeufZpBIDqJzhYiZMiFi5k6ZyPAIIYBWrVrRqlWrzK5FMujkyZNWlyDiVJSJazMMgx/+PMrQnyJIsxnULB7Clz3qUjjY3+rSJAvdMBMHDsCzz9oniAYoUADeeQd69wbvW/oRQcTp6VwhYqZMiJi5Uyb0054LCwoKsroEEaeiTFxdcqqNoT/t5sf1kQA8WLsYIzpWx9/H6waPFFd3w0ykpsJvv4GPDwwaBG+8ASEh2VKbiFV0rhAxUyZEzNwpEx6GYRg32ilv3rx43ORqIjExMbddVFaKi4sjJCSE2NhYgl18PoS0tDS8vPQLncgVyoSjcxeT6DdlCxsOx+DhAa+2rsRTTcvc9Ge6uDaHTCQlwcqV8O9LvL/9Fpo2hbJls79AEQvoXCFipkyImLl6JjLS87ipOYQ++eQTRo8ezejRo/nf//4H2C8bGzp0KEOHDk2/fOzNN9/McLHjxo0jLCwMf39/wsPDWbVq1U09bs2aNXh7e1OrVq0MHzOnmDFjhtUliDgVZcJsz6k4Hhi7hg2HY8jt5803Pe/g6bvKqhnkRtIzYRgwdy5UrQqtW8POnf/s9PjjagaJW9G5QsRMmRAxc6dM3NQlYz179kz/vlOnTrz99ts888wz6dsGDhzI2LFj+e233xg8ePBNH3z69OkMGjSIcePG0ahRIyZMmECbNm2IiIigZMmS13xcbGwsPXr04J577uH06dM3fTwREXexeNcpnp+xnYTkNErnD2Biz7qUK5Tb6rLEClu3wvPPw/Ll9tuhoXDyJFSvbmlZIiIiImKtDK8y9uuvv9K6dWuH7a1ateK3337L0HONGjWKJ598kt69e1O5cmU++eQTSpQowfjx46/7uKeffppHHnmEBg0aZOh4OU2lSpWsLkHEqSgTYLMZfPrbAfr+sIWE5DSalC/A/AGN1QxyR6dO0XL6dAgPtzeD/P3tcwTt3w9aGELcmM4VImbKhIiZO2Uiww2h/PnzM3fuXIft8+bNI3/+/Df9PMnJyWzevJmWLVuatrds2ZK1a9de83HffvstBw8e5K233rqp4yQlJREXF2f6yiny5ctndQkiTsXdM5GQnMozU7cw+rf9ADzRKIxve91BSICPxZVJtktNhTvvJP/8+fbLxbp1g7174d13Ibeag+Le3P1cIfJfyoSImTtlIsOrjA0bNownn3yS5cuXp4/Q+fPPP1m8eDETJ0686eeJjo4mLS2NwoULm7YXLlyYqKioqz7mwIEDvPrqq6xatQrvm1wOd8SIEQwbNsxh+8yZMwkICKBjx478/vvvxMbGUqhQIerVq8fChQsBqFOnDjabjW3btgHQvn17Vq9ezblz58iXLx9NmzZl3rx5ANSoUQMfHx82b94MwH333cemTZs4ffo0wcHBtGzZklmzZgFQtWpVgoKCWL9+PWAfXbVr1y5OnDhBYGAg999/P9OnTwegYsWKFChQgDVr1gDQokUL9u/fT2RkJKdOneL5559n+vTp2Gw2ypYtS7FixVi5ciUAzZo1IzIykkOHDuHt7c1DDz3E7NmzSU5OplSpUpQtW5Y//vgDgMaNG3PmzBn277f/Ivnwww8zf/58EhISKF68OFWqVGHJkiUANGjQgNjYWCIiIgB46KGHWLx4MfHx8YSGhlKnTh0WLVoEwB133EFiYiI7/56v4sEHH2T58uWcP3+eAgUK0KBBA3766ScAateuDcDWrVsBaNeuHevWrSM6Opq8efPSrFmz9GZk9erV8ff3Z+PGjQC0bduWLVu2EBUVRe7cuWndujUzZ84EoEqVKoSEhLBu3TrA3nSMiIjg+PHjBAQE0L59e6ZOnQpAhQoVKFSoEKtXrwbg7rvv5uDBgxw9ehRfX186derEzJkzSU1NpUyZMpQsWZLlf1+G0bRpU06cOMHBgwfx9PSka9euzJkzh6SkJEqWLEmFChXSR9E1atSI6Oho9u3bB0DXrl1ZuHAhly5dolixYlSrVo1f/14Gun79+ly8eJHdu3cD0LlzZ5YsWUJcXByFCxembt26/PzzzwCEh4eTkpLCjh07AOjQoQMrV64kJiaG/Pnz07hxY+bPnw9ArVq18PT0ZMuWLQDcf//9bNiwgTNnzhASEsI999zDnDlzAKhWrRoBAQFs2LABgDZt2rB9+3ZOnjxJUFAQbdu2Tb/WtlKlSuTLly+9sXvvvfeyd+9ejh07Rq5cuejQoQPTpk3DMAzKly9PaGho+txhzZs358iRIxw+fBgfHx86d+7MrFmzSElJISwsjNKlS7Ns2TIAmjRpQlRUFAcOHMDDw4Nu3boxZ84cQkNDKVGiBJUqVWLp0qUANGzYkJiYGPbu3QtAly5dWLRoERcvXqRo0aLUrFmTX375BYB69eqRkJDArl27AFzmM2LhsrVMPhzAqUQvvDygQ7HLVEvZi7dXFX1G4CafEX//m+380EMsWbKEInfdRdGVK8k9cSILzp6FtWsJT0py68+IefPmcfnyZbf8jLjazxF+fn507NjR7T4jTpw4QevWrd3vM0I/R+gz4hqfEdu2bSNXrlyAPiPc+ucIfUakf0acOHGCbt26uexnREJCAjfrplYZ+6/169czZswY9uzZg2EYVKlShYEDB1K/fv2bfo6TJ09SrFgx1q5da7r067333mPy5Mnpb+YVaWlp3HnnnTz55JP07dsXgKFDhzJv3rz0N+hqkpKSSEpKSr8dFxdHiRIlcsQqY1OnTuXhhx+2ugwRp+Gumdh4JIa+kzdz7lIyBYJ8+eKxcOqWdp//2RBgwwYYPBheeQUeeMC+LS2NqdOm8fCjj1pbm4iTcddzhci1KBMiZq6eiYysMnZLDaHMkJycTEBAADNnzuTBBx9M3/7cc8+xbds2VqxYYdr/woUL5M2b17T8m81mwzAMvLy8WLJkCXffffcNj5uTlp2Pjo6mQIECVpch4jTcMRPTNkTy5vxdpKQZVC0azJc96lIsTy6ry5Lscvw4vPYa/PCD/Xbt2rB5M/y9kpw7ZkLkRpQLETNlQsTM1TOR6cvOZwVfX1/Cw8PTh1ldsXTpUho2bOiwf3BwMDt37mTbtm3pX3379k0f5piR0Uk5xX9HUYm4O3fKRGqajaELdvPqnJ2kpBncV6MIs/o2VDPIXVy6BG+9BRUq/NMM6tULFi5MbwaBe2VC5GYpFyJmyoSImTtlIsNzCGWm559/nu7du1O3bl0aNGjAl19+SWRkZPolYa+99honTpxg0qRJeHp6Uq1aNdPjCxUqhL+/v8N2d3Hs2DGrSxBxKu6SiQsJyQz4cQtr/joHwIstKzCgeTk8/tUIkBzsp5+gb1/70vEATZrA6NH21cT+w10yIZIRyoWImTIhYuZOmbC0IdS1a1fOnTvH22+/zalTp6hWrRqLFi2iVKlSAJw6dYrIyEgrS3RqVyZ/ExE7d8jE/tPx9Jm0iaPnEgjw9WJ011q0qhpqdVmSnTw97c2gsDD46CPo2NE0Kujf3CETIhmlXIiYKRMiZu6UCcvmELJKTppDSETcy28Rpxk0fRsXk1IpnjcXE3vWpVKoPsdyvMOHISIC7rvPftswYNo0ePBB8Pe3tjYRERERcSpZOofQ6dOnr3nfleXnJHtMmzbN6hJEnEpOzYRhGIxb/hd9Jm/iYlIq9cPyseCZxmoG5XRxcfYJoytXhkcfheho+3YPD3j44ZtqBuXUTIjcDuVCxEyZEDFzp0xkuCFUvXp1FixY4LD9448/dsuJna3kZoO7RG4oJ2YiMSWN56Zt48PF+zAMeOzOkvzQuz75An2tLk2ySloafPUVlC8P778PSUlQty7Ex2f4qXJiJkRul3IhYqZMiJi5UyYyPIfQK6+8QteuXenZsyejR48mJiaG7t27s3v3bqZPn54VNco1lC9f3uoSRJxKTsvEqdjLPDVpMztPxOLt6cFbD1Sl+52lrC5LstIff8DgwXBlxG2FCvDxx3D//decJ+h6clomRDKDciFipkyImLlTJjLcEHrhhRdo0aIFjz32GDVq1CAmJoY777yTHTt2ULhw4ayoUa4hNFQTyYr8W07KxJbI8zw9eTNn45PIG+DDuEfDaVA2v9VlSVaKjISWLe0jhPLksS8r378/+N76aLCclAmRzKJciJgpEyJm7pSJDF8yBlCmTBmqVq3KkSNHiIuLo0uXLmoGWWDVqlVWlyDiVHJKJmZtPk63CX9yNj6JioVzs+CZxmoG5VRJSf98X7IkPPMMPPss/PUXDBp0W80gyDmZEMlMyoWImTIhYuZOmchwQ2jNmjXUqFGDv/76ix07djB+/HieffZZunTpwvnz57OiRhERt5BmM3jv5whenLmd5DQbLasUZnb/hpTIF2B1aZLZUlPh88+hVCnYteuf7aNHw5gxkF8NQBERERHJWhledt7Pz4/Bgwfzzjvv4OPjA8DBgwfp3r07kZGRHD9+PEsKzSw5adn5qKgotxrOJnIjrpyJ2MspDJy6lRX7zwIw8O5yDGpRAU/PjM8bI05u8WJ4/nnYs8d+++mn4YsvsuRQrpwJkayiXIiYKRMiZq6eiSxddn7JkiW8//776c0ggLJly7J69WqefvrpjFcrt+zIkSNWlyDiVFw1EwfPXuTBz9ewYv9Z/H08GftIbZ5vWVHNoJwmIgLatLF/7dljHwU0bhyMHZtlh3TVTIhkJeVCxEyZEDFzp0xkuCF01113Xf2JPD158803b7sguXmHDx+2ugQRp+KKmVix/ywdPl/DoehLFA3xZ1bfhtxfo6jVZUlme+MNqFHDPjrIxwdeeME+T1C/fuCd4fUdbporZkIkqykXImbKhIiZO2Uiwz+Fvv3229e9f8iQIbdcjGTMv0dpiYhrZcIwDL5efZjhi/ZgMyC8VF6+eCycgrn9rC5NskKBAvbVwzp0gI8+gnLlsuWwrpQJkeyiXIiYKRMiZu6UiQzPIVS7dm3T7ZSUFA4fPoy3tzdly5Zly5YtmVpgZstJcwiJiGtKTEnjjbm7mL3FPuda17oleLtDVfy8vSyuTDKFYcCCBZA7N9x9t31bcjKsXw9Nmlhbm4iIiIjkaFk6h9DWrVtNX7t27eLUqVPcc889DB48+JaLloybNWuW1SWIOBVXyMSZuEQe/upPZm85jpenB0PbVeH9TtXVDMoptm+He+6xjwTq3x9SUuzbfX0taQa5QiZEsptyIWKmTIiYuVMmMtwQuprg4GDefvttzSGUzVKu/KIhIoDzZ2LH8Qs8MHYNWyMvEJLLh+8fr0evRmF4eGjyaJcXFQV9+kDt2rBsGfj5QceO9uXlLeTsmRCxgnIhYqZMiJi5UyYybSbLCxcuEBsbm1lPJzchLCzM6hJEnIozZ2LB9pO8NHM7Sak2yhUK4qsedQkrEGh1WXK7EhNh9GgYPhwuXrRv69oV3n8fSpe2tDRw7kyIWEW5EDFTJkTM3CkTGW4IjRkzxnTbMAxOnTrF5MmTad26daYVJjdW2gl+2RBxJs6YCZvNYOTSfXy+7CAAd1cqxCfdahHs7z6T1eVoy5fD66/bv69Xz94catjQ0pL+zRkzIWI15ULETJkQMXOnTGT4krHRo0ebvsaMGcPy5cvp2bMnX375ZVbUKNewbNkyq0sQcSrOlon4xBSemrwpvRnU966yfNWjrppBri4m5p/vW7WCnj1h8mRYt86pmkHgfJkQcQbKhYiZMiFi5k6ZyPAIocOHD2dFHSIiOcrRc5foM2kT+09fxNfbkw871aBD7WJWlyW348QJ+2ighQth/37Inx88POC776yuTEREREQkwzJlUmmxRhMtXyxi4iyZWPtXNO0/X8P+0xcpHOzHzKcbqBnkyhISYNgwqFABJk2yjxD6+Werq7opzpIJEWeiXIiYKRMiZu6UiVuaVHrjxo3MnDmTyMhIkpOTTffNmTMnUwqTG4uKiqJ48eJWlyHiNKzOhGEYTP7zKMN+iiDNZlCzRB6+7B5O4WB/y2qS22CzwY8/wquv2kcHgf2SsNGj7fMFuQCrMyHijJQLETNlQsTMnTKR4RFC06ZNo1GjRkRERDB37lxSUlKIiIjgjz/+ICQkJCtqlGs4cOCA1SWIOBUrM5GcauP1ubsYMn83aTaDjrWLMf2pO9UMclUpKdC4MXTvbm8GlSoF06fD6tUu0wwCnSdErka5EDFTJkTM3CkTGW4IDR8+nNGjR7Nw4UJ8fX359NNP2bNnD126dKFkyZJZUaNcg4eHh9UliDgVqzJx7mISj01cz9QNkXh4wOttKzGyS038fbwsqUcygY8P1KwJQUH2JeX37oUuXexzBrkQnSdEHCkXImbKhIiZO2XCwzAMIyMPCAwMZPfu3ZQuXZoCBQqwbNkyqlevzp49e7j77rs5depUVtWaKeLi4ggJCSE2Npbg4GCryxERFxdxMo4+kzZx4sJlcvt5M+bh2jSvVMjqsiSj4uNhxAh47DGoUsW+7dw5+0ih0FBraxMRERERuUkZ6XlkeIRQvnz5iI+PB6BYsWLs2rULgAsXLpCQkHAL5cqtmjdvntUliDiV7M7ELztP0Wn8Wk5cuExYgUDmDmikZpCrSUuDr7+G8uXtDaEXXvjnvvz5Xb4ZpPOEiCPlQsRMmRAxc6dM3HRD6IknniA+Pp4mTZqwdOlSALp06cJzzz1Hnz59ePjhh7nnnnuyrFBxdPnyZatLEHEq2ZUJm83gk9/202/KFi6npNGkfAHm9W9EuUJB2XJ8ySTLlkHdutC7N5w+DeXKQd++kLGBs05N5wkRR8qFiJkyIWLmTpm46VXGvv/+e95//33Gjh1LYmIiAK+99ho+Pj6sXr2ajh078uabb2ZZoeKoRIkSVpcg4lSyIxMJyam8MGM7v+yKAuDJxmG81qYS3l4ZHnApVvnrL3jpJbjyvz8hIfDWWzBgAPj6WlpaZtN5QsSRciFipkyImLlTJm56DiFPT0+ioqIoVMi1L4fISXMIRUdHU6BAAavLEHEaWZ2J4+cT6DNpM3tOxeHr5cm7D1ajS133OWHkGCNHwosvgpeXfUTQ0KGQQz9LdZ4QcaRciJgpEyJmrp6JLJtDyJ1m23YFVy7dExG7rMzEhsMxtB+7hj2n4igQ5MfUp+qrGeQqUlPhyJF/bj/7rP0ysR07YOzYHNsMAp0nRK5GuRAxUyZEzNwpEzd9yRhAhQoVbtgUiomJua2CRESczdQNkQyZv4uUNINqxYL5sntdiubJZXVZcjN+/RWefx5sNnsDyMfHflnYV19ZXZmIiIiIiKUy1BAaNmwYISEhWVWLZFDDhg2tLkHEqWR2JlLSbLy7MILv1x0F4L4aRfi4c01y+Xpl6nEkC+zZY18x7Jdf7Lfz5bNvq1HD2rqymc4TIo6UCxEzZULEzJ0ykaGGULdu3Vx+DqGcJCYmhlKlSlldhojTyMxMnL+UzIAft7D24DkAXmxZgQHNy+nSWWd37px9TqDx4+1Lynt72y8Re/NNyJvX6uqync4TIo6UCxEzZULEzJ0ycdNzCOmXIOezd+9eq0sQcSqZlYn9p+PpMG4Naw+eI9DXiy+7h/PM3eX1OejsjhyxLx0/dqy9GfTAA7B7N4wa5ZbNINB5QuRqlAsRM2VCxMydMnHTI4RucjEyERGX9lvEaQZN38bFpFRK5MvFVz3qUinUtVckdBulSkGdOhAdbW8C3XOP1RWJiIiIiDitm152PqfIScvOp6Wl4eWluUxErridTBiGwfgVB/no130YBtxZJh/jHg0nX6BvJlcpmWbHDvvlYRMn2ucIAjh71v69PhsBnSdErka5EDFTJkTMXD0TWbbsvDiXRYsWWV2CiFO51UwkpqTx3LRtfLjY3gzqfmcpJj9ZX80gZ3X6NDz1FNSuDXPnwttv/3NfwYJqBv2LzhMijpQLETNlQsTMnTKRoUmlxblcvHjR6hJEnMqtZOJU7GWemrSZnSdi8fb0YOgDVXnsTveYRM7lJCbCp5/Ce+9BfLx920MPwXPPWVuXE9N5QsSRciFipkyImLlTJtQQcmFFixa1ugQRp5LRTGyJPM/TkzdzNj6JvAE+jH8snDvL5M+i6uS2zJkDL74Ihw/bb9etC6NHQ+PG1tbl5HSeEHGkXIiYKRMiZu6UCTWEXFjNmjWtLkHEqWQkE7M2H+f1OTtJTrNRKTQ3X/WoS4l8AVlYndyWX3+1N4OKFoURI+Cxx8BTVz3fiM4TIo6UCxEzZULEzJ0yoZ+mXdgvv/xidQkiTuVmMpGaZuPdhRG8OHM7yWk2WlUtzOx+DdUMcjYnT9qXkb/i7bdh2DDYvx969FAz6CbpPCHiSLkQMVMmRMzcKRP6iVpE3Ebs5RSe+H4TE1fbLzsaeE95xj8aTqCfBks6jYQEe/OnfHkYMOCf7YULw5AhEBhoXW0iIiIiIjmIfgtyYfXq1bO6BBGncr1MHDx7kT7fb+JQ9CX8fTwZ+VAt7qtRJBurk+uy2WDqVHj1VTh+3L7t/Hn75NG5c1tbmwvTeULEkXIhYqZMiJi5UybUEHJhCQkJVpcg4lSulYnl+87w7NStxCemUjTEny971KVasZBsrk6uad06GDQINmyw3y5ZEj78ELp0AQ8PS0tzdTpPiDhSLkTMlAkRM3fKhC4Zc2G7du2yugQRp/LfTBiGwVcrD/HEdxuJT0ylbqm8zH+msZpBzuSnn6BhQ3szKCjIvqT83r3QtauaQZlA5wkRR8qFiJkyIWLmTpnQCCERyZESU9J4Y+4uZm+xX37UtW4J3u5QFT9vL4srE5OWLaFcOWjaFN59F4roMj4RERERkezgYRiGYXUR2SkuLo6QkBBiY2MJDg62upzbkpSUhJ+fn9VliDiNK5k4E5fI0z9sZmvkBbw8PXjzvsr0bFgaD404sZbNBt9/D1OmwOLF4P33/0kkJECAVnnLCjpPiDhSLkTMlAkRM1fPREZ6HrpkzIX9/vvvVpcg4lR+//13dhy/wANj17A18gIhuXz4/vF69GoUpmaQ1VauhDvugCeegN9/h0mT/rlPzaAso/OEiCPlQsRMmRAxc6dM6JIxFxYbG2t1CSJOZcWRS8xbu46kVBvlCgXxVY+6hBXQMuWWOngQXn4Z5syx3w4JgTffhEcftbYuN6HzhIgj5ULETJkQMXOnTKgh5MIKFSpkdQkiTsFmM/h4yT6mRwYANu6uVIhPutUi2N/H6tLcV0oKvPEGfPopJCeDpyc8/TQMGwYFC1pdndvQeULEkXIhYqZMiJi5UybUEHJh9erVs7oEEcvFJ6YwePo2fttzBoC+d5XlpVYV8fLUJWKW8vaGjRvtzaCWLWHkSKhWzeqq3I7OEyKOlAsRM2VCxMydMqE5hFzYwoULrS5BxFJHz12i47i1/LbnDH7ennQtmcCrbSqpGWSVpUshJsb+vYcHjBkDP/9sn0BazSBL6Dwh4ki5EDFTJkTM3CkTagiJiEta81c0D4xdw4EzFykc7MeMpxtQK2+q1WW5p7174f777SOB3nnnn+3Vq0PbtvbmkIiIiIiIOBVdMubC6tSpY3UJItnOMAwmrTvK2wsjSLMZ1CqRhy+7h1Mo2B9/ZSJ7xcTY5wQaNw5SU+2Xiflo3iZnovOEiCPlQsRMmRAxc6dMqCHkwmw2m9UliGSr5FQbby3YxdQNxwDoWLsYwztWx9/HC1Amsk1KCowfD0OHwvnz9m3t2sFHH0HFipaWJmbKhIgj5ULETJkQMXOnTOiSMRe2bds2q0sQyTbRF5N4dOKfTN1wDE8PeKNtZUZ2qZneDAJlItsMHQrPPWdvBlWvbp87aMECNYOckDIh4ki5EDFTJkTM3CkTGiEkIk5v98lYnpq0mRMXLpPbz5sxj9SmeUX3WQ7SKdhs9qXjAQYOhBkz4KWX4Mknwcvr+o8VERERERGn42EYhmF1EdkpLi6OkJAQYmNjCQ4Otrqc25KQkEBAQIDVZYhkqV92nuL5Gdu5nJJGWIFAvupRl3KFgq66rzKRBc6cgSFDICoK5s37Z/u/G0TitJQJEUfKhYiZMiFi5uqZyEjPQz/Nu7DVq1dbXYJIlrHZDEYv3U+/KVu4nJJGk/IFmNe/0TWbQaBMZKqkJPucQOXLw4QJMH8+/Hv4rJpBLkGZEHGkXIiYKRMiZu6UCV0y5sLOnTtndQkiWeJSUiovzNjO4t1RAPRuHMarbSrh7XX9JoQykQkMA+bMgZdfhkOH7Nvq1IHRo6FWLUtLk4xTJkQcKRciZsqEiJk7ZUINIReWL18+q0sQyXTHYhLoM2kTe6Pi8fXy5N0Hq9GlbombeqwycZtOnIBHHoGVK+23ixSB4cOhRw+NCHJRyoSII+VCxEyZEDFzp0xoDiEXdvnyZXLlymV1GSKZZsPhGPr+sJmYS8kUCPJjQvc6hJe6+Q9kZeI2JSVB5cpw6pR9wuiXX4aga1+iJ85PmRBxpFyImCkTImaungnNIeQm5v17glcRFzd1QySPfPUnMZeSqVYsmAXPNMpQMwiUiQy7fBnGjoXUVPttPz/48UfYtw/eflvNoBxAmRBxpFyImCkTImbulAldMiYilkpJs/Huwgi+X3cUgPtrFOGjzjXJ5aulzLOMYcC0afDKK3DsmH3Z+H797Pfdeae1tYmIiIiISLZQQ8iF1ahRw+oSRG7L+UvJDPhxC2sP2idue6lVRfo3K4uHh8ctPZ8ycRP+/BMGD7b/CVCiBISGWluTZBllQsSRciFipkyImLlTJtQQcmE+Pj5WlyByy/afjqf395uIjEkg0NeL0V1r0bLq7TUmlInrOHYMXn3VfkkYQGAgvPYaPP88uPA10nJ9yoSII+VCxEyZEDFzp0xoDiEXtnnzZqtLELklv0Wc5sHP1xAZk0CJfLmY07/RbTeDQJm4rj597M0gDw94/HE4cADeeEPNoBxOmRBxpFyImCkTImbulAmNEBKRbGMYBuOWH+TjJfswDGhQJj/jHq1D3kBfq0vLeWw2SE4Gf3/77eHD7auIjRwJdepYW5uIiIiIiFhOy867sLi4OJd/DeI+Lien8fLsHfy0/SQAPRqU4s37q+DjlXkDFZWJv61caZ8nqFkzewNI3JYyIeJIuRAxUyZEzFw9E1p23k1s2rTJ6hJEbsqp2Mt0mbCOn7afxNvTg/cerMbb7atlajMIlAkOHYLOneGuu2DLFpg0CS5etLoqsZDbZ0LkKpQLETNlQsTMnTKhhpALO336tNUliNzQ5qPnaffZGnaeiCVfoC9Tetfn0fqlsuRYbpuJ2Fh4+WWoXBlmzwZPT3j6adi9G4KCrK5OLOS2mRC5DuVCxEyZEDFzp0xoDiEX5srD2MQ9zNp8nNfn7CQ5zUal0Nx81aMuJfIFZNnx3DITK1bAQw/B2bP22y1awKhRUL26tXWJU3DLTIjcgHIhYqZMiJi5UyY0h5ALS0lJcasl8cR1pKbZeP+XvUxcfRiAVlULM6pLLQL9srYH7ZaZOHUKKlSAokXt8wXdd599JTER3DQTIjegXIiYKRMiZq6eCc0h5CZmzZpldQkiDmIvp/DE95vSm0HP3VOe8Y+GZ3kzCNwkE/v3w4gR/9wuUgT++AN27YL771czSEzcIhMiGaRciJgpEyJm7pQJXTImIpnm4NmL9Pl+E4eiL5HLx4uRXWrStnoRq8vKGc6fh7ffhrFjITUV7rjDfnkY2L8XERERERHJADWEXFjVqlWtLkEk3bJ9Zxg4dSvxiakUy5OLL3uEU7VoSLbWkCMzkZICX3wBQ4dCTIx92333QcmSlpYlriFHZkLkNikXImbKhIiZO2VCDSEXFqTVg8QJGIbBxFWHGfHLHmwG1C2Vly+6h1MgyC/ba8lRmTAM+OUXeOEF2LvXvq1aNfuE0ffea21t4jJyVCZEMolyIWKmTIiYuVMmNIeQC1u/fr3VJYibS0xJ44WZ23lvkb0Z1LVuCX7sc6clzSDIYZlISYEBA+zNoAIFYPx42LpVzSDJkByVCZFMolyImCkTImbulAmNEBKRW3ImLpGnJm9m27ELeHl68OZ9lenZsDQemtT41kVHQ5484O0Nvr72VcPWrYM33rBvFxERERERySRadt6FxcTEkC9fPqvLEDe0/dgFnpq8idNxSYTk8uHzR+rQuHwBq8ty3UwkJcFnn8E778CHH8LTT1tdkeQQLpsJkSykXIiYKRMiZq6eCS077yZ27dpldQnihuZvO0GXCes4HZdEuUJBzB/QyCmaQeCCmTAMmDsXqlaFl16CuDj7bZFM4nKZEMkGyoWImTIhYuZOmdAlYy7sxIkTVpcgbiTNZvDxkn2MX34QgLsrFeLTbrXI7e9jcWX/cKlMbN0KgwfDihX226GhMHw49OhhbV2So7hUJkSyiXIhYqZMiJi5UybUEHJhgYGBVpcgbiI+MYVB07bx+94zAPRrVpYXW1bEy9O55gtymUyMHGkfEWQY4O9vX0ns1VfBjVY0kOzhMpkQyUbKhYiZMiFi5k6Z0BxCLsxms+Hpqav+JGsdib5E70mb+OvMRfy8Pfmwcw3a1ypmdVlX5TKZ2LQJ6tWDbt3g/fehZEmrK5IcymUyIZKNlAsRM2VCxMzVM6E5hNzE9OnTrS5Bcrg1f0XT/vM1/HXmIoWD/ZjxdAOnbQaBk2bCMGD6dPj443+21a0LBw7Ajz+qGSRZyikzIWIx5ULETJkQMXOnTOiSMRFxYBgG3689wjs/7yHNZlCrRB6+7B5OoWB/q0tzLRs22OcJWrsWfHzgwQehbFn7fVf+FBERERERsYAaQi6sYsWKVpcgOVByqo0h83cxbeMxADrWLsbwjtXx9/GyuLIbc5pMHD8Or70GP/xgvx0QYJ8jqEgRa+sSt+M0mRBxIsqFiJkyIWLmTplQQ8iFFSjgHEt9S84RfTGJfj9sZuOR83h6wGttKtO7SRgeHs41efS1WJ6JS5fgo4/gww/h8mX7tp494b33oJjzXmonOZflmRBxQsqFiJkyIWLmTpnQHEIubM2aNVaXIDnI7pOxtB+7ho1HzpPb35tvet1Bn6ZlXKYZBE6QidhY+1xBly9D48awcSN8952aQWIZyzMh4oSUCxEzZULEzJ0yoRFCIsKinad4YcZ2LqekUaZAIF/1rEvZgloC/aZERECVKvbvixa1LymfPz906gQu1EwTERERERH3omXnXdjZs2cpWLCg1WWIC7PZDD75/QBjfj8AQNMKBfmsW21CAnwsruzWZGsmDh+GV16BmTPhjz+gefPsOa5IBug8IeJIuRAxUyZEzFw9E1p23k3s37/f6hLEhV1KSqX/lC3pzaDejcP4pmddl20GQTZlIi7OPmF05cr2ZpCnp301MREnpPOEiCPlQsRMmRAxc6dMqCHkwiIjI60uQVzUsZgEOo1fy+LdUfh6efJR5xr87/4qeHu59kdClmYiLQ0mToTy5eH99yEpCe65B7ZutY8UEnFCOk+IOFIuRMyUCREzd8qE5hByYX5+flaXIC5o/aFz9JuyhZhLyRQI8mNC93DCS+W1uqxMkaWZ6NgRFiywf1+hgn3y6Pvv1zxB4tR0nhBxpFyImCkTImbulAnNISTiRn5cH8mQ+btItRlULxbClz3CKRKSy+qyXMO0adCvH7z1FvTvD76+VlckIiIiIiJiojmE3MT06dOtLkFcREqajSHzd/H63J2k2gza1SzKjKcb5LhmUKZl4vx5eP55+Pbbf7Z17QqHDsGgQWoGicvQeULEkXIhYqZMiJi5UyZ0yZgLs9lsVpcgLuD8pWT6T9nCukPn8PCAF1tWpH+zsnjkwEudbjsTqakwYYJ9FNC5c1CoEHTpAoGB9kvD8uaMS+vEfeg8IeJIuRAxUyZEzNwpE2oIubCyZctaXYI4uX1R8fSetJFjMZcJ9PXik261ubdKYavLyjK3lYnFi+2jgvbssd+uUgVGjbI3g0RclM4TIo6UCxEzZULEzJ0yoYaQCytWrJjVJYgTWxpxmkHTtnIpOY2S+QKY2LMuFQrntrqsLHVLmdi/H557zt4QAsifH955B/r0AW99RIpr03lCxJFyIWKmTIiYuVMmNIeQC1u5cqXVJYgTMgyDz5f9xVOTN3EpOY0GZfIzf0CjHN8MglvMRGysvRnk4wMvvAB//WWfPFrNIMkBdJ4QcaRciJgpEyJm7pQJ/cYjkoNcTk7jpVnbWbjjFAA9G5Tif/dXwcdLvd90ycmwfj00aWK/fccd8Nln0Lo1lCtnbW0iIiIiIiLZRMvOu7BTp05RpEgRq8sQJ3HywmWemryJXSfi8Pb04O321Xikfkmry8pW182EYcCCBfDiixAZaZ8rqEyZ7C1QJJvpPCHiSLkQMVMmRMxcPRNadt5NREZGWl2COInNR8/zwNg17DoRR75AX6b0ru92zSC4Tia2b4d77oEOHeyXhOXNC4cPZ2ttIlbQeULEkXIhYqZMiJi5UybUEHJhhw4dsroEcQIzNx3j4S//JPpiEpVCczN/QCPql8lvdVmWcMjE6dP2yaFr14Zly8DPD15/HQ4csDeIRHI4nSdEHCkXImbKhIiZO2XC8obQuHHjCAsLw9/fn/DwcFatWnXNfefMmcO9995LwYIFCQ4OpkGDBvz666/ZWK1z8dakt24tNc3GOwsjeGnWDpLTbLSuGsrsfg0pkS/A6tIsY8pEUhLUqgUTJ9ovF+vaFfbuhffeg9w5f4JtEdB5QuRqlAsRM2VCxMydMmHpHELTp0+ne/fujBs3jkaNGjFhwgQmTpxIREQEJUs6Xu4yaNAgihYtSvPmzcmTJw/ffvstH3/8MevXr6d27do3dcycNIeQuK/YhBSembqFVQeiAXjunvI8d095PD09LK7MYoYBHv96D95+GxYuhNGjoVEj6+oSERERERHJBi4zh9CoUaN48skn6d27N5UrV+aTTz6hRIkSjB8//qr7f/LJJ7z88svccccdlC9fnuHDh1O+fHl++umnbK7cOcyePdvqEsQCB89e5MFxa1h1IJpcPl6Me7QOg++toGbQxo1EV6kCy5f/s+3VV+HPP9UMErel84SII+VCxEyZEDFzp0xY1hBKTk5m8+bNtGzZ0rS9ZcuWrF279qaew2azER8fT758+a65T1JSEnFxcaavnCI5OdnqEiSbLdt3hg6fr+FQ9CWK5cnFrH4NaFvddWfAzxTHj0OPHlCvHgX27oX//e+f+3x9wdPyK2NFLKPzhIgj5ULETJkQMXOnTFh2cVx0dDRpaWkULlzYtL1w4cJERUXd1HOMHDmSS5cu0aVLl2vuM2LECIYNG+awfebMmQQEBNCxY0d+//13YmNjKVSoEPXq1WPhwoUA1KlTB5vNxrZt2wBo3749q1ev5ty5c+TLl4+mTZsyb948AGrUqIGPjw+bN28G4L777mPTpk2cPn2a4OBgWrZsyaxZswCoWrUqQUFBrF+/HoBWrVqxa9cuTpw4QWBgIPfffz/Tp08HoGLFihQoUIA1a9YA0KJFC/bv309kZGR6c2v69OnYbDbKli1LsWLFWLlyJQDNmjUjMjKSQ4cO4e3tzUMPPcTs2bNJTk6mVKlSlC1blj/++AOAxo0bc+bMGfbv3w/Aww8/zPz580lISKB48eJUqVKFJUuWANCgQQNiY2OJiIgA4KGHHmLx4sXEx8cTGhpKnTp1WLRoEQB33HEHiYmJ7Ny5E4AHH3yQ5cuXc/78eQoUKECDBg3SR3hduexv69atALRr145169YRHR1N3rx5adasGXPnzgWgevXq+Pv7s3HjRgDatm3Lli1biIqKInfu3LRu3ZqZM2cCUKVKFUJCQli3bh1gbzpGRERw/PhxAgICaN++PVOnTgWgQoUKFCpUiNWrVwNw9913c/DgQY4ePYqvry+dOnVi5syZpKamUqZMGUqWLMnyv0ekNG3alBMnTnDw4EE8PT3p2rUrc+bMISkpiZIlS1KhQgV+++03ABo1akR0dDT79u0DoGvXrixcuJBLly5RrFgxqlWrlj4/Vv369YmPj2fC8r/45ZQfBh6UDYZuRc9weu9mSgTV5eeffwYgPDyclJQUduzYAUCHDh1YuXIlMTEx5M+fn8aNGzN//nwAatWqhaenJ1u2bAHg/vvvZ8OGDZw5c4aQkBDuuece5syZA0C1atUICAhgw4YNALRp04bt27dz8uRJgoKCaNu2LTNmzACgUqVK5MuXL72xe++997J3716OHTtGrly56NChA9OmTcMwDMqXL09oaGj63GHNmzfnyJEjHD58GB8fHzp37sysWbNISUkhLCyM0qVLs2zZMvv7HR4OI0dS+Lvv8P77Q3v/nXcS0a0bBVavplKlSixduhSAhg0bEhMTw969ewHo0qULixYt4uLFixQtWpSaNWvyyy+/AFCvXj0SEhLYtWsXgEt/Rvj5+dGxY0d9RpDzPyMuXrzI7t27AejcuTNLliwhLi6OlJQU4uLi3PIzokmTJkRFRXHgwAE8PDzo1q0b8+bN4/Lly5QoUUKfEW78GRETE8OuXbv0GREXR+HChalb1z1/jtBnxD+fEcHBwen/hvUZoZ8j9BmxjJiYGI4fP+6ynxEJCQncLMvmEDp58iTFihVj7dq1NGjQIH37e++9x+TJk9PfzGuZOnUqvXv3Zv78+bRo0eKa+yUlJZGUlJR+Oy4ujhIlSuSIOYROnz7t0FCTnCcxJY3X5+5kzpYTAHS7owRvt6+Gr7cbj3yZNw+eeQZO2N8TGjWC0aM5XbKkMiHyLzpPiDhSLkTMlAkRM1fPhEvMIVSgQAG8vLwcRgOdOXPmhm/+9OnTefLJJ5kxY8Z1m0EAfn5+BAcHm75yiisdd8m5Tscl0u3LP5mz5QRenh4Me6AqIzpWd+9mEEBCgr0ZVKoUTJ8Oq1bBHXcoEyL/oUyIOFIuRMyUCREzd8qEZb9V+vr6Eh4enj7M6oqlS5fSsGHDaz5u6tSp9OrVix9//JH77rsvq8sUscz2Yxd4YOxqth27QEguHyY9UY+eDUvj4eGGk0cfPQp/D/EG4OGH4euv7cvId+liXllMREREREREbsiyOYQAnn/+ebp3707dunVp0KABX375JZGRkfTt2xeA1157jRMnTjBp0iTA3gzq0aMHn376KXfeeWf66KJcuXIREhJi2euwSuPGja0uQbLIvK0neHn2DpJTbZQvFMTEnnUplT/Q6rKyX3w8jBgBo0ZBnjxw4ADkzm1vAD3xhMPuyoSImTIh4ki5EDFTJkTM3CkTll530rVrVz755BPefvttatWqxcqVK1m0aBGlSpUC4NSpU0RGRqbvP2HCBFJTUxkwYABFihRJ/3ruueesegmWOnPmjNUlSCZLsxm8/8teBk3fRnKqjXsqFWJO/4bu1wxKS7OPACpf3t4QSkqCKlXg/PnrPkyZEDFTJkQcKRciZsqEiJk7ZcLyiUj69+/PkSNHSEpKYvPmzTRt2jT9vu+++y59VnWA5cuXYxiGw9d3332X/YU7gSuz9EvOEJ+YQp9Jm/hixUEA+jcry5c96pLb38fiyrLZsmVQty707g2nT0O5cvZJpH//HUqWvO5DlQkRM2VCxJFyIWKmTIiYuVMmLL1kTETsjkRfovekTfx15iJ+3p582LkG7WsVs7qs7Ld/P9x9t/37kBAYMsS+mpivr7V1iYiIiIiI5DCWLTtvlYwswSaSHVYfiGbAj1uIvZxCaLA/X/YIp0bxPFaXlX1SUsDnX6OgevWCwEAYNgwKFLCsLBEREREREVfjEsvOy+2bP3++1SXIbTAMg2/XHKbntxuIvZxCrRJ5WPBMI/dpBqWmwrhxEBYGhw//s/3bb+Hzz2+pGaRMiJgpEyKOlAsRM2VCxMydMqGGkAtLSEiwugS5RUmpabw6eyfDfoogzWbQsU4xpj11J4WC/a0uLXv8+ivUrAkDBsCJEzBmzD/33cYS8sqEiJkyIeJIuRAxUyZEzNwpE5pDyIUVL17c6hLkFkRfTKLv5M1sOnoeTw94vW1lnmwchsdtNEJcxp498OKLsGiR/Xb+/PZLw556KlOeXpkQMVMmRBwpFyJmyoSImTtlQg0hF1alShWrS5AM2n0ylj7fb+JkbCK5/b357OHaNKtYyOqysserr8LHH9uXlPf2hmefhTffhLx5M+0QyoSImTIh4ki5EDFTJkTM3CkTumTMhS1ZssTqEiQDft5xis7j13EyNpEyBQKZN6CR+zSDAHLlsjeD2reHiAgYNSpTm0GgTIj8lzIh4ki5EDFTJkTM3CkTGiEkksVsNoNPfj/AmN8PANC0QkE+61abkACfGzzShRkGLFwIhQpB/fr2bS+9BE2bQvPm1tYmIiIiIiIiagi5sgYNGlhdgtzApaRUnp+xjV93nwagd+MwXm1TCW+vHDw4b8cOeP55+P13CA+HDRvA0xMCArK8GaRMiJgpEyKOlAsRM2VCxMydMqGGkAuLjY21ugS5jmMxCfSZtIm9UfH4enny3oPVeKhuCavLyjqnT9vnBPr6a7DZwM8P7r0XUlLs32cDZULETJkQcaRciJgpEyJm7pSJHDxMIeeLiIiwugS5hj8PnaP952vYGxVPwdx+TH3qzpzbDEpMhA8+gPLl4auv7M2ghx6yryg2YkS2NYNAmRD5L2VCxJFyIWKmTIiYuVMmNEJIJJNNWX+Ut+bvJtVmUL1YCF/2CKdISC6ry8o6CxbYVxADqFsXRo+Gxo2trUlERERERESuy8MwDMPqIrJTXFwcISEhxMbGEhwcbHU5tyU1NRVvb/X0nEVKmo23f4pg8p9HAXigZlE+7FwDfx8viyvLAnFxcCU/Nht07gwdOsBjj9nnC7KIMiFipkyIOFIuRMyUCREzV89ERnoeumTMhS1evNjqEuRvMZeS6f71eib/eRQPD3i5dUU+7VYr5zWDTp6EXr2gcmWIj7dv8/SEOXOgRw9Lm0GgTIj8lzIh4ki5EDFTJkTM3CkTrtv2EuKv/EIultoXFU/vSRs5FnOZQF8vPu1WmxZVCltdVuZKSICRI+H99+3fAyxebJ8ryIkoEyJmyoSII+VCxEyZEDFzp0yoIeTCQkNDrS7B7S3ZHcXg6du4lJxGyXwBTOxZlwqFc1tdVuax2WDqVPscQceP27c1bGifJ6hePWtruwplQsRMmRBxpFyImCkTImbulAnNIeTCYmNjCQkJsboMt2QYBp8v+4uPl+wHoGHZ/Hz+SB3yBvpaXFkmSkyE5s3hzz/tt0uVsq8m1qULeHhYW9s1KBMiZsqEiCPlQsRMmRAxc/VMaA4hN7Fo0SKrS3BLl5PTeHbq1vRmUK+Gpfn+iXo5qxkE4O8PYWEQFATDh9uXke/a1WmbQaBMiPyXMiHiSLkQMVMmRMzcKRO6ZEwkA05euMxTkzex60QcPl4evN2+Gg/XK2l1WZnj4kX7CKAnn4TSpe3bRo6EUaPAjYZNioiIiIiIuAM1hFzYHXfcYXUJbmXz0RienryF6ItJ5Av05YvHwqkXls/qsm6fzQbffw+vvw5RUXDgAEybZr+vSBFra8sgZULETJkQcaRciJgpEyJm7pQJNYRcWGJiotUluI0Zm47xv7m7SE6zUSk0NxN71qV43gCry7p9K1bA4MGwdav9dtmy9svCXJQyIWKmTIg4Ui5EzJQJETN3yoTmEHJhO3futLqEHC81zcY7CyN4edYOktNstKkWyux+DV2/GXTwIHTqBM2a2ZtBISHw8cewezc8+KDV1d0yZULETJkQcaRciJgpEyJm7pQJjRASuYbYhBSembqFVQeiARjUojwD7y6Pp6fzTqp80yZNgjlzwNMTnn4ahg2DggWtrkpERERERESyiZadd2GJiYn4+/tbXUaO9NeZi/SZtInD0ZfI5ePFqC41aVPdtebTMUlNhTNnoGhR++2LF6FvX3j1VahWzdraMpEyIWKmTIg4Ui5EzJQJETNXz4SWnXcTy5cvt7qEHGnZvjM8+PkaDkdfolieXMzu19C1m0FLl0Lt2tC+vX0CabAvJf/DDzmqGQTKhMh/KRMijpQLETNlQsTMnTKhS8Zc2Pnz560uIUcxDIOvVh1ixC97MQyoVzof4x6rQ4EgP6tLuzV798KLL8LPP9tv58tnX0GsYkVr68pCyoSImTIh4ki5EDFTJkTM3CkTGiHkwgoUKGB1CTlGYkoaL8zYzvBF9mbQw/VK8EPv+q7ZDIqJgeeeg+rV7c0gb28YNAj++itHN4NAmRD5L2VCxJFyIWKmTIiYuVMmNIeQC7t48SJBQUFWl+HyTscl8tTkzWw/dgEvTw/ealeF7neWwsPDBSeP3rcPGjSAK13tdu3go49yfCPoCmVCxEyZEHGkXIiYKRMiZq6eCc0h5CZ++uknq0tweduPXeCBsavZfuwCeQJ8mPxEPXo0KO2azSCA8uWhbFn73EBLl8KCBW7TDAJlQuS/lAkRR8qFiJkyIWLmTplQQ0jc1rytJ3howjpOxyVRvlAQ8wc0omE5FxseuGsXPPKIfdUwsC8jP38+bN0KLVpYW5uIiIiIiIg4LU0q7cJq165tdQkuKc1m8NGv+/hixUEAWlQuxOiutcjt72NxZRlw5gwMGQJffWVfOaxcOXj7bft9V5aWd0PKhIiZMiHiSLkQMVMmRMzcKRNqCIlbiU9M4blp2/hj7xkABjQvywv3VsTT00UuEUtKgjFj4N13IS7Ovq1zZ+jVy9KyRERERERExLXokjEXtnXrVqtLcCmHoy/x4Li1/LH3DH7ennzarRYvtarkOs2gOXOgShV4+WV7M6hOHVixAmbOhDJlrK7OKSgTImbKhIgj5ULETJkQMXOnTGiEkLiFVQfOMmDKFuISUwkN9ufLHuHUKJ7H6rIyZuZMOHQIihSBESOge3f7nEEiIiIiIiIiGaRl512Yqy+Hlx0Mw+C7tUd49+c9pNkMapfMw4THwikU7G91aTd26pT9zyJF7H8ePQrffAMvvQT6e78qZULETJkQcaRciJgpEyJmrp4JLTvvJtatW2d1CU4tKTWNV2fvZNhPEaTZDDrVKc7UPnc6fzPo8mX7HEHly8MLL/yzvVQpGDZMzaDrUCZEzJQJEUfKhYiZMiFi5k6Z0CVjLiw6OtrqEpzW2fgk+v2wmU1Hz+PpAa+3rcyTjcPw8HDi+YIMA6ZNg1degWPH7NuOHIHERPB38iaWk1AmRMyUCRFHyoWImTIhYuZOmVBDyIXlzZvX6hKc0q4TsTw1aRMnYxPJ7e/NZw/XplnFQlaXdX1//gmDB9v/BChRAt5/Hx5+GJy5ieVklAkRM2VCxJFyIWKmTIiYuVMmNIeQC0tMTMRfI0dMft5xihdmbiMxxUaZAoF81bMuZQs6+SVWM2ZA16727wMD4dVX4fnnISDA2rpckDIhYqZMiDhSLkTMlAkRM1fPhOYQchNz5861ugSnYbMZjFqyjwE/biExxUbTCgWZO6CR8zeDANq2hWLFoFcv2L8f/vc/NYNukTIhYqZMiDhSLkTMlAkRM3fKhC4ZE5d3KSmVwdO3sSTiNAB9moTxapvKeHk64aVWNhtMmgTz5sGcOfZl44OCICICXHzEmoiIiIiIiLgONYRcWPXq1a0uwXLHYhLoM2kTe6Pi8fXyZHjH6nQOL251WVe3cqV9nqAtW+y3Z82CLl3s36sZlCmUCREzZULEkXIhYqZMiJi5UybUEHJhrnxdY2ZYd/Ac/ads5nxCCgVz+zGhezh1SjrhBGCHDsHLL8Ps2fbbwcH2y8Lat7e2rhzI3TMh8l/KhIgj5ULETJkQMXOnTGgOIRe2ceNGq0uwzA9/HqX71+s5n5BC9WIhLHimkfM1gxIT7UvIV65sbwZ5ekLfvnDgALz0Evj5WV1hjuPOmRC5GmVCxJFyIWKmTIiYuVMmNEJIXEpKmo1hP+3mhz8jAXigZlE+7FwDfx8viyu7Cl9f+O03SE6Ge++FkSPBjYYfioiIiIiIiPPSsvMuLDY2lpCQEKvLyDYxl5LpP2Uzfx6KwcMDXmpVkX53lcXDw4kmj/7jD6hf3758PMD69RAdbV9JzJnqzKHcLRMiN6JMiDhSLkTMlAkRM1fPhJaddxNbrkxO7Ab2RsXxwNjV/HkohkBfL77qXpf+zco5TzNo3z5o1w7uuQc++uif7fXrw333qRmUTdwpEyI3Q5kQcaRciJgpEyJm7pQJXTLmwqKioqwuIVv8ujuKwdO3kZCcRqn8AXzVoy4VCue2uiy7mBh4+234/HNITQUvL/vcQWIJd8mEyM1SJkQcKRciZsqEiJk7ZUINIReWO7eTNEWyiGEYjP3jL0Yu3Q9Ao3L5+fyROuQJ8LW4MiAlBb74AoYOtTeFwD4S6OOPoVIlS0tzZzk9EyIZpUyIOFIuRMyUCREzd8qE5hByYampqXh758ye3uXkNF6ctZ2fd5wCoFfD0rxxX2V8vJzkKsfBg+GTT+zfV60Ko0ZBy5aWliQ5OxMit0KZEHGkXIiYKRMiZq6eCc0h5CZmzpxpdQlZ4uSFy3T+Yi0/7ziFj5cHIzpWZ+gDVa1vBv27dzpwIBQvDuPHw7ZtagY5iZyaCZFbpUyIOFIuRMyUCREzd8qE67a9JEfafDSGpydvJvpiMvkDfRn/WDj1wvJZW9TZszBkCCQkwPff27eFhcHhw+DCnWMRERERERFxX/pt1oVVqVLF6hIy1YyNx3hj3k5S0gwqFwnmqx7hFM8bYF1BSUnw2WfwzjsQF2ff9sYbUKGC/Xs1g5xOTsuEyO1SJkQcKRciZsqEiJk7ZUK/0bqwkJAQq0vIFKlpNoYv2ss3aw4D0KZaKCO71CTA16J/noYB8+bBSy/BwYP2bbVrw+jR/zSDxCnllEyIZBZlQsSRciFipkyImLlTJjSHkAtbt26d1SXcttiEFB7/bmN6M2hwiwp8/kgd65pBkZFw993QsaO9GRQaCt98Axs3wl13WVOT3LSckAmRzKRMiDhSLkTMlAkRM3fKhEYIiWX+OhNP7+83ceRcArl8vBjdtSatqxWxtqi8eWHPHvD3hxdegFdfhaAga2sSERERERERyWRadt6FnTt3jvz581tdxi1ZtvcMA6duJT4plWJ5cvFVj7pUKWrB38flyzBlCjzxBHj+PWDujz+gbFkoVSr765Hb4sqZEMkKyoSII+VCxEyZEDFz9Uxo2Xk3ERERYXUJGWYYBhNWHOSJ7zcSn5RKvbB8LHimUfY3gwwDpk2DSpWgTx/791fcfbeaQS7KFTMhkpWUCRFHyoWImTIhYuZOmdAlYy7s+PHjVpeQIYkpabw2Zydzt54A4OF6JRn2QFV8vbO5L7lhAwweDGvX2m8XLw6Bgdlbg2QJV8uESFZTJkQcKRciZsqEiJk7ZUINIRcWEGDhkuwZdDoukacmb2b7sQt4eXowtF0VHruzFB4eHtlXxLFj8Npr9kvEAAIC7HMEvfCC/Xtxea6UCZHsoEyIOFIuRMyUCREzd8qE5hCSLLft2AWemrSJM/FJ5AnwYdwjdWhYrkD2F9KkCaxebf++Z0947z0oViz76xARERERERHJAppDyE1MnTrV6hJuaO7W43SZsI4z8UlUKBzEggGNs68ZZLNBcvI/t999194U2rgRvvtOzaAcyBUyIZKdlAkRR8qFiJkyIWLmTplQQ0iyRJrNYMQvexg8fTvJqTZaVC7MnP6NKJk/m4bfrVkD9evD++//s+2uu2DFCqhbN3tqEBEREREREXFSmkPIhVWoUMHqEq4qLjGF56ZuZdm+swA807wcz99bAU/PbJgv6MgReOUVmDHDfvv4cfttPz/77eycs0iynbNmQsQqyoSII+VCxEyZEDFzp0xohJALK1SokNUlODgcfYkHP1/Dsn1n8fP2ZMzDtXmxVcWsbwbFxdknjK5Uyd4M8vSEp56Cbdv+aQZJjueMmRCxkjIh4ki5EDFTJkTM3CkTagi5sNVXJkh2EqsOnKX92NUcPHuJ0GB/ZvVtyAM1i2b9gX/7DcqXt18elpQE99wDW7fChAlQuHDWH1+chrNlQsRqyoSII+VCxEyZEDFzp0zokjG5bYZh8N3aI7z78x7SbAa1S+ZhQvdwCuX2z54CypSBCxfsTaGRI+H++3VpmIiIiIiIiMh1aNl5F3b69GkKWzwCJik1jSHzdjN90zEAOocX570Hq+Hn7ZV1Bz1wAH75BQYO/Gfb6tVQrx74+mbdccXpOUMmRJyJMiHiSLkQMVMmRMxcPRNadt5NHDx40NLjn41P4pGv1jN90zE8PeB/91Xmo841sq4ZdP48PP88VK0Kzz0HGzb8c1/jxmoGieWZEHE2yoSII+VCxEyZEDFzp0yoIeTCjh49atmxd52I5YGxq9l89Dy5/b359vF69G5SBo+suFQrJQXGjrVfEjZ6tP12mzaQJ0/mH0tcmpWZEHFGyoSII+VCxEyZEDFzp0xoDiEX5mvRiJiFO07y4sztJKbYKFMwkIk96lKmYFDWHOyXX+CFF2DPHvvtKlVg1Cho1SprjicuzapMiDgrZULEkXIhYqZMiJi5UyY0h5DcNJvNYPRv+/nsj78AuKtCQcY8XJuQXD5Zc8CEBAgLgzNnIH9+eOcd6NMHvNXHFBEREREREfkvzSHkJmbOnJltx7qYlMrTP2xObwY91bQM3/S6I/ObQTExcKVHGRAAH3xgHyH011/Qr5+aQXJd2ZkJEVegTIg4Ui5EzJQJETN3yoR+u3Zhqamp2XKcYzEJ9P5+E/tOx+Pr5cmIjtXpFF48cw+SnGyfJ+idd2D8eOjWzb69V6/MPY7kaNmVCRFXoUyIOFIuRMyUCREzd8qERgi5sDJlymT5MdYdPMcDY1ez73Q8BXP7Me3pOzO3GWQYMH++feWwF16ACxfgxx8z7/nFrWRHJkRciTIh4ki5EDFTJkTM3CkTGiHkwkqWLJmlzz/5z6MMW7CbVJtBjeIhfNm9LqEh/pl3gO3bYfBgWLbMfrtwYXjvPY0KkluW1ZkQcTXKhIgj5ULETJkQMXOnTGiEkAtbvnx5ljxvSpqNN+bu5M15u0i1GbSvVZQZTzfI3GbQBx9A7dr2ZpCfH7z2Ghw4AE8+CV5emXcccStZlQkRV6VMiDhSLkTMlAkRM3fKhEYIiUnMpWT6/bCZ9Ydj8PCAl1tVou9dZfDw8MjcA9Wvb79crGtXeP99KF06c59fRERERERERK5JDSEX1rRp00x9vj2n4ugzaRPHz18myM+bT7vV4p7KhW//iQ0DZsyA8+ehb1/7tmbNYPduqFLl9p9f5G+ZnQkRV6dMiDhSLkTMlAkRM3fKhBpCLuzEiRMUK1YsU57r191RDJ6+jYTkNErlD2Bij7qUL5z79p9440b7PEFr1tiXkX/gASha1H6fmkGSyTIzEyI5gTIh4ki5EDG7nUykpaWRkpKSyRWJWOvEiRPkz5/f6jKuy9fXF0/P258BSA0hF3bw4EHq1at3W89hGAZj//iLkUv3A9CoXH4+f6QOeQJ8b6+448fh9ddh8mT77YAAePllCAm5vecVuY7MyIRITqJMiDhSLkTMbiUThmEQFRXFhQsXsqYoEQt5enpy+PBhq8u4Lk9PT8LCwvD1vb3f29UQcmG32xFMSE7lpZk7+HnnKQB6NSzNG/dVxsfrNp43IQE++sg+afTly/Zt3bvD8OFQPBOXqxe5iszokovkJMqEiCPlQsTsVjJxpRlUqFAhAgICMn++URELXbhwgTx58lhdxjXZbDZOnjzJqVOnKFmy5G3lz8MwDCMTa3N6cXFxhISEEBsbS3BwsNXlWObEhcs8NWkTu0/G4ePlwdvtq/FwvUxYXu/gQfulYMnJ0KgRjB4Nd9xx+88rIiIiIiKWS0tLY//+/RQqVMjpL6sRyaliY2M5efIk5cqVw8fHx3RfRnoe+i8SFzZnzpxbetymIzG0H7ua3SfjyB/oy5Ted95eM+ivv/75vmxZ+2igGTNg1So1gyRb3WomRHIqZULEkXIhYpbRTFyZMyggICAryhGx3Pnz560u4YauXCqWlpZ2W8+jhpALS0pKyvBjZmw8xsNf/Un0xWSqFAlm/jONqBeW79YKOHLEvmx8hQqwadM/2194AR56CDR0VLLZrWRCJCdTJkQcKRciZreaCV0mJjmVzWazuoQbyqz8aQ4hF1ay5M2P6klNs/Heoj18u+YIAG2rh/LxQzUJ8L2FfwLx8TBiBIwaBUlJ9sbPypVQt27Gn0skE2UkEyLuQJkQcaRciJgpEyJmfn5+VpeQbTRCyIVVqFDhpva7kJBMr283pjeDnr+3Ap8/UifjzaC0NJg4EcqXtzeEkpKgeXPYsgWefz6D1YtkvpvNhIi7UCZEHCkXImbKhFzN119/TcuWLa0uwxJWN4SSkpIoWbIkmzdvzvJjqSHkwn777bcb7vPXmXg6fL6G1X9FE+DrxRePhTPwnvK3NsSsbVvo0wdOn4Zy5WDePPj9d6hVK+PPJZIFbiYTIu5EmRBxpFyImLlTJnr16oWHhwceHh54e3tTsmRJ+vXrd9U5Y9auXUvbtm3Jmzcv/v7+VK9enZEjR151zpZly5bRtm1b8ufPT0BAAFWqVOGFF17gxIkT2fGyMl1SUhJDhgzhzTfftLqULGMYBkOHDqVo0aLkypWLZs2asXv3bsA+KfO1XLhwgQEDBlCkSBH8/f2pXLkyixYtSr9//Pjx1KhRg+DgYIKDg2nQoAG//PKL6Tmu/Bv879dHH30E2BtSL774Iq+88koWvHIzNYRysD/2nqbD52s5ci6BYnlyMbtfQ1pXC731J+zUCUJCYORI2L0b2rfXPEEiIiIiIuIyWrduzalTpzhy5AgTJ07kp59+on///qZ95s6dy1133UXx4sVZtmwZe/fu5bnnnuO9996jW7du/Huh7gkTJtCiRQtCQ0OZPXs2ERERfPHFF8TGxjJy5Mhse13JycmZ9lyzZ88mKCiIJk2a3NbzXJmA3Bl9+OGHjBo1irFjx7Jx40ZCQ0O59957iY+Pv+ZjkpOTuffeezly5AizZs1i3759fPXVVxQrVix9n+LFi/P++++zadMmNm3axN1330379u3Tm00Ap06dMn198803eHh40KlTp/R9Hn30UVatWsWePXuy5g24wnAzsbGxBmDExsZaXcptO3r06FW322w2Y/zyv4zSry40Sr2y0Hjoi7VGdHxixp78/HnDeOEFw5g1659tqamGcfbsrRcsksWulQkRd6VMiDhSLkTMMpqJy5cvGxEREcbly5fTt9lsNuNSUoolXzab7aZr79mzp9G+fXvTtueff97Ily9f+u2LFy8a+fPnNzp27Ojw+AULFhiAMW3aNMMwDOPYsWOGr6+vMWjQoKse7/z589es5fz580afPn2MQoUKGX5+fkbVqlWNn376yTAMw3jrrbeMmjVrmvYfPXq0UapUKYfXMnz4cKNIkSJGqVKljFdffdWoX7++w7GqV69uDBkyJP32N998Y1SqVMnw8/MzKlasaHz++eem/du1a2e8+OKLpm0bNmwwWrRoYeTPn98IDg42mjZtamzevNm0D2CMHz/eeOCBB4yAgID0Yy5YsMCoU6eO4efnZ4SFhRlDhw41UlJS0h83cuRIo1q1akZAQIBRvHhxo1+/fkZ8fPw137vbZbPZjNDQUOP9999P35aYmGiEhIQYX3zxhZGYePXfncePH2+UKVPGSE5OztDx8ubNa0ycOPGa97dv3964++67HbY3a9bMePPNN6/6mKvl8IqM9Dw0qbQLi46OdpgELjEljVdn72DetpMAPFK/JEPbVcXX+yYHg6WmwldfwZAhEB0NpUrB/feDnx94eUGBApn9MkQyzdUyIeLOlAkRR8qFiFlmZOJyShpVhvyaSRVlTMTbrW5toRzg0KFDLF68GB8fn/RtS5Ys4dy5c7z44osO+7dr144KFSowdepUunbtysyZM0lOTubll1++6vPnyZPnqtttNhtt2rQhPj6eH374gbJlyxIREYGXl1eG6v/9998JDg5m6dKl6aOW3n//fQ4ePEjZsmUB2L17Nzt37mTWrFkAfPXVV7z11luMHTuW2rVrs3XrVvr06UNgYCA9e/YEYNWqVTz66KOmY8XHx9OzZ0/GjBkDwMiRI2nbti0HDhwgd+7c6fu99dZbjBgxgtGjR+Pl5cWvv/7KY489xpgxY2jSpAkHDx7kqaeeSt8XwNPTkzFjxlC6dGkOHz5M//79efnllxk3btw1X3ubNm1YtWrVdd+fixcvXnX74cOHiYqKMs2R5Ofnx1133cXatWt57LHHrjqP0IIFC2jQoAEDBgxg/vz5FCxYkEceeYRXXnnlqn93aWlpzJw5k0uXLtGgQYOr1nL69Gl+/vlnvv/+e4f76tWrd8PXeLvUEHJh+/bto06dOum3o2ITeXryJrYfj8XL04OhD1Sl+52lbv4Jf/3VvmT8leFslSvbLw9zo1nWxbX9NxMi7k6ZEHGkXIiYuVsmFi5cSFBQEGlpaSQmJgIwatSo9Pv3798PQOXKla/6+EqVKqXvc+DAAYKDgylSpEiGavjtt9/YsGEDe/bsSZ/Uu0yZMhl+LYGBgUycOBFfX9/0bTVq1ODHH39Mn/9nypQp3HHHHenHeeeddxg5ciQdO3YEICwsjIiICCZMmEDPnj25cOECFy5coGjRoqZj3X333abbEyZMIG/evKxYsYL7778/ffsjjzzCE088kX67e/fuvPrqq+nNpjJlyvDOO+/w8ssvpzeEBg0alL5/WFgY77zzDv369btuQ2jixIlcvnz5pt+rf4uKigKgcOHCpu2FCxfm6NGjJCYmEhgY6PC4Q4cO8ccff/Doo4+yaNEiDhw4wIABA0hNTWXIkCHp++3cuZMGDRqQmJhIUFAQc+fOpUqVKlet5fvvvyd37tzpfx//VqxYMY4cOXJLr/FmqSGUQ2w7doGnJm3iTHwSeQJ8GPdoHRqWvcnRPPv3w+DBcGUyrPz5YdgweOop+Fe3XERERERE5L9y+XgR8XYry46dEc2bN2f8+PEkJCQwceJE9u/fz7PPPuuwn/GveYL+u/3KAj3//j4jtm3bRvHixW97hbfq1aubmkFgn3vmm2++4c0338QwDKZOnZrecDl79izHjh3jySefpE+fPumPSU1NJSQkBCC9yeLv72963jNnzjBkyBD++OMPTp8+TVpaGgkJCURGRpr2q1u3run25s2b2bhxI++99176tivNuISEBAICAli2bBnDhw8nIiKCuLg4UlNTSUxM5NKlS1dtzACmeXtu1X//7m7092mz2ShUqBBffvklXl5ehIeHc/LkST766CNTQ6hixYps27aNCxcuMHv2bHr27MmKFSuu2hT65ptvePTRRx3eb4BcuXKRkJBwG6/wxtQQcmFdu3YFYNHOUwyavo3kVBsVC+fmqx51KZk/4Oaf6NgxezPI2xuefRbefBPy5s2iqkWyzpVMiIidMiHiSLkQMcuMTHh4eNzyZVvZLTAwkHLlygEwZswYmjdvzrBhw3jnnXcA0ps0e/bsoWHDhg6P37t3b/ov9hUqVCA2NpZTp05laJRQrly5rnu/p6enQ0PqahM0X61Z8sgjj/Dqq6+yZcsWLl++zLFjx+jWrRtgb2iA/bKx+vXrmx535ZKn/Pnz4+Hh4bDyWq9evTh79iyffPIJpUqVws/PjwYNGjhMZv3fmmw2G8OGDbvqCBh/f3+OHj1K27Zt6du3L++88w758uVj9erVPPnkk9edlPp2LhkLDbUvtBQVFWX6eztz5gyFCxcmX758V31ckSJF8PHxMV0eVrlyZaKiokhOTk5vzvn6+qb/G6tbty4bN27k008/ZcKECabnW7VqFfv27WP69OlXPV5MTAwFCxa87mu8XVplzIUtXLiQ5FQbr87eQXKqjRaVCzO7f8MbN4OSk2HDhn9u33MPDB9uv1Rs1Cg1g8RlLVy40OoSRJyKMiHiSLkQMXP3TLz11lt8/PHHnDxpn4O1ZcuW5MuX76orhC1YsIADBw7w8MMPA9C5c2d8fX358MMPr/rcFy5cuOr2GjVqcPz48fRLz/6rYMGCREVFmZpC27Ztu6nXU7x4cZo2bcqUKVOYMmUKLVq0SL80qnDhwhQrVoxDhw5Rrlw501dYWBhgb2ZUqVKFiIgI0/OuWrWKgQMH0rZtW6pWrYqfnx/R0dE3rKdOnTrs27fP4XjlypXD09OTTZs2kZqaysiRI7nzzjupUKFC+t/F9UycOJFt27Zd9+tawsLCCA0NZenSpenbkpOTWbFiBQ0bNiQ2Nvaqj2vUqBF//fVXemMN7JcYFilSxGGk1r8ZhkFSUpLD9q+//prw8HBq1qx51cft2rWL2rVrX/N5M4NrtHHlqi5dusSag9HEJaZSMLcfE7qH4+V5nSGLhgE//QQvvggnT8KBA3ClI/raa9lTtEgWunTpktUliDgVZULEkXIhYubumWjWrBlVq1Zl+PDhjB07lsDAQCZMmEC3bt146qmneOaZZwgODub333/npZdeonPnznTp0gWAEiVKMHr0aJ555hni4uLo0aMHpUuX5vjx40yaNImgoKCrNpbuuusumjZtSqdOnRg1ahTlypVj7969eHh40Lp1a5o1a8bZs2f58MMP6dy5M4sXL+aXX34hODj4pl7To48+ytChQ0lOTmb06NGm+4YOHcrAgQMJDg6mTZs2JCUlsWnTJs6fP8/zzz8PQKtWrVi9erVpbp9y5coxefJk6tatS1xcHC+99NINRzoBDBkyhPvvv58SJUrw0EMP4enpyY4dO9i5cyfvvvsuZcuWJTU1lc8++4x27dqxZs0avvjiixs+7+1cMubh4cGgQYMYPnw45cuXp3z58gwfPpyAgAAeeeSR9FFPPXr0oFixYowYMQKAfv368dlnn/Hcc8/x7LPPcuDAAYYPH87AgQPTn/v111+nTZs2lChRgvj4eKZNm8by5ctZvHixqYa4uDhmzpx51X8fV6xatSp95FqWueE6ZDlMTlp2fsWKFcZLM7cZpV5ZaLw5b+f1d96+3TDuvtsw7G0hwyhUyDCWL8+eQkWyyYoVK6wuQcSpKBMijpQLEbOMZuJ6y107u6stO28YhjFlyhTD19fXiIyMTN+2cuVKo3Xr1kZISIjh6+trVKlSxfj444+N1NRUh8cvXbrUaNWqlZE3b17D39/fqFSpkvHiiy8aJ0+evGYt586dMx5//HEjf/78hr+/v1GtWjVj4cKF6fePHz/eKFGihBEYGGj06NHDeO+996667PzVnD9/3vDz8zMCAgKuunz7lClTjFq1ahm+vr5G3rx5jaZNmxpz5sxJv3/Pnj1Grly5jAsXLqRv27Jli1G3bl3Dz8/PKF++vDFz5kyjVKlSxujRo9P3AYy5c+c6HG/x4sVGw4YNjVy5chnBwcFGvXr1jC+//DL9/lGjRhlFihQxcuXKZbRq1cqYNGmSARjnz5+/5vt3u2w2m/HWW28ZoaGhhp+fn9G0aVNj507779RxcXGGYRjGXXfdZfTs2dP0uLVr1xr169c3/Pz8jDJlyhjvvfee6d/EE088YZQqVcrw9fU1ChYsaNxzzz3GkiVLHI4/YcIEh/f4v8fJkyePkZCQcNX7M2vZeQ/DuMZsWTlUXFwcISEhxMbG3nSH1VmdPhtNq/FbuJCQwtQ+d9KgbP6r7HTaPifQ11+DzWZfMWzwYPuIIBd//SL/FRMTc81rfkXckTIh4ki5EDHLaCYSExM5fPgwYWFhV50IV3KGLl26ULt2bV5zwytJUlNT8fa29mKqhx56iNq1a/P6669f9f7r5TAjPQ/NIeTCvpj7BxcSUigQ5Eu9sKt8iF+6BFWrwldf2ZtBDz0Ee/bAiBFqBkmO9Ouvv1pdgohTUSZEHCkXImbKhFzNRx99RFBQkNVlWOJacwhll6SkJGrWrMngwYOz/FiaQ8iF7bpgXxK+VdXQq88dFBgIPXvCypUwejQ0bpzNFYqIiIiIiIirKVWqFM8++6zVZbglPz8//ve//2XLsTRCyEWlptnYn2CfxKtt9b8nht68GZo1gy1b/tnxvfdg/Xo1g8Qt/Hf5TBF3p0yIOFIuRMyUCREzdxoZZXlDaNy4cenXvYWHh7Nq1arr7r9ixQrCw8Px9/enTJkyNzUDeU60/nAMsYlp5Av0pb5fIvTqBXfcAStWwL+vM/T3B0/L/5pFssXFixetLkHEqSgTIo6UCxEzZULELC0tzeoSso2lnYLp06czaNAg3njjDbZu3UqTJk1o06YNkZGRV93/8OHDtG3bliZNmrB161Zef/11Bg4cyOzZs7O5cust2nkK/5REPoiYh3elivD99/b1wx57zD5nkIgb2r17t9UliDgVZULEkXIhYqZMiJhdvnzZ6hKyjaVzCI0aNYonn3yS3r17A/DJJ5/w66+/Mn78eEaMGOGw/xdffEHJkiX55JNPAKhcuTKbNm3i448/plOnTtlZuqXSbAZJM2fxx8JxFI2Ptm9s0AA++QTq1bO0NhERERERERFxfpaNEEpOTmbz5s20bNnStL1ly5asXbv2qo9Zt26dw/6tWrVi06ZNpKSkXPUxSUlJxMXFmb5c3aR1R/A/F03R+GiMkiVh2jRYs0bNIHF7nTt3troEEaeiTIg4Ui5EzJQJEbO8efNaXUK2sWyEUHR0NGlpaRQuXNi0vXDhwkRFRV31MVFRUVfdPzU1lejoaIoUKeLwmBEjRjBs2DCH7TNnziQgIICOHTvy+++/ExsbS6FChahXrx4LFy4EoE6dOthsNrZt2wZA+/btWb16NefOnSNfvnw0bdqUefPmAVCjRg18fHzYvHkzAPfddx+bNm3i9OnTBAcH07JlS2bNmgVA1apVCQoKYv369YC9qbVr1y5OnDhBYGAg999/P9OnTwegYsWKFChQgDVr1gDQokULcl88xh/1WzDJP54e0yYwfcECbNOmUbZsWYoVK8bKlSsBaNasGZGRkRw6dAhvb28eeughZs+eTXJyMqVKlaJs2bL88ccfADRu3JgzZ86wf/9+AB5++GHmz59PQkICxYsXp0qVKixZsgSABg0aEBsbS0REBAAPPfQQixcvJj4+ntDQUOrUqcOiRYsAuOOOO0hMTGTnzp0APPjggyxfvpzz589ToEABGjRowE8//QRA7dq1Adi6dSsA7dq1Y926dURHR5M3b16aNWvG3LlzAahevTr+/v5s3LgRgLZt27JlyxaioqLInTs3rVu3ZubMmQBUqVKFkJAQ1q1bB9ibjhERERw/fpyAgADat2/P1KlTAahQoQKFChVi9erVANx9990cPHiQo0eP4uvrS6dOnZg5cyapqamUKVOGkiVLsnz5cgCaNm3KiRMnOHjwIJ6ennTt2pU5c+aQlJREyZIlqVChAr/99hsAjRo1Ijo6mn379gHQtWtXFi5cyKVLlyhWrBjVqlVLXwK0fv36XLx4MX04b+fOnVmyZAlxcXEULlyYunXr8vPPPwMQHh5OSkoKO3bsAKBDhw6sXLmSmJgY8ufPT+PGjZk/fz4AtWrVwtPTky1/T0J+//33s2HDBs6cOUNISAj33HMPc+bMAaBatWoEBASwYcMGANq0acP27ds5efIkQUFBtG3blhkzZgBQqVIl8uXLl97Yvffee9m7dy/Hjh0jV65cdOjQgWnTpmEYBuXLlyc0NDR97rDmzZtz5MgRDh8+jI+PD507d2bWrFmkpKQQFhZG6dKlWbZsGQBNmjQhKiqKAwcO4OHhQbdu3ZgwYQL58+enRIkSVKpUiaVLlwLQsGFDYmJi2Lt3LwBdunRh0aJFXLx4kaJFi1KzZk1++eUXAOrVq0dCQgK7du0CcMnPiP379xMZGYmfnx8dO3Zk+vTp2Gw2fUa44WdEfHw83bp102fE358R8+bN4/Lly/qMcPPPiNOnT9OiRQt9RujnCH1GYP+M2L9/P4ZhADf3GbFu3TpKly5NcnIyqampJCUl4eHhQb58+Th//jw2mw0/Pz/8/PzS/zM+d+7cpKSkkJiYCED+/Pm5cOECaWlp+Pr6kitXrvSlvoOCgkhLS0u/bCdfvnzExsaSlpaGj48PAQEB6fsGBgZiGAYJCQmA/Rf5+Ph4UlNT8fHxITAwkAsXLgAQEBAAkL5vnjx5uHTpEikpKXh7e5M7d27Onz+fvq+HhweXLl0CICQkhISEBFJSUvDy8iIkJISYmBgAcuXKhZeXV/pcTCEhIVy+fJnk5GS8vLzIkycP586dA8Df3x8fHx/i4+MBCA4OJikpiaSkJDw9PcmbNy8xMTEYhoGfnx++vr7p+/77PbzR+x0UFERqamr6+/3v9/BG73fevHmJi4tLf7///R5e7/329vYmKCjI9H7/+z283vudK1cuPD09Te/3v9/D673fwcHBJCYmkpyc7PAeXu/9vvIe/vv9vvIepqamkjdv3mu+376+vvj7+5ve72v9m/3v+x0YGIjNZjO939f6N/vf9ztPnjxcvHiR1NRUbDYbhmGwcOFCUlJSTJ8RV/a/GR7GlfRns5MnT1KsWDHWrl1LgwYN0re/9957TJ48Of0D998qVKjA448/zmuvvZa+bc2aNTRu3JhTp04RGhrq8Jgrf+FXxMXFUaJECWJjYwkODs7kV5V9ElPSmDNzBo888rDVpYg4jalTp/Lww8qEyBXKhIgj5ULELKOZSExM5PDhw+kLA4nkNOfOnSN//vxWl3Fd18thXFwcISEhN9XzsOySsQIFCuDl5eUwGujMmTMOo4CuCA0Nver+3t7e1/wL8/PzIzg42PSVE/j7eBEaevX3ScRdXeuzQ8RdKRMijpQLETNlIvuULl06fT5cd9SsWTMGDRqUfttZ3w8fHx+rS8g2ljWEfH19CQ8PTx+KecXSpUtp2LDhVR/ToEEDh/2XLFlC3bp13eov7Yq6detaXYKIU1EmRMyUCRFHyoWImTtlolevXnh4eODh4YG3tzclS5akX79+6Zcu5VRDhw5Nf90eHh6EhITQpEkTVqxYYWldGzdu5KmnnrK0hqsJDAy0uoRsY+my888//zwTJ07km2++Yc+ePQwePJjIyEj69u0LwGuvvUaPHj3S9+/bty9Hjx7l+eefZ8+ePXzzzTd8/fXXvPjii1a9BEtdud5bROyUCREzZULEkXIhYuZumWjdujWnTp3iyJEjTJw4kZ9++on+/ftbXVaWq1q1KqdOneLUqVOsW7eO8uXLc//996fPbWOFggULps/t5EyuzOHjDixtCHXt2pVPPvmEt99+m1q1arFy5UoWLVpEqVKlADh16hSRkZHp+4eFhbFo0SKWL19OrVq1eOeddxgzZoxbLTkvIiIiIiLilC5duvbX3xMc39S+f0+4e8N9b4Gfnx+hoaEUL16cli1b0rVr1/RJ7wHS0tJ48sknCQsLI1euXFSsWJFPP/3U9By9evWiQ4cOfPzxxxQpUoT8+fMzYMAA08rXZ86coV27duTKlYuwsDCmTJniUEtkZCTt27cnKCiI4OBgunTpwunTp9PvHzp0KLVq1eKbb76hZMmSBAUF0a9fP9LS0vjwww8JDQ2lUKFCvPfeezd83d7e3oSGhhIaGkqVKlUYNmwYFy9eTF8MAGDUqFFUr16dwMBASpQoQf/+BKpB8gAAH4tJREFU/dMnbwY4evQo7dq1I2/evAQGBlK1atX0RQAAIiIiaNu2LUFBQRQuXJju3bsTHR19zZr+e8mYh4cHEydO5MEHHyQgIIDy5cuzYMEC02Myegy5PksbQgD9+/fnyJEjJCUlsXnzZpo2bZp+33fffZe+8sIVd911F1u2bCEpKYnDhw+njyZyR+Hh4VaXIOJUlAkRM2VCxJFyIWKWqZkICrr213//E79QoWvv26aNed/Spa++3206dOgQixcvNk0/YrPZKF68ODNmzCAiIoIhQ4bw+uuvp6+Ed8WyZcs4ePAgy5Yt4/vvv+e7777ju+++S7+/V69eHDlyhD/++INZs2Yxbtw4zpw5k36/YRh06NCBmJgYVqxYwdKlSzl48CBdu3Y1HefgwYP88ssvLF68mKlTp/LNN99w3333cfz4cVasWMEHH3zA//73P/7888+bft1JSUl899135MmTh4oVK6Zv9/T0ZMyYMezatYvvv/+eP/74g5dffjn9/gEDBpCUlMTKlSvZuXMnH3zwAUF//z2cOnWKu+66i1q1arFp0yYWL17M6dOn6dKly03XBTBs2DC6dOnCjh07aNu2LY8++mj6CmOZdYwbcadLxixbdl5u37870CKiTIj8lzIh4ki5EDFzt0wsXLgwfZnwK8uyjxo1Kv1+Hx8fhg0bln47LCyMtWvXMmPGDFPjIW/evIwdOxYvLy8qVarEfffdx++//06fPn3Yv38/v/zyC3/++Sf169cH4Ouvv6Zy5crpj//tt9/YsWMHhw8fpkSJEgBMnjyZqlWrsnHjRu644w7A3qD65ptvyJ07N1WqVKF58+bs27ePRYsW4enpScWKFfnggw9Yvnw5d9555zVf986dO9ObNwkJCeTOnZvp06ebFl3694TPYWFhvPPOO/Tr149x48YB9hFNnTp1onr16gCUKVMmff/x48dTp04dhg8fnr7tm2++oUSJEuzfv58KFSpc9+/lil69eqWvejd8+HA+++wzNmzYQOvWrTPtGDdi0ULsllBDyIXt2LGDqlWrWl2GiNNQJkTMlAkRR8qFiFmmZuJflxc58PIy3/7XaBkHnv+5kOXIkVsu6b+aN2/O+PHjSUhIYOLEiezfv59nn33WtM8XX3zBxIkTOXr0KJcvXyY5OZlatWqZ9qlatSpe/3pNRYoUYefOnQDs2bMHb29v04TdlSpVIk+ePOm39+zZQ4kSJdKbQQBVqlQhT5487NmzJ70hVLp0aXLnzp2+T+HChfHy8sLzX+9R4cKFTaOPrqZixYrpl1/Fx8czffp0HnroIZYtW5Ze57Jlyxg+fDgRERHExcWRmppKYmIily5dIjAwkIEDB9KvXz+WLFlCixYt6NSpEzVq1ABg8+bNLFu2LL3p9G8HDx686WbNlecD+0id3Llzp7+2zDrGjSQkJJArV65MeS5nZ/klYyIiIiIiIpIDBAZe+8vf/+b3/e8v49fa75ZKDKRcuXLUqFGDMWPGkJSUZBoRNGPGDAYPHswTTzzBkiVL2LZtG48//jjJycmm5/nvKtceHh7YbDbgnxEmHh4e16zDMIyr3v/f7Vc7zvWOfS2+vr6UK1eOcuXKUbt2bd5//32KFSuWPofP0aNHadu2LdWqVWP27Nls3ryZzz//HPhnFFnv3r05dOgQ3bt3Z+fOndStW5fPPvsMsI9kateuHdu2bTN9HThwwDQtzI1c77Vl1jHkHxoh5MI6dOhgdQkiTkWZEDFTJkQcKRciZu6eibfeeos2bdrQr18/ihYtyqpVq2jYsKFp5bGDBw9m6DkrV65MamoqmzZtol69egDs27fPtHpVlSpViIyM5NixY+mjhCIiIoiNjTVdWpaVvLy8uPz3BN6bNm0iNTWVkSNHpo8++u+8SQAlSpSgb9++9O3bl9dee42vvvqKZ599ljp16jB79mxKly6Nt3fWtBmy4xhgvxzQXWiEkAtbuXKl1SWIOBVlQsRMmRBxpFyImLl7Jpo1a0bVqlXT56UpV64cmzZt4tdff2X//v28+eabbNy4MUPPWbFiRVq3bk2fPn1Yv349mzdvpnfv3qbLkFq0aEGNGjV49NFH2bJlCxs2bKBHjx7cddddpkvNMktqaipRUVFERUVx4MAB3n33XSIiImjfvj0AZcuWJTU1lc8++4xDhw4xefJkvvjiC9NzDBo0iF9//ZXDhw+zZcsW/vjjj/Tm1YABA4iJieHhhx9mw4YNHDp0iCVLlvDEE0+QlpaWKa8hO44B9kvq3IUaQi7symzrImKnTIiYKRMijpQLETNlAp5//nm++uorjh07Rt++fenYsSNdu3alfv36nDt3zjRa6GZ9++23lChRgrvuuouOHTvy1FNPUahQofT7PTw8mDdvHnnz5qVp06a0aNGCMmXKMH369Mx8ael2795NkSJFKFKkCLVq1WLGjBmMHz+eHj16AFCrVi1GjRrFBx98QLVq1ZgyZQojRowwPUdaWhoDBgygcuXKtG7dmooVK6ZPOF20aFHWrFlDWloarVq1olq1ajz33HOEhISY5ju6HdlxDLA3z9yFh+FOU2gDcXFxhISEEBsba5pR3RUtWbKEli1bWl2GiNNQJkTMlAkRR8qFiFlGM5GYmMjhw4cJCwvD/7/zAonkALGxsYSEhFhdxnVdL4cZ6XlohJALa9y4sdUliDgVZULETJkQcaRciJgpEyJmV1vFLKdSQ8iFzZ8/3+oSRJyKMiFipkyIOFIuRMyUCRGzf0/+ndOpISQiIiIiIiIi4mbUEHJhtWrVsroEEaeiTIiYKRMijpQLETNlQsQsICDA6hKyjRpCLiwzZ1IXyQmUCREzZULEkXIhYnarmXCztYnEjXh4eFhdwg1lVv50RnRhW7ZssboEEaeiTIiYKRMijpQLEbOMZsLHxweAhISErChHxHKXLl2yuoQbSk5OBsDLy+u2nsc7M4oRERERERGRnM/Ly4s8efJw5swZwH55jSuMqBC5WcnJySQmJlpdxjXZbDbOnj1LQEAA3t6319LxMNxsrF9cXBwhISHExsYSHBxsdTm3JT4+nty5c1tdhojTUCZEzJQJEUfKhYjZrWTCMAyioqLcajUmcR82m83pLy/29PQkLCwMX19fh/sy0vPQCCEXtmHDBu655x6ryxBxGsqEiJkyIeJIuRAxu5VMeHh4UKRIEQoVKkRKSkoWVSZijXXr1tGgQQOry7guX1/fTGlaqSHkwq4M0xQRO2VCxEyZEHGkXIiY3U4mvLy8bnsOExFnExUVhb+/v9VlZAvnHgcl1xUSEmJ1CSJORZkQMVMmRBwpFyJmyoSImTtlQnMIubCkpCT8/PysLkPEaSgTImbKhIgj5ULETJkQMXP1TGSk56ERQi5szpw5Vpcg4lSUCREzZULEkXIhYqZMiJi5Uybcbg6hKwOi4uLiLK7k9iUkJOSI1yGSWZQJETNlQsSRciFipkyImLl6Jq7UfjMXg7ndJWPHjx+nRIkSVpchIiIiIiIiIpIljh07RvHixa+7j9s1hGw2GydPniR37tx4eHhYXc4ti4uLo0SJEhw7dszl50ISyQzKhIiZMiHiSLkQMVMmRMxyQiYMwyA+Pp6iRYvecGl6t7tkzNPT84ZdMlcSHBzssv9QRbKCMiFipkyIOFIuRMyUCREzV8/Eza6UpkmlRURERERERETcjBpCIiIiIiIiIiJuRg0hF+Xn58dbb72Fn5+f1aWIOAVlQsRMmRBxpFyImCkTImbulgm3m1RaRERERERERMTdaYSQiIiIiIiIiIibUUNIRERERERERMTNqCEkIiIiIiIiIuJm1BASEREREREREXEzagg5sXHjxhEWFoa/vz/h4eGsWrXquvuvWLGC8PBw/P39KVOmDF988UU2VSqSPTKSiTlz5nDvvfdSsGBBgoODadCgAb/++ms2ViuS9TJ6nrhizZo1eHt7U6tWrawtUCSbZTQTSUlJvPHGG5QqVQo/Pz/Kli3LN998k03VimSPjOZiypQp1KxZk4CAAIoUKcLjjz/OuXPnsqlakay1cuVK2rVrR9GiRfHw8GDevHk3fExO/j1bDSEnNX36dAYNGsQbb7zB1q1badKkCW3atCEyMvKq+x8+fJi2bdvSpEkTtm7dyuuvv87AgQOZPXt2NlcukjUymomVK1dy7733smjRIjZv3kzz5s1p164dW7duzebKRbJGRjNxRWxsLD169OCee+7JpkpFssetZKJLly78/vvvfP311+zbt4+pU6dSqVKlbKxaJGtlNBerV6+mR48ePPnkk+zevZuZM2eyceNGevfunc2Vi2SNS5cuUbNmTcaOHXtT++f037O17LyTql+/PnXq1GH8+PHp2ypXrkyHDh0YMWKEw/6vvPIKCxYsYM+ePenb+vbty/bt21m3bl221CySlTKaiaupWrUqXbt2ZciQIVlVpki2udVMdOvWjfLly+Pl5cW8efPYtm1bNlQrkvUymonFixfTrVs3Dh06RL58+bKzVJFsk9FcfPzxx4wfP56DBw+mb/vss8/48MMPOXbsWLbULJJdPDw8mDt3Lh06dLjmPjn992yNEHJCycnJbN68mZYtW5q2t2zZkrVr1171MevWrXPYv1WrVmzatImUlJQsq1UkO9xKJv7LZrMRHx+vH/olR7jVTHz77bccPHiQt956K6tLFMlWt5KJBQsWULduXT788EOKFStGhQoVePHFF7l8+XJ2lCyS5W4lFw0bNuT48eMsWrQIwzA4ffo0s2bN4r777suOkkWcTk7/Pdvb6gLEUXR0NGlpaRQuXNi0vXDhwkRFRV31MVFRUVfdPzU1lejoaIoUKZJl9YpktVvJxH+NHDmSS5cu0aVLl6woUSRb3UomDhw4wKuvvsqqVavw9tbpX3KWW8nEoUOHWL16Nf7+/sydO5fo6Gj69+9PTEyM5hGSHOFWctGwYUOmTJlC165dSUxMJDU1lQceeIDPPvssO0oWcTo5/fdsjRByYh4eHqbbhmE4bLvR/lfbLuKqMpqJK6ZOncrQoUOZPn06hQoVyqryRLLdzWYiLS2NRx55hGHDhlGhQoXsKk8k22XkPGGz2fDw8GDKlCnUq1ePtm3bMmrUKL777juNEpIcJSO5iIiIYODAgQwZMoTNmzezePFiDh8+TN++fbOjVBGnlJN/z9Z/ETqhAgUK4OXl5dC5P3PmjEN38orQ0NCr7u/t7U3+/PmzrFaR7HArmbhi+vTpPPnkk8ycOZMWLf7f3p0HVVW+cQD/XuCyCqgJXhSDCHRcUSACF0DcLRXSdBq8gbsWIZrriALmQo4omWtqoKjolGgjjgslCOboYIKE3MEkIa1bWgKCyCK8vz8czq8ji6KC1f1+Zs4f57zvOe/zXu6Zy3nmfd8ztCXDJGo1zb0nSktLcenSJWRmZiI4OBjAo4dhIQQMDAxw+vRp+Pr6tkrsRC3hWX4nbGxs0LlzZ1haWkrHunfvDiEEbt26BScnpxaNmailPct9sXbtWgwYMAALFy4EAPTp0wdmZmYYNGgQVq1a9a8fDUHUXP/152yOEPoHMjQ0hKurK5KTk2XHk5OT0b9//wbP8fT0rFf/9OnTcHNzg1KpbLFYiVrDs9wTwKORQUFBQThw4ADnvtN/SnPvCQsLC/z444/IysqSttmzZ6Nbt27IysrCm2++2VqhE7WIZ/mdGDBgAH777TeUlZVJx65duwY9PT3Y2tq2aLxEreFZ7ovy8nLo6ckfEfX19QH8f1QEkS75zz9nC/pHOnjwoFAqlWL37t0iNzdXhIaGCjMzM1FQUCCEEGLJkiVCrVZL9X/++Wdhamoq5s2bJ3Jzc8Xu3buFUqkUX3/99cvqAtEL1dx74sCBA8LAwEBs2bJFaLVaaSsuLn5ZXSB6oZp7TzwuPDxcODs7t1K0RC2vufdEaWmpsLW1FRMmTBBXr14VZ8+eFU5OTmL69OkvqwtEL1xz74vY2FhhYGAgtm7dKvLz88W5c+eEm5ubcHd3f1ldIHqhSktLRWZmpsjMzBQAxIYNG0RmZqYoLCwUQujeczYTQv9gW7ZsEXZ2dsLQ0FC4uLiIs2fPSmWBgYHC29tbVj81NVX069dPGBoaCnt7e7Ft27ZWjpioZTXnnvD29hYA6m2BgYGtHzhRC2nu78TfMSFE/0XNvSc0Go0YOnSoMDExEba2tmL+/PmivLy8laMmalnNvS82bdokevToIUxMTISNjY0ICAgQt27dauWoiVpGSkpKk88IuvacrRCCY/+IiIiIiIiIiHQJ1xAiIiIiIiIiItIxTAgREREREREREekYJoSIiIiIiIiIiHQME0JERERERERERDqGCSEiIiIiIiIiIh3DhBARERERERERkY5hQoiIiIiIiIiISMcwIUREREREREREpGOYECIiIqJ/rLi4OLRt2/a5rxMREYGOHTtCoVDg6NGjz329f6qCggIoFApkZWU1Wc/HxwehoaHSfnl5OcaPHw8LCwsoFAoUFxc/U/tqtRpr1qx5pnOfx4IFCxASEtLq7RIREf2bMSFERESkgxQKRZNbUFDQyw7xhdFoNIiMjMSOHTug1WoxatSolx1Si+nSpQu0Wi169eoFAEhNTW0wwZOYmIhPPvlE2t+zZw/S09Nx/vx5aLVaWFpaNrvt7OxsHD9+HB999JF0zMfHp8Hv18OHD+uVGxkZoWvXrlizZg1qampk8ddtr7zyCnx9ffH999/L2l60aBFiY2Nx48aNZsdNRESkq5gQIiIi0kFarVbaYmJiYGFhITv22WefvewQX5j8/HwAwLhx46BSqWBkZPSSI2o5+vr6UKlUMDAwaLJe+/btYW5uLu3n5+eje/fu6NWrF1QqFRQKRbPb3rx5M959913ZdQFgxowZsu+WVquVxVdXnpeXh5CQEISFhWH9+vWya+Tl5UGr1SI1NRVWVlZ46623cPv2banc2toaw4cPx/bt25sdNxERka5iQoiIiEgHqVQqabO0tIRCoZD2lUolZs+eDVtbW5iamqJ3795ISEiQnW9vb4+YmBjZsb59+yIiIgLAo5EdhoaGSE9Pl8qjo6PRoUMHaLXaRuOKi4vDq6++ClNTU/j7++Ovv/6qV+fYsWNwdXWFsbExHBwcEBkZKY04eVxERATGjBkDANDT05MSHRkZGRg2bBg6dOgAS0tLeHt74/Lly9J5DU29Ki4uhkKhQGpqKgBg5cqV6NSpkyzGsWPHwsvLC7W1tQ3GExQUBD8/P0RGRsLa2hoWFhaYNWsWqqqqpDqVlZUICQmBtbU1jI2NMXDgQGRkZEjlRUVFCAgIgJWVFUxMTODk5ITY2Nh6cRcUFGDw4MEAgHbt2slGfv19ypiPjw+io6ORlpYGhUIBHx8fAMDWrVvh5OQEY2NjdOzYERMmTGiwTwBQW1uLr776CmPHjq1XZmpqKvu+qVSqBsvt7e0RHByMIUOG1JvWZ21tDZVKhd69eyMsLAwlJSW4ePGirM7YsWPrfU+JiIiocUwIERERkUxFRQVcXV2RlJSEnJwczJw5E2q1ut4DeFPqEg5qtRolJSW4cuUKli1bhp07d8LGxqbBcy5evIipU6figw8+QFZWFgYPHoxVq1bJ6pw6dQqTJ09GSEgIcnNzsWPHDsTFxWH16tUNXnPBggVSsqRudAoAlJaWIjAwEOnp6bhw4QKcnJwwevRolJaWPnUfly1bBnt7e0yfPh0AsH37dqSlpSE+Ph56eo3/i/Xdd99Bo9EgJSUFCQkJOHLkCCIjI6XyRYsW4fDhw9izZw8uX74MR0dHjBgxAnfv3gUALF++HLm5uThx4gQ0Gg22bduGDh061GunS5cuOHz4MID/j7BpaORXYmIiZsyYAU9PT2i1WiQmJuLSpUsICQnBypUrkZeXh5MnT8LLy6vRPmVnZ6O4uBhubm5P9+E1wcTEBNXV1Q2WlZeXS39PpVIpK3N3d8fNmzdRWFj43DEQERHpBEFEREQ6LTY2VlhaWjZZZ/To0eLjjz+W9u3s7MTGjRtldZydnUV4eLi0X1lZKfr16ycmTpwoevbsKaZPn95kG++9954YOXKk7NikSZNksQ0aNEisWbNGVic+Pl7Y2Ng0et0jR46IJ/3L8/DhQ2Fubi6OHTsmhBDixo0bAoDIzMyU6hQVFQkAIiUlRTqWn58vzM3NxeLFi4WpqanYt29fk+0EBgaK9u3bi/v370vHtm3bJtq0aSNqampEWVmZUCqVYv/+/VJ5VVWV6NSpk1i3bp0QQogxY8aIKVOmNHj9x+NOSUkRAERRUZGsnre3t5g7d660P3fuXOHt7S3tHz58WFhYWIh79+412Z86R44cEfr6+qK2trZeO0qlUpiZmUnb/PnzG4yjpqZGnDhxQhgaGopFixbJ4q87V6FQCADC1dVVVFVVydoqKSkRAERqaupTxUxERKTrmp5gTkRERDqnpqYGUVFROHToEH799VdUVlaisrISZmZmzbqOoaEh9u3bhz59+sDOzq7eFLPHaTQa+Pv7y455enri5MmT0v4PP/yAjIwM2YigmpoaVFRUoLy8HKampk8V2+3bt7FixQqcOXMGf/zxB2pqalBeXo5ffvnl6TsIwMHBAevXr8esWbMwadIkBAQEPPEcZ2dnWZyenp4oKyvDzZs3UVJSgurqagwYMEAqVyqVcHd3h0ajAQDMmTMH48ePx+XLlzF8+HD4+fmhf//+zYr7SYYNGwY7Ozs4ODhg5MiRGDlyJPz9/Rv9fB88eAAjI6MG1x4KCAjAsmXLpP3H3xq3detW7Nq1S5o2p1arER4eLquTnp4OMzMzZGZmYvHixYiLi6s3QsjExATAo1FERERE9GRMCBEREZFMdHQ0Nm7ciJiYGPTu3RtmZmYIDQ2VrXOjp6cHIYTsvIam+Zw/fx4AcPfuXdy9e7fJpNLj12tIbW0tIiMj8c4779QrMzY2fuL5dYKCgnDnzh3ExMTAzs4ORkZG8PT0lPpYN+Xr7zE1No0pLS0N+vr6KCgowMOHD5+4oHNjFAqF1N7jiRUhhHRs1KhRKCwsxPHjx/Htt99iyJAh+PDDD+stxPw8zM3NcfnyZaSmpuL06dNYsWIFIiIikJGRUS+hAwAdOnRAeXk5qqqqYGhoKCuztLSEo6Njo23VJYyMjIzQqVMn6Ovr16vz2muvoW3btujatSsqKirg7++PnJwc2QLhdVPqrKysnrHXREREuoVrCBEREZFMeno6xo0bh8mTJ8PZ2RkODg746aefZHWsrKxki0Pfu3ev3iu/8/PzMW/ePOzcuRMeHh54//33G11sGQB69OiBCxcuyI49vu/i4oK8vDw4OjrW25pat6ehPoaEhGD06NHo2bMnjIyM8Oeff8r6B0DWx78vMF3n0KFDSExMRGpqKm7evCl7lXtjrly5ggcPHsj62KZNG9ja2sLR0RGGhoY4d+6cVF5dXY1Lly6he/fusviCgoKwb98+xMTE4IsvvmiwrbrkTN1r3JvDwMAAQ4cOxbp165CdnY2CggKcOXOmwbp9+/YFAOTm5ja7nbqEUZcuXRpMBj1OrVajtrYWW7dulR3PycmBUqlEz549mx0DERGRLmJCiIiIiGQcHR2RnJyM8+fPQ6PRYNasWfj9999ldXx9fREfH4/09HTk5OQgMDBQ9jBfU1MDtVqN4cOHY8qUKYiNjUVOTg6io6MbbTckJAQnT57EunXrcO3aNWzevFk2XQwAVqxYgb179yIiIgJXr16FRqPBoUOHEBYW1uw+xsfHQ6PR4OLFiwgICJCmHAGPph95eHggKioKubm5SEtLq9fGrVu3MGfOHHz66acYOHAg4uLisHbt2npJrMdVVVVh2rRp0sLQ4eHhCA4Ohp6eHszMzDBnzhwsXLgQJ0+eRG5uLmbMmIHy8nJMmzZN+gy++eYbXL9+HVevXkVSUpIsWfR3dnZ2UCgUSEpKwp07d1BWVvZUn09SUhI2bdqErKwsFBYWYu/evaitrUW3bt0arG9lZQUXFxdZIqul6OnpITQ0FFFRUbLpYenp6Rg0aJDs70hERESNY0KIiIiIZJYvXw4XFxeMGDECPj4+UKlU8PPzk9VZunQpvLy88Pbbb2P06NHw8/PD66+/LpWvXr0aBQUF0sgVlUqFXbt2ISwsrMGRNgDg4eGBXbt24fPPP0ffvn1x+vTpekmYESNGICkpCcnJyXjjjTfg4eGBDRs2wM7Orll9/PLLL1FUVIR+/fpBrVZLr3l/vE51dTXc3Nwwd+5c2RvPhBAICgqCu7s7goODATxadyc4OBiTJ09uMvEyZMgQODk5wcvLCxMnTsSYMWMQEREhlUdFRWH8+PFQq9VwcXHB9evXcerUKbRr1w7Ao1E/S5cuRZ8+feDl5QV9fX0cPHiwwbY6d+6MyMhILFmyBB07dpRifZK2bdsiMTERvr6+6N69O7Zv346EhIQmR9/MnDkT+/fvf6rrP6+pU6eiuroamzdvlo4lJCRgxowZrdI+ERHRf4FCPM2EfSIiIiJ6bkFBQSguLsbRo0dfdigvXEVFBbp164aDBw/C09OzVds+fvw4Fi5ciOzs7Gdew4mIiEjXcIQQERERET03Y2Nj7N27V7YWU2u5f/8+YmNjmQwiIiJqBv5qEhEREdEL4e3t/VLanThx4ktpl4iI6N+MU8aIiIiIiIiIiHQMp4wREREREREREekYJoSIiIiIiIiIiHQME0JERERERERERDqGCSEiIiIiIiIiIh3DhBARERERERERkY5hQoiIiIiIiIiISMcwIUREREREREREpGOYECIiIiIiIiIi0jH/A8sC86Gbo4MIAAAAAElFTkSuQmCC",
|
||
"text/plain": [
|
||
"<Figure size 1400x800 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# ROC curve\n",
|
||
"\n",
|
||
"# Calcul des taux de faux positifs (FPR) et de vrais positifs (TPR)\n",
|
||
"y_pred_prob = random_forest_grid.predict_proba(X_test)[:, 1]\n",
|
||
"\n",
|
||
"fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob, pos_label=1)\n",
|
||
"\n",
|
||
"# Calcul de l'aire sous la courbe ROC (AUC)\n",
|
||
"roc_auc = auc(fpr, tpr)\n",
|
||
"\n",
|
||
"plt.figure(figsize = (14, 8))\n",
|
||
"plt.plot(fpr, tpr, label=\"ROC curve(area = %0.3f)\" % roc_auc)\n",
|
||
"plt.plot([0, 1], [0, 1], color=\"red\",label=\"Random Baseline\", linestyle=\"--\")\n",
|
||
"plt.grid(color='gray', linestyle='--', linewidth=0.5)\n",
|
||
"plt.xlabel('Taux de faux positifs (FPR)')\n",
|
||
"plt.ylabel('Taux de vrais positifs (TPR)')\n",
|
||
"plt.title('Courbe ROC : random forest')\n",
|
||
"plt.legend(loc=\"lower right\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "854f6242-813f-400a-be43-7414a859b355",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Naive Bayes "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 219,
|
||
"id": "b083d10d-8510-4a07-974b-e0c324175d7f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<style>#sk-container-id-5 {\n",
|
||
" /* Definition of color scheme common for light and dark mode */\n",
|
||
" --sklearn-color-text: black;\n",
|
||
" --sklearn-color-line: gray;\n",
|
||
" /* Definition of color scheme for unfitted estimators */\n",
|
||
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
|
||
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
|
||
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
|
||
" --sklearn-color-unfitted-level-3: chocolate;\n",
|
||
" /* Definition of color scheme for fitted estimators */\n",
|
||
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
|
||
" --sklearn-color-fitted-level-1: #d4ebff;\n",
|
||
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
|
||
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
|
||
"\n",
|
||
" /* Specific color for light theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-icon: #696969;\n",
|
||
"\n",
|
||
" @media (prefers-color-scheme: dark) {\n",
|
||
" /* Redefinition of color scheme for dark theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-icon: #878787;\n",
|
||
" }\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 pre {\n",
|
||
" padding: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 input.sk-hidden--visually {\n",
|
||
" border: 0;\n",
|
||
" clip: rect(1px 1px 1px 1px);\n",
|
||
" clip: rect(1px, 1px, 1px, 1px);\n",
|
||
" height: 1px;\n",
|
||
" margin: -1px;\n",
|
||
" overflow: hidden;\n",
|
||
" padding: 0;\n",
|
||
" position: absolute;\n",
|
||
" width: 1px;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-dashed-wrapped {\n",
|
||
" border: 1px dashed var(--sklearn-color-line);\n",
|
||
" margin: 0 0.4em 0.5em 0.4em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" padding-bottom: 0.4em;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-container {\n",
|
||
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
|
||
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
|
||
" so we also need the `!important` here to be able to override the\n",
|
||
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
|
||
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
|
||
" display: inline-block !important;\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-text-repr-fallback {\n",
|
||
" display: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-parallel-item,\n",
|
||
"div.sk-serial,\n",
|
||
"div.sk-item {\n",
|
||
" /* draw centered vertical line to link estimators */\n",
|
||
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
|
||
" background-size: 2px 100%;\n",
|
||
" background-repeat: no-repeat;\n",
|
||
" background-position: center center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Parallel-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-parallel-item::after {\n",
|
||
" content: \"\";\n",
|
||
" width: 100%;\n",
|
||
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
|
||
" flex-grow: 1;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-parallel {\n",
|
||
" display: flex;\n",
|
||
" align-items: stretch;\n",
|
||
" justify-content: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-parallel-item {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-parallel-item:first-child::after {\n",
|
||
" align-self: flex-end;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-parallel-item:last-child::after {\n",
|
||
" align-self: flex-start;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-parallel-item:only-child::after {\n",
|
||
" width: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Serial-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-serial {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
" align-items: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" padding-right: 1em;\n",
|
||
" padding-left: 1em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"\n",
|
||
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
|
||
"clickable and can be expanded/collapsed.\n",
|
||
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
|
||
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
|
||
"*/\n",
|
||
"\n",
|
||
"/* Pipeline and ColumnTransformer style (default) */\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-toggleable {\n",
|
||
" /* Default theme specific background. It is overwritten whether we have a\n",
|
||
" specific estimator or a Pipeline/ColumnTransformer */\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable label */\n",
|
||
"#sk-container-id-5 label.sk-toggleable__label {\n",
|
||
" cursor: pointer;\n",
|
||
" display: block;\n",
|
||
" width: 100%;\n",
|
||
" margin-bottom: 0;\n",
|
||
" padding: 0.5em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 label.sk-toggleable__label-arrow:before {\n",
|
||
" /* Arrow on the left of the label */\n",
|
||
" content: \"▸\";\n",
|
||
" float: left;\n",
|
||
" margin-right: 0.25em;\n",
|
||
" color: var(--sklearn-color-icon);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 label.sk-toggleable__label-arrow:hover:before {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable content - dropdown */\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-toggleable__content {\n",
|
||
" max-height: 0;\n",
|
||
" max-width: 0;\n",
|
||
" overflow: hidden;\n",
|
||
" text-align: left;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-toggleable__content.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-toggleable__content pre {\n",
|
||
" margin: 0.2em;\n",
|
||
" border-radius: 0.25em;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-toggleable__content.fitted pre {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
||
" /* Expand drop-down */\n",
|
||
" max-height: 200px;\n",
|
||
" max-width: 100%;\n",
|
||
" overflow: auto;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
||
" content: \"▾\";\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Pipeline/ColumnTransformer-specific style */\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific style */\n",
|
||
"\n",
|
||
"/* Colorize estimator box */\n",
|
||
"#sk-container-id-5 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-label label.sk-toggleable__label,\n",
|
||
"#sk-container-id-5 div.sk-label label {\n",
|
||
" /* The background is the default theme color */\n",
|
||
" color: var(--sklearn-color-text-on-default-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover, darken the color of the background */\n",
|
||
"#sk-container-id-5 div.sk-label:hover label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Label box, darken color on hover, fitted */\n",
|
||
"#sk-container-id-5 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator label */\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-label label {\n",
|
||
" font-family: monospace;\n",
|
||
" font-weight: bold;\n",
|
||
" display: inline-block;\n",
|
||
" line-height: 1.2em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-label-container {\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific */\n",
|
||
"#sk-container-id-5 div.sk-estimator {\n",
|
||
" font-family: monospace;\n",
|
||
" border: 1px dotted var(--sklearn-color-border-box);\n",
|
||
" border-radius: 0.25em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" margin-bottom: 0.5em;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-estimator.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* on hover */\n",
|
||
"#sk-container-id-5 div.sk-estimator:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 div.sk-estimator.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
|
||
"\n",
|
||
"/* Common style for \"i\" and \"?\" */\n",
|
||
"\n",
|
||
".sk-estimator-doc-link,\n",
|
||
"a:link.sk-estimator-doc-link,\n",
|
||
"a:visited.sk-estimator-doc-link {\n",
|
||
" float: right;\n",
|
||
" font-size: smaller;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1em;\n",
|
||
" height: 1em;\n",
|
||
" width: 1em;\n",
|
||
" text-decoration: none !important;\n",
|
||
" margin-left: 1ex;\n",
|
||
" /* unfitted */\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted,\n",
|
||
"a:link.sk-estimator-doc-link.fitted,\n",
|
||
"a:visited.sk-estimator-doc-link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Span, style for the box shown on hovering the info icon */\n",
|
||
".sk-estimator-doc-link span {\n",
|
||
" display: none;\n",
|
||
" z-index: 9999;\n",
|
||
" position: relative;\n",
|
||
" font-weight: normal;\n",
|
||
" right: .2ex;\n",
|
||
" padding: .5ex;\n",
|
||
" margin: .5ex;\n",
|
||
" width: min-content;\n",
|
||
" min-width: 20ex;\n",
|
||
" max-width: 50ex;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" box-shadow: 2pt 2pt 4pt #999;\n",
|
||
" /* unfitted */\n",
|
||
" background: var(--sklearn-color-unfitted-level-0);\n",
|
||
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted span {\n",
|
||
" /* fitted */\n",
|
||
" background: var(--sklearn-color-fitted-level-0);\n",
|
||
" border: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link:hover span {\n",
|
||
" display: block;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
|
||
"\n",
|
||
"#sk-container-id-5 a.estimator_doc_link {\n",
|
||
" float: right;\n",
|
||
" font-size: 1rem;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1rem;\n",
|
||
" height: 1rem;\n",
|
||
" width: 1rem;\n",
|
||
" text-decoration: none;\n",
|
||
" /* unfitted */\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 a.estimator_doc_link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"#sk-container-id-5 a.estimator_doc_link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-5 a.estimator_doc_link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"</style><div id=\"sk-container-id-5\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>GaussianNB()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-24\" type=\"checkbox\" checked><label for=\"sk-estimator-id-24\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> GaussianNB<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.naive_bayes.GaussianNB.html\">?<span>Documentation for GaussianNB</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>GaussianNB()</pre></div> </div></div></div></div>"
|
||
],
|
||
"text/plain": [
|
||
"GaussianNB()"
|
||
]
|
||
},
|
||
"execution_count": 219,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"clf = GaussianNB()\n",
|
||
"clf.fit(X_train, y_train)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 234,
|
||
"id": "a5459639-be3d-4292-89d2-061f276dc9a8",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Accuracy Score: 0.8780906593406593\n",
|
||
"F1 Score: 0.3673381217259815\n",
|
||
"Recall Score: 0.24842951059167276\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# print results for the best model\n",
|
||
"\n",
|
||
"y_pred = clf.predict(X_test)\n",
|
||
"\n",
|
||
"# Calculate the F1 score\n",
|
||
"acc = accuracy_score(y_test, y_pred)\n",
|
||
"print(f\"Accuracy Score: {acc}\")\n",
|
||
"\n",
|
||
"f1 = f1_score(y_test, y_pred)\n",
|
||
"print(f\"F1 Score: {f1}\")\n",
|
||
"\n",
|
||
"recall = recall_score(y_test, y_pred)\n",
|
||
"print(f\"Recall Score: {recall}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 239,
|
||
"id": "22d3d4d0-36b4-4561-9bc7-3a408914f089",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"somme des probas de y prédites : 4889.8913137503505\n",
|
||
"nombre de y valant 1 : y_has_purchased 13690.0\n",
|
||
"dtype: float64\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# le bayes naif sous-estime les probas d'achat (les autres modèles surestiment pr avoir un bon recall) w\n",
|
||
"print(f\"somme des probas de y prédites : {y_pred_prob.sum()}\")\n",
|
||
"print(f\"nombre de y valant 1 : {y_test.sum()}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 236,
|
||
"id": "e962eeed-4099-407b-a619-a34a539a404a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 1400x800 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# ROC curve\n",
|
||
"\n",
|
||
"# Calcul des taux de faux positifs (FPR) et de vrais positifs (TPR)\n",
|
||
"y_pred_prob = clf.predict_proba(X_test)[:, 1]\n",
|
||
"\n",
|
||
"fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob, pos_label=1)\n",
|
||
"\n",
|
||
"# Calcul de l'aire sous la courbe ROC (AUC)\n",
|
||
"roc_auc = auc(fpr, tpr)\n",
|
||
"\n",
|
||
"plt.figure(figsize = (14, 8))\n",
|
||
"plt.plot(fpr, tpr, label=\"ROC curve(area = %0.3f)\" % roc_auc)\n",
|
||
"plt.plot([0, 1], [0, 1], color=\"red\",label=\"Random Baseline\", linestyle=\"--\")\n",
|
||
"plt.grid(color='gray', linestyle='--', linewidth=0.5)\n",
|
||
"plt.xlabel('Taux de faux positifs (FPR)')\n",
|
||
"plt.ylabel('Taux de vrais positifs (TPR)')\n",
|
||
"plt.title('Courbe ROC : naive Bayes')\n",
|
||
"plt.legend(loc=\"lower right\")\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"id": "ad1a0b57-e382-4ae3-90b6-1f790099711b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/opt/mamba/lib/python3.11/site-packages/numpy/core/fromnumeric.py:86: FutureWarning: The behavior of DataFrame.sum with axis=None is deprecated, in a future version this will reduce over both axes and return a scalar. To retain the old behavior, pass axis=0 (or do not pass axis)\n",
|
||
" return reduction(axis=axis, out=out, **passkwargs)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# utilisation d'une métrique plus adaptée aux modèles de marketing : courbe de lift\n",
|
||
"\n",
|
||
"# Tri des prédictions de probabilités et des vraies valeurs\n",
|
||
"sorted_indices = np.argsort(y_pred_prob)[::-1]\n",
|
||
"y_pred_prob_sorted = y_pred_prob[sorted_indices]\n",
|
||
"y_test_sorted = y_test.iloc[sorted_indices]\n",
|
||
"\n",
|
||
"# Calcul du gain cumulatif\n",
|
||
"cumulative_gain = np.cumsum(y_test_sorted) / np.sum(y_test_sorted)\n",
|
||
"\n",
|
||
"# Tracé de la courbe de lift\n",
|
||
"plt.plot(np.linspace(0, 1, len(cumulative_gain)), cumulative_gain, label='Courbe de lift')\n",
|
||
"plt.xlabel('Part de clients identifiés sans modèle ')\n",
|
||
"plt.ylabel('Part de clients identifiés avec modèle')\n",
|
||
"plt.title('Courbe de Lift')\n",
|
||
"plt.legend()\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "7cbb1fec-97b9-4780-9488-5b8eff5aee0d",
|
||
"metadata": {},
|
||
"source": [
|
||
"## From model to segmentation"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 36,
|
||
"id": "d97ca3df-3778-469c-a077-495b3ee25051",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(array([9.0362e+04, 2.7200e+02, 1.6700e+02, 1.0000e+02, 8.6000e+01,\n",
|
||
" 5.7000e+01, 6.6000e+01, 6.3000e+01, 4.5000e+01, 5.1000e+01,\n",
|
||
" 5.4000e+01, 3.6000e+01, 5.3000e+01, 5.3000e+01, 5.3000e+01,\n",
|
||
" 5.1000e+01, 7.7000e+01, 1.1800e+02, 1.2700e+02, 4.2050e+03]),\n",
|
||
" array([8.76852176e-09, 5.00000083e-02, 1.00000008e-01, 1.50000007e-01,\n",
|
||
" 2.00000007e-01, 2.50000007e-01, 3.00000006e-01, 3.50000006e-01,\n",
|
||
" 4.00000005e-01, 4.50000005e-01, 5.00000004e-01, 5.50000004e-01,\n",
|
||
" 6.00000004e-01, 6.50000003e-01, 7.00000003e-01, 7.50000002e-01,\n",
|
||
" 8.00000002e-01, 8.50000001e-01, 9.00000001e-01, 9.50000000e-01,\n",
|
||
" 1.00000000e+00]),\n",
|
||
" <BarContainer object of 20 artists>)"
|
||
]
|
||
},
|
||
"execution_count": 36,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"plt.hist(y_pred_prob, bins=20)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 39,
|
||
"id": "b4ae4508-d5ac-4b22-a546-6c724278f8c3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([8.76852176e-09, 8.76852176e-09, 8.76852176e-09, ...,\n",
|
||
" 1.00000000e+00, 1.00000000e+00, 1.00000000e+00])"
|
||
]
|
||
},
|
||
"execution_count": 39,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"np.sort(y_pred_prob)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 43,
|
||
"id": "ace9c778-0ab4-4e28-8ca0-364040d122e6",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"4527"
|
||
]
|
||
},
|
||
"execution_count": 43,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"(y_pred_prob>0.8).sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 54,
|
||
"id": "4a202a7e-e7fe-479c-8be3-7b2b93fe9d7b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# number of observations\n",
|
||
"N = len(y_pred_prob)\n",
|
||
"\n",
|
||
"# sort the data in ascending order \n",
|
||
"y_pred_prob_sorted = np.sort(y_pred_prob) \n",
|
||
"\n",
|
||
"# get the cdf values of y \n",
|
||
"steps = np.arange(N) / N\n",
|
||
" \n",
|
||
"# plotting \n",
|
||
"plt.xlabel('X') \n",
|
||
"plt.ylabel('P(score<=X)') \n",
|
||
" \n",
|
||
"plt.title('CDF curve of the predicted probability of purchasec(score) for sports companies') \n",
|
||
" \n",
|
||
"plt.plot(y_pred_prob_sorted, steps) \n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 60,
|
||
"id": "e87efb96-71e6-4571-9a48-576ff5ebcbdc",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([0. , 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 ,\n",
|
||
" 0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95, 1. ])"
|
||
]
|
||
},
|
||
"execution_count": 60,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# on regarde de plus près les quantiles (on identifie 2 clusters, où est le cut-off ?)\n",
|
||
"\n",
|
||
"np.linspace(0,1, 21)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 59,
|
||
"id": "ccd8373c-85c4-451d-b918-7bb84713c9ea",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(90634,)"
|
||
]
|
||
},
|
||
"execution_count": 59,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"y_pred_prob_sorted[y_pred_prob < 0.1].shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 65,
|
||
"id": "75a2c582-3020-4e2e-9a41-0da75c5dbbed",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"score du quantile 0.0 : 1.0\n",
|
||
"score du quantile 0.05 : 1.1703610048497538e-08\n",
|
||
"score du quantile 0.1 : 1.1916538583855572e-08\n",
|
||
"score du quantile 0.15000000000000002 : 1.672960453020865e-08\n",
|
||
"score du quantile 0.2 : 2.261530896018714e-08\n",
|
||
"score du quantile 0.25 : 4.429426100901144e-08\n",
|
||
"score du quantile 0.30000000000000004 : 5.527720441770875e-08\n",
|
||
"score du quantile 0.35000000000000003 : 6.583003552085313e-08\n",
|
||
"score du quantile 0.4 : 1.0150014636815537e-07\n",
|
||
"score du quantile 0.45 : 1.045553983975125e-07\n",
|
||
"score du quantile 0.5 : 1.8254643649033717e-07\n",
|
||
"score du quantile 0.55 : 1.0036337913333724e-06\n",
|
||
"score du quantile 0.6000000000000001 : 3.6006418270834777e-06\n",
|
||
"score du quantile 0.65 : 8.750051427856617e-06\n",
|
||
"score du quantile 0.7000000000000001 : 1.7761176996762073e-05\n",
|
||
"score du quantile 0.75 : 3.658511676930477e-05\n",
|
||
"score du quantile 0.8 : 7.449089979671675e-05\n",
|
||
"score du quantile 0.8500000000000001 : 0.0001599334998042523\n",
|
||
"score du quantile 0.9 : 0.0006156933309033692\n",
|
||
"score du quantile 0.9500000000000001 : 0.5161846499348189\n",
|
||
"score du quantile 1.0 : 1.0\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"for step in np.linspace(0,1, 21) :\n",
|
||
" score_reached = y_pred_prob_sorted[int(step*N)-1]\n",
|
||
" print(f\"score du quantile {step} : {score_reached}\")\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 67,
|
||
"id": "3e7d04c4-1add-4ef3-bca5-c2f68356b669",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"score du quantile 0.94 : 0.046364832132301186\n",
|
||
"score du quantile 0.941 : 0.060426331367796585\n",
|
||
"score du quantile 0.942 : 0.07560789365683944\n",
|
||
"score du quantile 0.943 : 0.0961854989484283\n",
|
||
"score du quantile 0.944 : 0.12036366182214445\n",
|
||
"score du quantile 0.945 : 0.15326229828189683\n",
|
||
"score du quantile 0.946 : 0.20141929276940546\n",
|
||
"score du quantile 0.947 : 0.26129057078459816\n",
|
||
"score du quantile 0.948 : 0.34459110917836233\n",
|
||
"score du quantile 0.949 : 0.42441766527261676\n",
|
||
"score du quantile 0.95 : 0.5161846499348189\n",
|
||
"score du quantile 0.951 : 0.6281715747542238\n",
|
||
"score du quantile 0.952 : 0.7161294443763133\n",
|
||
"score du quantile 0.953 : 0.8098274658632696\n",
|
||
"score du quantile 0.954 : 0.8628210594682936\n",
|
||
"score du quantile 0.955 : 0.9031546758694196\n",
|
||
"score du quantile 0.956 : 0.9406325197642711\n",
|
||
"score du quantile 0.957 : 0.9717094630837765\n",
|
||
"score du quantile 0.958 : 0.9853416074407844\n",
|
||
"score du quantile 0.959 : 0.99263528504162\n",
|
||
"score du quantile 0.96 : 0.9965103675841931\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# le saut survient entre le quantile 0.94 et 0.955\n",
|
||
"# on peut prendre le quantile 0.95 / score = 0.52 comme cut-off approximatif\n",
|
||
"for step in np.linspace(0.94,0.96, 21) :\n",
|
||
" score_reached = y_pred_prob_sorted[int(step*N)-1]\n",
|
||
" print(f\"score du quantile {step} : {score_reached}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 90,
|
||
"id": "5d8bb4ea-0030-4d23-8cff-26c9ed54ca71",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<style>#sk-container-id-4 {\n",
|
||
" /* Definition of color scheme common for light and dark mode */\n",
|
||
" --sklearn-color-text: black;\n",
|
||
" --sklearn-color-line: gray;\n",
|
||
" /* Definition of color scheme for unfitted estimators */\n",
|
||
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
|
||
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
|
||
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
|
||
" --sklearn-color-unfitted-level-3: chocolate;\n",
|
||
" /* Definition of color scheme for fitted estimators */\n",
|
||
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
|
||
" --sklearn-color-fitted-level-1: #d4ebff;\n",
|
||
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
|
||
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
|
||
"\n",
|
||
" /* Specific color for light theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-icon: #696969;\n",
|
||
"\n",
|
||
" @media (prefers-color-scheme: dark) {\n",
|
||
" /* Redefinition of color scheme for dark theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-icon: #878787;\n",
|
||
" }\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 pre {\n",
|
||
" padding: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 input.sk-hidden--visually {\n",
|
||
" border: 0;\n",
|
||
" clip: rect(1px 1px 1px 1px);\n",
|
||
" clip: rect(1px, 1px, 1px, 1px);\n",
|
||
" height: 1px;\n",
|
||
" margin: -1px;\n",
|
||
" overflow: hidden;\n",
|
||
" padding: 0;\n",
|
||
" position: absolute;\n",
|
||
" width: 1px;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-dashed-wrapped {\n",
|
||
" border: 1px dashed var(--sklearn-color-line);\n",
|
||
" margin: 0 0.4em 0.5em 0.4em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" padding-bottom: 0.4em;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-container {\n",
|
||
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
|
||
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
|
||
" so we also need the `!important` here to be able to override the\n",
|
||
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
|
||
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
|
||
" display: inline-block !important;\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-text-repr-fallback {\n",
|
||
" display: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-parallel-item,\n",
|
||
"div.sk-serial,\n",
|
||
"div.sk-item {\n",
|
||
" /* draw centered vertical line to link estimators */\n",
|
||
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
|
||
" background-size: 2px 100%;\n",
|
||
" background-repeat: no-repeat;\n",
|
||
" background-position: center center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Parallel-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-parallel-item::after {\n",
|
||
" content: \"\";\n",
|
||
" width: 100%;\n",
|
||
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
|
||
" flex-grow: 1;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-parallel {\n",
|
||
" display: flex;\n",
|
||
" align-items: stretch;\n",
|
||
" justify-content: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-parallel-item {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-parallel-item:first-child::after {\n",
|
||
" align-self: flex-end;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-parallel-item:last-child::after {\n",
|
||
" align-self: flex-start;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-parallel-item:only-child::after {\n",
|
||
" width: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Serial-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-serial {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
" align-items: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" padding-right: 1em;\n",
|
||
" padding-left: 1em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"\n",
|
||
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
|
||
"clickable and can be expanded/collapsed.\n",
|
||
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
|
||
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
|
||
"*/\n",
|
||
"\n",
|
||
"/* Pipeline and ColumnTransformer style (default) */\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-toggleable {\n",
|
||
" /* Default theme specific background. It is overwritten whether we have a\n",
|
||
" specific estimator or a Pipeline/ColumnTransformer */\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable label */\n",
|
||
"#sk-container-id-4 label.sk-toggleable__label {\n",
|
||
" cursor: pointer;\n",
|
||
" display: block;\n",
|
||
" width: 100%;\n",
|
||
" margin-bottom: 0;\n",
|
||
" padding: 0.5em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 label.sk-toggleable__label-arrow:before {\n",
|
||
" /* Arrow on the left of the label */\n",
|
||
" content: \"▸\";\n",
|
||
" float: left;\n",
|
||
" margin-right: 0.25em;\n",
|
||
" color: var(--sklearn-color-icon);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable content - dropdown */\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-toggleable__content {\n",
|
||
" max-height: 0;\n",
|
||
" max-width: 0;\n",
|
||
" overflow: hidden;\n",
|
||
" text-align: left;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-toggleable__content.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-toggleable__content pre {\n",
|
||
" margin: 0.2em;\n",
|
||
" border-radius: 0.25em;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-toggleable__content.fitted pre {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
||
" /* Expand drop-down */\n",
|
||
" max-height: 200px;\n",
|
||
" max-width: 100%;\n",
|
||
" overflow: auto;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
||
" content: \"▾\";\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Pipeline/ColumnTransformer-specific style */\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific style */\n",
|
||
"\n",
|
||
"/* Colorize estimator box */\n",
|
||
"#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-label label.sk-toggleable__label,\n",
|
||
"#sk-container-id-4 div.sk-label label {\n",
|
||
" /* The background is the default theme color */\n",
|
||
" color: var(--sklearn-color-text-on-default-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover, darken the color of the background */\n",
|
||
"#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Label box, darken color on hover, fitted */\n",
|
||
"#sk-container-id-4 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator label */\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-label label {\n",
|
||
" font-family: monospace;\n",
|
||
" font-weight: bold;\n",
|
||
" display: inline-block;\n",
|
||
" line-height: 1.2em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-label-container {\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific */\n",
|
||
"#sk-container-id-4 div.sk-estimator {\n",
|
||
" font-family: monospace;\n",
|
||
" border: 1px dotted var(--sklearn-color-border-box);\n",
|
||
" border-radius: 0.25em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" margin-bottom: 0.5em;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-estimator.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* on hover */\n",
|
||
"#sk-container-id-4 div.sk-estimator:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 div.sk-estimator.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
|
||
"\n",
|
||
"/* Common style for \"i\" and \"?\" */\n",
|
||
"\n",
|
||
".sk-estimator-doc-link,\n",
|
||
"a:link.sk-estimator-doc-link,\n",
|
||
"a:visited.sk-estimator-doc-link {\n",
|
||
" float: right;\n",
|
||
" font-size: smaller;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1em;\n",
|
||
" height: 1em;\n",
|
||
" width: 1em;\n",
|
||
" text-decoration: none !important;\n",
|
||
" margin-left: 1ex;\n",
|
||
" /* unfitted */\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted,\n",
|
||
"a:link.sk-estimator-doc-link.fitted,\n",
|
||
"a:visited.sk-estimator-doc-link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Span, style for the box shown on hovering the info icon */\n",
|
||
".sk-estimator-doc-link span {\n",
|
||
" display: none;\n",
|
||
" z-index: 9999;\n",
|
||
" position: relative;\n",
|
||
" font-weight: normal;\n",
|
||
" right: .2ex;\n",
|
||
" padding: .5ex;\n",
|
||
" margin: .5ex;\n",
|
||
" width: min-content;\n",
|
||
" min-width: 20ex;\n",
|
||
" max-width: 50ex;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" box-shadow: 2pt 2pt 4pt #999;\n",
|
||
" /* unfitted */\n",
|
||
" background: var(--sklearn-color-unfitted-level-0);\n",
|
||
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted span {\n",
|
||
" /* fitted */\n",
|
||
" background: var(--sklearn-color-fitted-level-0);\n",
|
||
" border: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link:hover span {\n",
|
||
" display: block;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
|
||
"\n",
|
||
"#sk-container-id-4 a.estimator_doc_link {\n",
|
||
" float: right;\n",
|
||
" font-size: 1rem;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1rem;\n",
|
||
" height: 1rem;\n",
|
||
" width: 1rem;\n",
|
||
" text-decoration: none;\n",
|
||
" /* unfitted */\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 a.estimator_doc_link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"#sk-container-id-4 a.estimator_doc_link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-4 a.estimator_doc_link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"</style><div id=\"sk-container-id-4\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>KMeans(n_clusters=2, random_state=0)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" checked><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> KMeans<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.cluster.KMeans.html\">?<span>Documentation for KMeans</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>KMeans(n_clusters=2, random_state=0)</pre></div> </div></div></div></div>"
|
||
],
|
||
"text/plain": [
|
||
"KMeans(n_clusters=2, random_state=0)"
|
||
]
|
||
},
|
||
"execution_count": 90,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# simple K-means pour déterminer le seuil qui sépare les 2 clusters apparents\n",
|
||
"\n",
|
||
"from sklearn.cluster import KMeans\n",
|
||
"\n",
|
||
"kmeans = KMeans(n_clusters=2, random_state=0)\n",
|
||
"\n",
|
||
"kmeans.fit(y_pred_prob.reshape(-1,1))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 91,
|
||
"id": "afbf8247-4cb1-455b-96df-7e9a87407413",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([0, 0, 0, ..., 0, 0, 0], dtype=int32)"
|
||
]
|
||
},
|
||
"execution_count": 91,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"y_clusters = kmeans.predict(y_pred_prob.reshape(-1,1))\n",
|
||
"y_clusters"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 93,
|
||
"id": "e4747b82-1967-4043-bcd1-7659dbd87a2a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"4846"
|
||
]
|
||
},
|
||
"execution_count": 93,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"y_clusters[y_clusters==1].size"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 94,
|
||
"id": "2853083a-99a4-4ae9-9e8d-ddf175cca7ee",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"0.9495712620712621"
|
||
]
|
||
},
|
||
"execution_count": 94,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# 5% des individus sont dans le cluster 1\n",
|
||
"1 - y_clusters.mean()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "d18c8a4c-7d19-4d24-a304-cb26a533303e",
|
||
"metadata": {},
|
||
"source": [
|
||
"Intérêt du K-means : permet d'identifier un seuil de passage d'un cluster à l'autre quand le cluster est restreint, comme ici où on isole les clients avec la proba d'achat dans le quantile 0.95, et on les sépare des 95% restant"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 95,
|
||
"id": "77f59f30-1dc6-43b8-98b7-d179a966786a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"part d'individus dans le cluster 0 : 0.9495712620712621\n",
|
||
"seuil de passage du cluster 0 au cluster 1 : 0.4855790414879801\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# seuil de split \n",
|
||
"\n",
|
||
"size_cluster_0 = 1 - y_clusters.mean()\n",
|
||
"seuil_cluster = y_pred_prob_sorted[int(1 - y_clusters.mean()*N)]\n",
|
||
"\n",
|
||
"print(f\"part d'individus dans le cluster 0 : {size_cluster_0}\")\n",
|
||
"print(f\"seuil de passage du cluster 0 au cluster 1 : {seuil_cluster}\")"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|