2878 lines
391 KiB
Plaintext
2878 lines
391 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "ac01a6ea-bef6-4ace-89ff-1dc03a4215c2",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Segmentation des clients par régression logistique"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "bca785be-39f7-4583-9bd8-67c1134ae275",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"import os\n",
|
||
"import s3fs\n",
|
||
"import re\n",
|
||
"from sklearn.linear_model import LogisticRegression\n",
|
||
"from sklearn.ensemble import RandomForestClassifier\n",
|
||
"from sklearn.metrics import accuracy_score, confusion_matrix, classification_report\n",
|
||
"from sklearn.preprocessing import StandardScaler\n",
|
||
"import seaborn as sns\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score\n",
|
||
"import pickle"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "14378e7b-240f-4df7-9ce8-5e60920a7729",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"'1.12.0'"
|
||
]
|
||
},
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import scipy\n",
|
||
"scipy.__version__ # il faut cette version pr eviter les pb"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "3bf57816-b023-4e84-9450-095620bddebc",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Create filesystem object\n",
|
||
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"id": "27002f2f-a78a-414c-8e4f-b15bf6dd9e40",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_3769/2190163697.py:7: DtypeWarning: Columns (38) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||
" dataset_train = pd.read_csv(file_in, sep=\",\")\n",
|
||
"/tmp/ipykernel_3769/2190163697.py:12: DtypeWarning: Columns (38) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||
" dataset_test = pd.read_csv(file_in, sep=\",\")\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Importation des données\n",
|
||
"BUCKET = \"projet-bdc2324-team1/Generalization/musee\"\n",
|
||
"\n",
|
||
"FILE_PATH_S3 = BUCKET + \"/\" + \"Train_set.csv\"\n",
|
||
"\n",
|
||
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||
" dataset_train = pd.read_csv(file_in, sep=\",\")\n",
|
||
"\n",
|
||
"FILE_PATH_S3 = BUCKET + \"/\" + \"Test_set.csv\"\n",
|
||
"\n",
|
||
"with fs.open(FILE_PATH_S3, mode=\"rb\") as file_in:\n",
|
||
" dataset_test = pd.read_csv(file_in, sep=\",\")\n",
|
||
"\n",
|
||
"# Choosed features\n",
|
||
"features = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min',\n",
|
||
" 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_partner', 'is_email_true', \n",
|
||
" 'opt_in', 'gender_female', 'gender_male', 'country_fr', 'nb_campaigns', 'nb_campaigns_opened', 'y_has_purchased'] \n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "3964cd4a-ab47-4b5b-8a19-cfc9286d0083",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"id": "f3a32a9e-feff-4112-8d86-57a61b60f564",
|
||
"metadata": {
|
||
"editable": true,
|
||
"slideshow": {
|
||
"slide_type": ""
|
||
},
|
||
"tags": []
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Index(['customer_id', 'nb_tickets', 'nb_purchases', 'total_amount',\n",
|
||
" 'nb_suppliers', 'vente_internet_max', 'purchase_date_min',\n",
|
||
" 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet',\n",
|
||
" 'street_id', 'structure_id', 'mcp_contact_id', 'fidelity', 'tenant_id',\n",
|
||
" 'is_partner', 'deleted_at', 'gender', 'is_email_true', 'opt_in',\n",
|
||
" 'last_buying_date', 'max_price', 'ticket_sum', 'average_price',\n",
|
||
" 'average_purchase_delay', 'average_price_basket',\n",
|
||
" 'average_ticket_basket', 'total_price', 'purchase_count',\n",
|
||
" 'first_buying_date', 'country', 'gender_label', 'gender_female',\n",
|
||
" 'gender_male', 'gender_other', 'country_fr', 'nb_campaigns',\n",
|
||
" 'nb_campaigns_opened', 'time_to_open', 'y_has_purchased'],\n",
|
||
" dtype='object')"
|
||
]
|
||
},
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"dataset_train.columns"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"id": "3c225a17-26d0-4764-a8fe-59db216d4498",
|
||
"metadata": {
|
||
"editable": true,
|
||
"slideshow": {
|
||
"slide_type": ""
|
||
},
|
||
"tags": []
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"customer_id 0.000000\n",
|
||
"nb_tickets 0.000000\n",
|
||
"nb_purchases 0.000000\n",
|
||
"total_amount 0.000000\n",
|
||
"nb_suppliers 0.000000\n",
|
||
"vente_internet_max 0.000000\n",
|
||
"purchase_date_min 0.000000\n",
|
||
"purchase_date_max 0.000000\n",
|
||
"time_between_purchase 0.000000\n",
|
||
"nb_tickets_internet 0.000000\n",
|
||
"street_id 0.000000\n",
|
||
"structure_id 0.908249\n",
|
||
"mcp_contact_id 0.548870\n",
|
||
"fidelity 0.000000\n",
|
||
"tenant_id 0.000000\n",
|
||
"is_partner 0.000000\n",
|
||
"deleted_at 1.000000\n",
|
||
"gender 0.000000\n",
|
||
"is_email_true 0.000000\n",
|
||
"opt_in 0.000000\n",
|
||
"last_buying_date 0.377696\n",
|
||
"max_price 0.377696\n",
|
||
"ticket_sum 0.000000\n",
|
||
"average_price 0.212994\n",
|
||
"average_purchase_delay 0.377696\n",
|
||
"average_price_basket 0.377696\n",
|
||
"average_ticket_basket 0.377696\n",
|
||
"total_price 0.164701\n",
|
||
"purchase_count 0.000000\n",
|
||
"first_buying_date 0.377696\n",
|
||
"country 0.410248\n",
|
||
"gender_label 0.000000\n",
|
||
"gender_female 0.000000\n",
|
||
"gender_male 0.000000\n",
|
||
"gender_other 0.000000\n",
|
||
"country_fr 0.410248\n",
|
||
"nb_campaigns 0.000000\n",
|
||
"nb_campaigns_opened 0.000000\n",
|
||
"time_to_open 0.750637\n",
|
||
"y_has_purchased 0.000000\n",
|
||
"dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"dataset_test.isna().sum() / len(dataset_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "c3928b55-8821-46da-b3b5-a036efd6d2cf",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>name_event_types</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>offre muséale individuel</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>spectacle vivant</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>offre muséale groupe</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" event_type_id name_event_types\n",
|
||
"0 2.0 offre muséale individuel\n",
|
||
"1 4.0 spectacle vivant\n",
|
||
"2 5.0 offre muséale groupe\n",
|
||
"3 NaN NaN"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"dataset_train[['event_type_id', 'name_event_types']].drop_duplicates()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "7e8a9d4d-7e55-4173-a7f4-8b8baa9610d2",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#Choose type of event \n",
|
||
"type_event_choosed = 5\n",
|
||
"\n",
|
||
"dataset_test = dataset_test[(dataset_test['event_type_id'] == type_event_choosed) | np.isnan(dataset_test['event_type_id'])]\n",
|
||
"dataset_test['y_has_purchased'] = dataset_test['y_has_purchased'].fillna(0)\n",
|
||
"dataset_train = dataset_train[(dataset_train['event_type_id'] == type_event_choosed) | np.isnan(dataset_train['event_type_id'])]\n",
|
||
"dataset_train['y_has_purchased'] = dataset_train['y_has_purchased'].fillna(0)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "e20ced8f-df1c-43bb-8d15-79f414c8225c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"customer_id 0.000000\n",
|
||
"event_type_id 0.967882\n",
|
||
"nb_tickets 0.000000\n",
|
||
"nb_purchases 0.000000\n",
|
||
"total_amount 0.000000\n",
|
||
"nb_suppliers 0.000000\n",
|
||
"vente_internet_max 0.000000\n",
|
||
"purchase_date_min 0.967882\n",
|
||
"purchase_date_max 0.967882\n",
|
||
"time_between_purchase 0.967882\n",
|
||
"nb_tickets_internet 0.000000\n",
|
||
"name_event_types 0.967882\n",
|
||
"avg_amount 0.967882\n",
|
||
"street_id 0.000000\n",
|
||
"is_partner 0.000000\n",
|
||
"gender 0.000000\n",
|
||
"is_email_true 0.000000\n",
|
||
"opt_in 0.000000\n",
|
||
"structure_id 0.856471\n",
|
||
"mcp_contact_id 0.297844\n",
|
||
"last_buying_date 0.642312\n",
|
||
"max_price 0.642312\n",
|
||
"ticket_sum 0.000000\n",
|
||
"average_price 0.107403\n",
|
||
"fidelity 0.000000\n",
|
||
"average_purchase_delay 0.642312\n",
|
||
"average_price_basket 0.642312\n",
|
||
"average_ticket_basket 0.642312\n",
|
||
"total_price 0.534909\n",
|
||
"purchase_count 0.000000\n",
|
||
"first_buying_date 0.642312\n",
|
||
"country 0.066622\n",
|
||
"tenant_id 0.000000\n",
|
||
"gender_label 0.000000\n",
|
||
"gender_female 0.000000\n",
|
||
"gender_male 0.000000\n",
|
||
"gender_other 0.000000\n",
|
||
"country_fr 0.066622\n",
|
||
"nb_campaigns 0.000000\n",
|
||
"nb_campaigns_opened 0.000000\n",
|
||
"time_to_open 0.553988\n",
|
||
"y_has_purchased 0.000000\n",
|
||
"dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"dataset_train.isna().sum()/len(dataset_train)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"id": "05e29adb-7eef-416f-8f7b-248229eee0fe",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"nb_tickets 0\n",
|
||
"nb_purchases 0\n",
|
||
"total_amount 0\n",
|
||
"nb_suppliers 0\n",
|
||
"vente_internet_max 0\n",
|
||
"nb_tickets_internet 0\n",
|
||
"opt_in 0\n",
|
||
"fidelity 0\n",
|
||
"nb_campaigns 0\n",
|
||
"nb_campaigns_opened 0\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"dataset_train[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet', 'opt_in', 'fidelity', 'nb_campaigns', 'nb_campaigns_opened']].isna().sum()\n",
|
||
"# pas de NaN, OK !"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "2ce94258-e2d1-472a-81fc-fc11e247b423",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"228.0"
|
||
]
|
||
},
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"dataset_train['y_has_purchased'].sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"id": "34bae3f7-d579-4f80-a38d-a83eb5ea8a7b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Accuracy: 0.9986037223669636\n",
|
||
"Confusion Matrix:\n",
|
||
" [[128000 37]\n",
|
||
" [ 142 19]]\n",
|
||
"Classification Report:\n",
|
||
" precision recall f1-score support\n",
|
||
"\n",
|
||
" 0.0 1.00 1.00 1.00 128037\n",
|
||
" 1.0 0.34 0.12 0.18 161\n",
|
||
"\n",
|
||
" accuracy 1.00 128198\n",
|
||
" macro avg 0.67 0.56 0.59 128198\n",
|
||
"weighted avg 1.00 1.00 1.00 128198\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"\n",
|
||
"reg_columns = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet', 'opt_in', 'fidelity', 'nb_campaigns', 'nb_campaigns_opened']\n",
|
||
"\n",
|
||
"X_train = dataset_train[reg_columns]\n",
|
||
"y_train = dataset_train['y_has_purchased']\n",
|
||
"X_test = dataset_test[reg_columns]\n",
|
||
"y_test = dataset_test['y_has_purchased']\n",
|
||
"\n",
|
||
"# Fit and transform the scaler on the training data\n",
|
||
"scaler = StandardScaler()\n",
|
||
"\n",
|
||
"# Transform the test data using the same scaler\n",
|
||
"X_train_scaled = scaler.fit_transform(X_train)\n",
|
||
"X_test_scaled = scaler.fit_transform(X_test)\n",
|
||
"\n",
|
||
"# Create and fit the linear regression model\n",
|
||
"logit_model = LogisticRegression(penalty='l1', solver='liblinear', C=1.0)\n",
|
||
"logit_model.fit(X_train_scaled, y_train)\n",
|
||
"\n",
|
||
"y_pred = logit_model.predict(X_test_scaled)\n",
|
||
"\n",
|
||
"#Evaluation du modèle \n",
|
||
"accuracy = accuracy_score(y_test, y_pred)\n",
|
||
"conf_matrix = confusion_matrix(y_test, y_pred)\n",
|
||
"class_report = classification_report(y_test, y_pred)\n",
|
||
"\n",
|
||
"print(\"Accuracy:\", accuracy)\n",
|
||
"print(\"Confusion Matrix:\\n\", conf_matrix)\n",
|
||
"print(\"Classification Report:\\n\", class_report)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"id": "ccc78c36-3287-46e6-89ac-7494c1a7106a",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 2 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Class 0', 'Class 1'], yticklabels=['Class 0', 'Class 1'])\n",
|
||
"plt.xlabel('Predicted')\n",
|
||
"plt.ylabel('Actual')\n",
|
||
"plt.title('Confusion Matrix')\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "fe6e14d2-001d-4585-9344-f240b84ce4af",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Ajout TP : test d'une nouvelle pipeline"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"id": "3782988b-52f9-4172-92d4-68948bf259c9",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# etape supp : suppression du client 1 (outlier car client anonyme)\n",
|
||
"\n",
|
||
"dataset_train = dataset_train[dataset_train[\"customer_id\"]!=1]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"id": "9d19f8c0-ed31-46cd-8879-47810fa099d6",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# definition des variables utilisées\n",
|
||
"\n",
|
||
"numeric_features = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'nb_tickets_internet', 'fidelity', 'nb_campaigns', 'nb_campaigns_opened']\n",
|
||
"# categorical_features = [\"opt_in\"]\n",
|
||
"encoded_features = [\"opt_in\", \"vente_internet_max\"]\n",
|
||
"features = numeric_features + encoded_features\n",
|
||
"X_train = dataset_train[features]\n",
|
||
"y_train = dataset_train['y_has_purchased']\n",
|
||
"X_test = dataset_test[features]\n",
|
||
"y_test = dataset_test['y_has_purchased']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"id": "412ddfad-3d20-4fa0-afaa-79ec87b3122d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"count 122016.000000\n",
|
||
"mean 0.307656\n",
|
||
"std 3.135563\n",
|
||
"min 0.000000\n",
|
||
"25% 0.000000\n",
|
||
"50% 0.000000\n",
|
||
"75% 0.000000\n",
|
||
"max 907.000000\n",
|
||
"Name: fidelity, dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 16,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"### variable fidelity\n",
|
||
"\n",
|
||
"X_train[\"fidelity\"].describe() # sûrement un problème d'outlier pour fidelity\n",
|
||
"# X_train[\"total_amount\"].describe()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"id": "97e1cd25-0961-45dd-af7f-78ab1d8088ee",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>173</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>15</th>\n",
|
||
" <td>2233.0</td>\n",
|
||
" <td>66.0</td>\n",
|
||
" <td>25703.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>94</td>\n",
|
||
" <td>130.0</td>\n",
|
||
" <td>60.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>24</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>224</td>\n",
|
||
" <td>16.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>28</th>\n",
|
||
" <td>557.0</td>\n",
|
||
" <td>25.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>175.0</td>\n",
|
||
" <td>34</td>\n",
|
||
" <td>32.0</td>\n",
|
||
" <td>15.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>34</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>24</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>144823</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>144824</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>120</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>144868</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>907</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>144877</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>150595</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>279 rows × 10 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"3 0.0 0.0 0.0 0.0 \n",
|
||
"15 2233.0 66.0 25703.0 2.0 \n",
|
||
"24 0.0 0.0 0.0 0.0 \n",
|
||
"28 557.0 25.0 0.0 2.0 \n",
|
||
"34 0.0 0.0 0.0 0.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"144823 0.0 0.0 0.0 0.0 \n",
|
||
"144824 0.0 0.0 0.0 0.0 \n",
|
||
"144868 0.0 0.0 0.0 0.0 \n",
|
||
"144877 0.0 0.0 0.0 0.0 \n",
|
||
"150595 0.0 0.0 0.0 0.0 \n",
|
||
"\n",
|
||
" nb_tickets_internet fidelity nb_campaigns nb_campaigns_opened \\\n",
|
||
"3 0.0 173 2.0 0.0 \n",
|
||
"15 2.0 94 130.0 60.0 \n",
|
||
"24 0.0 224 16.0 0.0 \n",
|
||
"28 175.0 34 32.0 15.0 \n",
|
||
"34 0.0 24 0.0 0.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"144823 0.0 9 0.0 0.0 \n",
|
||
"144824 0.0 120 0.0 0.0 \n",
|
||
"144868 0.0 907 0.0 0.0 \n",
|
||
"144877 0.0 8 0.0 0.0 \n",
|
||
"150595 0.0 6 0.0 0.0 \n",
|
||
"\n",
|
||
" opt_in vente_internet_max \n",
|
||
"3 True 0.0 \n",
|
||
"15 True 1.0 \n",
|
||
"24 True 0.0 \n",
|
||
"28 True 1.0 \n",
|
||
"34 True 0.0 \n",
|
||
"... ... ... \n",
|
||
"144823 True 0.0 \n",
|
||
"144824 True 0.0 \n",
|
||
"144868 True 0.0 \n",
|
||
"144877 True 0.0 \n",
|
||
"150595 True 0.0 \n",
|
||
"\n",
|
||
"[279 rows x 10 columns]"
|
||
]
|
||
},
|
||
"execution_count": 17,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_train[X_train[\"fidelity\"]>5]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"id": "fc17957e-b684-41cd-880f-049a4ffcc7dc",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>time_between_purchase</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>tenant_id</th>\n",
|
||
" <th>gender_label</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>country_fr</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>time_to_open</th>\n",
|
||
" <th>y_has_purchased</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>125.0</td>\n",
|
||
" <td>71.0</td>\n",
|
||
" <td>1 days 04:13:20.492957746</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>17.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>7</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>27.0</td>\n",
|
||
" <td>13.0</td>\n",
|
||
" <td>5 days 18:07:22.615384615</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>152554</th>\n",
|
||
" <td>1256102</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>152555</th>\n",
|
||
" <td>1256103</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>152556</th>\n",
|
||
" <td>1256104</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>152557</th>\n",
|
||
" <td>1256105</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>152558</th>\n",
|
||
" <td>1256106</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>122016 rows × 42 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id event_type_id nb_tickets nb_purchases total_amount \\\n",
|
||
"3 2 NaN 0.0 0.0 0.0 \n",
|
||
"4 3 NaN 0.0 0.0 0.0 \n",
|
||
"6 5 NaN 0.0 0.0 0.0 \n",
|
||
"7 6 NaN 0.0 0.0 0.0 \n",
|
||
"8 7 NaN 0.0 0.0 0.0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"152554 1256102 NaN 0.0 0.0 0.0 \n",
|
||
"152555 1256103 NaN 0.0 0.0 0.0 \n",
|
||
"152556 1256104 NaN 0.0 0.0 0.0 \n",
|
||
"152557 1256105 NaN 0.0 0.0 0.0 \n",
|
||
"152558 1256106 NaN 0.0 0.0 0.0 \n",
|
||
"\n",
|
||
" nb_suppliers vente_internet_max purchase_date_min \\\n",
|
||
"3 0.0 0.0 NaN \n",
|
||
"4 0.0 0.0 NaN \n",
|
||
"6 0.0 0.0 NaN \n",
|
||
"7 0.0 0.0 NaN \n",
|
||
"8 0.0 0.0 NaN \n",
|
||
"... ... ... ... \n",
|
||
"152554 0.0 0.0 NaN \n",
|
||
"152555 0.0 0.0 NaN \n",
|
||
"152556 0.0 0.0 NaN \n",
|
||
"152557 0.0 0.0 NaN \n",
|
||
"152558 0.0 0.0 NaN \n",
|
||
"\n",
|
||
" purchase_date_max time_between_purchase ... tenant_id gender_label \\\n",
|
||
"3 NaN NaN ... 1311 male \n",
|
||
"4 NaN NaN ... 1311 male \n",
|
||
"6 NaN NaN ... 1311 male \n",
|
||
"7 NaN NaN ... 1311 male \n",
|
||
"8 NaN NaN ... 1311 female \n",
|
||
"... ... ... ... ... ... \n",
|
||
"152554 NaN NaN ... 1311 female \n",
|
||
"152555 NaN NaN ... 1311 other \n",
|
||
"152556 NaN NaN ... 1311 other \n",
|
||
"152557 NaN NaN ... 1311 other \n",
|
||
"152558 NaN NaN ... 1311 other \n",
|
||
"\n",
|
||
" gender_female gender_male gender_other country_fr nb_campaigns \\\n",
|
||
"3 0 1 0 1.0 2.0 \n",
|
||
"4 0 1 0 1.0 125.0 \n",
|
||
"6 0 1 0 1.0 2.0 \n",
|
||
"7 0 1 0 1.0 17.0 \n",
|
||
"8 1 0 0 1.0 27.0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"152554 1 0 0 1.0 0.0 \n",
|
||
"152555 0 0 1 NaN 0.0 \n",
|
||
"152556 0 0 1 NaN 0.0 \n",
|
||
"152557 0 0 1 NaN 0.0 \n",
|
||
"152558 0 0 1 NaN 0.0 \n",
|
||
"\n",
|
||
" nb_campaigns_opened time_to_open y_has_purchased \n",
|
||
"3 0.0 NaN 0.0 \n",
|
||
"4 71.0 1 days 04:13:20.492957746 0.0 \n",
|
||
"6 0.0 NaN 0.0 \n",
|
||
"7 0.0 NaN 0.0 \n",
|
||
"8 13.0 5 days 18:07:22.615384615 0.0 \n",
|
||
"... ... ... ... \n",
|
||
"152554 0.0 NaN 0.0 \n",
|
||
"152555 0.0 NaN 0.0 \n",
|
||
"152556 0.0 NaN 0.0 \n",
|
||
"152557 0.0 NaN 0.0 \n",
|
||
"152558 0.0 NaN 0.0 \n",
|
||
"\n",
|
||
"[122016 rows x 42 columns]"
|
||
]
|
||
},
|
||
"execution_count": 18,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# on transforme opt_in en indicatrice\n",
|
||
"\n",
|
||
"dataset_train[\"opt_in\"] = dataset_train[\"opt_in\"].astype(int)\n",
|
||
"dataset_train"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"id": "8ad69b5d-e2e2-4d70-b8f0-ea0d37f7fe0c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# definition des variables utilisées\n",
|
||
"\n",
|
||
"numeric_features = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'nb_tickets_internet', 'fidelity', 'nb_campaigns', 'nb_campaigns_opened']\n",
|
||
"# categorical_features = [\"opt_in\"]\n",
|
||
"encoded_features = [\"opt_in\", \"vente_internet_max\"]\n",
|
||
"features = numeric_features + encoded_features\n",
|
||
"X_train = dataset_train[features]\n",
|
||
"y_train = dataset_train['y_has_purchased']\n",
|
||
"X_test = dataset_test[features]\n",
|
||
"y_test = dataset_test['y_has_purchased']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "3ed647a6-db9a-4737-b819-57cb81691ea2",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Autre ajout : travail de preprocessing des données - étude des outliers"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 60,
|
||
"id": "3771eeb1-5221-44e5-a5cd-15475fbe4858",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"count 128198.000000\n",
|
||
"mean 0.582536\n",
|
||
"std 181.774597\n",
|
||
"min 0.000000\n",
|
||
"25% 0.000000\n",
|
||
"50% 0.000000\n",
|
||
"75% 0.000000\n",
|
||
"max 65082.000000\n",
|
||
"Name: nb_purchases, dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 60,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# 1. number of purchases\n",
|
||
"\n",
|
||
"X_train[\"nb_purchases\"].describe()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 84,
|
||
"id": "63c44b80-88cd-4339-91b9-3764e2690316",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>122983.0</td>\n",
|
||
" <td>65082.0</td>\n",
|
||
" <td>878762.5</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>330831</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>15</th>\n",
|
||
" <td>2773.0</td>\n",
|
||
" <td>81.0</td>\n",
|
||
" <td>32338.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>94</td>\n",
|
||
" <td>126.0</td>\n",
|
||
" <td>50.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>28</th>\n",
|
||
" <td>282.0</td>\n",
|
||
" <td>15.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>53.0</td>\n",
|
||
" <td>34</td>\n",
|
||
" <td>32.0</td>\n",
|
||
" <td>13.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>29</th>\n",
|
||
" <td>40.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>24.0</td>\n",
|
||
" <td>17.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>31</th>\n",
|
||
" <td>52.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>22.0</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>147155</th>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>44.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>147242</th>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>40.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>147414</th>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>132.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>147636</th>\n",
|
||
" <td>15.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>165.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>15.0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>147950</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>29.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>747 rows × 10 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"2 122983.0 65082.0 878762.5 5.0 \n",
|
||
"15 2773.0 81.0 32338.0 2.0 \n",
|
||
"28 282.0 15.0 0.0 2.0 \n",
|
||
"29 40.0 2.0 0.0 1.0 \n",
|
||
"31 52.0 2.0 0.0 1.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"147155 4.0 2.0 44.0 1.0 \n",
|
||
"147242 3.0 2.0 40.0 1.0 \n",
|
||
"147414 12.0 2.0 132.0 1.0 \n",
|
||
"147636 15.0 2.0 165.0 1.0 \n",
|
||
"147950 2.0 2.0 29.0 1.0 \n",
|
||
"\n",
|
||
" nb_tickets_internet fidelity nb_campaigns nb_campaigns_opened \\\n",
|
||
"2 9.0 330831 0.0 0.0 \n",
|
||
"15 2.0 94 126.0 50.0 \n",
|
||
"28 53.0 34 32.0 13.0 \n",
|
||
"29 0.0 4 24.0 17.0 \n",
|
||
"31 0.0 5 22.0 6.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"147155 4.0 2 0.0 0.0 \n",
|
||
"147242 3.0 2 0.0 0.0 \n",
|
||
"147414 12.0 2 0.0 0.0 \n",
|
||
"147636 15.0 2 0.0 0.0 \n",
|
||
"147950 2.0 2 0.0 0.0 \n",
|
||
"\n",
|
||
" opt_in vente_internet_max \n",
|
||
"2 0 1.0 \n",
|
||
"15 1 1.0 \n",
|
||
"28 1 1.0 \n",
|
||
"29 1 0.0 \n",
|
||
"31 1 0.0 \n",
|
||
"... ... ... \n",
|
||
"147155 0 1.0 \n",
|
||
"147242 0 1.0 \n",
|
||
"147414 0 1.0 \n",
|
||
"147636 0 1.0 \n",
|
||
"147950 0 1.0 \n",
|
||
"\n",
|
||
"[747 rows x 10 columns]"
|
||
]
|
||
},
|
||
"execution_count": 84,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_train[X_train[\"nb_purchases\"]>1]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 65,
|
||
"id": "032fbc5a-9044-41bd-b992-78077a6c8432",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"1.0"
|
||
]
|
||
},
|
||
"execution_count": 65,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"np.quantile(X_train[\"nb_purchases\"], 0.99)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 70,
|
||
"id": "cad9f7cb-8b71-49a6-874b-e15cb9d7a204",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"count 128198.000000\n",
|
||
"mean 1.946941\n",
|
||
"std 343.940117\n",
|
||
"min 0.000000\n",
|
||
"25% 0.000000\n",
|
||
"50% 0.000000\n",
|
||
"75% 0.000000\n",
|
||
"max 122983.000000\n",
|
||
"Name: nb_tickets, dtype: float64\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"23.0"
|
||
]
|
||
},
|
||
"execution_count": 70,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"### 2. nb tickets\n",
|
||
"\n",
|
||
"print(X_train[\"nb_tickets\"].describe())\n",
|
||
"np.quantile(X_train[\"nb_tickets\"], 0.99)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 73,
|
||
"id": "6bb0c86d-eb61-473d-a29b-c59e7e5af489",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"count 128198.000000\n",
|
||
"mean 10.496193\n",
|
||
"std 2457.094272\n",
|
||
"min 0.000000\n",
|
||
"25% 0.000000\n",
|
||
"50% 0.000000\n",
|
||
"75% 0.000000\n",
|
||
"max 878762.500000\n",
|
||
"Name: total_amount, dtype: float64\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"44.0"
|
||
]
|
||
},
|
||
"execution_count": 73,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# 3. total amount\n",
|
||
"\n",
|
||
"print(X_train[\"total_amount\"].describe())\n",
|
||
"np.quantile(X_train[\"total_amount\"], 0.99)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 76,
|
||
"id": "ab6fded3-d8a5-4bb4-8f2d-472ea0e5e755",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"count 128198.000000\n",
|
||
"mean 2.924687\n",
|
||
"std 923.990506\n",
|
||
"min 0.000000\n",
|
||
"25% 0.000000\n",
|
||
"50% 0.000000\n",
|
||
"75% 1.000000\n",
|
||
"max 330831.000000\n",
|
||
"Name: fidelity, dtype: float64\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"2.0"
|
||
]
|
||
},
|
||
"execution_count": 76,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# 4. fidelity\n",
|
||
"\n",
|
||
"print(X_train[\"fidelity\"].describe())\n",
|
||
"np.quantile(X_train[\"fidelity\"], 0.99)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 79,
|
||
"id": "c1f0ac75-71a4-43fb-844b-e006acf5927b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"count 128198.000000\n",
|
||
"mean 24.276463\n",
|
||
"std 37.899868\n",
|
||
"min 0.000000\n",
|
||
"25% 1.000000\n",
|
||
"50% 4.000000\n",
|
||
"75% 28.000000\n",
|
||
"max 299.000000\n",
|
||
"Name: nb_campaigns, dtype: float64\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"133.0"
|
||
]
|
||
},
|
||
"execution_count": 79,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# 5. nb campaigns - semble pas aberrant meme si forte variance\n",
|
||
"\n",
|
||
"print(X_train[\"nb_campaigns\"].describe())\n",
|
||
"np.quantile(X_train[\"nb_campaigns\"], 0.99)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 80,
|
||
"id": "8bb01064-1c23-4100-ace8-56f155e0b4ab",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"### on retire les outliers - variables : nb purchases, nb tickets, total amount, fidelity\n",
|
||
"\n",
|
||
"p99_nb_purchases = np.quantile(X_train[\"nb_purchases\"], 0.99)\n",
|
||
"p99_nb_tickets = np.quantile(X_train[\"nb_tickets\"], 0.99)\n",
|
||
"p99_total_amount = np.quantile(X_train[\"total_amount\"], 0.99)\n",
|
||
"p99_fidelity = np.quantile(X_train[\"fidelity\"], 0.99)\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 98,
|
||
"id": "b2b43ab6-16aa-41bc-9a62-47ab769c5bf2",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# filtre - on enlève les valeurs aberrantes sur les variables problématiques (retire 2% des valeurs en tt)\n",
|
||
"\n",
|
||
"X_train = X_train.loc[(X_train[\"nb_purchases\"] <= p99_nb_purchases) &\n",
|
||
"(X_train[\"nb_tickets\"] <= p99_nb_tickets) &\n",
|
||
"(X_train[\"total_amount\"] <= p99_total_amount) &\n",
|
||
"(X_train[\"fidelity\"] <= p99_fidelity)]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 99,
|
||
"id": "b254a671-9e57-4123-ae65-55c852eb64cd",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>24.0</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>14.0</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>10</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>23.0</td>\n",
|
||
" <td>11.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers nb_tickets_internet \\\n",
|
||
"6 0.0 0.0 0.0 0.0 0.0 \n",
|
||
"7 0.0 0.0 0.0 0.0 0.0 \n",
|
||
"8 0.0 0.0 0.0 0.0 0.0 \n",
|
||
"9 0.0 0.0 0.0 0.0 0.0 \n",
|
||
"10 0.0 0.0 0.0 0.0 0.0 \n",
|
||
"\n",
|
||
" fidelity nb_campaigns nb_campaigns_opened opt_in vente_internet_max \n",
|
||
"6 1 2.0 0.0 1 0.0 \n",
|
||
"7 1 12.0 0.0 1 0.0 \n",
|
||
"8 1 24.0 10.0 1 0.0 \n",
|
||
"9 1 14.0 7.0 1 0.0 \n",
|
||
"10 1 23.0 11.0 1 0.0 "
|
||
]
|
||
},
|
||
"execution_count": 99,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_train.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 101,
|
||
"id": "86d90380-6ad2-4c6b-a103-53e4c1fa59e0",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>customer_id</th>\n",
|
||
" <th>event_type_id</th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>time_between_purchase</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>tenant_id</th>\n",
|
||
" <th>gender_label</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>country_fr</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" <th>time_to_open</th>\n",
|
||
" <th>y_has_purchased</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>male</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>7</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>24.0</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>5 days 11:58:52</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>8</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>14.0</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>0 days 13:29:25.714285714</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>10</th>\n",
|
||
" <td>9</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>23.0</td>\n",
|
||
" <td>11.0</td>\n",
|
||
" <td>0 days 17:17:44.090909090</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>152645</th>\n",
|
||
" <td>1256102</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>female</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>152646</th>\n",
|
||
" <td>1256103</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>152647</th>\n",
|
||
" <td>1256104</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>152648</th>\n",
|
||
" <td>1256105</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>152649</th>\n",
|
||
" <td>1256106</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1311</td>\n",
|
||
" <td>other</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>125792 rows × 42 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" customer_id event_type_id nb_tickets nb_purchases total_amount \\\n",
|
||
"6 5 NaN 0.0 0.0 0.0 \n",
|
||
"7 6 NaN 0.0 0.0 0.0 \n",
|
||
"8 7 NaN 0.0 0.0 0.0 \n",
|
||
"9 8 NaN 0.0 0.0 0.0 \n",
|
||
"10 9 NaN 0.0 0.0 0.0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"152645 1256102 NaN 0.0 0.0 0.0 \n",
|
||
"152646 1256103 NaN 0.0 0.0 0.0 \n",
|
||
"152647 1256104 NaN 0.0 0.0 0.0 \n",
|
||
"152648 1256105 NaN 0.0 0.0 0.0 \n",
|
||
"152649 1256106 NaN 0.0 0.0 0.0 \n",
|
||
"\n",
|
||
" nb_suppliers vente_internet_max purchase_date_min \\\n",
|
||
"6 0.0 0.0 NaN \n",
|
||
"7 0.0 0.0 NaN \n",
|
||
"8 0.0 0.0 NaN \n",
|
||
"9 0.0 0.0 NaN \n",
|
||
"10 0.0 0.0 NaN \n",
|
||
"... ... ... ... \n",
|
||
"152645 0.0 0.0 NaN \n",
|
||
"152646 0.0 0.0 NaN \n",
|
||
"152647 0.0 0.0 NaN \n",
|
||
"152648 0.0 0.0 NaN \n",
|
||
"152649 0.0 0.0 NaN \n",
|
||
"\n",
|
||
" purchase_date_max time_between_purchase ... tenant_id gender_label \\\n",
|
||
"6 NaN NaN ... 1311 male \n",
|
||
"7 NaN NaN ... 1311 male \n",
|
||
"8 NaN NaN ... 1311 female \n",
|
||
"9 NaN NaN ... 1311 female \n",
|
||
"10 NaN NaN ... 1311 female \n",
|
||
"... ... ... ... ... ... \n",
|
||
"152645 NaN NaN ... 1311 female \n",
|
||
"152646 NaN NaN ... 1311 other \n",
|
||
"152647 NaN NaN ... 1311 other \n",
|
||
"152648 NaN NaN ... 1311 other \n",
|
||
"152649 NaN NaN ... 1311 other \n",
|
||
"\n",
|
||
" gender_female gender_male gender_other country_fr nb_campaigns \\\n",
|
||
"6 0 1 0 1.0 2.0 \n",
|
||
"7 0 1 0 1.0 12.0 \n",
|
||
"8 1 0 0 1.0 24.0 \n",
|
||
"9 1 0 0 1.0 14.0 \n",
|
||
"10 1 0 0 1.0 23.0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"152645 1 0 0 1.0 0.0 \n",
|
||
"152646 0 0 1 NaN 0.0 \n",
|
||
"152647 0 0 1 NaN 0.0 \n",
|
||
"152648 0 0 1 NaN 0.0 \n",
|
||
"152649 0 0 1 NaN 0.0 \n",
|
||
"\n",
|
||
" nb_campaigns_opened time_to_open y_has_purchased \n",
|
||
"6 0.0 NaN 0.0 \n",
|
||
"7 0.0 NaN 0.0 \n",
|
||
"8 10.0 5 days 11:58:52 0.0 \n",
|
||
"9 7.0 0 days 13:29:25.714285714 0.0 \n",
|
||
"10 11.0 0 days 17:17:44.090909090 0.0 \n",
|
||
"... ... ... ... \n",
|
||
"152645 0.0 NaN 0.0 \n",
|
||
"152646 0.0 NaN 0.0 \n",
|
||
"152647 0.0 NaN 0.0 \n",
|
||
"152648 0.0 NaN 0.0 \n",
|
||
"152649 0.0 NaN 0.0 \n",
|
||
"\n",
|
||
"[125792 rows x 42 columns]"
|
||
]
|
||
},
|
||
"execution_count": 101,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"\n",
|
||
"\n",
|
||
"dataset_train = dataset_train.loc[(dataset_train[\"nb_purchases\"] <= p99_nb_purchases) &\n",
|
||
"(dataset_train[\"nb_tickets\"] <= p99_nb_tickets) &\n",
|
||
"(dataset_train[\"total_amount\"] <= p99_total_amount) &\n",
|
||
"(dataset_train[\"fidelity\"] <= p99_fidelity)]\n",
|
||
"\n",
|
||
"dataset_train"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "f9487c48-b973-4d9e-abb9-902800ab778f",
|
||
"metadata": {},
|
||
"source": [
|
||
"En enlevant les outliers, on supprime la plupart des clients ayant acheté à nouveau ... Il faut trouver un autre moyen de preprocessing qui ne dégrade pas le dataset"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 102,
|
||
"id": "9fe7513b-f23b-4bee-957d-f98919d6eb30",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"19.0"
|
||
]
|
||
},
|
||
"execution_count": 102,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"dataset_train[\"y_has_purchased\"].sum() # pb : on passe de 161 à 19 clients ayant acheté ..."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "b531aebb-3b2f-4c62-ae01-84bdf8e45f49",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Construction de la pipeline pour le modèle de régression logistique et résultats"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"id": "1476da0d-cbb5-46ac-9f97-10855eec0108",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# importations pr créer la pipeline\n",
|
||
"\n",
|
||
"from sklearn.neighbors import KNeighborsClassifier\n",
|
||
"from sklearn.pipeline import Pipeline\n",
|
||
"from sklearn.compose import ColumnTransformer\n",
|
||
"from sklearn.preprocessing import OneHotEncoder\n",
|
||
"from sklearn.impute import SimpleImputer\n",
|
||
"from sklearn.linear_model import LogisticRegression\n",
|
||
"from sklearn.model_selection import GridSearchCV\n",
|
||
"from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler\n",
|
||
"from sklearn.metrics import make_scorer, f1_score, balanced_accuracy_score"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"id": "f905cb6f-b0be-4a47-ac8d-7b3e16ff1dce",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# debut de la pipeline\n",
|
||
"numeric_transformer = Pipeline(steps=[\n",
|
||
" # (\"imputer\", SimpleImputer(strategy=\"mean\")), # NaN remplacés par la moyenne, mais peu importe car on a supprimé les valeurs manquantes\n",
|
||
" (\"scaler\", StandardScaler())])\n",
|
||
"\"\"\"\n",
|
||
"categorical_transformer = Pipeline(steps=[\n",
|
||
" (\"imputer\", SimpleImputer(strategy=\"constant\", fill_value=\"Not defined\")),\n",
|
||
" (\"onehot\", OneHotEncoder(handle_unknown='ignore'))]) # to deal with missing categorical data\n",
|
||
"\n",
|
||
"\"\"\"\n",
|
||
"preproc = ColumnTransformer(transformers=[(\"num\", numeric_transformer, numeric_features)])\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"id": "d322fb8f-1e97-4a44-96ca-c0f5d7ebd383",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Returned hyperparameter: {'logreg__C': 0.0009765625, 'logreg__class_weight': 'balanced'}\n",
|
||
"Best classification accuracy in train is: 0.25403118665289387\n",
|
||
"Classification accuracy on test is: 0.0495079950799508\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# on doit prendre une métrique adaptée aux datasets déséquilibrés\n",
|
||
"balanced_scorer = make_scorer(balanced_accuracy_score)\n",
|
||
"f1_scorer = make_scorer(f1_score)\n",
|
||
"\n",
|
||
"parameter_space = np.logspace(-10, 6, 17, base=2)\n",
|
||
"\n",
|
||
"pipe = Pipeline([('preprocessor', preproc), ('logreg', LogisticRegression(max_iter=500))]) # prendre 5k iter\n",
|
||
"# on met plus de poids sur les observations rares (utile pr gérer le déséquilibre du dataset)\n",
|
||
"parameters4 = {'logreg__C': parameter_space, 'logreg__class_weight': ['balanced']} \n",
|
||
"clf4 = GridSearchCV(pipe, parameters4, cv=3, scoring = f1_scorer)\n",
|
||
"clf4.fit(X_train, y_train)\n",
|
||
"\n",
|
||
"# print results\n",
|
||
"# print(clf4.cv_results_)\n",
|
||
"print('Returned hyperparameter: {}'.format(clf4.best_params_))\n",
|
||
"print('Best classification accuracy in train is: {}'.format(clf4.best_score_))\n",
|
||
"print('Classification accuracy on test is: {}'.format(clf4.score(X_test, y_test)))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"id": "b32bb668-c816-4055-b786-e548eb71f318",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Accuracy: 0.9517777188411676\n",
|
||
"Confusion Matrix:\n",
|
||
" [[121855 6182]\n",
|
||
" [ 0 161]]\n",
|
||
"Classification Report:\n",
|
||
" precision recall f1-score support\n",
|
||
"\n",
|
||
" 0.0 1.00 0.95 0.98 128037\n",
|
||
" 1.0 0.03 1.00 0.05 161\n",
|
||
"\n",
|
||
" accuracy 0.95 128198\n",
|
||
" macro avg 0.51 0.98 0.51 128198\n",
|
||
"weighted avg 1.00 0.95 0.97 128198\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# visualisation des résultats \n",
|
||
"\n",
|
||
"y_pred = clf4.predict(X_test)\n",
|
||
"\n",
|
||
"#Evaluation du modèle \n",
|
||
"accuracy = accuracy_score(y_test, y_pred)\n",
|
||
"conf_matrix = confusion_matrix(y_test, y_pred)\n",
|
||
"class_report = classification_report(y_test, y_pred)\n",
|
||
"\n",
|
||
"print(\"Accuracy:\", accuracy)\n",
|
||
"print(\"Confusion Matrix:\\n\", conf_matrix)\n",
|
||
"print(\"Classification Report:\\n\", class_report)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"id": "faebbecb-3f85-4181-8005-2f52180fa37e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 2 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# matrice de confusion\n",
|
||
"\n",
|
||
"sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Class 0', 'Class 1'], yticklabels=['Class 0', 'Class 1'])\n",
|
||
"plt.xlabel('Predicted')\n",
|
||
"plt.ylabel('Actual')\n",
|
||
"plt.title('Confusion Matrix')\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"id": "dc66d09e-3f7b-4f6d-a60f-c21a3a057c6d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 800x600 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# on trace la courbe ROC\n",
|
||
"\n",
|
||
"# Prédictions sur l'ensemble de test\n",
|
||
"y_pred_prob = clf4.predict_proba(X_test)[:, 1]\n",
|
||
"\n",
|
||
"# Calcul des taux de faux positifs (FPR) et de vrais positifs (TPR)\n",
|
||
"fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)\n",
|
||
"\n",
|
||
"# Calcul de l'aire sous la courbe ROC (AUC)\n",
|
||
"roc_auc = auc(fpr, tpr)\n",
|
||
"\n",
|
||
"# Tracé de la courbe ROC\n",
|
||
"plt.figure(figsize=(8, 6))\n",
|
||
"plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'AUC = {roc_auc:.2f}')\n",
|
||
"plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')\n",
|
||
"plt.xlabel('Taux de faux positifs (FPR)')\n",
|
||
"plt.ylabel('Taux de vrais positifs (TPR)')\n",
|
||
"plt.title('Courbe ROC : modèle logistique')\n",
|
||
"plt.legend(loc='lower right')\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"id": "b36a11db-5d7a-487a-9b22-f02339e6d413",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 800x600 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Calcul des valeurs de précision et de rappel à différents seuils\n",
|
||
"precision, recall, thresholds = precision_recall_curve(y_test, y_pred_prob)\n",
|
||
"\n",
|
||
"# Calcul de l'aire sous la courbe PR (AUC-PR)\n",
|
||
"average_precision = average_precision_score(y_test, y_pred_prob)\n",
|
||
"\n",
|
||
"# Tracé de la courbe PR\n",
|
||
"plt.figure(figsize=(8, 6))\n",
|
||
"plt.step(recall, precision, color='b', alpha=0.2, where='post')\n",
|
||
"plt.fill_between(recall, precision, step='post', alpha=0.2, color='b')\n",
|
||
"plt.xlabel('Rappel')\n",
|
||
"plt.ylabel('Précision')\n",
|
||
"plt.ylim([0.0, 1.05])\n",
|
||
"plt.xlim([0.0, 1.0])\n",
|
||
"plt.title(f'Courbe PR (AUC-PR = {average_precision:.2f})')\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"id": "7fb157b6-4e4e-4c7d-8a37-c3ac99323795",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# utilisation d'une métrique plus adaptée aux modèles de marketing : courbe de lift\n",
|
||
"\n",
|
||
"# Tri des prédictions de probabilités et des vraies valeurs\n",
|
||
"sorted_indices = np.argsort(y_pred_prob)[::-1]\n",
|
||
"y_pred_prob_sorted = y_pred_prob[sorted_indices]\n",
|
||
"y_test_sorted = y_test.iloc[sorted_indices]\n",
|
||
"\n",
|
||
"# Calcul du gain cumulatif\n",
|
||
"cumulative_gain = np.cumsum(y_test_sorted) / np.sum(y_test_sorted)\n",
|
||
"\n",
|
||
"# Tracé de la courbe de lift\n",
|
||
"plt.plot(np.linspace(0, 1, len(cumulative_gain))[:10000], (cumulative_gain/np.linspace(0, 1, len(cumulative_gain)))[:10000], label='Courbe de lift')\n",
|
||
"plt.xlabel('Pourcentage des données')\n",
|
||
"plt.ylabel('Gain cumulatif')\n",
|
||
"plt.title('Courbe de Lift')\n",
|
||
"plt.legend()\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "98b93d38-a5d7-4480-91e6-e79be5de18e7",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Random forest"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"id": "771bee72-8b12-4ffb-b3ce-82f7e2ba6a8d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Fitting 3 folds for each of 9 candidates, totalling 27 fits\n",
|
||
"Best parameters: {'max_depth': 20, 'n_estimators': 100, 'random_state': 20}\n",
|
||
"Best classification accuracy in train is: 0.3224906065485776\n",
|
||
"Classification accuracy on test is: 0.31906614785992216\n",
|
||
"------\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Define models and parameters for GridSearch\n",
|
||
"params = {\n",
|
||
" 'n_estimators': [100, 150, 200],\n",
|
||
" 'max_depth': [5, 20, 30],\n",
|
||
" 'random_state' : [20]\n",
|
||
" }\n",
|
||
"\n",
|
||
"\n",
|
||
"# define model and pipeline - no preprocessing\n",
|
||
"clf = GridSearchCV(RandomForestClassifier(), params, cv=3, scoring=f1_scorer, verbose=True)\n",
|
||
"clf.fit(X_train, y_train)\n",
|
||
"\n",
|
||
"print(f\"Best parameters: {clf.best_params_}\")\n",
|
||
"print('Best classification accuracy in train is: {}'.format(clf.best_score_))\n",
|
||
"print('Classification accuracy on test is: {}'.format(clf.score(X_test, y_test)))\n",
|
||
"print(\"------\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"id": "bf44a84d-607e-48c3-b8c6-28a07d1b1c14",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Accuracy: 0.99863492410178\n",
|
||
"Confusion Matrix:\n",
|
||
" [[127982 55]\n",
|
||
" [ 120 41]]\n",
|
||
"Classification Report:\n",
|
||
" precision recall f1-score support\n",
|
||
"\n",
|
||
" 0.0 1.00 1.00 1.00 128037\n",
|
||
" 1.0 0.43 0.25 0.32 161\n",
|
||
"\n",
|
||
" accuracy 1.00 128198\n",
|
||
" macro avg 0.71 0.63 0.66 128198\n",
|
||
"weighted avg 1.00 1.00 1.00 128198\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# visualisation des résultats \n",
|
||
"\n",
|
||
"y_pred = clf.predict(X_test)\n",
|
||
"\n",
|
||
"#Evaluation du modèle \n",
|
||
"accuracy = accuracy_score(y_test, y_pred)\n",
|
||
"conf_matrix = confusion_matrix(y_test, y_pred)\n",
|
||
"class_report = classification_report(y_test, y_pred)\n",
|
||
"\n",
|
||
"print(\"Accuracy:\", accuracy)\n",
|
||
"print(\"Confusion Matrix:\\n\", conf_matrix)\n",
|
||
"print(\"Classification Report:\\n\", class_report)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"id": "0fa2189c-5c0a-405b-b686-b9df3958c85c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjYAAAHFCAYAAADhWLMfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABUH0lEQVR4nO3deVxVdf7H8dcV4YooNxQBMc2NSNJJw1K00nJXJGdq1CjSMjLXSFyicmsBt9HKvU0by2gxGytlNC0bU5RUyq3MRMkE0URURCA4vz/8eWeuYILdRa/v5zzO4+E953PP+dzrEB8/3+/3HJNhGAYiIiIibqCKqxMQERERsRcVNiIiIuI2VNiIiIiI21BhIyIiIm5DhY2IiIi4DRU2IiIi4jZU2IiIiIjbUGEjIiIibkOFjYiIiLgNFTbi1r7//nseeeQRGjVqRLVq1ahRowa33nor06ZN4/jx4w699vbt2+nQoQMWiwWTycTLL79s92uYTCYmTZpk9/NeyuLFizGZTJhMJr766qsyxw3DoGnTpphMJjp27HhZ15g3bx6LFy+u1Hu++uqri+YkIteGqq5OQMRRXn/9dYYOHUpoaChjxowhLCyM4uJivv32WxYsWMCmTZtYvny5w67/6KOPkp+fT3JyMn5+fjRs2NDu19i0aRPXX3+93c9bUTVr1uTNN98sU7ysX7+en3/+mZo1a172uefNm4e/vz8DBw6s8HtuvfVWNm3aRFhY2GVfV0SubipsxC1t2rSJIUOG0KVLFz755BPMZrP1WJcuXYiPjyclJcWhOezcuZPY2Fh69OjhsGu0bdvWYeeuiH79+vHuu+8yd+5cfH19rfvffPNNIiIiOHnypFPyKC4uxmQy4evr6/LvRERcS0NR4pYSExMxmUy89tprNkXNeV5eXkRFRVlfl5aWMm3aNG666SbMZjMBAQE8/PDDHDp0yOZ9HTt2pHnz5qSlpXHnnXdSvXp1GjduzJQpUygtLQX+O0zz+++/M3/+fOuQDcCkSZOsf/5f599z4MAB675169bRsWNHateujbe3Nw0aNOC+++7jzJkz1pjyhqJ27tzJvffei5+fH9WqVaNly5a8/fbbNjHnh2zee+89nn32WYKDg/H19aVz5878+OOPFfuSgQceeACA9957z7ovLy+PZcuW8eijj5b7nsmTJ9OmTRtq1aqFr68vt956K2+++Sb/+zzehg0bsmvXLtavX2/9/s53vM7nvmTJEuLj46lXrx5ms5l9+/aVGYo6duwY9evXp127dhQXF1vPv3v3bnx8fIiJianwZxWRq4MKG3E7JSUlrFu3jvDwcOrXr1+h9wwZMoRx48bRpUsXVqxYwQsvvEBKSgrt2rXj2LFjNrHZ2dk8+OCDPPTQQ6xYsYIePXqQkJDAO++8A0CvXr3YtGkTAPfffz+bNm2yvq6oAwcO0KtXL7y8vHjrrbdISUlhypQp+Pj4UFRUdNH3/fjjj7Rr145du3bx6quv8vHHHxMWFsbAgQOZNm1amfhnnnmGgwcP8sYbb/Daa6/x008/0bt3b0pKSiqUp6+vL/fffz9vvfWWdd97771HlSpV6Nev30U/2+DBg/nggw/4+OOP+dvf/saIESN44YUXrDHLly+ncePGtGrVyvr9XThsmJCQQGZmJgsWLODTTz8lICCgzLX8/f1JTk4mLS2NcePGAXDmzBn+/ve/06BBAxYsWFChzykiVxFDxM1kZ2cbgNG/f/8Kxe/Zs8cAjKFDh9rs37x5swEYzzzzjHVfhw4dDMDYvHmzTWxYWJjRrVs3m32AMWzYMJt9EydONMr7sVu0aJEBGBkZGYZhGMZHH31kAEZ6evof5g4YEydOtL7u37+/YTabjczMTJu4Hj16GNWrVzdOnDhhGIZhfPnllwZg9OzZ0ybugw8+MABj06ZNf3jd8/mmpaVZz7Vz507DMAzjtttuMwYOHGgYhmHcfPPNRocOHS56npKSEqO4uNh4/vnnjdq1axulpaXWYxd77/nr3XXXXRc99uWXX9rsnzp1qgEYy5cvNwYMGGB4e3sb33///R9+RhG5OqljI9e8L7/8EqDMJNXbb7+dZs2asXbtWpv9QUFB3H777Tb7/vKXv3Dw4EG75dSyZUu8vLx4/PHHefvtt9m/f3+F3rdu3To6depUplM1cOBAzpw5U6Zz9L/DcXDucwCV+iwdOnSgSZMmvPXWW+zYsYO0tLSLDkOdz7Fz585YLBY8PDzw9PRkwoQJ/Pbbb+Tk5FT4uvfdd1+FY8eMGUOvXr144IEHePvtt5k9ezYtWrSo8PtF5Oqhwkbcjr+/P9WrVycjI6NC8b/99hsAdevWLXMsODjYevy82rVrl4kzm80UFBRcRrbla9KkCV988QUBAQEMGzaMJk2a0KRJE1555ZU/fN9vv/120c9x/vj/uvCznJ+PVJnPYjKZeOSRR3jnnXdYsGABN954I3feeWe5sVu2bKFr167AuVVr33zzDWlpaTz77LOVvm55n/OPchw4cCBnz54lKChIc2tE3JgKG3E7Hh4edOrUia1bt5aZ/Fue87/cs7Kyyhw7fPgw/v7+dsutWrVqABQWFtrsv3AeD8Cdd97Jp59+Sl5eHqmpqURERBAXF0dycvJFz1+7du2Lfg7Arp/lfw0cOJBjx46xYMECHnnkkYvGJScn4+npyWeffUbfvn1p164drVu3vqxrljcJ+2KysrIYNmwYLVu25LfffmP06NGXdU0RufKpsBG3lJCQgGEYxMbGljvZtri4mE8//RSAe+65B8A6+fe8tLQ09uzZQ6dOneyW1/mVPd9//73N/vO5lMfDw4M2bdowd+5cALZt23bR2E6dOrFu3TprIXPeP//5T6pXr+6wpdD16tVjzJgx9O7dmwEDBlw0zmQyUbVqVTw8PKz7CgoKWLJkSZlYe3XBSkpKeOCBBzCZTKxatYqkpCRmz57Nxx9//KfPLSJXHt3HRtxSREQE8+fPZ+jQoYSHhzNkyBBuvvlmiouL2b59O6+99hrNmzend+/ehIaG8vjjjzN79myqVKlCjx49OHDgAOPHj6d+/fo89dRTdsurZ8+e1KpVi0GDBvH8889TtWpVFi9ezC+//GITt2DBAtatW0evXr1o0KABZ8+eta486ty580XPP3HiRD777DPuvvtuJkyYQK1atXj33Xf5/PPPmTZtGhaLxW6f5UJTpky5ZEyvXr2YOXMm0dHRPP744/z222/MmDGj3CX5LVq0IDk5mffff5/GjRtTrVq1y5oXM3HiRP7zn/+wevVqgoKCiI+PZ/369QwaNIhWrVrRqFGjSp9TRK5cKmzEbcXGxnL77bcza9Yspk6dSnZ2Np6entx4441ER0czfPhwa+z8+fNp0qQJb775JnPnzsVisdC9e3eSkpLKnVNzuXx9fUlJSSEuLo6HHnqI6667jscee4wePXrw2GOPWeNatmzJ6tWrmThxItnZ2dSoUYPmzZuzYsUK6xyV8oSGhrJx40aeeeYZhg0bRkFBAc2aNWPRokWVuoOvo9xzzz289dZbTJ06ld69e1OvXj1iY2MJCAhg0KBBNrGTJ08mKyuL2NhYTp06xQ033GBzn5+KWLNmDUlJSYwfP96m87Z48WJatWpFv3792LBhA15eXvb4eCJyBTAZxv/cFUtERETkKqY5NiIiIuI2VNiIiIiI21BhIyIiIm5DhY2IiIi4DRU2IiIi4jZU2IiIiIjbUGEjIiIibsMtb9Dn3Wr4pYNErkG5aXNcnYLIFaeaE34T2uv3UsF2/Qxfijo2IiIi4jbcsmMjIiJyRTGpj+AsKmxEREQczWRydQbXDBU2IiIijqaOjdPomxYRERG3oY6NiIiIo2koymlU2IiIiDiahqKcRt+0iIiIuA11bERERBxNQ1FOo8JGRETE0TQU5TT6pkVERMRtqGMjIiLiaBqKchoVNiIiIo6moSin0TctIiIibkMdGxEREUfTUJTTqLARERFxNA1FOY0KGxEREUdTx8ZpVEKKiIiI21BhIyIi4mimKvbZKunrr7+md+/eBAcHYzKZ+OSTT6zHiouLGTduHC1atMDHx4fg4GAefvhhDh8+bHOOwsJCRowYgb+/Pz4+PkRFRXHo0CGbmNzcXGJiYrBYLFgsFmJiYjhx4oRNTGZmJr1798bHxwd/f39GjhxJUVGRTcyOHTvo0KED3t7e1KtXj+effx7DMCr1mVXYiIiIOJqLCpv8/HxuueUW5syZU+bYmTNn2LZtG+PHj2fbtm18/PHH7N27l6ioKJu4uLg4li9fTnJyMhs2bOD06dNERkZSUlJijYmOjiY9PZ2UlBRSUlJIT08nJibGerykpIRevXqRn5/Phg0bSE5OZtmyZcTHx1tjTp48SZcuXQgODiYtLY3Zs2czY8YMZs6cWanPbDIqWwpdBbxbDXd1CiJXpNy0sv9xE7nWVXPCbFPvDs/b5TwF6ydc9ntNJhPLly+nT58+F41JS0vj9ttv5+DBgzRo0IC8vDzq1KnDkiVL6NevHwCHDx+mfv36rFy5km7durFnzx7CwsJITU2lTZs2AKSmphIREcEPP/xAaGgoq1atIjIykl9++YXg4GAAkpOTGThwIDk5Ofj6+jJ//nwSEhI4cuQIZrMZgClTpjB79mwOHTqEqYLzlNSxERERcbQqJrtshYWFnDx50mYrLCy0W5p5eXmYTCauu+46ALZu3UpxcTFdu3a1xgQHB9O8eXM2btwIwKZNm7BYLNaiBqBt27ZYLBabmObNm1uLGoBu3bpRWFjI1q1brTEdOnSwFjXnYw4fPsyBAwcq/BlU2IiIiDianYaikpKSrPNYzm9JSUl2SfHs2bM8/fTTREdH4+vrC0B2djZeXl74+fnZxAYGBpKdnW2NCQgIKHO+gIAAm5jAwECb435+fnh5ef1hzPnX52MqQsu9RURErhIJCQmMGjXKZt//djguV3FxMf3796e0tJR58+ZdMt4wDJuhofKGiewRc362TEWHoUAdGxEREcczmeyymc1mfH19bbY/W9gUFxfTt29fMjIyWLNmjbVbAxAUFERRURG5ubk278nJybF2U4KCgjhy5EiZ8x49etQm5sKuS25uLsXFxX8Yk5OTA1Cmk/NHVNiIiIg4motWRV3K+aLmp59+4osvvqB27do2x8PDw/H09GTNmjXWfVlZWezcuZN27doBEBERQV5eHlu2bLHGbN68mby8PJuYnTt3kpWVZY1ZvXo1ZrOZ8PBwa8zXX39tswR89erVBAcH07Bhwwp/JhU2IiIibur06dOkp6eTnp4OQEZGBunp6WRmZvL7779z//338+233/Luu+9SUlJCdnY22dnZ1uLCYrEwaNAg4uPjWbt2Ldu3b+ehhx6iRYsWdO7cGYBmzZrRvXt3YmNjSU1NJTU1ldjYWCIjIwkNDQWga9euhIWFERMTw/bt21m7di2jR48mNjbW2iGKjo7GbDYzcOBAdu7cyfLly0lMTGTUqFGVGorScm+Ra4iWe4uU5ZTl3l2m2uU8BWvGVSr+q6++4u677y6zf8CAAUyaNIlGjRqV+74vv/ySjh07AucmFY8ZM4alS5dSUFBAp06dmDdvHvXr17fGHz9+nJEjR7JixQoAoqKimDNnjnV1FZy7Qd/QoUNZt24d3t7eREdHM2PGDJuhtB07djBs2DC2bNmCn58fTzzxBBMmTFBho8JGpHwqbETKckph03W6Xc5TsHqMXc7jzrQqSkRExNH0EEyn0RwbERERcRvq2IiIiDiaA1Y0SflU2IiIiDiahqKcRiWkiIiIuA11bERERBxNQ1FOo8JGRETE0TQU5TQqIUVERMRtqGMjIiLiaBqKchoVNiIiIo6mwsZp9E2LiIiI21DHRkRExNE0edhpVNiIiIg4moainEaFjYiIiKOpY+M0KiFFRETEbahjIyIi4mgainIaFTYiIiKOpqEop1EJKSIiIm5DHRsREREHM6lj4zQqbERERBxMhY3zaChKRERE3IY6NiIiIo6mho3TqLARERFxMA1FOY+GokRERMRtqGMjIiLiYOrYOI8KGxEREQdTYeM8KmxEREQcTIWN82iOjYiIiLgNdWxEREQcTQ0bp1FhIyIi4mAainIeDUWJiIiI21DHRkRExMHUsXEeFTYiIiIOpsLGeTQUJSIiIm5DHRsREREHU8fGeVTYiIiIOJrqGqfRUJSIiIi4DXVsREREHExDUc6jwkZERMTBVNg4jwobERERB1Nh4zwuLWzy8/NZunQpGzduJDs7G5PJRGBgIO3bt+eBBx7Ax8fHlemJiIjIVcZlk4d3797NjTfeyNixY8nNzaVBgwZcf/315ObmMmbMGEJDQ9m9e7er0hMREbEfk502uSSXdWyGDRvGXXfdxdtvv42Xl5fNsaKiIgYOHMiwYcP48ssvXZShiIiIfWgoynlcVths3ryZb7/9tkxRA+Dl5cUzzzzD7bff7oLMRERE5GrlsqEoPz8/fvrpp4se37dvH35+fk7MSERExDFMJpNdNrk0l3VsYmNjGTBgAM899xxdunQhMDAQk8lEdnY2a9asITExkbi4OFelJyIiYjcqSpzHZYXNpEmT8Pb2ZubMmYwdO9b6l24YBkFBQTz99NOMHTvWVemJiIjIVcilj1QYN24chw8f5ueff2bDhg1s2LCBn3/+mcOHD6uoERERt+Gqoaivv/6a3r17ExwcjMlk4pNPPrE5bhgGkyZNIjg4GG9vbzp27MiuXbtsYgoLCxkxYgT+/v74+PgQFRXFoUOHbGJyc3OJiYnBYrFgsViIiYnhxIkTNjGZmZn07t0bHx8f/P39GTlyJEVFRTYxO3bsoEOHDnh7e1OvXj2ef/55DMOo1Ge+Ip4V1ahRIyIiIoiIiKBRo0auTkdERMS+XLTcOz8/n1tuuYU5c+aUe3zatGnMnDmTOXPmkJaWRlBQEF26dOHUqVPWmLi4OJYvX05ycjIbNmzg9OnTREZGUlJSYo2Jjo4mPT2dlJQUUlJSSE9PJyYmxnq8pKSEXr16kZ+fz4YNG0hOTmbZsmXEx8dbY06ePEmXLl0IDg4mLS2N2bNnM2PGDGbOnFmpz2wyKlsKXQW8Ww13dQoiV6TctPL/4yZyLavmhEkZwU98bJfzHF7wt8t+r8lkYvny5fTp0wc4160JDg4mLi6OcePGAee6M4GBgUydOpXBgweTl5dHnTp1WLJkCf369TuXw+HD1K9fn5UrV9KtWzf27NlDWFgYqamptGnTBoDU1FQiIiL44YcfCA0NZdWqVURGRvLLL78QHBwMQHJyMgMHDiQnJwdfX1/mz59PQkICR44cwWw2AzBlyhRmz57NoUOHKtyxuiI6NiIiIu7MXkNRhYWFnDx50mYrLCy8rJwyMjLIzs6ma9eu1n1ms5kOHTqwceNGALZu3UpxcbFNTHBwMM2bN7fGbNq0CYvFYi1qANq2bYvFYrGJad68ubWoAejWrRuFhYVs3brVGtOhQwdrUXM+5vDhwxw4cKDCn0uFjYiIiIPZq7BJSkqyzmM5vyUlJV1WTtnZ2QAEBgba7A8MDLQey87OxsvLq8ztVy6MCQgIKHP+gIAAm5gLr+Pn54eXl9cfxpx/fT6mIvQQTBEREQez13LvhIQERo0aZbPvfzscl+PC3AzDuGS+F8aUF2+PmPOzZSrz/bm8Y5OSksKGDRusr+fOnUvLli2Jjo4mNzfXhZmJiIhcWcxmM76+vjbb5RY2QUFBQNluSE5OjrVTEhQURFFRUZnfxxfGHDlypMz5jx49ahNz4XVyc3MpLi7+w5icnBygbFfpj7i8sBkzZgwnT54Ezi3zio+Pp2fPnuzfv79MVSoiInJVugIfgtmoUSOCgoJYs2aNdV9RURHr16+nXbt2AISHh+Pp6WkTk5WVxc6dO60xERER5OXlsWXLFmvM5s2bycvLs4nZuXMnWVlZ1pjVq1djNpsJDw+3xnz99dc2S8BXr15NcHAwDRs2rPDncvlQVEZGBmFhYQAsW7aMyMhIEhMT2bZtGz179nRxdiIiIn+eq+48fPr0afbt22d9nZGRQXp6OrVq1aJBgwbExcWRmJhISEgIISEhJCYmUr16daKjowGwWCwMGjSI+Ph4ateuTa1atRg9ejQtWrSgc+fOADRr1ozu3bsTGxvLwoULAXj88ceJjIwkNDQUgK5duxIWFkZMTAzTp0/n+PHjjB49mtjYWHx9fYFzS8YnT57MwIEDeeaZZ/jpp59ITExkwoQJlfr+XF7YeHl5cebMGQC++OILHn74YQBq1apl7eSIiIhI5X377bfcfffd1tfnR0IGDBjA4sWLGTt2LAUFBQwdOpTc3FzatGnD6tWrqVmzpvU9s2bNomrVqvTt25eCggI6derE4sWL8fDwsMa8++67jBw50rp6KioqyubeOR4eHnz++ecMHTqU9u3b4+3tTXR0NDNmzLDGWCwW1qxZw7Bhw2jdujV+fn6MGjWq0qM3Lr+PTVRUFEVFRbRv354XXniBjIwM6tWrx+rVqxk+fDh79+6t9Dl1H5uKaX9rE556uDO3hjWgbh0LfZ96jU+/+h6AqlWrMGlob7rdcTONrq/NydNnWbf5B8a/uoKso3kANKhbix9XPl/uuR8c8yYff7EdgJY3Xc+LT/Yh/OYGlJQYfLI2nXH/WEZ+wX/bjeFhDXhh5L20CquPYcDWXQd59uVP+H7vrwDcGR7CiIfupvXNN+Bboxr7Mo/y8ttfkLzqW0d+RW5H97FxnvlzZ7Ngnu33Xbu2P+u+/gaA8c88zYp/Lbc53uIvt/DOex84LUc5xxn3sblh5Kd2Oc/BV3vb5TzuzOUdmzlz5jB06FA++ugj5s+fT7169QBYtWoV3bt3d3F27s3H28yOvb+yZEUqyf+ItTlWvZoXLZvVZ8rrq/h+76/4+VZn+uj7+PDlwdzx4DQADh3JpWHnBJv3PXpfe0YN6MK/vzl3S+66dSx8vmAEH63exlNTPsDXpxrTx9zH68/HED3mTQBqVDezYt4wPvtqB08mvU9VjyqMH9KLFfOG0bT7c/z+eyltb2nEzp9+ZebiNRz57RQ97ryZN154mJP5Z1n59U4nfFsildekaQivvbHI+rrK//wLF6D9HXfy/Iv/Xarr6enptNzEufQQTOdxeWHToEEDPvvsszL7Z82a5YJsri2rv9nN6m92l3vs5OmzRA6x/dfmqKkfsuHdsdQP8uOX7FxKSw2O/HbKJibq7lv4aPVWazemx53NKf69hLikD6zL9uKSPmDz+wk0ru/P/l+OcWPDQGpZfHhh/mccOnICgJcWruLbD5+hflAtMg4dY/pbq22uM++99XSOaEbU3beosJErVlUPD/zr1LnocS8vrz88LiKV5/JVUdu2bWPHjh3W1//617/o06cPzzzzTJmHY4lr+db0prS0lBOnCso93qpZfVreVJ+3P9lk3Wf2qkpxcYnNQ8wKCosBaNeyCQB7DxzhaO4pBvRph2dVD6qZPRnYJ4Jd+w6TmXX8ovlYaniTe/KMPT6aiEMczDxI54530KPrPYwd/RSHfvnF5vi3aVvoeGcEvXt2Y/KE5/jtt99clKk4mqsegnktcnlhM3jwYOs8mv3799O/f3+qV6/Ohx9+qCd8X0HMXlV5YeS9vL/qW07lny03ZkCfCPbszyL1uwzrvq+2/EhgbV+eergTnlU9uK6mN8+PiAIgqI4FgNNnCun22Cs80PM2clNnceybf9A5ohl/HTGfkpLScq/1184tCb+5Af/816Zyj4u4Wou//IWXEqcy/7U3mTj5RX47doyHH+zPiRPn7gfS/s67SJw6g9ffepv4MePYtXMHsY8O0D/o3NUVuNzbXbm8sNm7dy8tW7YE4MMPP+Suu+5i6dKlLF68mGXLll3y/eU9N8MoLbnk+6TiqlatwpIpj1DFZOLJpPInNlYze9KvR2ubbg3Anv3ZxE5YwsiYThzfNJMDXySScegY2cdOUvr/RUs1sycLJz3Epu/20+HhGdzzyEz27M9i+ewhVDOXnXNwZ3gIr02OYegL77Fnf8Vvsy3iTHfc2YHOXbsRcmMobSPaMXveuWWwKz75BIDuPXpyV4eOhITcSMe772Huwtc5eOAAX6//ynVJi7gBl8+xMQyD0tJzv+C++OILIiMjAahfvz7Hjh275PuTkpKYPHmyzT6PwNvwrHu7/ZO9BlWtWoV3pw7ihnq16fH47It2a/7auSXVq3nx7mdbyhx7P+Vb3k/5loBaNckvKMQwYORD93Dg13Nt9349WtMguBYdBvzDOmQ1IGExWV9Po3fHv/Dhv7daz3VHeFOWvTKYcf/4mKXlXEvkSlW9enVCbryRzMwD5R6vUyeA4OBgMg+Wf1yubhpGch6Xd2xat27Niy++yJIlS1i/fj29evUCzt1EqCK3UE5ISCAvL89mqxoY7ui0rwnni5omDerQ64k5HM/Lv2jswD7t+Hz9Do7lnr5oTM7xU+QXFHF/t1s5W1TM2tQfgHMrsEpLDZt5OKWGgWFAlf/5j8Gd4SEsf3UI419dwVsff2OHTyjiPEVFRezf/zP+/uVPFj5xIpfs7Czq1Cn7MEG5+mmOjfO4vGPz8ssv8+CDD/LJJ5/w7LPP0rRpUwA++ugj662Y/4jZbC7znAxTFY+LRMv/8vH2okn9//5HtmG92vzlxnrknjzD4aN5LJ3+GK1uqs/fnlyARxUTgbXP3bDpeN4Zin//73Bf4/r+3HFrE/qMmF/udZ7odxep3+3n9JkiOrW9icS4Poyf/S/yTp+bhLw29QcS4/rwckJf5ievp4rJxOhHuvJ7SQnrvz03/+rO8BCWz36CuUu/4pO12625FBWXaAKxXJH+MX0qHTreTVDduhw/fpzXF8wn//Rpovr8lTP5+cyfN4fOXbriX6cOh3/9ldmvzOI6Pz/u+f+7uYp7UU3iPC6/Qd/FnD17Fg8Pj8u6r4Nu0Fcxd4aHsPqNJ8vsX7IilRcXrLzozfe6PvYK/9n6k/X15OG9ie51Ozf2nEB5/3d644UYut/RnBrVvfjxwBFe/uda3vs8zSbmnjY38ezgHoQ1rUtpqcF3Pxxi0txP2bLjAACvTX6ImKi2Zc799bc/0S32lcp87GuabtDnPGNHP8W2b9PIzT2BXy0//vKXlgwb8SRNmjbl7NmzxI0Yxg8/7ObUyVPUqVOH225vw7ARTxJUt66rU7/mOOMGfU1Hr7LLefbN6GGX87izK7aw+TNU2IiUT4WNSFnOKGxCxqTY5Tw/TdeNay/F5UNRJSUlzJo1iw8++IDMzMwySx2PH7/4fUxERESuBhqKch6XTx6ePHkyM2fOpG/fvuTl5TFq1Cj+9re/UaVKFSZNmuTq9EREROQq4vLC5t133+X1119n9OjRVK1alQceeIA33niDCRMmkJqa6ur0RERE/jStinIelxc22dnZtGjRAoAaNWqQl3fuydGRkZF8/vnnrkxNRETELkwm+2xyaS4vbK6//nqysrIAaNq0KatXn3vYYVpaWpll3CIiIiJ/xOWFzV//+lfWrl0LwJNPPsn48eMJCQnh4Ycf5tFHH3VxdiIiIn9elSomu2xyaS5fFTVlyhTrn++//36uv/56Nm7cSNOmTYmKinJhZiIiIvahYSTncXlhc6G2bdvStm3ZG7GJiIiIXIpLCpsVK1ZUOFZdGxERudppRZPzuKSw6dOnT4XiTCYTJSUllw4UERG5gqmucR6XFDalpaWuuKyIiIhLqGPjPC5fFSUiIiJiLy4rbNatW0dYWBgnT54scywvL4+bb76Zr7/+2gWZiYiI2JfuPOw8LitsXn75ZWJjY/H19S1zzGKxMHjwYGbNmuWCzEREROxLdx52HpcVNt999x3du1/88etdu3Zl69atTsxIRERErnYuu4/NkSNH8PT0vOjxqlWrcvToUSdmJCIi4hgaRnIel3Vs6tWrx44dOy56/Pvvv6du3bpOzEhERMQxNBTlPC4rbHr27MmECRM4e/ZsmWMFBQVMnDiRyMhIF2QmIiIiVyuXDUU999xzfPzxx9x4440MHz6c0NBQTCYTe/bsYe7cuZSUlPDss8+6Kj0RERG70VCU87issAkMDGTjxo0MGTKEhIQEDMMAzv3ld+vWjXnz5hEYGOiq9EREROxGdY3zuPQhmDfccAMrV64kNzeXffv2YRgGISEh+Pn5uTItERERuUpdEU/39vPz47bbbnN1GiIiIg6hoSjnuSIKGxEREXemusZ5VNiIiIg4mDo2zqOHYIqIiIjbUMdGRETEwdSwcR4VNiIiIg6moSjn0VCUiIiIuA11bERERBxMDRvnUWEjIiLiYBqKch4NRYmIiIjbUMdGRETEwdSwcR4VNiIiIg6moSjn0VCUiIiIuA11bERERBxMHRvnUWEjIiLiYKprnEdDUSIiIg5mMpnsslXG77//znPPPUejRo3w9vamcePGPP/885SWllpjDMNg0qRJBAcH4+3tTceOHdm1a5fNeQoLCxkxYgT+/v74+PgQFRXFoUOHbGJyc3OJiYnBYrFgsViIiYnhxIkTNjGZmZn07t0bHx8f/P39GTlyJEVFRZX7IitAhY2IiIgbmjp1KgsWLGDOnDns2bOHadOmMX36dGbPnm2NmTZtGjNnzmTOnDmkpaURFBREly5dOHXqlDUmLi6O5cuXk5yczIYNGzh9+jSRkZGUlJRYY6Kjo0lPTyclJYWUlBTS09OJiYmxHi8pKaFXr17k5+ezYcMGkpOTWbZsGfHx8Xb/3CbDMAy7n9XFvFsNd3UKIlek3LQ5rk5B5IpTzQmTMu5+ZaNdzvPlk+0qHBsZGUlgYCBvvvmmdd99991H9erVWbJkCYZhEBwcTFxcHOPGjQPOdWcCAwOZOnUqgwcPJi8vjzp16rBkyRL69esHwOHDh6lfvz4rV66kW7du7Nmzh7CwMFJTU2nTpg0AqampRERE8MMPPxAaGsqqVauIjIzkl19+ITg4GIDk5GQGDhxITk4Ovr6+dvl+QB0bERERh3PFUNQdd9zB2rVr2bt3LwDfffcdGzZsoGfPngBkZGSQnZ1N165dre8xm8106NCBjRvPFWJbt26luLjYJiY4OJjmzZtbYzZt2oTFYrEWNQBt27bFYrHYxDRv3txa1AB069aNwsJCtm7dWqnPdSmaPCwiInKVKCwspLCw0Gaf2WzGbDaXiR03bhx5eXncdNNNeHh4UFJSwksvvcQDDzwAQHZ2NgCBgYE27wsMDOTgwYPWGC8vL/z8/MrEnH9/dnY2AQEBZa4fEBBgE3Phdfz8/PDy8rLG2Is6NiIiIg5mMtlnS0pKsk7QPb8lJSWVe83333+fd955h6VLl7Jt2zbefvttZsyYwdtvv31BbradIMMwLtkdujCmvPjLibEHdWxEREQcrIqdfnknJCQwatQom33ldWsAxowZw9NPP03//v0BaNGiBQcPHiQpKYkBAwYQFBQEnOum1K1b1/q+nJwca3clKCiIoqIicnNzbbo2OTk5tGvXzhpz5MiRMtc/evSozXk2b95sczw3N5fi4uIynZw/Sx0bERGRq4TZbMbX19dmu1hhc+bMGapUsf017+HhYV3u3ahRI4KCglizZo31eFFREevXr7cWLeHh4Xh6etrEZGVlsXPnTmtMREQEeXl5bNmyxRqzefNm8vLybGJ27txJVlaWNWb16tWYzWbCw8P/zFdShjo2IiIiDuaKG/T17t2bl156iQYNGnDzzTezfft2Zs6cyaOPPvr/OZmIi4sjMTGRkJAQQkJCSExMpHr16kRHRwNgsVgYNGgQ8fHx1K5dm1q1ajF69GhatGhB586dAWjWrBndu3cnNjaWhQsXAvD4448TGRlJaGgoAF27diUsLIyYmBimT5/O8ePHGT16NLGxsXZdEQUqbERERBzOFY9UmD17NuPHj2fo0KHk5OQQHBzM4MGDmTBhgjVm7NixFBQUMHToUHJzc2nTpg2rV6+mZs2a1phZs2ZRtWpV+vbtS0FBAZ06dWLx4sV4eHhYY959911GjhxpXT0VFRXFnDn/vb2Eh4cHn3/+OUOHDqV9+/Z4e3sTHR3NjBkz7P65dR8bkWuI7mMjUpYz7mPTY/7mSwdVwKohbS4ddI3THBsRERFxGxqKEhERcTA93dt5VNiIiIg4mOoa59FQlIiIiLgNdWxEREQczIRaNs6iwkZERMTBqqiucRoNRYmIiIjbUMdGRETEwbQqynlU2IiIiDiY6hrn0VCUiIiIuA11bERERBysilo2TqPCRkRExMFU1ziPChsREREH0+Rh59EcGxEREXEb6tiIiIg4mBo2zqPCRkRExME0edh5NBQlIiIibkMdGxEREQdTv8Z5VNiIiIg4mFZFOY+GokRERMRtqGMjIiLiYFXUsHGaChU2K1asqPAJo6KiLjsZERERd6ShKOepUGHTp0+fCp3MZDJRUlLyZ/IRERERuWwVKmxKS0sdnYeIiIjbUsPGeTTHRkRExME0FOU8l1XY5Ofns379ejIzMykqKrI5NnLkSLskJiIi4i40edh5Kl3YbN++nZ49e3LmzBny8/OpVasWx44do3r16gQEBKiwEREREZep9H1snnrqKXr37s3x48fx9vYmNTWVgwcPEh4ezowZMxyRo4iIyFXNZDLZZZNLq3Rhk56eTnx8PB4eHnh4eFBYWEj9+vWZNm0azzzzjCNyFBERuaqZ7LTJpVW6sPH09LRWjYGBgWRmZgJgsVisfxYRERFxhUrPsWnVqhXffvstN954I3fffTcTJkzg2LFjLFmyhBYtWjgiRxERkataFQ0jOU2lOzaJiYnUrVsXgBdeeIHatWszZMgQcnJyeO211+yeoIiIyNXOZLLPJpdW6Y5N69atrX+uU6cOK1eutGtCIiIiIpdLN+gTERFxMK1ocp5KFzaNGjX6w7+g/fv3/6mERERE3I3qGuepdGETFxdn87q4uJjt27eTkpLCmDFj7JWXiIiISKVVurB58skny90/d+5cvv322z+dkIiIiLvRqijnqfSqqIvp0aMHy5Yts9fpRERE3IZWRTmP3SYPf/TRR9SqVctepxMREXEbmjzsPJd1g77//QsyDIPs7GyOHj3KvHnz7JqciIiISGVUurC59957bQqbKlWqUKdOHTp27MhNN91k1+QuV27aHFenICIiYmW3eR9ySZUubCZNmuSANERERNyXhqKcp9JFpIeHBzk5OWX2//bbb3h4eNglKREREZHLUemOjWEY5e4vLCzEy8vrTyckIiLibqqoYeM0FS5sXn31VeBcO+2NN96gRo0a1mMlJSV8/fXXV8wcGxERkSuJChvnqXBhM2vWLOBcx2bBggU2w05eXl40bNiQBQsW2D9DERERkQqqcGGTkZEBwN13383HH3+Mn5+fw5ISERFxJ5o87DyVnjz85ZdfqqgRERGphCom+2yV9euvv/LQQw9Ru3ZtqlevTsuWLdm6dav1uGEYTJo0ieDgYLy9venYsSO7du2yOUdhYSEjRozA398fHx8foqKiOHTokE1Mbm4uMTExWCwWLBYLMTExnDhxwiYmMzOT3r174+Pjg7+/PyNHjqSoqKjyH+oSKl3Y3H///UyZMqXM/unTp/P3v//dLkmJiIjIn5Obm0v79u3x9PRk1apV7N69m3/84x9cd9111php06Yxc+ZM5syZQ1paGkFBQXTp0oVTp05ZY+Li4li+fDnJycls2LCB06dPExkZSUlJiTUmOjqa9PR0UlJSSElJIT09nZiYGOvxkpISevXqRX5+Phs2bCA5OZlly5YRHx9v989tMi62zOki6tSpw7p162jRooXN/h07dtC5c2eOHDli1wQvx9nfXZ2BiIhcLarZ7eFCFzf28x/tcp5pvUIrHPv000/zzTff8J///Kfc44ZhEBwcTFxcHOPGjQPOdWcCAwOZOnUqgwcPJi8vjzp16rBkyRL69esHwOHDh6lfvz4rV66kW7du7Nmzh7CwMFJTU2nTpg0AqampRERE8MMPPxAaGsqqVauIjIzkl19+ITg4GIDk5GQGDhxITk4Ovr6+f+ZrsVHpjs3p06fLXdbt6enJyZMn7ZKUiIiIO6liMtllq4wVK1bQunVr/v73vxMQEECrVq14/fXXrcczMjLIzs6ma9eu1n1ms5kOHTqwceNGALZu3UpxcbFNTHBwMM2bN7fGbNq0CYvFYi1qANq2bYvFYrGJad68ubWoAejWrRuFhYU2Q2P2UOnCpnnz5rz//vtl9icnJxMWFmaXpERERNxJFTtthYWFnDx50mYrLCws95r79+9n/vz5hISE8O9//5snnniCkSNH8s9//hOA7OxsAAIDA23eFxgYaD2WnZ2Nl5dXmbm1F8YEBASUuX5AQIBNzIXX8fPzw8vLyxpjL5VuwI0fP5777ruPn3/+mXvuuQeAtWvXsnTpUj766CO7JiciIiL/lZSUxOTJk232TZw4sdzHHZWWltK6dWsSExOBcw+x3rVrF/Pnz+fhhx+2xl24YsswjEuu4rowprz4y4mxh0p3bKKiovjkk0/Yt28fQ4cOJT4+nl9//ZV169bRsGFDuyYnIiLiDkwm+2wJCQnk5eXZbAkJCeVes27dumVGUpo1a0ZmZiYAQUFBAGU6Jjk5OdbuSlBQEEVFReTm5v5hTHnza48ePWoTc+F1cnNzKS4uLtPJ+bMu64GjvXr14ptvviE/P599+/bxt7/9jbi4OMLDw+2anIiIiDuw1xwbs9mMr6+vzWY2m8u9Zvv27fnxR9tJy3v37uWGG24AoFGjRgQFBbFmzRrr8aKiItavX0+7du0ACA8Px9PT0yYmKyuLnTt3WmMiIiLIy8tjy5Yt1pjNmzeTl5dnE7Nz506ysrKsMatXr8ZsNtu9drjsueDr1q3jrbfe4uOPP+aGG27gvvvu480337RnbiIiInKZnnrqKdq1a0diYiJ9+/Zly5YtvPbaa7z22mvAuaGhuLg4EhMTCQkJISQkhMTERKpXr050dDQAFouFQYMGER8fT+3atalVqxajR4+mRYsWdO7cGTjXBerevTuxsbEsXLgQgMcff5zIyEhCQ8+t4uratSthYWHExMQwffp0jh8/zujRo4mNjbXriiioZGFz6NAhFi9ezFtvvUV+fj59+/aluLiYZcuWaeKwiIjIRbjixsO33XYby5cvJyEhgeeff55GjRrx8ssv8+CDD1pjxo4dS0FBAUOHDiU3N5c2bdqwevVqatasaY2ZNWsWVatWpW/fvhQUFNCpUycWL15s82ild999l5EjR1pXT0VFRTFnzhzrcQ8PDz7//HOGDh1K+/bt8fb2Jjo6mhkzZtj9c1f4PjY9e/Zkw4YNREZG8uCDD9K9e3c8PDzw9PTku+++u6IKG93HRkREKsoZ97GZtPon+5yna4hdzuPOKvzXuXr1akaOHMmQIUMICdEXKyIiIleeCk8e/s9//sOpU6do3bo1bdq0Yc6cORw9etSRuYmIiLgFV9yg71pV4cImIiKC119/naysLAYPHkxycjL16tWjtLSUNWvW2DxXQkRERP7LXsu95dIqvdy7evXqPProo2zYsIEdO3YQHx/PlClTCAgIICoqyhE5ioiIiFTIZd3H5rzQ0FCmTZvGoUOHeO+99+yVk4iIiFupYrLPJpdW6ad7Xw20KkpERCrKGauiEtf+bJfzPNOpiV3O486c8NcpIiJybVO3xXn+1FCUiIiIyJVEHRsREREHU8fGeVTYiIiIOJhJa7WdRkNRIiIi4jbUsREREXEwDUU5jwobERERB9NIlPNoKEpERETchjo2IiIiDqYHWDqPChsREREH0xwb59FQlIiIiLgNdWxEREQcTCNRzqPCRkRExMGqoMrGWVTYiIiIOJg6Ns6jOTYiIiLiNtSxERERcTCtinIeFTYiIiIOpvvYOI+GokRERMRtqGMjIiLiYGrYOI8KGxEREQfTUJTzaChKRERE3IY6NiIiIg6mho3zqLARERFxMA2POI++axEREXEb6tiIiIg4mEljUU6jwkZERMTBVNY4jwobERERB9Nyb+fRHBsRERFxG+rYiIiIOJj6Nc6jwkZERMTBNBLlPBqKEhEREbehjo2IiIiDabm386iwERERcTANjziPvmsRERFxG+rYiIiIOJiGopxHhY2IiIiDqaxxHg1FiYiIiNtQx0ZERMTBNBTlPCpsREREHEzDI86jwkZERMTB1LFxHhWRIiIi4jZU2IiIiDiYyU7bn5GUlITJZCIuLs66zzAMJk2aRHBwMN7e3nTs2JFdu3bZvK+wsJARI0bg7++Pj48PUVFRHDp0yCYmNzeXmJgYLBYLFouFmJgYTpw4YROTmZlJ79698fHxwd/fn5EjR1JUVPQnP1VZKmxEREQczGSyz3a50tLSeO211/jLX/5is3/atGnMnDmTOXPmkJaWRlBQEF26dOHUqVPWmLi4OJYvX05ycjIbNmzg9OnTREZGUlJSYo2Jjo4mPT2dlJQUUlJSSE9PJyYmxnq8pKSEXr16kZ+fz4YNG0hOTmbZsmXEx8df/oe6CJNhGIbdz+piZ393dQYiInK1qOaE2ab/2pFtl/Pc2yKo0u85ffo0t956K/PmzePFF1+kZcuWvPzyyxiGQXBwMHFxcYwbNw44150JDAxk6tSpDB48mLy8POrUqcOSJUvo168fAIcPH6Z+/fqsXLmSbt26sWfPHsLCwkhNTaVNmzYApKamEhERwQ8//EBoaCirVq0iMjKSX375heDgYACSk5MZOHAgOTk5+Pr62uX7AXVsREREHK4KJrtshYWFnDx50mYrLCz8w2sPGzaMXr160blzZ5v9GRkZZGdn07VrV+s+s9lMhw4d2LhxIwBbt26luLjYJiY4OJjmzZtbYzZt2oTFYrEWNQBt27bFYrHYxDRv3txa1AB069aNwsJCtm7depnfavmu2MLmyJEjPP/8865OQ0RE5E+z11BUUlKSdR7L+S0pKemi101OTmbbtm3lxmRnn+siBQYG2uwPDAy0HsvOzsbLyws/P78/jAkICChz/oCAAJuYC6/j5+eHl5eXNcZertjCJjs7m8mTJ7s6DRERkStGQkICeXl5NltCQkK5sb/88gtPPvkk77zzDtWqVbvoOS9cim4YxiWXp18YU1785cTYg8vuY/P999//4fEff/zRSZmIiIg4lslOT4sym82YzeYKxW7dupWcnBzCw8Ot+0pKSvj666+ZM2eO9fdsdnY2devWtcbk5ORYuytBQUEUFRWRm5tr07XJycmhXbt21pgjR46Uuf7Ro0dtzrN582ab47m5uRQXF5fp5PxZLitsWrZsiclkory5y+f364ZGIiLiDlzx66xTp07s2LHDZt8jjzzCTTfdxLhx42jcuDFBQUGsWbOGVq1aAVBUVMT69euZOnUqAOHh4Xh6erJmzRr69u0LQFZWFjt37mTatGkAREREkJeXx5YtW7j99tsB2Lx5M3l5edbiJyIigpdeeomsrCxrEbV69WrMZrNN4WUPLitsateuzdSpU+nUqVO5x3ft2kXv3r2dnJWIiIh7qFmzJs2bN7fZ5+PjQ+3ata374+LiSExMJCQkhJCQEBITE6levTrR0dEAWCwWBg0aRHx8PLVr16ZWrVqMHj2aFi1aWCcjN2vWjO7duxMbG8vChQsBePzxx4mMjCQ0NBSArl27EhYWRkxMDNOnT+f48eOMHj2a2NhYu66IAhcWNuHh4Rw+fJgbbrih3OMnTpwot5sjIiJytalip6Eoexs7diwFBQUMHTqU3Nxc2rRpw+rVq6lZs6Y1ZtasWVStWpW+fftSUFBAp06dWLx4MR4eHtaYd999l5EjR1pXT0VFRTFnzhzrcQ8PDz7//HOGDh1K+/bt8fb2Jjo6mhkzZtj9M7nsPjbLly8nPz+fhx56qNzjubm5rFixggEDBlT63LqPjYiIVJQz7mPz791H7XKebmF17HIed6Yb9ImIyDXNGYXN6j32KWy6NlNhcylX7HJvERERkcpy2RwbERGRa4W9lnvLpamwERERcbAqqmucRkNRIiIi4jbUsREREXEwDUU5j8s7NikpKWzYsMH6eu7cubRs2ZLo6Ghyc3NdmJmIiIh92OshmHJpLi9sxowZw8mTJwHYsWMH8fHx9OzZk/379zNq1CgXZyciIiJXE5cPRWVkZBAWFgbAsmXLiIyMJDExkW3bttGzZ08XZyciIvLnaSjKeVzesfHy8uLMmTMAfPHFF9bbMdeqVcvayREREbmaVTHZZ5NLc3nH5o477mDUqFG0b9+eLVu28P777wOwd+9err/+ehdnJyIiIlcTl3ds5syZQ9WqVfnoo4+YP38+9erVA2DVqlV0797dxdlJebZ+m8aIoU/QueMd3HJzKOvWfmE9VlxczKx/TOe+Pr1p07olnTvewbMJY8nJOWJzjqKiIpJeeoEO7dvQpnVLRg57giPZ2c7+KCJO8+brC7nl5lCmJb1k3ffFmtU8ETuIDu3bcMvNofywZ48LMxRHMtnpf3JpLi9sGjRowGeffcZ3333HoEGDrPtnzZrFq6++6sLM5GIKCs4QGhrK089OKHPs7Nmz/LBnN48/MYT3P/yYma/M4eCBAzw5fIhN3LQpL7Fu7RqmzpjF4iVLOXPmDCOGDqakpMRZH0PEaXbu+J6PPnyfG28MtdlfUHCGlq1a8eRTo12UmTiLVkU5j8uHorZt24anpyctWrQA4F//+heLFi0iLCyMSZMm4eXl5eIM5UJ33NmBO+7sUO6xmjVrsvCNRTb7nn7mOR7s/3eyDh+mbnAwp06dYvmyZbw0ZRptI9oBkDh1Ot06dSR100ba33Gnwz+DiLOcyc8nYdwYJk5+kdcXzrc51juqDwC//nrIBZmJM6kmcR6Xd2wGDx7M3r17Adi/fz/9+/enevXqfPjhh4wdO9bF2Yk9nD59GpPJRE1fXwB279rJ778X065de2tMQEAgTZuG8F36dlelKeIQiS8+z113dbAW8SLiWC7v2Ozdu5eWLVsC8OGHH3LXXXexdOlSvvnmG/r378/LL7/8h+8vLCyksLDQZp/hYcZsNjsoY6mMwsJCXpk1gx69IqlRowYAvx07hqenJ74Wi01sLX9/jh075oo0RRxi1crP2bNnN0vf/8jVqYiLVdE4ktO4vGNjGAalpaXAueXe5+9dU79+/Qr9kktKSsJisdhs06cmOTRnqZji4mLGjX6K0lKDZ8dPuvQbDENjyOI2srOymDblJRKnTNc/tASTnTa5NJd3bFq3bs2LL75I586dWb9+PfPnnxuDzsjIIDAw8JLvT0hIKHOHYsND/xFxteLiYsbEx/HroUO8vuhta7cGoLa/P8XFxZzMy7Pp2hz/7TduadnKFemK2N3u3bs4/ttvPND3b9Z9JSUlbP02jeT33iVt+w48PDxcmKGIe3J5YfPyyy/z4IMP8sknn/Dss8/StGlTAD766CPatbv0mLTZXHbY6ezvDklVKuh8UZN58CBvLPon113nZ3M87ObmVK3qyaZN39Ct+7kO3dGjOezb9xNx8WNckbKI3bVp25aPPvnUZt/EZxNo2LgxjwyKVVFzrVG7xWlcXtj85S9/YceOHWX2T58+XT/4V6gz+flkZmZaX/966BA/7NmDxWKhTkAAo58ayZ49u5k9dyGlJSUcO3oUAIvFgqeXFzVr1uSv993HP6ZP5brr/PC1WJg5fSohITdqgqW4DR+fGoSE3Gizz7t6da6zXGfdn3fiBFlZWRw9mgPAgQMZAPj7++Nfp45zExaH0j1onMflhc3FVKtWzdUpyEXs2rWTxx552Pp6xrRzc5qi7v0rTwwbzldfrgOg73332rzvjUX/5Lbb2wAwZtwzeHhUZcyoOAoLz3J7mwhemDtFxaxcU776ch0Tnkuwvh43+ikAnhg6nCHDRrgqLZGrmskwDMOVCZSUlDBr1iw++OADMjMzKSoqsjl+/PjxSp9TQ1EiIlJR1ZzwT/wt+/Pscp7bG1suHXSNc/mqqMmTJzNz5kz69u1LXl4eo0aN4m9/+xtVqlRh0qRJrk5PRETkT9OqKOdxecemSZMmvPrqq/Tq1YuaNWuSnp5u3ZeamsrSpUsrfU51bEREpKKc0bFJs1PH5jZ1bC7J5R2b7Oxs6+MUatSoQV7eub/8yMhIPv/8c1emJiIiYh9q2TiNywub66+/nqysLACaNm3K6tWrAUhLS9NNrURExC3o6d7O4/LC5q9//Str164F4Mknn2T8+PGEhITw8MMP8+ijj7o4OxERkT9PT/d2HpfPsblQamoqGzdupGnTpkRFRV3WOTTHRkREKsoZc2y2Hjhpl/OEN/S1y3nc2RVX2NiDChsREakoZxQ22+xU2NyqwuaSXHKDvhUrVlQ49nK7NiIiIlcMDSM5jUs6NlWqVGxqj8lkoqSkpNLnV8dGREQqyikdm4N26tjcoI7NpbikY1NaWuqKy4qIiLiEVjQ5zxX7rCgRERF3oRVNzuOy5d7r1q0jLCyMkyfLtufy8vK4+eab+frrr12QmYiIiFytXFbYvPzyy8TGxuLrW3a80GKxMHjwYGbNmuWCzEREROxLNx52HpcVNt999x3du3e/6PGuXbuydetWJ2YkIiLiIKpsnMZlhc2RI0fw9PS86PGqVaty9OhRJ2YkIiIiVzuXFTb16tVjx44dFz3+/fffU7duXSdmJCIi4hh6VpTzuKyw6dmzJxMmTODs2bNljhUUFDBx4kQiIyNdkJmIiIh96VlRzuOyRyocOXKEW2+9FQ8PD4YPH05oaCgmk4k9e/Ywd+5cSkpK2LZtG4GBgZU+t27QJyIiFeWMG/TtPHTaLudpfn0Nu5zHnbn0WVEHDx5kyJAh/Pvf/+Z8GiaTiW7dujFv3jwaNmx4WedVYSMiIhWlwsa9XBEPwczNzWXfvn0YhkFISAh+fn5/6nwqbEREpKKcUtj8aqfCpp4Km0u5Igobe1NhIyIiFeWMwmbXr/l2Oc/N9Xzsch535rLJwyIiIiL2pmdFiYiIOJhWNDmPChsREREHU13jPBqKEhEREbehwkZERMTRXPCsqKSkJG677TZq1qxJQEAAffr04ccff7SJMQyDSZMmERwcjLe3Nx07dmTXrl02MYWFhYwYMQJ/f398fHyIiori0KFDNjG5ubnExMRgsViwWCzExMRw4sQJm5jMzEx69+6Nj48P/v7+jBw5kqKiosp9qApQYSMiIuJgrnikwvr16xk2bBipqamsWbOG33//na5du5Kf/98VWtOmTWPmzJnMmTOHtLQ0goKC6NKlC6dOnbLGxMXFsXz5cpKTk9mwYQOnT58mMjKSkpISa0x0dDTp6emkpKSQkpJCeno6MTEx1uMlJSX06tWL/Px8NmzYQHJyMsuWLSM+Pv5PfKvl03JvERG5pjljufcPWWfscp6b6la/7PcePXqUgIAA1q9fz1133YVhGAQHBxMXF8e4ceOAc92ZwMBApk6dyuDBg8nLy6NOnTosWbKEfv36AXD48GHq16/PypUr6datG3v27CEsLIzU1FTatGkDQGpqKhEREfzwww+EhoayatUqIiMj+eWXXwgODgYgOTmZgQMHkpOTg6+v75/8Zv5LHRsREREHs9ezogoLCzl58qTNVlhYWKEc8vLyAKhVqxYAGRkZZGdn07VrV2uM2WymQ4cObNy4EYCtW7dSXFxsExMcHEzz5s2tMZs2bcJisViLGoC2bdtisVhsYpo3b24tagC6detGYWEhW7duvZyv9KJU2IiIiDiYvabYJCUlWeexnN+SkpIueX3DMBg1ahR33HEHzZs3ByA7OxugzDMZAwMDrceys7Px8vIq80SAC2MCAgLKXDMgIMAm5sLr+Pn54eXlZY2xFy33FhERcTQ7rfdOSEhg1KhRNvvMZvMl3zd8+HC+//57NmzYUDa1C26yYxhGmX0XujCmvPjLibEHdWxERESuEmazGV9fX5vtUoXNiBEjWLFiBV9++SXXX3+9dX9QUBBAmY5JTk6OtbsSFBREUVERubm5fxhz5MiRMtc9evSoTcyF18nNzaW4uLhMJ+fPUmEjIiLiYK5YFWUYBsOHD+fjjz9m3bp1NGrUyOZ4o0aNCAoKYs2aNdZ9RUVFrF+/nnbt2gEQHh6Op6enTUxWVhY7d+60xkRERJCXl8eWLVusMZs3byYvL88mZufOnWRlZVljVq9ejdlsJjw8vFKf61K0KkpERK5pzlgVtS+nwC7naRrgXeHYoUOHsnTpUv71r38RGhpq3W+xWPD2PneeqVOnkpSUxKJFiwgJCSExMZGvvvqKH3/8kZo1awIwZMgQPvvsMxYvXkytWrUYPXo0v/32G1u3bsXDwwOAHj16cPjwYRYuXAjA448/zg033MCnn34KnFvu3bJlSwIDA5k+fTrHjx9n4MCB9OnTh9mzZ9vluzlPhY2IiFzT3LWwudjclUWLFjFw4EDgXFdn8uTJLFy4kNzcXNq0acPcuXOtE4wBzp49y5gxY1i6dCkFBQV06tSJefPmUb9+fWvM8ePHGTlyJCtWrAAgKiqKOXPmcN1111ljMjMzGTp0KOvWrcPb25vo6GhmzJhRoTlClaHCRkRErmnOKGx+tlNh06QShc21SquiREREHE1PwXQaTR4WERERt6GOjYiIiINVdkWTXD4VNiIiIg5m53vQyR/QUJSIiIi4DXVsREREHEwNG+dRYSMiIuJoqmycRoWNiIiIg2nysPNojo2IiIi4DXVsREREHEyropxHhY2IiIiDqa5xHg1FiYiIiNtQx0ZERMTBNBTlPCpsREREHE6VjbNoKEpERETchjo2IiIiDqahKOdRYSMiIuJgqmucR0NRIiIi4jbUsREREXEwDUU5jwobERERB9OzopxHhY2IiIijqa5xGs2xEREREbehjo2IiIiDqWHjPCpsREREHEyTh51HQ1EiIiLiNtSxERERcTCtinIeFTYiIiKOprrGaTQUJSIiIm5DHRsREREHU8PGeVTYiIiIOJhWRTmPhqJERETEbahjIyIi4mBaFeU8KmxEREQcTENRzqOhKBEREXEbKmxERETEbWgoSkRExME0FOU8KmxEREQcTJOHnUdDUSIiIuI21LERERFxMA1FOY8KGxEREQdTXeM8GooSERERt6GOjYiIiKOpZeM0KmxEREQcTKuinEdDUSIiIuI21LERERFxMK2Kch4VNiIiIg6musZ5VNiIiIg4miobp9EcGxEREXEb6tiIiIg4mFZFOY8KGxEREQfT5GHn0VCUiIiIuA2TYRiGq5MQ91RYWEhSUhIJCQmYzWZXpyNyxdDPhojjqLARhzl58iQWi4W8vDx8fX1dnY7IFUM/GyKOo6EoERERcRsqbERERMRtqLARERERt6HCRhzGbDYzceJETY4UuYB+NkQcR5OHRURExG2oYyMiIiJuQ4WNiIiIuA0VNiIiIuI2VNhIhZlMJj755BNXpyFyRdHPhciVRYWNAJCdnc2IESNo3LgxZrOZ+vXr07t3b9auXevq1AAwDINJkyYRHByMt7c3HTt2ZNeuXa5OS9zclf5z8fHHH9OtWzf8/f0xmUykp6e7OiURl1NhIxw4cIDw8HDWrVvHtGnT2LFjBykpKdx9990MGzbM1ekBMG3aNGbOnMmcOXNIS0sjKCiILl26cOrUKVenJm7qavi5yM/Pp3379kyZMsXVqYhcOQy55vXo0cOoV6+ecfr06TLHcnNzrX8GjOXLl1tfjx071ggJCTG8vb2NRo0aGc8995xRVFRkPZ6enm507NjRqFGjhlGzZk3j1ltvNdLS0gzDMIwDBw4YkZGRxnXXXWdUr17dCAsLMz7//PNy8ystLTWCgoKMKVOmWPedPXvWsFgsxoIFC/7kpxcp35X+c/G/MjIyDMDYvn37ZX9eEXdR1cV1lbjY8ePHSUlJ4aWXXsLHx6fM8euuu+6i761ZsyaLFy8mODiYHTt2EBsbS82aNRk7diwADz74IK1atWL+/Pl4eHiQnp6Op6cnAMOGDaOoqIivv/4aHx8fdu/eTY0aNcq9TkZGBtnZ2XTt2tW6z2w206FDBzZu3MjgwYP/xDcgUtbV8HMhIuVTYXON27dvH4ZhcNNNN1X6vc8995z1zw0bNiQ+Pp7333/f+h/wzMxMxowZYz13SEiINT4zM5P77ruPFi1aANC4ceOLXic7OxuAwMBAm/2BgYEcPHiw0nmLXMrV8HMhIuXTHJtrnPH/N542mUyVfu9HH33EHXfcQVBQEDVq1GD8+PFkZmZaj48aNYrHHnuMzp07M2XKFH7++WfrsZEjR/Liiy/Svn17Jk6cyPfff3/J612Yo2EYl5W3yKVcTT8XImJLhc01LiQkBJPJxJ49eyr1vtTUVPr370+PHj347LPP2L59O88++yxFRUXWmEmTJrFr1y569erFunXrCAsLY/ny5QA89thj7N+/n5iYGHbs2EHr1q2ZPXt2udcKCgoC/tu5OS8nJ6dMF0fEHq6GnwsRuQiXzvCRK0L37t0rPUlyxowZRuPGjW1iBw0aZFgslotep3///kbv3r3LPfb0008bLVq0KPfY+cnDU6dOte4rLCzU5GFxqCv95+J/afKwyH+pYyPMmzePkpISbr/9dpYtW8ZPP/3Enj17ePXVV4mIiCj3PU2bNiUzM5Pk5GR+/vlnXn31Veu/OgEKCgoYPnw4X331FQcPHuSbb74hLS2NZs2aARAXF8e///1vMjIy2LZtG+vWrbMeu5DJZCIuLo7ExESWL1/Ozp07GThwINWrVyc6Otr+X4gIV/7PBZyb5Jyens7u3bsB+PHHH0lPTy/T3RS5pri6spIrw+HDh41hw4YZN9xwg+Hl5WXUq1fPiIqKMr788ktrDBcsax0zZoxRu3Zto0aNGka/fv2MWbNmWf9lWlhYaPTv39+oX7++4eXlZQQHBxvDhw83CgoKDMMwjOHDhxtNmjQxzGazUadOHSMmJsY4duzYRfMrLS01Jk6caAQFBRlms9m46667jB07djjiqxCxutJ/LhYtWmQAZbaJEyc64NsQuTqYDOP/Z8mJiIiIXOU0FCUiIiJuQ4WNiIiIuA0VNiIiIuI2VNiIiIiI21BhIyIiIm5DhY2IiIi4DRU2IiIi4jZU2Ii4oUmTJtGyZUvr64EDB9KnTx+n53HgwAFMJhPp6elOv7aIXJtU2Ig40cCBAzGZTJhMJjw9PWncuDGjR48mPz/fodd95ZVXWLx4cYViVYyIyNWsqqsTELnWdO/enUWLFlFcXMx//vMfHnvsMfLz85k/f75NXHFxMZ6enna5psVisct5RESudOrYiDiZ2WwmKCiI+vXrEx0dzYMPPsgnn3xiHT566623aNy4MWazGcMwyMvL4/HHHycgIABfX1/uuecevvvuO5tzTpkyhcDAQGrWrMmgQYM4e/aszfELh6JKS0uZOnUqTZs2xWw206BBA1566SUAGjVqBECrVq0wmUx07NjR+r5FixbRrFkzqlWrxk033cS8efNsrrNlyxZatWpFtWrVaN26Ndu3b7fjNycicmnq2Ii4mLe3N8XFxQDs27ePDz74gGXLluHh4QFAr169qFWrFitXrsRisbBw4UI6derE3r17qVWrFh988AETJ05k7ty53HnnnSxZsoRXX32Vxo0bX/SaCQkJvP7668yaNYs77riDrKwsfvjhB+BccXL77bfzxRdfcPPNN+Pl5QXA66+/zsSJE5kzZw6tWrVi+/btxMbG4uPjw4ABA8jPzycyMpJ77rmHd955h4yMDJ588kkHf3siIhdw8UM4Ra4pAwYMMO69917r682bNxu1a9c2+vbta0ycONHw9PQ0cnJyrMfXrl1r+Pr6GmfPnrU5T5MmTYyFCxcahmEYERERxhNPPGFzvE2bNsYtt9xS7nVPnjxpmM1m4/XXXy83x4yMDAMwtm/fbrO/fv36xtKlS232vfDCC0ZERIRhGIaxcOFCo1atWkZ+fr71+Pz588s9l4iIo2goSsTJPvvsM2rUqEG1atWIiIjgrrvuYvbs2QDccMMN1KlTxxq7detWTp8+Te3atalRo4Z1y8jI4OeffwZgz549RERE2Fzjwtf/a8+ePRQWFtKpU6cK53z06FF++eUXBg0aZJPHiy++aJPHLbfcQvXq1SuUh4iII2goSsTJ7r77bubPn4+npyfBwcE2E4R9fHxsYktLS6lbty5fffVVmfNcd911l3V9b2/vSr+ntLQUODcc1aZNG5tj54fMDMO4rHxEROxJhY2Ik/n4+NC0adMKxd56661kZ2dTtWpVGjZsWG5Ms2bNSE1N5eGHH7buS01Nveg5Q0JC8Pb2Zu3atTz22GNljp+fU1NSUmLdFxgYSL169di/fz8PPvhguecNCwtjyZIlFBQUWIunP8pDRMQRNBQlcgXr3LkzERER9OnTh3//+98cOHCAjRs38txzz/Htt98C8OSTT/LWW2/x1ltvsXfvXiZOnMiuXbsues5q1aoxbtw4xo4dyz//+U9+/vlnUlNTefPNNwEICAjA29ublJQUjhw5Ql5eHnDupn9JSUm88sor7N27lx07drBo0SJmzpwJQHR0NFWqVGHQoEHs3r2blStXMmPGDAd/QyIitlTYiFzBTCYTK1eu5K677uLRRx/lxhtvpH///hw4cIDAwEAA+vXrx4QJExg3bhzh4eEcPHiQIUOG/OF5x48fT3x8PBMmTKBZs2b069ePnJwcAKpWrcqrr77KwoULCQ4O5t577wXgscce44033mDx4sW0aNGCDh06sHjxYuvy8Bo1avDpp5+ye/duWrVqxbPPPsvUqVMd+O2IiJRlMjQwLiIiIm5CHRsRERFxGypsRERExG2osBERERG3ocJGRERE3IYKGxEREXEbKmxERETEbaiwEREREbehwkZERETchgobERERcRsqbERERMRtqLARERERt6HCRkRERNzG/wEwexooEpDSGQAAAABJRU5ErkJggg==",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 2 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# matrice de confusion\n",
|
||
"\n",
|
||
"sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Class 0', 'Class 1'], yticklabels=['Class 0', 'Class 1'])\n",
|
||
"plt.xlabel('Predicted')\n",
|
||
"plt.ylabel('Actual')\n",
|
||
"plt.title('Confusion Matrix')\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 34,
|
||
"id": "311f0208-b79e-4e80-8016-075a98708f6e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 800x600 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# on trace la courbe ROC\n",
|
||
"\n",
|
||
"# Prédictions sur l'ensemble de test\n",
|
||
"y_pred_prob = clf.predict_proba(X_test)[:, 1]\n",
|
||
"\n",
|
||
"# Calcul des taux de faux positifs (FPR) et de vrais positifs (TPR)\n",
|
||
"fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)\n",
|
||
"\n",
|
||
"# Calcul de l'aire sous la courbe ROC (AUC)\n",
|
||
"roc_auc = auc(fpr, tpr)\n",
|
||
"\n",
|
||
"# Tracé de la courbe ROC\n",
|
||
"plt.figure(figsize=(8, 6))\n",
|
||
"plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'AUC = {roc_auc:.2f}')\n",
|
||
"plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')\n",
|
||
"plt.xlabel('Taux de faux positifs (FPR)')\n",
|
||
"plt.ylabel('Taux de vrais positifs (TPR)')\n",
|
||
"plt.title('Courbe ROC : random forest')\n",
|
||
"plt.legend(loc='lower right')\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 35,
|
||
"id": "e20e9ac2-7232-4418-87f0-c7299a6d7de3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 800x600 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Calcul des valeurs de précision et de rappel à différents seuils\n",
|
||
"precision, recall, thresholds = precision_recall_curve(y_test, y_pred_prob)\n",
|
||
"\n",
|
||
"# Calcul de l'aire sous la courbe PR (AUC-PR)\n",
|
||
"average_precision = average_precision_score(y_test, y_pred_prob)\n",
|
||
"\n",
|
||
"# Tracé de la courbe PR\n",
|
||
"plt.figure(figsize=(8, 6))\n",
|
||
"plt.step(recall, precision, color='b', alpha=0.2, where='post')\n",
|
||
"plt.fill_between(recall, precision, step='post', alpha=0.2, color='b')\n",
|
||
"plt.xlabel('Rappel')\n",
|
||
"plt.ylabel('Précision')\n",
|
||
"plt.ylim([0.0, 1.05])\n",
|
||
"plt.xlim([0.0, 1.0])\n",
|
||
"plt.title(f'Courbe PR (AUC-PR = {average_precision:.2f})')\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 36,
|
||
"id": "0633df2d-686e-4f9d-823e-e54c23f983f8",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# utilisation d'une métrique plus adaptée aux modèles de marketing : courbe de lift\n",
|
||
"\n",
|
||
"# Tri des prédictions de probabilités et des vraies valeurs\n",
|
||
"sorted_indices = np.argsort(y_pred_prob)[::-1]\n",
|
||
"y_pred_prob_sorted = y_pred_prob[sorted_indices]\n",
|
||
"y_test_sorted = y_test.iloc[sorted_indices]\n",
|
||
"\n",
|
||
"# Calcul du gain cumulatif\n",
|
||
"cumulative_gain = np.cumsum(y_test_sorted) / np.sum(y_test_sorted)\n",
|
||
"\n",
|
||
"# Tracé de la courbe de lift\n",
|
||
"plt.plot(np.linspace(0, 1, len(cumulative_gain))[:10000], (cumulative_gain/np.linspace(0, 1, len(cumulative_gain)))[:10000], label='Courbe de lift')\n",
|
||
"plt.xlabel('Pourcentage des données')\n",
|
||
"plt.ylabel('Gain cumulatif')\n",
|
||
"plt.title('Courbe de Lift')\n",
|
||
"plt.legend()\n",
|
||
"plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"id": "49dc4e25-a79e-44d7-a577-524468336b96",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"52512 0.000000\n",
|
||
"87081 0.000000\n",
|
||
"2695 0.000000\n",
|
||
"51486 0.006211\n",
|
||
"15 0.012422\n",
|
||
" ... \n",
|
||
"86959 1.000000\n",
|
||
"86960 1.000000\n",
|
||
"86961 1.000000\n",
|
||
"86962 1.000000\n",
|
||
"65836 1.000000\n",
|
||
"Name: y_has_purchased, Length: 128198, dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 37,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"cumulative_gain"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "5fde953b-4cce-4879-bb5e-1852511e7054",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Sauvegarde des résultats (à reprendre))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 40,
|
||
"id": "7ac941bf-7994-4baf-8d9f-13b93eed73a9",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# sauvegarde\n",
|
||
"\n",
|
||
"with open('test_logit.pkl', 'wb') as file:\n",
|
||
" pickle.dump(clf4, file)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"id": "3ac3def3-00f2-4b31-b6f7-2cae5038b766",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# pour charger les paramètres \n",
|
||
"\n",
|
||
"# Chargement du modèle à partir du fichier\n",
|
||
"with open('test_logit.pkl', 'rb') as file:\n",
|
||
" loaded_logit = pickle.load(file)"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|