2024-03-08 14:48:38 +01:00
{
"cells": [
{
"cell_type": "markdown",
"id": "3415114e-9577-4487-89eb-4931620ad9f0",
"metadata": {},
"source": [
"# Predict Sales"
]
},
{
"cell_type": "code",
2024-03-11 09:36:25 +01:00
"execution_count": 1,
2024-03-08 14:48:38 +01:00
"id": "f271eb45-1470-4764-8c2e-31374efa1fe5",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import s3fs\n",
"import re\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.ensemble import RandomForestClassifier\n",
2024-03-10 12:30:57 +01:00
"from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, recall_score\n",
2024-03-10 11:09:53 +01:00
"from sklearn.utils import class_weight\n",
2024-03-08 14:48:38 +01:00
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.model_selection import GridSearchCV\n",
"from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler\n",
"from sklearn.metrics import make_scorer, f1_score, balanced_accuracy_score\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score\n",
2024-03-10 11:09:53 +01:00
"from sklearn.exceptions import ConvergenceWarning, DataConversionWarning\n",
"\n",
2024-03-08 14:48:38 +01:00
"import pickle\n",
2024-03-10 11:09:53 +01:00
"import warnings\n",
2024-03-08 14:48:38 +01:00
"#import scikitplot as skplt"
]
},
2024-03-10 11:09:53 +01:00
{
"cell_type": "code",
2024-03-11 09:36:25 +01:00
"execution_count": 2,
2024-03-10 11:09:53 +01:00
"id": "3fecb606-22e5-4dee-8efa-f8dff0832299",
"metadata": {},
"outputs": [],
"source": [
"warnings.filterwarnings('ignore')\n",
"warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n",
"warnings.filterwarnings(\"ignore\", category=DataConversionWarning)"
]
},
2024-03-08 14:48:38 +01:00
{
"cell_type": "markdown",
"id": "ae591854-3003-4c75-a0c7-5abf04246e81",
"metadata": {},
"source": [
"### Load Data"
]
},
{
"cell_type": "code",
2024-03-11 09:36:25 +01:00
"execution_count": 3,
2024-03-08 14:48:38 +01:00
"id": "59dd4694-a812-4923-b995-a2ee86c74f85",
"metadata": {},
"outputs": [],
"source": [
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
]
},
{
"cell_type": "code",
2024-03-11 09:36:25 +01:00
"execution_count": 4,
2024-03-08 14:48:38 +01:00
"id": "017f7e9a-3ba0-40fa-bdc8-51b98cc1fdb3",
"metadata": {},
"outputs": [],
"source": [
"def load_train_test():\n",
2024-03-10 11:09:53 +01:00
" BUCKET = \"projet-bdc2324-team1/Generalization/sport\"\n",
2024-03-11 09:36:25 +01:00
" File_path_train = BUCKET + \"/Train_set.csv\"\n",
" File_path_test = BUCKET + \"/Test_set.csv\"\n",
2024-03-08 14:48:38 +01:00
" \n",
2024-03-10 11:09:53 +01:00
" with fs.open( File_path_train, mode=\"rb\") as file_in:\n",
2024-03-08 14:48:38 +01:00
" dataset_train = pd.read_csv(file_in, sep=\",\")\n",
2024-03-11 09:36:25 +01:00
" # dataset_train['y_has_purchased'] = dataset_train['y_has_purchased'].fillna(0)\n",
2024-03-08 14:48:38 +01:00
"\n",
2024-03-10 11:09:53 +01:00
" with fs.open(File_path_test, mode=\"rb\") as file_in:\n",
2024-03-08 14:48:38 +01:00
" dataset_test = pd.read_csv(file_in, sep=\",\")\n",
2024-03-11 09:36:25 +01:00
" # dataset_test['y_has_purchased'] = dataset_test['y_has_purchased'].fillna(0)\n",
2024-03-08 14:48:38 +01:00
" \n",
" return dataset_train, dataset_test"
]
},
{
"cell_type": "code",
2024-03-11 09:36:25 +01:00
"execution_count": 5,
"id": "c479b230-b4bd-4cfb-b76b-d9faf6d95772",
"metadata": {},
"outputs": [],
"source": [
"dataset_train, dataset_test = load_train_test()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "c24c446d-4e1c-4ac1-a048-f0b8d8559f36",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"customer_id 0\n",
"nb_tickets 0\n",
"nb_purchases 0\n",
"total_amount 0\n",
"nb_suppliers 0\n",
"vente_internet_max 0\n",
"purchase_date_min 0\n",
"purchase_date_max 0\n",
"time_between_purchase 0\n",
"nb_tickets_internet 0\n",
"street_id 0\n",
"structure_id 222825\n",
"mcp_contact_id 70874\n",
"fidelity 0\n",
"tenant_id 0\n",
"is_partner 0\n",
"deleted_at 224213\n",
"gender 0\n",
"is_email_true 0\n",
"opt_in 0\n",
"last_buying_date 66139\n",
"max_price 66139\n",
"ticket_sum 0\n",
"average_price 66023\n",
"average_purchase_delay 66139\n",
"average_price_basket 66139\n",
"average_ticket_basket 66139\n",
"total_price 116\n",
"purchase_count 0\n",
"first_buying_date 66139\n",
"country 23159\n",
"gender_label 0\n",
"gender_female 0\n",
"gender_male 0\n",
"gender_other 0\n",
"country_fr 23159\n",
"nb_campaigns 0\n",
"nb_campaigns_opened 0\n",
"time_to_open 123159\n",
"y_has_purchased 0\n",
"dtype: int64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset_train.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 7,
2024-03-08 14:48:38 +01:00
"id": "825d14a3-6967-4733-bfd4-64bf61c2bd43",
"metadata": {},
"outputs": [],
"source": [
"def features_target_split(dataset_train, dataset_test):\n",
2024-03-11 09:36:25 +01:00
" features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n",
" 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n",
" 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n",
2024-03-10 11:09:53 +01:00
" X_train = dataset_train[features_l]\n",
" y_train = dataset_train[['y_has_purchased']]\n",
2024-03-08 14:48:38 +01:00
"\n",
2024-03-10 11:09:53 +01:00
" X_test = dataset_test[features_l]\n",
" y_test = dataset_test[['y_has_purchased']]\n",
2024-03-08 14:48:38 +01:00
" return X_train, X_test, y_train, y_test"
]
},
2024-03-10 11:09:53 +01:00
{
"cell_type": "code",
2024-03-11 09:36:25 +01:00
"execution_count": 8,
2024-03-10 11:09:53 +01:00
"id": "69eaec12-b30f-4d30-a461-ea520d5cbf77",
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = features_target_split(dataset_train, dataset_test)"
]
},
2024-03-10 12:30:57 +01:00
{
"cell_type": "code",
2024-03-11 09:36:25 +01:00
"execution_count": 9,
2024-03-10 12:30:57 +01:00
"id": "d039f31d-0093-46c6-9743-ddec1381f758",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-11 09:36:25 +01:00
"Shape train : (224213, 17)\n",
"Shape test : (96096, 17)\n"
2024-03-10 12:30:57 +01:00
]
}
],
"source": [
"print(\"Shape train : \", X_train.shape)\n",
"print(\"Shape test : \", X_test.shape)"
]
},
2024-03-08 14:48:38 +01:00
{
"cell_type": "markdown",
"id": "a1d6de94-4e11-481a-a0ce-412bf29f692c",
"metadata": {},
"source": [
"### Prepare preprocessing and Hyperparameters"
]
},
2024-03-10 11:09:53 +01:00
{
"cell_type": "code",
2024-03-11 09:36:25 +01:00
"execution_count": 10,
2024-03-10 11:09:53 +01:00
"id": "b808da43-c444-4e94-995a-7ec6ccd01e2d",
"metadata": {},
2024-03-10 12:30:57 +01:00
"outputs": [
{
"data": {
"text/plain": [
2024-03-11 09:36:25 +01:00
"{0.0: 0.5837086520288036, 1.0: 3.486549107420539}"
2024-03-10 12:30:57 +01:00
]
},
2024-03-11 09:36:25 +01:00
"execution_count": 10,
2024-03-10 12:30:57 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
2024-03-10 11:09:53 +01:00
"source": [
"# Compute Weights\n",
"weights = class_weight.compute_class_weight(class_weight = 'balanced', classes = np.unique(y_train['y_has_purchased']),\n",
" y = y_train['y_has_purchased'])\n",
"\n",
"weight_dict = {np.unique(y_train['y_has_purchased'])[i]: weights[i] for i in range(len(np.unique(y_train['y_has_purchased'])))}\n",
"weight_dict"
]
},
2024-03-08 14:48:38 +01:00
{
"cell_type": "code",
2024-03-11 09:36:25 +01:00
"execution_count": 11,
2024-03-08 14:48:38 +01:00
"id": "b32a79ea-907f-4dfc-9832-6c74bef3200c",
"metadata": {},
"outputs": [],
"source": [
2024-03-11 09:36:25 +01:00
"numeric_features = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n",
" 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n",
" 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n",
2024-03-08 14:48:38 +01:00
"\n",
"numeric_transformer = Pipeline(steps=[\n",
2024-03-10 11:09:53 +01:00
" #(\"imputer\", SimpleImputer(strategy=\"mean\")), \n",
" (\"scaler\", StandardScaler()) \n",
"])\n",
"\n",
"categorical_features = ['opt_in'] \n",
2024-03-08 14:48:38 +01:00
"\n",
2024-03-10 11:09:53 +01:00
"# Transformer for the categorical features\n",
"categorical_transformer = Pipeline(steps=[\n",
" #(\"imputer\", SimpleImputer(strategy=\"most_frequent\")), # Impute missing values with the most frequent\n",
" (\"onehot\", OneHotEncoder(handle_unknown='ignore', sparse_output=False))\n",
"])\n",
"\n",
"preproc = ColumnTransformer(\n",
" transformers=[\n",
" (\"num\", numeric_transformer, numeric_features),\n",
" (\"cat\", categorical_transformer, categorical_features)\n",
" ]\n",
")"
2024-03-08 14:48:38 +01:00
]
},
{
"cell_type": "code",
2024-03-11 09:36:25 +01:00
"execution_count": 12,
2024-03-08 14:48:38 +01:00
"id": "9809a688-bfbc-4685-a77f-17a8b2b79ab3",
"metadata": {},
"outputs": [],
"source": [
"# Set loss\n",
"balanced_scorer = make_scorer(balanced_accuracy_score)\n",
2024-03-10 13:30:10 +01:00
"recall_scorer = make_scorer(recall_score)"
2024-03-08 14:48:38 +01:00
]
},
{
"cell_type": "code",
2024-03-11 09:36:25 +01:00
"execution_count": 13,
2024-03-10 13:30:10 +01:00
"id": "4f9b2bbf-5f8a-4ac1-8e6c-51bd0dd8ac85",
"metadata": {},
"outputs": [],
"source": [
"def draw_confusion_matrix(y_test, y_pred):\n",
" conf_matrix = confusion_matrix(y_test, y_pred)\n",
" sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Class 0', 'Class 1'], yticklabels=['Class 0', 'Class 1'])\n",
" plt.xlabel('Predicted')\n",
" plt.ylabel('Actual')\n",
" plt.title('Confusion Matrix')\n",
" plt.show()\n",
"\n",
"\n",
"def draw_roc_curve(X_test, y_test):\n",
" y_pred_prob = pipeline.predict_proba(X_test)[:, 1]\n",
"\n",
" # Calcul des taux de faux positifs (FPR) et de vrais positifs (TPR)\n",
" fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob, pos_label=1)\n",
" \n",
" # Calcul de l'aire sous la courbe ROC (AUC)\n",
" roc_auc = auc(fpr, tpr)\n",
" \n",
" plt.figure(figsize = (14, 8))\n",
" plt.plot(fpr, tpr, label=\"ROC curve(area = %0.3f)\" % roc_auc)\n",
" plt.plot([0, 1], [0, 1], color=\"red\",label=\"Random Baseline\", linestyle=\"--\")\n",
" plt.grid(color='gray', linestyle='--', linewidth=0.5)\n",
" plt.xlabel('Taux de faux positifs (FPR)')\n",
" plt.ylabel('Taux de vrais positifs (TPR)')\n",
" plt.title('Courbe ROC : modèle logistique')\n",
" plt.legend(loc=\"lower right\")\n",
" plt.show()"
]
},
{
"cell_type": "code",
2024-03-11 09:36:25 +01:00
"execution_count": 14,
"id": "cf400c70-0192-42cc-9919-f61bae8382b0",
"metadata": {},
"outputs": [],
"source": [
"def draw_features_importance(pipeline, model):\n",
" coefficients = pipeline.named_steps['logreg'].coef_[0]\n",
" feature_names = pipeline.named_steps['logreg'].feature_names_in_\n",
" \n",
" # Tracer l'importance des caractéristiques\n",
" plt.figure(figsize=(10, 6))\n",
" plt.barh(feature_names, coefficients, color='skyblue')\n",
" plt.xlabel('Importance des caractéristiques')\n",
" plt.ylabel('Caractéristiques')\n",
" plt.title('Importance des caractéristiques dans le modèle de régression logistique')\n",
" plt.grid(True)\n",
" plt.show()\n",
"\n",
"def draw_prob_distribution(X_test):\n",
" y_pred_prob = pipeline.predict_proba(X_test)[:, 1]\n",
" plt.figure(figsize=(8, 6))\n",
" plt.hist(y_pred_prob, bins=10, range=(0, 1), color='blue', alpha=0.7)\n",
" \n",
" plt.xlim(0, 1)\n",
" plt.ylim(0, None)\n",
" \n",
" plt.title('Histogramme des probabilités pour la classe 1')\n",
" plt.xlabel('Probabilité')\n",
" plt.ylabel('Fréquence')\n",
" plt.grid(True)\n",
" plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
2024-03-08 14:48:38 +01:00
"id": "206d9a95-7c37-4506-949b-e77d225e42c5",
"metadata": {},
"outputs": [],
"source": [
2024-03-10 13:30:10 +01:00
"# Hyperparameter\n",
"param_grid = {'logreg__C': np.logspace(-10, 6, 17, base=2),\n",
" 'logreg__penalty': ['l1', 'l2'],\n",
" 'logreg__class_weight': ['balanced', weight_dict]} "
]
},
{
"cell_type": "code",
2024-03-11 09:36:25 +01:00
"execution_count": 16,
2024-03-10 13:30:10 +01:00
"id": "7ff2f7bd-efc1-4f7c-a3c9-caa916aa2f2b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
2024-03-11 09:36:25 +01:00
"<style>#sk-container-id-1 {\n",
2024-03-10 13:30:10 +01:00
" /* Definition of color scheme common for light and dark mode */\n",
" --sklearn-color-text: black;\n",
" --sklearn-color-line: gray;\n",
" /* Definition of color scheme for unfitted estimators */\n",
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
" --sklearn-color-unfitted-level-3: chocolate;\n",
" /* Definition of color scheme for fitted estimators */\n",
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
" --sklearn-color-fitted-level-1: #d4ebff;\n",
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
"\n",
" /* Specific color for light theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-icon: #696969;\n",
"\n",
" @media (prefers-color-scheme: dark) {\n",
" /* Redefinition of color scheme for dark theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-icon: #878787;\n",
" }\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 {\n",
2024-03-10 13:30:10 +01:00
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 pre {\n",
2024-03-10 13:30:10 +01:00
" padding: 0;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 input.sk-hidden--visually {\n",
2024-03-10 13:30:10 +01:00
" border: 0;\n",
" clip: rect(1px 1px 1px 1px);\n",
" clip: rect(1px, 1px, 1px, 1px);\n",
" height: 1px;\n",
" margin: -1px;\n",
" overflow: hidden;\n",
" padding: 0;\n",
" position: absolute;\n",
" width: 1px;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-dashed-wrapped {\n",
2024-03-10 13:30:10 +01:00
" border: 1px dashed var(--sklearn-color-line);\n",
" margin: 0 0.4em 0.5em 0.4em;\n",
" box-sizing: border-box;\n",
" padding-bottom: 0.4em;\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-container {\n",
2024-03-10 13:30:10 +01:00
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
" so we also need the `!important` here to be able to override the\n",
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
" display: inline-block !important;\n",
" position: relative;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-text-repr-fallback {\n",
2024-03-10 13:30:10 +01:00
" display: none;\n",
"}\n",
"\n",
"div.sk-parallel-item,\n",
"div.sk-serial,\n",
"div.sk-item {\n",
" /* draw centered vertical line to link estimators */\n",
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
" background-size: 2px 100%;\n",
" background-repeat: no-repeat;\n",
" background-position: center center;\n",
"}\n",
"\n",
"/* Parallel-specific style estimator block */\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-parallel-item::after {\n",
2024-03-10 13:30:10 +01:00
" content: \"\";\n",
" width: 100%;\n",
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
" flex-grow: 1;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-parallel {\n",
2024-03-10 13:30:10 +01:00
" display: flex;\n",
" align-items: stretch;\n",
" justify-content: center;\n",
" background-color: var(--sklearn-color-background);\n",
" position: relative;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-parallel-item {\n",
2024-03-10 13:30:10 +01:00
" display: flex;\n",
" flex-direction: column;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
2024-03-10 13:30:10 +01:00
" align-self: flex-end;\n",
" width: 50%;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
2024-03-10 13:30:10 +01:00
" align-self: flex-start;\n",
" width: 50%;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
2024-03-10 13:30:10 +01:00
" width: 0;\n",
"}\n",
"\n",
"/* Serial-specific style estimator block */\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-serial {\n",
2024-03-10 13:30:10 +01:00
" display: flex;\n",
" flex-direction: column;\n",
" align-items: center;\n",
" background-color: var(--sklearn-color-background);\n",
" padding-right: 1em;\n",
" padding-left: 1em;\n",
"}\n",
"\n",
"\n",
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
"clickable and can be expanded/collapsed.\n",
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
"*/\n",
"\n",
"/* Pipeline and ColumnTransformer style (default) */\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-toggleable {\n",
2024-03-10 13:30:10 +01:00
" /* Default theme specific background. It is overwritten whether we have a\n",
" specific estimator or a Pipeline/ColumnTransformer */\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"/* Toggleable label */\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 label.sk-toggleable__label {\n",
2024-03-10 13:30:10 +01:00
" cursor: pointer;\n",
" display: block;\n",
" width: 100%;\n",
" margin-bottom: 0;\n",
" padding: 0.5em;\n",
" box-sizing: border-box;\n",
" text-align: center;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
2024-03-10 13:30:10 +01:00
" /* Arrow on the left of the label */\n",
" content: \"▸\";\n",
" float: left;\n",
" margin-right: 0.25em;\n",
" color: var(--sklearn-color-icon);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
2024-03-10 13:30:10 +01:00
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"/* Toggleable content - dropdown */\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-toggleable__content {\n",
2024-03-10 13:30:10 +01:00
" max-height: 0;\n",
" max-width: 0;\n",
" overflow: hidden;\n",
" text-align: left;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
2024-03-10 13:30:10 +01:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-toggleable__content pre {\n",
2024-03-10 13:30:10 +01:00
" margin: 0.2em;\n",
" border-radius: 0.25em;\n",
" color: var(--sklearn-color-text);\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
2024-03-10 13:30:10 +01:00
" /* unfitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
2024-03-10 13:30:10 +01:00
" /* Expand drop-down */\n",
" max-height: 200px;\n",
" max-width: 100%;\n",
" overflow: auto;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
2024-03-10 13:30:10 +01:00
" content: \"▾\";\n",
"}\n",
"\n",
"/* Pipeline/ColumnTransformer-specific style */\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2024-03-10 13:30:10 +01:00
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2024-03-10 13:30:10 +01:00
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator-specific style */\n",
"\n",
"/* Colorize estimator box */\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2024-03-10 13:30:10 +01:00
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2024-03-10 13:30:10 +01:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
"#sk-container-id-1 div.sk-label label {\n",
2024-03-10 13:30:10 +01:00
" /* The background is the default theme color */\n",
" color: var(--sklearn-color-text-on-default-background);\n",
"}\n",
"\n",
"/* On hover, darken the color of the background */\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
2024-03-10 13:30:10 +01:00
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"/* Label box, darken color on hover, fitted */\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
2024-03-10 13:30:10 +01:00
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator label */\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-label label {\n",
2024-03-10 13:30:10 +01:00
" font-family: monospace;\n",
" font-weight: bold;\n",
" display: inline-block;\n",
" line-height: 1.2em;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-label-container {\n",
2024-03-10 13:30:10 +01:00
" text-align: center;\n",
"}\n",
"\n",
"/* Estimator-specific */\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-estimator {\n",
2024-03-10 13:30:10 +01:00
" font-family: monospace;\n",
" border: 1px dotted var(--sklearn-color-border-box);\n",
" border-radius: 0.25em;\n",
" box-sizing: border-box;\n",
" margin-bottom: 0.5em;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-estimator.fitted {\n",
2024-03-10 13:30:10 +01:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"/* on hover */\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-estimator:hover {\n",
2024-03-10 13:30:10 +01:00
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
2024-03-10 13:30:10 +01:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
"\n",
"/* Common style for \"i\" and \"?\" */\n",
"\n",
".sk-estimator-doc-link,\n",
"a:link.sk-estimator-doc-link,\n",
"a:visited.sk-estimator-doc-link {\n",
" float: right;\n",
" font-size: smaller;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1em;\n",
" height: 1em;\n",
" width: 1em;\n",
" text-decoration: none !important;\n",
" margin-left: 1ex;\n",
" /* unfitted */\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted,\n",
"a:link.sk-estimator-doc-link.fitted,\n",
"a:visited.sk-estimator-doc-link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"/* Span, style for the box shown on hovering the info icon */\n",
".sk-estimator-doc-link span {\n",
" display: none;\n",
" z-index: 9999;\n",
" position: relative;\n",
" font-weight: normal;\n",
" right: .2ex;\n",
" padding: .5ex;\n",
" margin: .5ex;\n",
" width: min-content;\n",
" min-width: 20ex;\n",
" max-width: 50ex;\n",
" color: var(--sklearn-color-text);\n",
" box-shadow: 2pt 2pt 4pt #999;\n",
" /* unfitted */\n",
" background: var(--sklearn-color-unfitted-level-0);\n",
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted span {\n",
" /* fitted */\n",
" background: var(--sklearn-color-fitted-level-0);\n",
" border: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link:hover span {\n",
" display: block;\n",
"}\n",
"\n",
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 a.estimator_doc_link {\n",
2024-03-10 13:30:10 +01:00
" float: right;\n",
" font-size: 1rem;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1rem;\n",
" height: 1rem;\n",
" width: 1rem;\n",
" text-decoration: none;\n",
" /* unfitted */\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 a.estimator_doc_link.fitted {\n",
2024-03-10 13:30:10 +01:00
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 a.estimator_doc_link:hover {\n",
2024-03-10 13:30:10 +01:00
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
2024-03-10 13:30:10 +01:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
"}\n",
2024-03-11 09:36:25 +01:00
"</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('preprocessor',\n",
2024-03-10 13:30:10 +01:00
" ColumnTransformer(transformers=[('num',\n",
" Pipeline(steps=[('scaler',\n",
" StandardScaler())]),\n",
" ['nb_tickets', 'nb_purchases',\n",
" 'total_amount',\n",
" 'nb_suppliers',\n",
2024-03-11 09:36:25 +01:00
" 'vente_internet_max',\n",
" 'purchase_date_min',\n",
" 'purchase_date_max',\n",
" 'time_between_purchase',\n",
2024-03-10 13:30:10 +01:00
" 'nb_tickets_internet',\n",
2024-03-11 09:36:25 +01:00
" 'fidelity', 'is_email_true',\n",
" 'opt_in', 'gender_female',\n",
" 'gender_male',\n",
" 'gender_other',\n",
2024-03-10 13:30:10 +01:00
" 'nb_campaigns',\n",
" 'nb_campaigns_opened']),\n",
" ('cat',\n",
" Pipeline(steps=[('onehot',\n",
" OneHotEncoder(handle_unknown='ignore',\n",
" sparse_output=False))]),\n",
" ['opt_in'])])),\n",
" ('logreg',\n",
2024-03-11 09:36:25 +01:00
" LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
" 1.0: 3.486549107420539},\n",
" max_iter=5000, solver='saga'))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> Pipeline<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.pipeline.Pipeline.html\">?<span>Documentation for Pipeline</span></a><span class=\"sk-estimator-doc-link \">i<span>Not fitted</span></span></label><div class=\"sk-toggleable__content \"><pre>Pipeline(steps=[('preprocessor',\n",
2024-03-10 13:30:10 +01:00
" ColumnTransformer(transformers=[('num',\n",
" Pipeline(steps=[('scaler',\n",
" StandardScaler())]),\n",
" ['nb_tickets', 'nb_purchases',\n",
" 'total_amount',\n",
" 'nb_suppliers',\n",
2024-03-11 09:36:25 +01:00
" 'vente_internet_max',\n",
" 'purchase_date_min',\n",
" 'purchase_date_max',\n",
" 'time_between_purchase',\n",
2024-03-10 13:30:10 +01:00
" 'nb_tickets_internet',\n",
2024-03-11 09:36:25 +01:00
" 'fidelity', 'is_email_true',\n",
" 'opt_in', 'gender_female',\n",
" 'gender_male',\n",
" 'gender_other',\n",
2024-03-10 13:30:10 +01:00
" 'nb_campaigns',\n",
" 'nb_campaigns_opened']),\n",
" ('cat',\n",
" Pipeline(steps=[('onehot',\n",
" OneHotEncoder(handle_unknown='ignore',\n",
" sparse_output=False))]),\n",
" ['opt_in'])])),\n",
" ('logreg',\n",
2024-03-11 09:36:25 +01:00
" LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
" 1.0: 3.486549107420539},\n",
" max_iter=5000, solver='saga'))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> preprocessor: ColumnTransformer<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></label><div class=\"sk-toggleable__content \"><pre>ColumnTransformer(transformers=[('num',\n",
2024-03-10 13:30:10 +01:00
" Pipeline(steps=[('scaler', StandardScaler())]),\n",
" ['nb_tickets', 'nb_purchases', 'total_amount',\n",
2024-03-11 09:36:25 +01:00
" 'nb_suppliers', 'vente_internet_max',\n",
" 'purchase_date_min', 'purchase_date_max',\n",
" 'time_between_purchase',\n",
" 'nb_tickets_internet', 'fidelity',\n",
" 'is_email_true', 'opt_in', 'gender_female',\n",
" 'gender_male', 'gender_other', 'nb_campaigns',\n",
" 'nb_campaigns_opened']),\n",
2024-03-10 13:30:10 +01:00
" ('cat',\n",
" Pipeline(steps=[('onehot',\n",
" OneHotEncoder(handle_unknown='ignore',\n",
" sparse_output=False))]),\n",
2024-03-11 09:36:25 +01:00
" ['opt_in'])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">num</label><div class=\"sk-toggleable__content \"><pre>['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> StandardScaler<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.StandardScaler.html\">?<span>Documentation for StandardScaler</span></a></label><div class=\"sk-toggleable__content \"><pre>StandardScaler()</pre></div> </div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">cat</label><div class=\"sk-toggleable__content \"><pre>['opt_in']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> OneHotEncoder<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></label><div class=\"sk-toggleable__content \"><pre>OneHotEncoder(handle_unknown='ignore', sparse_output=False)</pre></div> </div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> LogisticRegression<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a></label><div class=\"sk-toggleable__content \"><pre>LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
" 1.0: 3.486549107420539},\n",
2024-03-10 13:30:10 +01:00
" max_iter=5000, solver='saga')</pre></div> </div></div></div></div></div></div>"
],
"text/plain": [
"Pipeline(steps=[('preprocessor',\n",
" ColumnTransformer(transformers=[('num',\n",
" Pipeline(steps=[('scaler',\n",
" StandardScaler())]),\n",
" ['nb_tickets', 'nb_purchases',\n",
" 'total_amount',\n",
" 'nb_suppliers',\n",
2024-03-11 09:36:25 +01:00
" 'vente_internet_max',\n",
" 'purchase_date_min',\n",
" 'purchase_date_max',\n",
" 'time_between_purchase',\n",
2024-03-10 13:30:10 +01:00
" 'nb_tickets_internet',\n",
2024-03-11 09:36:25 +01:00
" 'fidelity', 'is_email_true',\n",
" 'opt_in', 'gender_female',\n",
" 'gender_male',\n",
" 'gender_other',\n",
2024-03-10 13:30:10 +01:00
" 'nb_campaigns',\n",
" 'nb_campaigns_opened']),\n",
" ('cat',\n",
" Pipeline(steps=[('onehot',\n",
" OneHotEncoder(handle_unknown='ignore',\n",
" sparse_output=False))]),\n",
" ['opt_in'])])),\n",
" ('logreg',\n",
2024-03-11 09:36:25 +01:00
" LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
" 1.0: 3.486549107420539},\n",
2024-03-10 13:30:10 +01:00
" max_iter=5000, solver='saga'))])"
]
},
2024-03-11 09:36:25 +01:00
"execution_count": 16,
2024-03-10 13:30:10 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Pipeline\n",
"pipeline = Pipeline(steps=[\n",
" ('preprocessor', preproc),\n",
" ('logreg', LogisticRegression(solver='saga', class_weight = weight_dict,\n",
" max_iter=5000)) \n",
"])\n",
"\n",
"pipeline.set_output(transform=\"pandas\")"
]
},
{
"cell_type": "markdown",
"id": "ed415f60-9663-4179-877b-233faf6e1645",
"metadata": {},
"source": [
"## Baseline"
2024-03-10 11:09:53 +01:00
]
},
{
"cell_type": "code",
2024-03-11 09:36:25 +01:00
"execution_count": 17,
2024-03-10 13:30:10 +01:00
"id": "2b467511-2ae5-4a16-a502-397c3460471d",
2024-03-10 11:09:53 +01:00
"metadata": {},
2024-03-10 12:30:57 +01:00
"outputs": [
{
"data": {
"text/html": [
2024-03-11 09:36:25 +01:00
"<style>#sk-container-id-2 {\n",
2024-03-10 12:30:57 +01:00
" /* Definition of color scheme common for light and dark mode */\n",
" --sklearn-color-text: black;\n",
" --sklearn-color-line: gray;\n",
" /* Definition of color scheme for unfitted estimators */\n",
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
" --sklearn-color-unfitted-level-3: chocolate;\n",
" /* Definition of color scheme for fitted estimators */\n",
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
" --sklearn-color-fitted-level-1: #d4ebff;\n",
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
"\n",
" /* Specific color for light theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-icon: #696969;\n",
"\n",
" @media (prefers-color-scheme: dark) {\n",
" /* Redefinition of color scheme for dark theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-icon: #878787;\n",
" }\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 {\n",
2024-03-10 12:30:57 +01:00
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 pre {\n",
2024-03-10 12:30:57 +01:00
" padding: 0;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 input.sk-hidden--visually {\n",
2024-03-10 12:30:57 +01:00
" border: 0;\n",
" clip: rect(1px 1px 1px 1px);\n",
" clip: rect(1px, 1px, 1px, 1px);\n",
" height: 1px;\n",
" margin: -1px;\n",
" overflow: hidden;\n",
" padding: 0;\n",
" position: absolute;\n",
" width: 1px;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-dashed-wrapped {\n",
2024-03-10 12:30:57 +01:00
" border: 1px dashed var(--sklearn-color-line);\n",
" margin: 0 0.4em 0.5em 0.4em;\n",
" box-sizing: border-box;\n",
" padding-bottom: 0.4em;\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-container {\n",
2024-03-10 12:30:57 +01:00
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
" so we also need the `!important` here to be able to override the\n",
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
" display: inline-block !important;\n",
" position: relative;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-text-repr-fallback {\n",
2024-03-10 12:30:57 +01:00
" display: none;\n",
"}\n",
"\n",
"div.sk-parallel-item,\n",
"div.sk-serial,\n",
"div.sk-item {\n",
" /* draw centered vertical line to link estimators */\n",
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
" background-size: 2px 100%;\n",
" background-repeat: no-repeat;\n",
" background-position: center center;\n",
"}\n",
"\n",
"/* Parallel-specific style estimator block */\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-parallel-item::after {\n",
2024-03-10 12:30:57 +01:00
" content: \"\";\n",
" width: 100%;\n",
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
" flex-grow: 1;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-parallel {\n",
2024-03-10 12:30:57 +01:00
" display: flex;\n",
" align-items: stretch;\n",
" justify-content: center;\n",
" background-color: var(--sklearn-color-background);\n",
" position: relative;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-parallel-item {\n",
2024-03-10 12:30:57 +01:00
" display: flex;\n",
" flex-direction: column;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-parallel-item:first-child::after {\n",
2024-03-10 12:30:57 +01:00
" align-self: flex-end;\n",
" width: 50%;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-parallel-item:last-child::after {\n",
2024-03-10 12:30:57 +01:00
" align-self: flex-start;\n",
" width: 50%;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-parallel-item:only-child::after {\n",
2024-03-10 12:30:57 +01:00
" width: 0;\n",
"}\n",
"\n",
"/* Serial-specific style estimator block */\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-serial {\n",
2024-03-10 12:30:57 +01:00
" display: flex;\n",
" flex-direction: column;\n",
" align-items: center;\n",
" background-color: var(--sklearn-color-background);\n",
" padding-right: 1em;\n",
" padding-left: 1em;\n",
"}\n",
"\n",
"\n",
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
"clickable and can be expanded/collapsed.\n",
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
"*/\n",
"\n",
"/* Pipeline and ColumnTransformer style (default) */\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-toggleable {\n",
2024-03-10 12:30:57 +01:00
" /* Default theme specific background. It is overwritten whether we have a\n",
" specific estimator or a Pipeline/ColumnTransformer */\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"/* Toggleable label */\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 label.sk-toggleable__label {\n",
2024-03-10 12:30:57 +01:00
" cursor: pointer;\n",
" display: block;\n",
" width: 100%;\n",
" margin-bottom: 0;\n",
" padding: 0.5em;\n",
" box-sizing: border-box;\n",
" text-align: center;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 label.sk-toggleable__label-arrow:before {\n",
2024-03-10 12:30:57 +01:00
" /* Arrow on the left of the label */\n",
" content: \"▸\";\n",
" float: left;\n",
" margin-right: 0.25em;\n",
" color: var(--sklearn-color-icon);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {\n",
2024-03-10 12:30:57 +01:00
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"/* Toggleable content - dropdown */\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-toggleable__content {\n",
2024-03-10 12:30:57 +01:00
" max-height: 0;\n",
" max-width: 0;\n",
" overflow: hidden;\n",
" text-align: left;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-toggleable__content.fitted {\n",
2024-03-10 12:30:57 +01:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-toggleable__content pre {\n",
2024-03-10 12:30:57 +01:00
" margin: 0.2em;\n",
" border-radius: 0.25em;\n",
" color: var(--sklearn-color-text);\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-toggleable__content.fitted pre {\n",
2024-03-10 12:30:57 +01:00
" /* unfitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
2024-03-10 12:30:57 +01:00
" /* Expand drop-down */\n",
" max-height: 200px;\n",
" max-width: 100%;\n",
" overflow: auto;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
2024-03-10 12:30:57 +01:00
" content: \"▾\";\n",
"}\n",
"\n",
"/* Pipeline/ColumnTransformer-specific style */\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2024-03-10 12:30:57 +01:00
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2024-03-10 12:30:57 +01:00
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator-specific style */\n",
"\n",
"/* Colorize estimator box */\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2024-03-10 12:30:57 +01:00
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2024-03-10 12:30:57 +01:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-label label.sk-toggleable__label,\n",
"#sk-container-id-2 div.sk-label label {\n",
2024-03-10 12:30:57 +01:00
" /* The background is the default theme color */\n",
" color: var(--sklearn-color-text-on-default-background);\n",
"}\n",
"\n",
"/* On hover, darken the color of the background */\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {\n",
2024-03-10 12:30:57 +01:00
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"/* Label box, darken color on hover, fitted */\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
2024-03-10 12:30:57 +01:00
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator label */\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-label label {\n",
2024-03-10 12:30:57 +01:00
" font-family: monospace;\n",
" font-weight: bold;\n",
" display: inline-block;\n",
" line-height: 1.2em;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-label-container {\n",
2024-03-10 12:30:57 +01:00
" text-align: center;\n",
"}\n",
"\n",
"/* Estimator-specific */\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-estimator {\n",
2024-03-10 12:30:57 +01:00
" font-family: monospace;\n",
" border: 1px dotted var(--sklearn-color-border-box);\n",
" border-radius: 0.25em;\n",
" box-sizing: border-box;\n",
" margin-bottom: 0.5em;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-estimator.fitted {\n",
2024-03-10 12:30:57 +01:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"/* on hover */\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-estimator:hover {\n",
2024-03-10 12:30:57 +01:00
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 div.sk-estimator.fitted:hover {\n",
2024-03-10 12:30:57 +01:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
"\n",
"/* Common style for \"i\" and \"?\" */\n",
"\n",
".sk-estimator-doc-link,\n",
"a:link.sk-estimator-doc-link,\n",
"a:visited.sk-estimator-doc-link {\n",
" float: right;\n",
" font-size: smaller;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1em;\n",
" height: 1em;\n",
" width: 1em;\n",
" text-decoration: none !important;\n",
" margin-left: 1ex;\n",
" /* unfitted */\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted,\n",
"a:link.sk-estimator-doc-link.fitted,\n",
"a:visited.sk-estimator-doc-link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"/* Span, style for the box shown on hovering the info icon */\n",
".sk-estimator-doc-link span {\n",
" display: none;\n",
" z-index: 9999;\n",
" position: relative;\n",
" font-weight: normal;\n",
" right: .2ex;\n",
" padding: .5ex;\n",
" margin: .5ex;\n",
" width: min-content;\n",
" min-width: 20ex;\n",
" max-width: 50ex;\n",
" color: var(--sklearn-color-text);\n",
" box-shadow: 2pt 2pt 4pt #999;\n",
" /* unfitted */\n",
" background: var(--sklearn-color-unfitted-level-0);\n",
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted span {\n",
" /* fitted */\n",
" background: var(--sklearn-color-fitted-level-0);\n",
" border: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link:hover span {\n",
" display: block;\n",
"}\n",
"\n",
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 a.estimator_doc_link {\n",
2024-03-10 12:30:57 +01:00
" float: right;\n",
" font-size: 1rem;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1rem;\n",
" height: 1rem;\n",
" width: 1rem;\n",
" text-decoration: none;\n",
" /* unfitted */\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 a.estimator_doc_link.fitted {\n",
2024-03-10 12:30:57 +01:00
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 a.estimator_doc_link:hover {\n",
2024-03-10 12:30:57 +01:00
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
2024-03-11 09:36:25 +01:00
"#sk-container-id-2 a.estimator_doc_link.fitted:hover {\n",
2024-03-10 12:30:57 +01:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
"}\n",
2024-03-11 09:36:25 +01:00
"</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('preprocessor',\n",
2024-03-10 12:30:57 +01:00
" ColumnTransformer(transformers=[('num',\n",
" Pipeline(steps=[('scaler',\n",
" StandardScaler())]),\n",
" ['nb_tickets', 'nb_purchases',\n",
" 'total_amount',\n",
" 'nb_suppliers',\n",
2024-03-11 09:36:25 +01:00
" 'vente_internet_max',\n",
" 'purchase_date_min',\n",
" 'purchase_date_max',\n",
" 'time_between_purchase',\n",
2024-03-10 12:30:57 +01:00
" 'nb_tickets_internet',\n",
2024-03-11 09:36:25 +01:00
" 'fidelity', 'is_email_true',\n",
" 'opt_in', 'gender_female',\n",
" 'gender_male',\n",
" 'gender_other',\n",
2024-03-10 12:30:57 +01:00
" 'nb_campaigns',\n",
" 'nb_campaigns_opened']),\n",
" ('cat',\n",
" Pipeline(steps=[('onehot',\n",
" OneHotEncoder(handle_unknown='ignore',\n",
" sparse_output=False))]),\n",
" ['opt_in'])])),\n",
" ('logreg',\n",
2024-03-11 09:36:25 +01:00
" LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
" 1.0: 3.486549107420539},\n",
" max_iter=5000, solver='saga'))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> Pipeline<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.pipeline.Pipeline.html\">?<span>Documentation for Pipeline</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>Pipeline(steps=[('preprocessor',\n",
2024-03-10 12:30:57 +01:00
" ColumnTransformer(transformers=[('num',\n",
" Pipeline(steps=[('scaler',\n",
" StandardScaler())]),\n",
" ['nb_tickets', 'nb_purchases',\n",
" 'total_amount',\n",
" 'nb_suppliers',\n",
2024-03-11 09:36:25 +01:00
" 'vente_internet_max',\n",
" 'purchase_date_min',\n",
" 'purchase_date_max',\n",
" 'time_between_purchase',\n",
2024-03-10 12:30:57 +01:00
" 'nb_tickets_internet',\n",
2024-03-11 09:36:25 +01:00
" 'fidelity', 'is_email_true',\n",
" 'opt_in', 'gender_female',\n",
" 'gender_male',\n",
" 'gender_other',\n",
2024-03-10 12:30:57 +01:00
" 'nb_campaigns',\n",
" 'nb_campaigns_opened']),\n",
" ('cat',\n",
" Pipeline(steps=[('onehot',\n",
" OneHotEncoder(handle_unknown='ignore',\n",
" sparse_output=False))]),\n",
" ['opt_in'])])),\n",
" ('logreg',\n",
2024-03-11 09:36:25 +01:00
" LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
" 1.0: 3.486549107420539},\n",
" max_iter=5000, solver='saga'))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> preprocessor: ColumnTransformer<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>ColumnTransformer(transformers=[('num',\n",
2024-03-10 12:30:57 +01:00
" Pipeline(steps=[('scaler', StandardScaler())]),\n",
" ['nb_tickets', 'nb_purchases', 'total_amount',\n",
2024-03-11 09:36:25 +01:00
" 'nb_suppliers', 'vente_internet_max',\n",
" 'purchase_date_min', 'purchase_date_max',\n",
" 'time_between_purchase',\n",
" 'nb_tickets_internet', 'fidelity',\n",
" 'is_email_true', 'opt_in', 'gender_female',\n",
" 'gender_male', 'gender_other', 'nb_campaigns',\n",
" 'nb_campaigns_opened']),\n",
2024-03-10 12:30:57 +01:00
" ('cat',\n",
" Pipeline(steps=[('onehot',\n",
" OneHotEncoder(handle_unknown='ignore',\n",
" sparse_output=False))]),\n",
2024-03-11 09:36:25 +01:00
" ['opt_in'])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-10\" type=\"checkbox\" ><label for=\"sk-estimator-id-10\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">num</label><div class=\"sk-toggleable__content fitted\"><pre>['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-11\" type=\"checkbox\" ><label for=\"sk-estimator-id-11\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> StandardScaler<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.StandardScaler.html\">?<span>Documentation for StandardScaler</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>StandardScaler()</pre></div> </div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-12\" type=\"checkbox\" ><label for=\"sk-estimator-id-12\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">cat</label><div class=\"sk-toggleable__content fitted\"><pre>['opt_in']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-13\" type=\"checkbox\" ><label for=\"sk-estimator-id-13\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> OneHotEncoder<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>OneHotEncoder(handle_unknown='ignore', sparse_output=False)</pre></div> </div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-14\" type=\"checkbox\" ><label for=\"sk-estimator-id-14\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> LogisticRegression<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
" 1.0: 3.486549107420539},\n",
2024-03-10 12:30:57 +01:00
" max_iter=5000, solver='saga')</pre></div> </div></div></div></div></div></div>"
],
"text/plain": [
"Pipeline(steps=[('preprocessor',\n",
" ColumnTransformer(transformers=[('num',\n",
" Pipeline(steps=[('scaler',\n",
" StandardScaler())]),\n",
" ['nb_tickets', 'nb_purchases',\n",
" 'total_amount',\n",
" 'nb_suppliers',\n",
2024-03-11 09:36:25 +01:00
" 'vente_internet_max',\n",
" 'purchase_date_min',\n",
" 'purchase_date_max',\n",
" 'time_between_purchase',\n",
2024-03-10 12:30:57 +01:00
" 'nb_tickets_internet',\n",
2024-03-11 09:36:25 +01:00
" 'fidelity', 'is_email_true',\n",
" 'opt_in', 'gender_female',\n",
" 'gender_male',\n",
" 'gender_other',\n",
2024-03-10 12:30:57 +01:00
" 'nb_campaigns',\n",
" 'nb_campaigns_opened']),\n",
" ('cat',\n",
" Pipeline(steps=[('onehot',\n",
" OneHotEncoder(handle_unknown='ignore',\n",
" sparse_output=False))]),\n",
" ['opt_in'])])),\n",
" ('logreg',\n",
2024-03-11 09:36:25 +01:00
" LogisticRegression(class_weight={0.0: 0.5837086520288036,\n",
" 1.0: 3.486549107420539},\n",
2024-03-10 12:30:57 +01:00
" max_iter=5000, solver='saga'))])"
]
},
2024-03-11 09:36:25 +01:00
"execution_count": 17,
2024-03-10 12:30:57 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
2024-03-10 11:09:53 +01:00
"source": [
"pipeline.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
2024-03-11 09:36:25 +01:00
"execution_count": 18,
2024-03-10 11:09:53 +01:00
"id": "6356e870-0dfc-4e60-9e48-e2de5e7f9f87",
"metadata": {},
2024-03-10 13:30:10 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-03-11 09:36:25 +01:00
"Accuracy Score: 0.7829358141858141\n",
"F1 Score: 0.5016842256145632\n",
"Recall Score: 0.7669831994156319\n"
2024-03-10 13:30:10 +01:00
]
}
],
2024-03-10 11:09:53 +01:00
"source": [
"y_pred = pipeline.predict(X_test)\n",
"\n",
"# Calculate the F1 score\n",
2024-03-10 12:30:57 +01:00
"acc = accuracy_score(y_test, y_pred)\n",
"print(f\"Accuracy Score: {acc}\")\n",
"\n",
2024-03-10 11:09:53 +01:00
"f1 = f1_score(y_test, y_pred)\n",
2024-03-10 12:30:57 +01:00
"print(f\"F1 Score: {f1}\")\n",
"\n",
"recall = recall_score(y_test, y_pred)\n",
"print(f\"Recall Score: {recall}\")"
2024-03-10 11:09:53 +01:00
]
},
{
"cell_type": "code",
2024-03-11 09:36:25 +01:00
"execution_count": 19,
2024-03-10 11:09:53 +01:00
"id": "09387a09-0d53-4c54-baac-f3c2a57a629a",
"metadata": {},
2024-03-10 13:30:10 +01:00
"outputs": [
{
"data": {
2024-03-11 09:36:25 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi0AAAHFCAYAAAA+FskAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABXn0lEQVR4nO3de3yP9f/H8cfHbB8z9mkH25qcrWVRRDEq5MxIJzStfNPydWxfc/iuAzrNMZRTkijR6ktKYV9KKTFGlmMkh+VrM4eZ02xrrt8ffj71sdGm6+Oz8bx3u243u67XdV3v65MPL6/34bIYhmEgIiIiUsKVcXUDRERERIpCSYuIiIiUCkpaREREpFRQ0iIiIiKlgpIWERERKRWUtIiIiEipoKRFRERESgUlLSIiIlIqKGkRERGRUkFJi1zXtmzZwj/+8Q9q1KhBuXLlqFChAnfddRfjxo3j+PHjTr335s2bad68OTabDYvFwuTJk02/h8ViYdSoUaZf96/MnTsXi8WCxWLh22+/LXDcMAxq166NxWKhRYsWV3WP6dOnM3fu3GKd8+233162TSJS+pV1dQNEnGXWrFn069eP0NBQhg4dSlhYGHl5eWzcuJG3336bdevWsXjxYqfd/+mnn+bMmTMkJCTg4+ND9erVTb/HunXruOWWW0y/blFVrFiR2bNnF0hMVq9eza+//krFihWv+trTp0/H39+fXr16Ffmcu+66i3Xr1hEWFnbV9xWRkktJi1yX1q1bR9++fWnTpg2fffYZVqvVfqxNmzbExsaSmJjo1DZs27aN6OhoOnTo4LR7NGnSxGnXLoru3bszf/58pk2bhre3t33/7NmzCQ8P5+TJk9ekHXl5eVgsFry9vV3+mYiI86h7SK5L8fHxWCwW3nnnHYeE5SIPDw+6dOli//n8+fOMGzeO2267DavVSkBAAE8++SQHDx50OK9FixbUrVuX5ORk7rvvPsqXL0/NmjUZM2YM58+fB/7oOvn999+ZMWOGvRsFYNSoUfZf/9nFc/bv32/ft2rVKlq0aIGfnx+enp5UrVqVRx55hLNnz9pjCuse2rZtGw8++CA+Pj6UK1eO+vXr8/777zvEXOxG+eijj3jhhRcIDg7G29ub1q1bs2vXrqJ9yMDjjz8OwEcffWTfl5WVxaJFi3j66acLPefll1+mcePG+Pr64u3tzV133cXs2bP587tbq1evzvbt21m9erX987tYqbrY9nnz5hEbG0vlypWxWq3s2bOnQPfQ0aNHqVKlCk2bNiUvL89+/R07duDl5UVUVFSRn1VEXE9Ji1x38vPzWbVqFQ0bNqRKlSpFOqdv374MHz6cNm3asGTJEl599VUSExNp2rQpR48edYhNT0+nZ8+ePPHEEyxZsoQOHToQFxfHhx9+CECnTp1Yt24dAI8++ijr1q2z/1xU+/fvp1OnTnh4ePDee++RmJjImDFj8PLyIjc397Ln7dq1i6ZNm7J9+3beeustPv30U8LCwujVqxfjxo0rEP/8889z4MAB3n33Xd555x1++eUXOnfuTH5+fpHa6e3tzaOPPsp7771n3/fRRx9RpkwZunfvftln69OnD5988gmffvopDz/8MAMHDuTVV1+1xyxevJiaNWvSoEED++d3aVdeXFwcqampvP3223zxxRcEBAQUuJe/vz8JCQkkJyczfPhwAM6ePctjjz1G1apVefvtt4v0nCJSQhgi15n09HQDMHr06FGk+J07dxqA0a9fP4f969evNwDj+eeft+9r3ry5ARjr1693iA0LCzPatWvnsA8w+vfv77Bv5MiRRmFfuzlz5hiAsW/fPsMwDGPhwoUGYKSkpFyx7YAxcuRI+889evQwrFarkZqa6hDXoUMHo3z58saJEycMwzCMb775xgCMjh07OsR98sknBmCsW7fuive92N7k5GT7tbZt22YYhmHcfffdRq9evQzDMIzbb7/daN68+WWvk5+fb+Tl5RmvvPKK4efnZ5w/f95+7HLnXrzf/ffff9lj33zzjcP+sWPHGoCxePFi46mnnjI8PT2NLVu2XPEZRaTkUaVFbnjffPMNQIEBn/fccw916tTh66+/dtgfFBTEPffc47Dvjjvu4MCBA6a1qX79+nh4ePDss8/y/vvvs3fv3iKdt2rVKlq1alWgwtSrVy/Onj1boOLz5y4yuPAcQLGepXnz5tSqVYv33nuPrVu3kpycfNmuoYttbN26NTabDTc3N9zd3RkxYgTHjh0jIyOjyPd95JFHihw7dOhQOnXqxOOPP87777/PlClTqFevXpHPF5GSQUmLXHf8/f0pX748+/btK1L8sWPHALj55psLHAsODrYfv8jPz69AnNVqJTs7+ypaW7hatWrx1VdfERAQQP/+/alVqxa1atXizTffvOJ5x44du+xzXDz+Z5c+y8XxP8V5FovFwj/+8Q8+/PBD3n77bW699Vbuu+++QmM3bNhA27ZtgQuzu3744QeSk5N54YUXin3fwp7zSm3s1asX586dIygoSGNZREopJS1y3XFzc6NVq1Zs2rSpwEDawlz8izstLa3AsUOHDuHv729a28qVKwdATk6Ow/5Lx80A3HfffXzxxRdkZWWRlJREeHg4MTExJCQkXPb6fn5+l30OwNRn+bNevXpx9OhR3n77bf7xj39cNi4hIQF3d3e+/PJLunXrRtOmTWnUqNFV3bOwAc2Xk5aWRv/+/alfvz7Hjh1jyJAhV3VPEXEtJS1yXYqLi8MwDKKjowsduJqXl8cXX3wBwAMPPABgH0h7UXJyMjt37qRVq1amteviDJgtW7Y47L/YlsK4ubnRuHFjpk2bBsCPP/542dhWrVqxatUqe5Jy0QcffED58uWdNh24cuXKDB06lM6dO/PUU09dNs5isVC2bFnc3Nzs+7Kzs5k3b16BWLOqV/n5+Tz++ONYLBaWL1/O6NGjmTJlCp9++unfvraIXFtap0WuS+Hh4cyYMYN+/frRsGFD+vbty+23305eXh6bN2/mnXfeoW7dunTu3JnQ0FCeffZZpkyZQpkyZejQoQP79+/npZdeokqVKvzrX/8yrV0dO3bE19eX3r1788orr1C2bFnmzp3Lb7/95hD39ttvs2rVKjp16kTVqlU5d+6cfYZO69atL3v9kSNH8uWXX9KyZUtGjBiBr68v8+fPZ+nSpYwbNw6bzWbas1xqzJgxfxnTqVMnJk6cSGRkJM8++yzHjh1jwoQJhU5Lr1evHgkJCXz88cfUrFmTcuXKXdU4lJEjR/L999+zYsUKgoKCiI2NZfXq1fTu3ZsGDRpQo0aNYl9TRFxDSYtct6Kjo7nnnnuYNGkSY8eOJT09HXd3d2699VYiIyMZMGCAPXbGjBnUqlWL2bNnM23aNGw2G+3bt2f06NGFjmG5Wt7e3iQmJhITE8MTTzzBTTfdxDPPPEOHDh145pln7HH169dnxYoVjBw5kvT0dCpUqEDdunVZsmSJfUxIYUJDQ1m7di3PP/88/fv3Jzs7mzp16jBnzpxirSzrLA888ADvvfceY8eOpXPnzlSuXJno6GgCAgLo3bu3Q+zLL79MWloa0dHRnDp1imrVqjmsY1MUK1euZPTo0bz00ksOFbO5c+fSoEEDunfvzpo1a/Dw8DDj8UTEySyG8acVnURERERKKI1pERERkVJBSYuIiIiUCkpaREREpFRQ0iIiIiKlgpIWERERKRWUtIiIiEipoKRFRERESoXrcnE5zwYD/jpI5Aa0etHrrm6CSIlzT03nrRR9kVl/L2VvnmrKdUorVVpERESkVLguKy0iIiIlikU1AjMoaREREXE2i8XVLbguKGkRERFxNlVaTKFPUUREREoFVVpEREScTd1DplDSIiIi4mzqHjKFPkUREREpFVRpERERcTZ1D5lCSYuIiIizqXvIFPoURUREpFRQpUVERMTZ1D1kCiUtIiIizqbuIVPoUxQREZFSQZUWERERZ1P3kCmUtIiIiDibuodMoaRFRETE2VRpMYVSPxERESkVVGkRERFxNnUPmUJJi4iIiLMpaTGFPkUREREpFVRpERERcbYyGohrBiUtIiIizqb
2024-03-10 13:30:10 +01:00
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
2024-03-10 11:09:53 +01:00
"source": [
2024-03-10 13:30:10 +01:00
"draw_confusion_matrix(y_test, y_pred)"
2024-03-10 11:09:53 +01:00
]
},
2024-03-10 12:30:57 +01:00
{
"cell_type": "code",
2024-03-11 09:36:25 +01:00
"execution_count": 22,
2024-03-10 12:30:57 +01:00
"id": "580b58d7-596f-4207-8c99-4365aba2bc9f",
"metadata": {},
2024-03-10 13:30:10 +01:00
"outputs": [
{
"data": {
2024-03-11 09:36:25 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABIQAAAK8CAYAAACeK2TMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3gU5frG8e+m90CAUELvvQsC0osCKiggYsOCR8WfHrGg2LGhWLAdRT12j4gooAIiKCAiiDRBioBI70kgCenJzu+PgcVhAyaQZHYz9+e6uMi8O7t5Erh3wsM77+syDMNAREREREREREQcI8DuAkREREREREREpHSpISQiIiIiIiIi4jBqCImIiIiIiIiIOIwaQiIiIiIiIiIiDqOGkIiIiIiIiIiIw6ghJCIiIiIiIiLiMGoIiYiIiIiIiIg4jBpCIiIiIiIiIiIOo4aQiIiIiIiIiIjDqCEkIiIi4ufGjx9PhQoV2Llzp92liIiIiJ9QQ0hERKQErVu3jhtuuIE6deoQFhZGVFQUbdu2ZeLEiSQnJ9tS0wcffIDL5WLlypUl+nl27NiBy+Xy/AoICKB8+fL07t2befPmnfZ5c+fOZeDAgVSqVInQ0FBq1KjByJEj2bhx42mf89NPP3HFFVeQkJBASEgIsbGxdO7cmTfffJP09PSS+PJsceLPbseOHZbxxx57jMsuu4xhw4aRk5NT4HMff/xxXC5XsdWyaNEiXC4XixYtKrbXLEjt2rW5/vrri/ScpUuX8vjjj3P06FGvx3r06EGPHj2KpTYRERF/poaQiIhICXnnnXdo164dK1as4L777mPu3LnMmDGDYcOGMXnyZG666Sa7SywVd9xxB8uWLeOnn37ihRdeYOvWrQwYMIDFixd7nTt27Fj69++P2+3mjTfeYP78+Tz22GOsWLGCtm3bMn36dK/nPPbYY3Tr1o29e/fy5JNPMn/+fD777DN69+7N448/zsMPP1waX6btJk+eTKVKlRgzZozdpRSrGTNm8MgjjxTpOUuXLmX8+PEFNoTeeOMN3njjjWKqTkRExH8F2V2AiIhIWbRs2TJuu+02+vbty8yZMwkNDfU81rdvX+655x7mzp1bqjXl5uYW6wyRwqpZsybnn38+AF26dKFBgwZ0796dd999l27dunnOmzJlCs8//zy33Xab5R/s3bp1Y8SIEXTv3p1rr72W1q1bU7duXQCmTZvGE088wU033cQ777xj+fr69+/P2LFjWbZsWSl9pfYKCgpi9uzZdpdR7Nq0aVOsr9e0adNifT0RERF/pRlCIiIiJeCZZ57B5XLx9ttvW5pBJ4SEhHDppZd6jt1uNxMnTqRx48aEhoYSHx/Pddddx549eyzPO93tM6feBnPidp6PP/6Ye+65h4SEBEJDQ/nzzz895xw5coQbbriBuLg4IiMjueSSS/jrr7+8Xvv777+nd+/exMTEEBERQZcuXfjhhx/O4rtiat++PQAHDx60jD/99NOUL1+eF154wes5kZGRvPbaa2RkZDBp0iTP+BNPPEH58uV59dVXC2x2RUdH069fv7Ou9VQ9evSgefPmLFu2jM6dOxMeHk7t2rV5//33AZg9ezZt27YlIiKCFi1aFNj0W7JkCb179yY6OpqIiAg6d+5cYCPnl19+oUuXLoSFhVGtWjXGjRtHbm5ugXVNnTqVTp06ERkZSVRUFP369WPVqlWF+ppOfe6FF17ImjVrivBdsfr666/p1KkTERERREdH07dv3wKbcl999RUtW7YkNDSUunXr8sorrxR4W9upf+fdbjdPPfUUjRo1Ijw8nHLlytGyZUteeeUVwLw17r777gOgTp06nlsWT9zaVtAtY/v27eOKK64gOjqa2NhYhg8fzi+//ILL5eKDDz7wnHe6282uv/56ateubRnLycnhqaee8mS6UqVK3HDDDRw+fLhw30gREZESpoaQiIhIMcvPz2fBggW0a9eOGjVqFOo5t912G/fffz99+/bl66+/5sknn2Tu3Ll07tyZxMTEs65l3Lhx7Nq1i8mTJ/PNN98QHx/veeymm24iICCATz/9lJdffplff/2VHj16WG6z+eSTT+jXrx8xMTF8+OGHfP7558TFxXHhhReedVNo+/btADRs2NAztn//fjZs2EC/fv2IiIgo8HmdOnUiPj6e+fPne56zfv36Mz6nME40zx5//PFCnX/gwAFuuOEGRo0axVdffUWLFi248cYbeeKJJxg3bhxjx47lyy+/JCoqisGDB7Nv3z7Pc3/88Ud69epFSkoK7777LlOmTCE6OppLLrmEqVOnes7buHEjvXv35ujRo3zwwQdMnjyZNWvW8NRTT3nV88wzzzBixAiaNm3K559/zkcffURqaipdu3Zl/fr1Z/xaTn3uxx9/TFpaGl27dj3jmk2n8+mnnzJo0CBiYmKYMmUK7777LkeOHKFHjx4sWbLEc97cuXO5/PLLqVChAlOnTmXixIlMmTKFDz/88B8/x8SJE3n88ccZMWIEs2fPZurUqdx0002ev7ejRo3ijjvuAGD69OksW7aMZcuW0bZt2wJfLzMzkz59+jBv3jwmTJjAtGnTqFKlCsOHDy/y13+C2+1m0KBBPPvss1x11VXMnj2bZ599lvnz59OjRw8yMzPP+rVFRESKjSEiIiLF6sCBAwZgXHnllYU6f9OmTQZgjB492jK+fPlyAzAefPBBz1itWrWMkSNHer1G9+7dje7du3uOFy5caABGt27dvM59//33DcC47LLLLOM///yzARhPPfWUYRiGkZ6ebsTFxRmXXHKJ5bz8/HyjVatWRocOHc74dW3fvt0AjOeee87Izc01srKyjN9++83o1KmTUbVqVWP79u2ec3/55RcDMB544IEzvmbHjh2N8PDwIj3nnyxatMgIDAw0xo8f/4/ndu/e3QCMlStXesaSkpKMwMBAIzw83Ni7d69n/LfffjMA49VXX/WMnX/++UZ8fLyRlpbmGcvLyzOaN29uVK9e3XC73YZhGMbw4cON8PBw48CBA5bzGjdubACe792uXbuMoKAg4/bbb7fUmZqaasTHxxtDhw71jD322GPG33/0O/HcO+64w/LctLQ0o0qVKsYVV1xxxu/Fib9jCxcuNAzD/HtRrVo1o0WLFkZ+fr7l9eLj443OnTt7xs477zyjRo0aRnZ2tuW8ChUqGKf+eHrq3/mLL77YaN269Rlre/755y3fp787NStvvvmmARhfffWV5bybb77ZAIz333//tM89YeTIkUatWrU8x1OmTDEA48svv7Sct2LFCgMw3njjjTPWLyIiUho0Q0hERMRmCxcuBPC6FaxDhw40adLknG7PGjJkyGkfu/rqqy3HnTt3platWp56li5dSnJyMiNHjiQvL8/zy+12c9FFF7FixYpC7eB1//33ExwcTFhYGK1bt2b9+vV88803XrfYFIZhGMW+DlL37t3Jy8vj0UcfLdT5VatWpV27dp7juLg44uPjad26NdWqVfOMN2nSBMCzFXx6ejrLly9n6NChREVFec4LDAzk2muvZc+ePWzevBkw/0707t2bypUrW847ddbKd999R15eHjfeeKNlPDo6mp49e/Ljjz+e9us48dzrrrvO8ucbFhZG9+7di7x72ObNm9m3bx/XXnstAQEnf8SMiopiyJAh/PLLL2RkZJCens7KlSsZPHgwISEhlvMuueSSf/w8HTp0YO3atYwePZrvvvuO1NTUItV5qoULFxIdHW25hRPgqquuOuvXnDVrFuXKleOSSy6xfG9bt25NlSpVSnxnNhERkcLQotIiIiLFrGLFikRERHhujfonSUlJgNloOFW1atU8DYWzUdBrnlClSpUCx07Uc2KNn6FDh572NZKTk4mMjDxjDf/+97+55ppryM7O5pdffuHhhx9m0KBBrF27lgoVKgDmwtPAP37Pdu7c6bkNr7DPKW5xcXFeYyEhIV7jJ5odWVlZgLlmk2EYp/1zhpN/F5KSkk775/N3J/6MOnfu7HXuiebd6Zx47nnnnVfg439v6hTGP/09drvdnu+BYRiWZtcJBY2daty4cURGRvLJJ58
2024-03-10 13:30:10 +01:00
"text/plain": [
"<Figure size 1400x800 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
2024-03-10 12:30:57 +01:00
"source": [
2024-03-10 13:30:10 +01:00
"draw_roc_curve(X_test, y_test)"
2024-03-10 12:30:57 +01:00
]
},
2024-03-11 09:36:25 +01:00
{
"cell_type": "code",
"execution_count": 21,
"id": "ca5d0a55-adbb-47a0-a4c8-6af9ca75ca9d",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABA4AAAIjCAYAAACDPFmSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAD88klEQVR4nOzdd1gUV/s38O/SWTqIFAMigogFxSCKFQsW1IglxJIgtoSosaCixAaGKBYs0SfGJCrGaExTY8GCBWPsUVGfYCUi+ogSNYCK4sLO+wfvzo+VHVgQBOX7uS6vZGfOnHOf2ZuBPXvmjEwQBAFERERERERERBroVHUARERERERERFR9ceCAiIiIiIiIiCRx4ICIiIiIiIiIJHHggIiIiIiIiIgkceCAiIiIiIiIiCRx4ICIiIiIiIiIJHHggIiIiIiIiIgkceCAiIiIiIiIiCRx4ICIiIiIiIiIJHHggIiIiF7K3r17oa+vj23btlV1KESVRqFQoFu3bvDz84NCoajqcKrU0KFD4eHhgfv371d1KET0inDggIioAsTHx0Mmk+HPP/+s6lDKbdOmTVi2bFlVh1Hp/P394e/vX9VhVHspKSmIiopCWlpaieVu376N999/H8uXL0dQUJDW9UdFRUEmk5UpptzcXERFRSEpKanYPtXPYGnxvq7Kc75elbS0NMhkMsTHx1d1KBUmNDQULi4uatv09fXx66+/4tmzZ5gyZYrksa/qGqMpxlfhq6++woEDB7Bnzx7UqlXrlbdf0WQyGaKiol55u6/qmlWe/iUkJEge4+LigtDQ0JeOi14/HDggIiIANWfggLSTkpKC6OjoEv+ozc/Px3vvvYcPP/wQY8aMKVP9o0aNwvHjx8t0TG5uLqKjozUOHPTq1QvHjx+Hg4NDmeokKgszMzMkJCRg+/bt+Omnn6o6nFfu7NmzmDVrFhISElCvXr2qDqdCHD9+HKNGjarqMCpNefqXkJCA6Ohojfu2bt2KWbNmVURo9JrRq+oAiIioauXm5kIul1d1GPQSquo91NPTw9GjR8t0jCrWt956C2+99VaFxWJrawtbW9sKq49IioODA27cuFHVYbwSL15bWrRogX/++adS2nr69CmMjIxe+cya1q1bv9L2XrWK7p+3t3eF1kevD844ICKqJKGhoTA1NcXly5fRvXt3mJiYwMHBAbGxsQCAEydOoF27djAxMUGDBg2wfv16teNV0xgTExMxfPhwWFtbw8TEBH369MHff/9drL21a9eiWbNmMDIygrW1Nfr164dLly5pjOnixYvo1q0bzMzM0KVLF/j7+2PXrl24efMmZDKZ+E8lOjoarVq1grW1NczNzdGiRQusWbMGgiCo1e/i4oLevXtjz549aNGiBYyNjdGwYUOsXbu2WLz/+9//8OGHH8LJyQkGBgZwdHTEwIEDce/ePbFMTk4OpkyZgnr16sHAwAB16tTBxIkT8eTJk1LPvyAIWLhwIerWrQsjIyO0aNECu3fv1lhW23Z+/vlntGrVChYWFpDL5XB1dcWIESNKjUWpVGLFihVo3rw5jI2NYWlpidatW2P79u1imR9//BHdunWDg4MDjI2N4enpienTpxeLQeo9BIDExET07dsXb731FoyMjODm5oaPPvpI433Ily9fxuDBg2FnZwdDQ0M4OzsjJCQEeXl5iI+Px7vvvgsA6NSpk5gPRaei79+/H126dIG5uTnkcjnatm2LAwcOqLWhml5/9uxZDBw4EFZWVqhfv77avqIOHjwIf39/2NjYwNjYGM7OzhgwYAByc3ORlpYmDgxER0eLMammzGqa9iuVAy9OJZeaMpyUlASZTFZshoM2ff/nn3/E/DY0NIStrS3atm2L/fv3F3svXrRr1y40b94choaGqFevHhYvXqyx3H/+8x906NABtWvXhomJCZo2bYqFCxcWu//e398fTZo0wenTp9G+fXsxd2NjY6FUKsVySqUSMTEx8PDwEPPUy8sLy5cvLzVmTa5du4YhQ4agdu3aMDQ0hKenJ/7zn/9odaxMJsO4ceOwbt06MR4fHx+cOHECgiBg0aJFqFevHkxNTdG5c2dcv369WB3aXBOBwvffw8NDjPG7777TGNPz588RExODhg0biu9paGio2jVLiqZjhw8frvWH8JeJUdt2Srq2aFtvXl4eJk+eDHt7e8jlcnTo0AFnzpwpNr1d9TO3b98+jBgxAra2tpDL5cjLywNQeD308/ODiYkJTE1N0b17d5w7d06trb///huDBg2Co6MjDA0NYWdnhy5duiA5OVksU9I1RUXTVP7//ve/6Nu3L6ysrGBkZITmzZsX+x2tuj788MMPmDFjBhwdHWFubo6uXbviypUrpZ5vKdrm7TfffIMGDRrA0NAQjRo1wqZNmzTevvJi/3Jzc8Xfd6o2fHx88MMPPwAozAPVz2nRvwdU10dNtypcvnwZPXr0gFwuR61atRAWFoYdO3YUu35K3eag6fael/n9T5WDMw6IiCqRQqFA//79ERYWhqlTp2LTpk2IjIxETk4Ofv31V0ybNg1vvfUWVqxYgdDQUDRp0gRvv/22Wh0jR45EQEAANm3ahFu3bmHmzJnw9/fHhQsXYGlpCQCYP38+Pv30UwwePBjz58/HgwcPEBUVBT8/P5w+fRru7u5ifc+fP8c777yDjz76CNOnT0d+fj7eeustfPjhh0hNTcXWrVuL9SMtLQ0fffQRnJ2dARQOenzyySf43//+h9mzZ6uVPX/+PCZPnozp06fDzs4O3377LUaOHAk3Nzd06NABQOGgQcuWLaFQKPDpp5/Cy8sLDx48wN69e/Hvv//Czs4Oubm56NixI27fvi2W+euvvzB79mxcvHgR+/fvL/GbqejoaERHR2PkyJEYOHAgbt26hdGjR6OgoAAeHh5iOW3bOX78ON577z289957iIqKgpGREW7evImDBw+WmgehoaH4/vvvMXLkSMydOxcGBgY4e/as2gfVa9euITAwEBMnToSJiQkuX76MBQsW4NSpU8Xa0PQeAkBqair8/PwwatQoWFhYIC0tDUuWLEG7du1w8eJF6Ovri+9Ru3btUKtWLcydOxfu7u7IyMjA9u3b8fz5c/Tq1Qvz5s3Dp59+iv/85z9o0aIFAIgf+r///nuEhISgb9++WL9+PfT19bF69Wp0794de/fuFT9sqPTv3x+DBg1CWFiY5B99aWlp6NWrF9q3b4+1a9fC0tIS//vf/7Bnzx48f/4cDg4O2LNnD3r06IGRI0eKU29LmmWgbQ6UhbZ9/+CDD3D27Fl8/vnnaNCgAbKysnD27Fk8ePCgxPoPHDiAvn37ws/PD5s3b0ZBQQEWLlyo8cNpamoqhgwZIv5hff78eXz++ee4fPlyscG6u3fvYujQoZg8eTLmzJmDrVu3IjIyEo6OjggJCQEALFy4EFFRUZg5cyY6dOgAhUKBy5cvIysrq8znKSUlBW3atIGzszPi4uJgb2+PvXv3Yvz48bh//z7mzJlTah07d+7EuXPnEBsbC5lMhmnTpqFXr14YNmwY/v77b6xcuRLZ2dkIDw/HgAEDkJycLF4TtL0mxsfHY/jw4ejbty/i4uKQnZ2NqKgo5OXlQUfn/75fUyqV6Nu3L44cOYKIiAi0adMGN2/exKxZs3Dy5EmcOXNGctaP1LFz5syBv78//vzzTxgbG0ueh5eNUdt2AM3XlrLUO3z4cPz444+IiIhA586dkZKSgn79+iEnJ0djeyNGjECvXr2wYcMGPHnyBPr6+pg3bx5mzpyJ4cOHY+bMmXj+/DkWLVqE9u3b49SpU2jUqBEAIDAwUPz5cHZ2xv3793Hs2DExX0u7pki9X1euXEGbNm1Qu3ZtfPHFF7CxscH3338vDhJFRESolf/000/Rtm1bfPvtt8jJycG0adPQp08fXLp0Cbq6uiWe7xdpm7dff/01PvroIwwYMABLly5FdnY2oqOjxYGXkoSHh2PDhg2IiYmBt7c3njx5gv/+97/itWnWrFl48uQJfvnlF7XbyaRuA7t37x46duwIfX19fPnll7Czs8PGjRsxbty4MvW9qJf9/U+VRCA
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"draw_features_importance(pipeline, 'logreg')"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "f3782ec2-9f2c-4c23-9691-79413c4e04be",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAtUAAAIiCAYAAAAHJDTKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABaS0lEQVR4nO3deXwU9f3H8feabDYHYSHEXBAiKEQwgBwFAq2AkEAgUEQhGg1gOWrVIgI/K1oLaQWqiEfxohblFooCVaSBoIhguCEogojKWRPOJEDAsCTz+4Nmy5IASSYn+3o+HvuA+c53Zz6z353w5ruzE4thGIYAAAAAlNlNVV0AAAAAUNMRqgEAAACTCNUAAACASYRqAAAAwCRCNQAAAGASoRoAAAAwiVANAAAAmESoBgAAAEwiVAMAAAAmEaoBADeUlStXymq1atmyZVVdCgA3QqgGrmPWrFmyWCzaunVrsevj4+N1yy23uLTdcsstGjp0aKn2k5aWpokTJyo7O7tshaJcHDhwQBaLRbNmzarqUipE4fG99NJL5bbNzz//XBaLRR988MF1+06cOFEWi8WlrWvXruratatLm8Vi0cSJE53Lu3fv1sSJE3XgwIFrbv/IkSN66KGH9Nprr6l///4lPIIby5WvXWUYOnRokZ+DNck333yjRx99VNHR0fLz85PFYtHnn39e1WWhhiFUAxVg6dKleu6550r1nLS0NCUnJxOqcUMbPny4NmzYcN1+GzZs0PDhw53Lu3fvVnJy8jVD9cWLF5WQkKCRI0fq0UcfLY9y4Sa2bt2qZcuWKSAgQN27d6/qclBDeVZ1AcCNqHXr1lVdQqk5HA5ZLBZ5evJjoaY5d+6cfH19q7qMEmnQoIEaNGhw3X4dO3Ys9bY9PT315ZdflqWsGoVztfwlJSVpyJAhkqQPPvhAH3/8cRVXhJqImWqgAlx5+UdBQYGef/55RUZGysfHR3Xq1FHLli312muvSbr0kfj//d//SZIaNWoki8Xi8vFjQUGBXnzxRd1+++2y2WwKCgrS4MGDdeTIEZf9GoahyZMnKyIiQt7e3mrXrp1SU1OLfLxe+HH93LlzNXbsWNWvX182m03ff/+9jh8/rkcffVTNmzdXrVq1FBQUpLvvvlvr1q1z2VfhZQRTp07VCy+8oFtuuUU+Pj7q2rWrvvvuOzkcDj399NMKCwuT3W7XPffco2PHjhV5neLj47V8+XK1bt1aPj4+atasmZYvXy7p0qU3zZo1k5+fn9q3b1/sJThbt25Vv379FBAQIG9vb7Vu3Vr//Oc/SzROP/30kwYNGiR/f3/Z7XYlJCQoMzOz2L4l2c+5c+c0btw4NWrUSN7e3goICFC7du30/vvvX7OOwkuMUlNT9fDDDysgIEB+fn7q27evfvzxR5e+Xbt2VVRUlL744gt16tRJvr6++s1vfiNJOnTokB566CEFBQXJZrOpWbNmmjZtmgoKCorss6CgQJMmTVLDhg2d75VPP/3Upc/333+vhx9+WE2aNJGvr6/q16+vvn376uuvvy72OH7++WeNGTNGISEh8vHxUZcuXbRjxw6XPsVd/lGcyy9hmDVrlgYOHChJ6tatm/P8uPwSndWrV6t79+6qXbu2fH191blz5yLHc/z4cY0cOVLh4eGy2Wy6+eab1blzZ61evfqatRTWvGPHDg0YMEC1a9eW3W7XQw89pOPHj7v0Lem5erVLxEpzrpZUSc/pa1mwYIGio6NVq1Yt1apVS3feeadmzpx5zee88cYbuuuuuxQUFCQ/Pz+1aNFCL774ohwOh0u/HTt2KD4+3vm+DQsLU58+fVxes8WLF6tDhw6y2+3y9fVV48aNne/7QqdPn3aef15eXqpfv75Gjx6t3Nzc6x7fTTcRh2Ae/80FSig/P18XL14s0m4YxnWf++KLL2rixIn64x//qLvuuksOh0Pffvut81KP4cOH69SpU5o+fbqWLFmi0NBQSVLz5s0lSb/73e/097//XY8//rji4+N14MABPffcc/r888+1fft2BQYGSpKeffZZTZkyRSNHjtSAAQN0+PBhDR8+XA6HQ02bNi1S1/jx4xUdHa23335bN910k4KCgpwhYcKECQoJCdHZs2e1dOlSde3aVZ9++mmRa1/feOMNtWzZUm+88Yays7M1duxY9e3bVx06dJDVatW7776rgwcPaty4cRo+fLg++ugjl+fv3LlT48eP17PPPiu73a7k5GQNGDBA48eP16effqrJkyfLYrHoD3/4g+Lj47V//375+PhIktasWaNevXqpQ4cOevvtt2W327Vw4UIlJCTo3Llz17yu/fz58+rRo4d++uknTZkyRU2bNtUnn3yihISEIn1Lup8xY8Zo7ty5ev7559W6dWvl5uZq165dOnny5LXfIP81bNgwxcTEaMGCBTp8+LD++Mc/qmvXrvrqq69Up04dZ7+MjAw99NBDeuqppzR58mTddNNNOn78uDp16qQLFy7oL3/5i2655RYtX75c48aN0w8//KA333zTZV+vv/66IiIi9OqrrzqDYFxcnNauXavo6GhJl/7TUa9ePf31r3/VzTffrFOnTmn27Nnq0KGDduzYocjISJdtPvPMM2rTpo3+8Y9/KCcnRxMnTlTXrl21Y8cONW7cuESvQXH69OmjyZMn65lnntEbb7yhNm3aSJJuvfVWSdK8efM0ePBg/frXv9bs2bNltVo1Y8YM9ezZUytXrnR+nJ+UlKTt27dr0qRJatq0qbKzs7V9+/YSj88999yjQYMG6ZFHHtE333yj5557Trt379amTZtktVollfxcLa3iztWSOnXqlKSSn9NX+tOf/qS//OUvGjBggMaOHSu73a5du3bp4MGD13zeDz/8oMTERGfI3blzpyZNmqRvv/1W7777riQpNzdXMTExatSokd544w0FBwcrMzNTa9as0ZkzZyRduhQoISFBCQkJmjhxory9vXXw4EF99tlnzn2dO3dOXbp00ZEjR/TMM8+oZcuW+uabb/SnP/1JX3/9tVavXl2i/8wBphgArum9994zJF3zERER4fKciIgIY8iQIc7l+Ph4484777zmfqZOnWpIMvbv3+/SvmfPHkOS8eijj7q0b9q0yZBkPPPMM4ZhGMapU6cMm81mJCQkuPTbsGGDIcno0qWLs23NmjWGJOOuu+667vFfvHjRcDgcRvfu3Y177rnH2b5//35DktGqVSsjPz/f2f7qq68akox+/fq5bGf06NGGJCMnJ8fZFhERYfj4+BhHjhxxtqWnpxuSjNDQUCM3N9fZvmzZMkOS8dFHHznbbr/9dqN169aGw+Fw2Vd8fLwRGhrqUteV3nrrLUOS8a9//culfcSIEYYk47333iv1fqKiooz+/ftfdZ9XU/geu/z1NQzD+PLLLw1JxvPPP+9s69KliyHJ+PTTT136Pv3004YkY9OmTS7tv/vd7wyLxWLs3bvXMIz/jVtYWJhx/vx5Z7/Tp08bAQEBRo8ePa5a58WLF40LFy4YTZo0MZ588klne+H7qU2bNkZBQYGz/cCBA4bVajWGDx/ubJswYYJx5T89Xbp0cXl/GoZhSDImTJjgXF68eLEhyVizZo1Lv9zcXCMgIMDo27evS3t+fr7RqlUro3379s62WrVqGaNHj77q8V1NYc2XH7NhGMb8+fMNSca8efMMwyj5uWoYRX9GFLrytSjNuVroytfuSlc7p4vz448/Gh4eHsaDDz54zX5Dhgwp8nPwcvn5+YbD4TDmzJljeHh4GKdOnTIMwzC2bt1qSDKWLVt21ee+9NJLhiQjOzv7qn2mTJli3HTTTcaWLVtc2j/44ANDkrFixYpr1n+5q73XgOvh8w6ghObMmaMtW7YUefzyl7+87nPbt2+vnTt36tFHH9XKlSt1+vTpEu93zZo1klRk1rV9+/Zq1qyZ8yPujRs3Ki8vT4MGDXLp17Fjx6t+K//ee+8ttv3tt99WmzZt5O3tLU9PT1mtVn366afas2dPkb69e/d2+ei0WbNmki7NLl6usP3QoUMu7Xfeeafq169fpF/Xrl1drhMubC+cHfv+++/17bff6sEHH5R
"text/plain": [
"<Figure size 800x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"draw_prob_distribution(X_test)"
]
},
2024-03-10 11:09:53 +01:00
{
"cell_type": "markdown",
"id": "ae8e9bd3-0f6a-4f82-bb4c-470cbdc8d6bb",
2024-03-11 09:36:25 +01:00
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
2024-03-10 11:09:53 +01:00
"source": [
"## Cross Validation"
]
},
2024-03-10 12:30:57 +01:00
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 40,
2024-03-10 12:30:57 +01:00
"id": "7f0535de-34f1-4e97-b993-b429ecf0a554",
"metadata": {},
"outputs": [],
"source": [
"y_train = y_train['y_has_purchased']"
]
},
2024-03-10 11:09:53 +01:00
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 41,
2024-03-10 11:09:53 +01:00
"id": "f7fca463-d7d6-493b-8329-fdfa92457f78",
"metadata": {},
2024-03-10 13:30:10 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best parameters found: {'logreg__C': 0.0009765625, 'logreg__class_weight': 'balanced', 'logreg__penalty': 'l1'}\n",
"Best cross-validation score: 0.65\n",
"Test set score: 0.64\n"
]
}
],
2024-03-10 11:09:53 +01:00
"source": [
"# Cross validation\n",
2024-03-10 12:30:57 +01:00
"\n",
2024-03-10 13:30:10 +01:00
"grid_search = GridSearchCV(pipeline, param_grid, cv=3, scoring=recall_scorer, error_score='raise',\n",
2024-03-10 11:09:53 +01:00
" n_jobs=-1)\n",
"\n",
"grid_search.fit(X_train, y_train)\n",
"\n",
"# Print the best parameters and the best score\n",
"print(\"Best parameters found: \", grid_search.best_params_)\n",
"print(\"Best cross-validation score: {:.2f}\".format(grid_search.best_score_))\n",
"\n",
"# Evaluate the best model on the test set\n",
"test_score = grid_search.score(X_test, y_test)\n",
"print(\"Test set score: {:.2f}\".format(test_score))"
]
},
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 43,
2024-03-10 11:09:53 +01:00
"id": "56bd7828-4de1-4166-bea0-5d5e152b9d38",
"metadata": {},
2024-03-10 13:30:10 +01:00
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi0AAAHFCAYAAAA+FskAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABQP0lEQVR4nO3de3yP9f/H8cdnp49tbMZsM6ecMxRRjEI5M/LtgFYrEXJsOaZy6mBOIWc60FdpiUilpVJKDGHOUU5LzBxmGLbZrt8ffj7fPjbZdH189uF57/a53ey6Xtd1va5PrV693u/3dVkMwzAQERERKeDcnJ2AiIiISF6oaBERERGXoKJFREREXIKKFhEREXEJKlpERETEJahoEREREZegokVERERcgooWERERcQkqWkRERMQlqGiRW9q2bdt49tlnKV++PIUKFaJw4cLcc889jB8/nlOnTjn02lu2bKFx48b4+/tjsViYMmWK6dewWCyMGjXK9PNez/z587FYLFgsFn788ccc+w3DoFKlSlgsFpo0aXJD15g5cybz58/P1zE//vjjNXMSEdfn4ewERBzlnXfeoXfv3lStWpXBgwcTFhZGZmYmv/76K7Nnz2bdunUsXbrUYdfv2rUraWlpxMbGEhAQwB133GH6NdatW0fp0qVNP29eFSlShPfeey9HYbJ69Wr27dtHkSJFbvjcM2fOJDAwkC5duuT5mHvuuYd169YRFhZ2w9cVkYJLRYvcktatW0evXr1o3rw5y5Ytw2q12vY1b96cgQMHEhcX59AcduzYQffu3WndurXDrlG/fn2HnTsvOnXqxEcffcSMGTPw8/OzbX/vvfcIDw/nzJkzNyWPzMxMLBYLfn5+Tv9ORMRxNDwkt6QxY8ZgsViYO3euXcFyhZeXF+3bt7f9nJ2dzfjx47nzzjuxWq0EBQXx9NNPc/jwYbvjmjRpQo0aNdi4cSMPPPAAPj4+VKhQgbFjx5KdnQ38b+jk0qVLzJo1yzaMAjBq1Cjbn//uyjEHDx60bVu1ahVNmjShePHieHt7U7ZsWR599FHOnz9vi8lteGjHjh08/PDDBAQEUKhQIWrVqsUHH3xgF3NlGOXjjz/mlVdeITQ0FD8/P5o1a8aePXvy9iUDTzzxBAAff/yxbVtqaipLliyha9euuR4zevRo6tWrR7FixfDz8+Oee+7hvffe4+/vbr3jjjvYuXMnq1evtn1/VzpVV3JfsGABAwcOpFSpUlitVv74448cw0MnTpygTJkyNGjQgMzMTNv5d+3aha+vL1FRUXm+VxFxPhUtcsvJyspi1apV1KlThzJlyuTpmF69ejF06FCaN2/O8uXLef3114mLi6NBgwacOHHCLjYpKYknn3ySp556iuXLl9O6dWuGDRvGhx9+CEDbtm1Zt24dAI899hjr1q2z/ZxXBw8epG3btnh5efH+++8TFxfH2LFj8fX1JSMj45rH7dmzhwYNGrBz506mTp3KZ599RlhYGF26dGH8+PE54l9++WUOHTrEu+++y9y5c/n9999p164dWVlZecrTz8+Pxx57jPfff9+27eOPP8bNzY1OnTpd89569uzJokWL+Oyzz3jkkUfo168fr7/+ui1m6dKlVKhQgdq1a9u+v6uH8oYNG0ZiYiKzZ8/miy++ICgoKMe1AgMDiY2NZePGjQwdOhSA8+fP8/jjj1O2bFlmz56dp/sUkQLCELnFJCUlGYDRuXPnPMXv3r3bAIzevXvbbV+/fr0BGC+//LJtW+PGjQ3AWL9+vV1sWFiY0bJlS7ttgNGnTx+7bSNHjjRy+7WbN2+eARgHDhwwDMMwFi9ebABGQkLCP+YOGCNHjrT93LlzZ8NqtRqJiYl2ca1btzZ8fHyM06dPG4ZhGD/88IMBGG3atLGLW7RokQEY69at+8frXsl348aNtnPt2LHDMAzDuPfee40uXboYhmEY1atXNxo3bnzN82RlZRmZmZnGa6+9ZhQvXtzIzs627bvWsVeu16hRo2vu++GHH+y2jxs3zgCMpUuXGs8884zh7e1tbNu27R/vUUQKHnVa5Lb3ww8/AOSY8HnfffdRrVo1vv/+e7vtISEh3HfffXbb7rrrLg4dOmRaTrVq1cLLy4sePXrwwQcfsH///jwdt2rVKpo2bZqjw9SlSxfOnz+fo+Pz9yEyuHwfQL7upXHjxlSsWJH333+f7du3s3HjxmsODV3JsVmzZvj7++Pu7o6npycjRozg5MmTJCcn5/m6jz76aJ5jBw8eTNu2bXniiSf44IMPmDZtGjVr1szz8SJSMKhokVtOYGAgPj4+HDhwIE/xJ0+eBKBkyZI59oWGhtr2X1G8ePEccVarlQsXLtxAtrmrWLEi3333HUFBQfTp04eKFStSsWJF3n777X887uTJk9e8jyv7/+7qe7ky/yc/92KxWHj22Wf58MMPmT17NlWqVOGBBx7INXbDhg20aNECuLy665dffmHjxo288sor+b5ubvf5Tzl26dKFixcvEhISorksIi5KRYvcctzd3WnatCmbNm3KMZE2N1f+w3306NEc+44cOUJgYKBpuRUqVAiA9PR0u+1Xz5sBeOCBB/jiiy9ITU0lPj6e8PBwoqOjiY2Nveb5ixcvfs37AEy9l7/r0qULJ06cYPbs2Tz77LPXjIuNjcXT05Mvv/ySjh070qBBA+rWrXtD18xtQvO1HD16lD59+lCrVi1OnjzJoEGDbuiaIuJcKlrkljRs2DAMw6B79+65TlzNzMzkiy++AOChhx4CsE2kvWLjxo3s3r2bpk2bmpbXlRUw27Zts9t+JZfcuLu7U69ePWbMmAHA5s2brxnbtGlTVq1aZStSrvjvf/+Lj4+Pw5YDlypVisGDB9OuXTueeeaZa8ZZLBY8PDxwd3e3bbtw4QILFizIEWtW9yorK4snnngCi8XC119/TUxMDNOmTeOzzz771+cWkZtLz2mRW1J4eDizZs2id+/e1KlTh169elG9enUyMzPZsmULc+fOpUaNGrRr146qVavSo0cPpk2bhpubG61bt+bgwYMMHz6cMmXK8OKLL5qWV5s2bShWrBjdunXjtddew8PDg/nz5/Pnn3/axc2ePZtVq1bRtm1bypYty8WLF20rdJo1a3bN848cOZIvv/ySBx98kBEjRlCsWDE++ugjvvrqK8aPH4+/v79p93K1sWPHXjembdu2TJo0icjISHr06MHJkyeZOHFirsvSa9asSWxsLJ988gkVKlSgUKFCNzQPZeTIkfz888+sXLmSkJAQBg4cyOrVq+nWrRu1a9emfPny+T6niDiHiha5ZXXv3p377ruPyZMnM27cOJKSkvD09KRKlSpERkbSt29fW+ysWbOoWLEi7733HjNmzMDf359WrVoRExOT6xyWG+Xn50dcXBzR0dE89dRTFC1alOeee47WrVvz3HPP2eJq1arFypUrGTlyJElJSRQuXJgaNWqwfPly25yQ3FStWpW1a9fy8ssv06dPHy5cuEC1atWYN29evp4s6ygPPfQQ77//PuPGjaNdu3aUKlWK7t27ExQURLdu3exiR48ezdGjR+nevTtnz56lXLlyds+xyYtvv/2WmJgYhg8fbtcxmz9/PrVr16ZTp06sWbMGLy8vM25PRBzMYhh/e6KTiIiISAGlOS0iIiLiElS0iIiIiEtQ0SIiIiIuQUWLiIiIuAQVLSIiIuISVLSIiIiIS1DRIiIiIi7hlny4nHftvtcPErkNLV84ytkpiBQ4zas55p1cf2fWf5cubJluynlclTotIiIi4hJuyU6LiIhIgWJRj8AMKlpEREQczWJxdga3BBUtIiIijqZOiyn0LYqIiIhLUKdFRETE0TQ8ZAoVLSIiIo6m4SFT6FsUERERl6BOi4iIiKNpeMgUKlpEREQcTcNDptC3KCIiIi5BnRYRERFH0/CQKVS0iIiIOJqGh0yhb1FERERcgjotIiIijqbhIVOoaBEREXE0DQ+ZQkWLiIiIo6nTYgqVfiIiIuIS1GkRERFxNA0PmUJFi4iIiKOpaDGFvkURERFxCeq0iIiIOJqbJuKaQUWLiIiIo2l4yBT6FkV
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"y_pred = grid_search.predict(X_test)\n",
"\n",
"draw_confusion_matrix(y_test, y_pred)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "319fe0eb-4d4a-492c-bd50-3f08ab483021",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABIQAAAK8CAYAAACeK2TMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUddrG8e+k904ooSahSheBJIggioINBUTWvuqqrLr2tfeKBdtrXde6IqKIq7IIFsQA0hEBBRJ6JwnpPXPePw4MHAcwgSRnJnN/rotL5pwzyQNy54SH33l+DsMwDERERERERERExGf42V2AiIiIiIiIiIg0LjWERERERERERER8jBpCIiIiIiIiIiI+Rg0hEREREREREREfo4aQiIiIiIiIiIiPUUNIRERERERERMTHqCEkIiIiIiIiIuJj1BASEREREREREfExagiJiIiIiIiIiPgYNYREREREvNzDDz9MfHw8mzdvtrsUERER8RJqCImIiDSglStXcuWVV9KhQwdCQkKIiIigb9++TJw4kby8PFtqevfdd3E4HCxZsqRBP8+mTZtwOByuH35+fsTGxjJs2DBmzZp1xPfNnDmTs846i2bNmhEcHEybNm24/PLLWbNmzRHf89NPP3HhhReSlJREUFAQ0dHRpKen89prr1FSUtIQvzxbHPh/t2nTJsvxBx98kPPPP5+xY8dSWVl52Pc+9NBDOByOeqtlzpw5OBwO5syZU28f83Dat2/PFVdcUaf3zJ8/n4ceeoj8/Hy3c0OGDGHIkCH1UpuIiIg3U0NIRESkgbz11luceOKJLF68mDvuuIOZM2fy+eefM3bsWF5//XWuuuoqu0tsFDfeeCMLFizgp59+4tlnn2X9+vWMHDmSuXPnul175513MmLECJxOJ6+++iqzZ8/mwQcfZPHixfTt25dp06a5vefBBx9k8ODBbN++nUcffZTZs2fz8ccfM2zYMB566CHuu+++xvhl2u7111+nWbNm3HLLLXaXUq8+//xz7r///jq9Z/78+Tz88MOHbQi9+uqrvPrqq/VUnYiIiPcKsLsAERGRpmjBggVcf/31nH766UyfPp3g4GDXudNPP53bbruNmTNnNmpNVVVV9bpCpLbatm3LwIEDAcjIyKBjx46ccsopvP322wwePNh13eTJk3nmmWe4/vrrLX9hHzx4MOPHj+eUU07h0ksvpXfv3iQnJwMwdepUHnnkEa666ireeusty69vxIgR3HnnnSxYsKCRfqX2CggI4Ouvv7a7jHrXp0+fev143bp1q9ePJyIi4q20QkhERKQBPPHEEzgcDt58801LM+iAoKAgzj33XNdrp9PJxIkT6dKlC8HBwSQmJnLZZZexbds2y/uO9PjMHx+DOfA4zwcffMBtt91GUlISwcHBZGVlua7Zt28fV155JXFxcYSHh3POOeewYcMGt4/97bffMmzYMKKioggLCyMjI4PvvvvuGH5XTP369QNg9+7dluOPP/44sbGxPPvss27vCQ8P5+WXX6a0tJRJkya5jj/yyCPExsby0ksvHbbZFRkZyfDhw4+51j8aMmQI3bt3Z8GCBaSnpxMaGkr79u155513APj666/p27cvYWFh9OjR47BNv8zMTIYNG0ZkZCRhYWGkp6cftpHz888/k5GRQUhICK1ateLuu++mqqrqsHVNmTKFtLQ0wsPDiYiIYPjw4SxdurRWv6Y/vveMM85g+fLldfhdsfrvf/9LWloaYWFhREZGcvrppx+2KffFF1/Qs2dPgoODSU5O5sUXXzzsY21//DPvdDp57LHH6Ny5M6GhocTExNCzZ09efPFFwHw07o477gCgQ4cOrkcWDzzadrhHxnbs2MGFF15IZGQk0dHRjBs3jp9//hmHw8G7777ruu5Ij5tdccUVtG/f3nKssrKSxx57zJXpZs2aceWVV7J3797a/UaKiIg0MDWERERE6llNTQ3ff/89J554Im3atKnVe66//nr++c9/cvrpp/Pf//6XRx99lJkzZ5Kenk5OTs4x13L33XezZcsWXn/9db788ksSExNd56666ir8/Pz46KOPeOGFF1i0aBFDhgyxPGbz4YcfMnz4cKKionjvvff45JNPiIuL44wzzjjmptDGjRsB6NSpk+vYzp07Wb16NcOHDycsLOyw70tLSyMxMZHZs2e73rNq1aqjvqc2DjTPHnrooVpdv2vXLq688kquvvpqvvjiC3r06MFf//pXHnnkEe6++27uvPNOPvvsMyIiIhg1ahQ7duxwvffHH3/k1FNPpaCggLfffpvJkycTGRnJOeecw5QpU1zXrVmzhmHDhpGfn8+7777L66+/zvLly3nsscfc6nniiScYP3483bp145NPPuH999+nsLCQk08+mVWrVh311/LH937wwQcUFRVx8sknH3Vm05F89NFHnHfeeURFRTF58mTefvtt9u3bx5AhQ8jMzHRdN3PmTC644ALi4+OZMmUKEydOZPLkybz33nt/+jkmTpzIQw89xPjx4/n666+ZMmUKV111levP7dVXX82NN94IwLRp01iwYAELFiygb9++h/14ZWVlnHbaacyaNYsnn3ySqVOn0qJFC8aNG1fnX/8BTqeT8847j6eeeoq//OUvfP311zz11FPMnj2bIUOGUFZWdswfW0REpN4YIiIiUq927dplAMZFF11Uq+t/++03AzAmTJhgOb5w4UIDMO655x7XsXbt2hmXX36528c45ZRTjFNOOcX1+ocffjAAY/DgwW7XvvPOOwZgnH/++Zbj8+bNMwDjscceMwzDMEpKSoy4uDjjnHPOsVxXU1Nj9OrVy+jfv/9Rf10bN240AOPpp582qqqqjPLycmPFihVGWlqa0bJlS2Pjxo2ua3/++WcDMO66666jfswBAwYYoaGhdXrPn5kzZ47h7+9vPPzww3967SmnnGIAxpIlS1zHcnNzDX9/fyM0NNTYvn276/iKFSsMwHjppZdcxwYOHGgkJiYaRUVFrmPV1dVG9+7djdatWxtOp9MwDMMYN26cERoaauzatctyXZcuXQzA9Xu3ZcsWIyAgwPj73/9uqbOwsNBITEw0xowZ4zr24IMPGod+63fgvTfeeKPlvUVFRUaLFi2MCy+88Ki/Fwf+jP3www+GYZh/Llq1amX06NHDqKmpsXy8xMREIz093XXspJNOMtq0aWNUVFRYrouPjzf++O3pH//Mn3322Ubv3r2PWtszzzxj+X061B+z8tprrxmA8cUXX1iuu+aaawzAeOedd4743gMuv/xyo127dq7XkydPNgDjs88+s1y3ePFiAzBeffXVo9YvIiLSGLRCSERExGY//PADgNujYP3796dr167H9XjW6NGjj3ju4osvtrxOT0+nXbt2rnrmz59PXl4el19+OdXV1a4fTqeTM888k8WLF9dqB69//vOfBAYGEhISQu/evVm1ahVffvml2yM2tWEYRr3PQTrllFOorq7mgQceqNX1LVu25MQTT3S9jouLIzExkd69e9OqVSvX8a5duwK4toIvKSlh4cKFjBkzhoiICNd1/v7+XHrppWzbto21a9cC5p+JYcOG0bx5c8t1f1y18s0331BdXc1f//pXy/HIyEiGDh3Kjz/+eMRfx4H3XnbZZZb/vyEhIZxyyil13j1s7dq17Nixg0svvRQ/v4PfYkZERDB69Gh+/vlnSktLKSkpYcmSJYwaNYqgoCDLdeecc86ffp7+/fvzyy+/MGHCBL755hsKCwvrVOcf/fDDD0RGRloe4QT4y1/+cswf86uvviImJoZzzjnH8nvbu3dvWrRo0eA7s4mIiNSGhkqLiIjUs4SEBMLCwlyPRv2Z3NxcwGw0/FGrVq1cDYVjcbiPeUCLFi0Oe+xAPQdm/IwZM+aIHyMvL4/w8PCj1vCPf/yDSy65hIqKCn7++Wfuu+8+zjvvPH755Rfi4+MBc/A08Ke/Z5s3b3Y9hlfb99S3uLg4t2NBQUFuxw80O8rLywFzZpNhGEf8/wwH/yzk5uYe8f/PoQ78P0pPT3e79kDz7kgOvPekk0467PlDmzq18Wd/jp1Op+v3wDA
"text/plain": [
"<Figure size 1400x800 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"draw_roc_curve(X_test, y_test)"
]
},
2024-03-11 09:36:25 +01:00
{
"cell_type": "markdown",
"id": "ab122f66-1591-43ea-a364-2564f09b2bb3",
"metadata": {},
"source": [
"# Segmentation du score de prédiction"
]
},
2024-03-10 13:30:10 +01:00
{
"cell_type": "code",
2024-03-11 09:36:25 +01:00
"execution_count": 61,
"id": "279e18c7-29d8-4328-963a-18babd13c2c8",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABA4AAAIjCAYAAACDPFmSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAD8YklEQVR4nOzdd1gUV/s38O8ufekgUgyICCIWFIMoVixYUCOWYEsQW0LUWFBRYgNDFAuW6BNjEhVjNKapsaCIBWPsUVGfYIlERB9RogZQUVzYef/w3fm5sgtLE2W/n+vySubMmTP3zD0M7NkzZySCIAggIiIiIiIiIlJDWt0BEBEREREREdHrix0HRERERERERKQROw6IiIiIiIiISCN2HBARERERERGRRuw4ICIiIiIiIiKN2HFARERERERERBqx44CIiIiIiIiINGLHARERERERERFpxI4DIiIiIiIiItKIHQdERERUIUlJSTAwMMD27durOxSiKiOXy9GtWzf4+/tDLpdXdzjVatiwYfD09MS9e/eqOxQiekXYcUBEVAkSEhIgkUjwxx9/VHco5bZ582YsX768usOocgEBAQgICKjuMF57aWlpiI6ORkZGRon1bt26hffeew8rVqxAcHCw1u1HR0dDIpGUKab8/HxER0cjJSWl2Drlz2Bp8b6pynO+XpWMjAxIJBIkJCRUdyiVJiwsDK6uriplBgYG+OWXX/D06VNMnTpV47av6h6jLsZX4csvv8SBAwewd+9e1KpV65Xvv7JJJBJER0e/8v2+qntWeY4vMTFR4zaurq4ICwurcFz05mHHARERAdCdjgPSTlpaGmJiYkr8o7awsBCDBg3CBx98gLFjx5ap/dGjR+P48eNl2iY/Px8xMTFqOw569eqF48ePw9HRsUxtEpWFubk5EhMTsWPHDvz444/VHc4rd/bsWcyePRuJiYmoV69edYdTKY4fP47Ro0dXdxhVpjzHl5iYiJiYGLXrtm3bhtmzZ1dGaPSG0a/uAIiIqHrl5+dDJpNVdxhUAdWVQ319fRw9erRM2yhjfeutt/DWW29VWix2dnaws7OrtPaINHF0dMT169erO4xX4uV7S4sWLfDPP/9Uyb6ePHkCY2PjVz6ypnXr1q90f69aZR+fj49PpbZHbw6OOCAiqiJhYWEwMzPD5cuX0b17d5iamsLR0RFxcXEAgBMnTqBdu3YwNTVFgwYNsGHDBpXtlcMYk5OTMWLECNjY2MDU1BR9+vTB33//XWx/69atQ7NmzWBsbAwbGxv069cPly5dUhvTxYsX0a1bN5ibm6NLly4ICAjA7t27cePGDUgkEvGfUkxMDFq1agUbGxtYWFigRYsWWLt2LQRBUGnf1dUVvXv3xt69e9GiRQuYmJigYcOGWLduXbF4//e//+GDDz6As7MzDA0N4eTkhIEDB+Lu3btinby8PEydOhX16tWDoaEh6tSpg0mTJuHx48elnn9BELBo0SLUrVsXxsbGaNGiBfbs2aO2rrb7+emnn9CqVStYWlpCJpPBzc0NI0eOLDUWhUKBlStXonnz5jAxMYGVlRVat26NHTt2iHV++OEHdOvWDY6OjjAxMYGXlxdmzJhRLAZNOQSA5ORk9O3bF2+99RaMjY3h7u6ODz/8UO1zyJcvX8aQIUNgb28PIyMjuLi4IDQ0FAUFBUhISMC7774LAOjUqZN4Pbw4FH3//v3o0qULLCwsIJPJ0LZtWxw4cEBlH8rh9WfPnsXAgQNhbW2N+vXrq6x70cGDBxEQEABbW1uYmJjAxcUFAwYMQH5+PjIyMsSOgZiYGDEm5ZBZdcN+NV0DLw8l1zRkOCUlBRKJpNgIB22O/Z9//hGvbyMjI9jZ2aFt27bYv39/sVy8bPfu3WjevDmMjIxQr149LFmyRG29//znP+jQoQNq164NU1NTNG3aFIsWLSr2/H1AQACaNGmC06dPo3379uK1GxcXB4VCIdZTKBSIjY2Fp6eneJ16e3tjxYoVpcaszl9//YWhQ4eidu3aMDIygpeXF/7zn/9ota1EIsH48eOxfv16MR5fX1+cOHECgiBg8eLFqFevHszMzNC5c2dcu3atWBva3BOB5/n39PQUY/z222/VxvTs2TPExsaiYcOGYk7DwsJU7lmaqNt2xIgRWn8Ir0iM2u6npHuLtu0WFBRgypQpcHBwgEwmQ4cOHXDmzJliw9uVP3P79u3DyJEjYWdnB5lMhoKCAgDP74f+/v4wNTWFmZkZunfvjnPnzqns6++//8bgwYPh5OQEIyMj2Nvbo0uXLkhNTRXrlHRPUVI3lP+///0v+vbtC2traxgbG6N58+bFfkcr7w/ff/89Zs6cCScnJ1hYWKBr1664cuVKqedbE22v26+//hoNGjSAkZERGjVqhM2bN6t9fOXl48vPzxd/3yn34evri++//x7A8+tA+XP64t8DyvujukcVLl++jB49ekAmk6FWrVoIDw/Hzp07i90/NT3moO7xnor8/qeqwREHRERVSC6Xo3///ggPD8e0adOwefNmREVFIS8vD7/88gumT5+Ot956CytXrkRYWBiaNGmCt99+W6WNUaNGITAwEJs3b8bNmzcxa9YsBAQE4MKFC7CysgIALFiwAJ988gmGDBmCBQsW4P79+4iOjoa/vz9Onz4NDw8Psb1nz57hnXfewYcffogZM2agsLAQb731Fj744AOkp6dj27ZtxY4jIyMDH374IVxcXAA87/T4+OOP8b///Q9z5sxRqXv+/HlMmTIFM2bMgL29Pb755huMGjUK7u7u6NChA4DnnQYtW7aEXC7HJ598Am9vb9y/fx9JSUn4999/YW9vj/z8fHTs2BG3bt0S6/z555+YM2cOLl68iP3795f4zVRMTAxiYmIwatQoDBw4EDdv3sSYMWNQVFQET09PsZ62+zl+/DgGDRqEQYMGITo6GsbGxrhx4wYOHjxY6nUQFhaG7777DqNGjcK8efNgaGiIs2fPqnxQ/euvvxAUFIRJkybB1NQUly9fxsKFC3Hq1Kli+1CXQwBIT0+Hv78/Ro8eDUtLS2RkZGDp0qVo164dLl68CAMDAzFH7dq1Q61atTBv3jx4eHggKysLO3bswLNnz9CrVy/Mnz8fn3zyCf7zn/+gRYsWACB+6P/uu+8QGhqKvn37YsOGDTAwMMCaNWvQvXt3JCUliR82lPr374/BgwcjPDxc4x99GRkZ6NWrF9q3b49169bBysoK//vf/7B37148e/YMjo6O2Lt3L3r06IFRo0aJQ29LGmWg7TVQFtoe+/vvv4+zZ8/is88+Q4MGDZCTk4OzZ8/i/v37JbZ/4MAB9O3bF/7+/tiyZQuKioqwaNEitR9O09PTMXToUPEP6/Pnz+Ozzz7D5cuXi3XW3blzB8OGDcOUKVMwd+5cbNu2DVFRUXByckJoaCgAYNGiRYiOjsasWbPQoUMHyOVyXL58GTk5OWU+T2lpaWjTpg1cXFwQHx8PBwcHJCUlYcKECbh37x7mzp1bahu7du3CuXPnEBcXB4lEgunTp6NXr14YPnw4/v77b6xatQq5ubmIiIjAgAEDkJqaKt4TtL0nJiQkYMSIEejbty/i4+ORm5uL6OhoFBQUQCr9v+/XFAoF+vbtiyNHjiAyMhJt2rTBjRs3MHv2bJw8eRJnzpzROOpH07Zz585FQEAA/vjjD5iYmGg8DxWNUdv9AOrvLWVpd8SIEfjhhx8QGRmJzp07Iy0tDf369UNeXp7a/Y0cORK9evXCxo0b8fjxYxgYGGD+/PmYNWsWRowYgVmzZuHZs2dYvHgx2rdvj1OnTqFRo0YAgKCgIPHnw8XFBffu3cOxY8fE67W0e4qmfF25cgVt2rRB7dq18fnnn8PW1hbfffed2EkUGRmpUv+TTz5B27Zt8c033yAvLw/Tp09Hnz59cOnSJejp6ZV4vl+m7XX71Vdf4cMPP8SAAQOwbNky5ObmIiYmRux4KUlERAQ2btyI2NhY+Pj44PHjx/jvf/8r3ptmz56Nx48f4+eff1Z5nEzTY2B3795Fx44dYWBggC+++AL29vbYtGkTxo8fX6Zjf1FFf/9
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"coefficients = pipeline.named_steps['logreg'].coef_[0]\n",
"feature_names = pipeline.named_steps['logreg'].feature_names_in_\n",
"\n",
"# Tracer l'importance des caractéristiques\n",
"plt.figure(figsize=(10, 6))\n",
"plt.barh(feature_names, coefficients, color='skyblue')\n",
"plt.xlabel('Importance des caractéristiques')\n",
"plt.ylabel('Caractéristiques')\n",
"plt.title('Importance des caractéristiques dans le modèle de régression logistique')\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 70,
2024-03-10 13:30:10 +01:00
"id": "210b931c-6d46-4ebf-a9c7-d1ee05c3fadf",
"metadata": {},
2024-03-10 11:09:53 +01:00
"outputs": [],
2024-03-11 09:36:25 +01:00
"source": [
"# Création d'un dataframe avec le score\n",
"dataset_for_segmentation = dataset_test[['customer_id'] + numeric_features + categorical_features]\n",
"\n",
"y_predict_proba = pipeline.predict_proba(X_test)[:, 1]\n",
"\n",
"dataset_for_segmentation['prediction_probability'] = y_predict_proba\n",
"\n",
"# Arrondir les valeurs de la colonne 'prediction_probability' et les multiplier par 10\n",
"dataset_for_segmentation['category'] = dataset_for_segmentation['prediction_probability'].apply(lambda x: int(x * 10))\n",
"\n",
"dataset_for_segmentation['prediction'] = y_pred\n",
"\n",
"def premiere_partie(chaine):\n",
" if chaine:\n",
" return chaine.split('_')[0]\n",
" else:\n",
" return None\n",
"\n",
"dataset_for_segmentation['company_number'] = dataset_for_segmentation['customer_id'].apply(lambda x: premiere_partie(x))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "055e47dd-9ff3-4853-a46d-d5a5edc1f361",
"metadata": {},
"outputs": [],
2024-03-10 11:09:53 +01:00
"source": []
2024-03-11 09:36:25 +01:00
},
{
"cell_type": "code",
"execution_count": 73,
"id": "969f1f92-d715-4d74-85a7-437e72838cb5",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>fidelity</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" </tr>\n",
" <tr>\n",
" <th>category</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.113637</td>\n",
" <td>0.006274</td>\n",
" <td>1.586366</td>\n",
" <td>0.005821</td>\n",
" <td>0.000647</td>\n",
" <td>548.790455</td>\n",
" <td>548.773103</td>\n",
" <td>-0.977118</td>\n",
" <td>0.001585</td>\n",
" <td>0.000776</td>\n",
" <td>0.000000</td>\n",
" <td>0.000032</td>\n",
" <td>0.999968</td>\n",
" <td>13.984219</td>\n",
" <td>1.302720</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.810841</td>\n",
" <td>0.128432</td>\n",
" <td>9.611292</td>\n",
" <td>0.125295</td>\n",
" <td>0.018186</td>\n",
" <td>525.437516</td>\n",
" <td>525.275222</td>\n",
" <td>-0.729328</td>\n",
" <td>0.054312</td>\n",
" <td>0.111832</td>\n",
" <td>0.245480</td>\n",
" <td>0.495929</td>\n",
" <td>0.258591</td>\n",
" <td>18.413562</td>\n",
" <td>3.718711</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.159419</td>\n",
" <td>0.339253</td>\n",
" <td>15.182143</td>\n",
" <td>0.337577</td>\n",
" <td>0.323824</td>\n",
" <td>501.529129</td>\n",
" <td>501.415505</td>\n",
" <td>-0.554439</td>\n",
" <td>0.969939</td>\n",
" <td>0.304757</td>\n",
" <td>0.392570</td>\n",
" <td>0.297258</td>\n",
" <td>0.310173</td>\n",
" <td>17.395042</td>\n",
" <td>2.608084</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2.153080</td>\n",
" <td>0.744161</td>\n",
" <td>27.820044</td>\n",
" <td>0.734881</td>\n",
" <td>0.600982</td>\n",
" <td>287.051054</td>\n",
" <td>286.675385</td>\n",
" <td>0.105360</td>\n",
" <td>1.776035</td>\n",
" <td>0.659878</td>\n",
" <td>0.288813</td>\n",
" <td>0.253244</td>\n",
" <td>0.457943</td>\n",
" <td>16.790421</td>\n",
" <td>4.173954</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2.044749</td>\n",
" <td>0.777640</td>\n",
" <td>27.353145</td>\n",
" <td>0.754549</td>\n",
" <td>0.079213</td>\n",
" <td>297.179255</td>\n",
" <td>295.019902</td>\n",
" <td>1.898178</td>\n",
" <td>0.293760</td>\n",
" <td>0.894877</td>\n",
" <td>0.666980</td>\n",
" <td>0.301424</td>\n",
" <td>0.031596</td>\n",
" <td>16.954707</td>\n",
" <td>6.060621</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>3.237988</td>\n",
" <td>0.958520</td>\n",
" <td>46.637380</td>\n",
" <td>0.807655</td>\n",
" <td>0.484785</td>\n",
" <td>387.464785</td>\n",
" <td>380.145068</td>\n",
" <td>7.111357</td>\n",
" <td>2.080397</td>\n",
" <td>1.164958</td>\n",
" <td>0.497758</td>\n",
" <td>0.259769</td>\n",
" <td>0.242473</td>\n",
" <td>27.006406</td>\n",
" <td>12.457719</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>3.592233</td>\n",
" <td>1.102881</td>\n",
" <td>49.989226</td>\n",
" <td>0.878014</td>\n",
" <td>0.599906</td>\n",
" <td>268.627019</td>\n",
" <td>250.949344</td>\n",
" <td>17.539247</td>\n",
" <td>2.525994</td>\n",
" <td>1.420921</td>\n",
" <td>0.534607</td>\n",
" <td>0.304259</td>\n",
" <td>0.161134</td>\n",
" <td>14.073285</td>\n",
" <td>4.604134</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>3.747016</td>\n",
" <td>1.391266</td>\n",
" <td>40.710335</td>\n",
" <td>0.914702</td>\n",
" <td>0.160990</td>\n",
" <td>309.716173</td>\n",
" <td>274.795570</td>\n",
" <td>34.796876</td>\n",
" <td>0.844250</td>\n",
" <td>1.963028</td>\n",
" <td>0.650364</td>\n",
" <td>0.263464</td>\n",
" <td>0.086172</td>\n",
" <td>26.186317</td>\n",
" <td>8.891703</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>5.698276</td>\n",
" <td>1.567006</td>\n",
" <td>63.033699</td>\n",
" <td>0.907915</td>\n",
" <td>0.334248</td>\n",
" <td>326.485952</td>\n",
" <td>257.940194</td>\n",
" <td>68.425460</td>\n",
" <td>2.794279</td>\n",
" <td>2.413009</td>\n",
" <td>0.606583</td>\n",
" <td>0.251567</td>\n",
" <td>0.141850</td>\n",
" <td>30.987461</td>\n",
" <td>11.676332</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14.505956</td>\n",
" <td>3.211571</td>\n",
" <td>107.288514</td>\n",
" <td>1.011628</td>\n",
" <td>0.157119</td>\n",
" <td>369.696066</td>\n",
" <td>209.280306</td>\n",
" <td>160.348544</td>\n",
" <td>3.514464</td>\n",
" <td>5.394498</td>\n",
" <td>0.669314</td>\n",
" <td>0.223766</td>\n",
" <td>0.106920</td>\n",
" <td>45.928247</td>\n",
" <td>18.241634</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>2262.859155</td>\n",
" <td>45.619718</td>\n",
" <td>11051.732394</td>\n",
" <td>1.464789</td>\n",
" <td>0.154930</td>\n",
" <td>467.111875</td>\n",
" <td>31.146796</td>\n",
" <td>435.950994</td>\n",
" <td>54.295775</td>\n",
" <td>64.704225</td>\n",
" <td>0.507042</td>\n",
" <td>0.295775</td>\n",
" <td>0.197183</td>\n",
" <td>53.352113</td>\n",
" <td>26.070423</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
" mean mean mean mean \n",
"category \n",
"0 0.113637 0.006274 1.586366 0.005821 \n",
"1 0.810841 0.128432 9.611292 0.125295 \n",
"2 1.159419 0.339253 15.182143 0.337577 \n",
"3 2.153080 0.744161 27.820044 0.734881 \n",
"4 2.044749 0.777640 27.353145 0.754549 \n",
"5 3.237988 0.958520 46.637380 0.807655 \n",
"6 3.592233 1.102881 49.989226 0.878014 \n",
"7 3.747016 1.391266 40.710335 0.914702 \n",
"8 5.698276 1.567006 63.033699 0.907915 \n",
"9 14.505956 3.211571 107.288514 1.011628 \n",
"10 2262.859155 45.619718 11051.732394 1.464789 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
" mean mean mean \n",
"category \n",
"0 0.000647 548.790455 548.773103 \n",
"1 0.018186 525.437516 525.275222 \n",
"2 0.323824 501.529129 501.415505 \n",
"3 0.600982 287.051054 286.675385 \n",
"4 0.079213 297.179255 295.019902 \n",
"5 0.484785 387.464785 380.145068 \n",
"6 0.599906 268.627019 250.949344 \n",
"7 0.160990 309.716173 274.795570 \n",
"8 0.334248 326.485952 257.940194 \n",
"9 0.157119 369.696066 209.280306 \n",
"10 0.154930 467.111875 31.146796 \n",
"\n",
" time_between_purchase nb_tickets_internet fidelity gender_female \\\n",
" mean mean mean mean \n",
"category \n",
"0 -0.977118 0.001585 0.000776 0.000000 \n",
"1 -0.729328 0.054312 0.111832 0.245480 \n",
"2 -0.554439 0.969939 0.304757 0.392570 \n",
"3 0.105360 1.776035 0.659878 0.288813 \n",
"4 1.898178 0.293760 0.894877 0.666980 \n",
"5 7.111357 2.080397 1.164958 0.497758 \n",
"6 17.539247 2.525994 1.420921 0.534607 \n",
"7 34.796876 0.844250 1.963028 0.650364 \n",
"8 68.425460 2.794279 2.413009 0.606583 \n",
"9 160.348544 3.514464 5.394498 0.669314 \n",
"10 435.950994 54.295775 64.704225 0.507042 \n",
"\n",
" gender_male gender_other nb_campaigns nb_campaigns_opened \n",
" mean mean mean mean \n",
"category \n",
"0 0.000032 0.999968 13.984219 1.302720 \n",
"1 0.495929 0.258591 18.413562 3.718711 \n",
"2 0.297258 0.310173 17.395042 2.608084 \n",
"3 0.253244 0.457943 16.790421 4.173954 \n",
"4 0.301424 0.031596 16.954707 6.060621 \n",
"5 0.259769 0.242473 27.006406 12.457719 \n",
"6 0.304259 0.161134 14.073285 4.604134 \n",
"7 0.263464 0.086172 26.186317 8.891703 \n",
"8 0.251567 0.141850 30.987461 11.676332 \n",
"9 0.223766 0.106920 45.928247 18.241634 \n",
"10 0.295775 0.197183 53.352113 26.070423 "
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Grouper le DataFrame par la colonne 'category' et calculer la moyenne pour chaque groupe\n",
"summary_stats = dataset_for_segmentation.groupby('category')[numeric_features].describe()\n",
"\n",
"# Sélectionner uniquement la colonne 'mean' pour chaque variable numérique\n",
"mean_stats = summary_stats.loc[:, (slice(None), 'mean')]\n",
"\n",
"# Afficher le DataFrame résultant\n",
"mean_stats"
]
},
{
"cell_type": "code",
"execution_count": 75,
"id": "14da601e-7b1b-469c-bab1-de8fad4047f2",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAtUAAAIiCAYAAAAHJDTKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABUOklEQVR4nO3de1iUdf7/8dcIw1GcBAJEkbSQNNRMV0I3DykewTUr3SjU1kOtlZm6bebuCpvppmm2Wua2luYhWyv9lhlBZabhWbE8ZCcT3UA8ICoajHD//nCZnyN4gJuDOM/HdXHVfO733Pf7ns/c+vKee24shmEYAgAAAFBhdWq6AQAAAKC2I1QDAAAAJhGqAQAAAJMI1QAAAIBJhGoAAADAJEI1AAAAYBKhGgAAADCJUA0AAACYRKgGAAAATCJUAwCuK5988omsVqtWrlxZ060AcCGEauAKFixYIIvFoq1bt5a5PC4uTjfddJPT2E033aShQ4eWazvp6elKSkrSiRMnKtYoKsXPP/8si8WiBQsW1HQrVaJk/1588cVKW+cXX3whi8Wid99994q1SUlJslgsTmNdunRRly5dnMYsFouSkpIcj/fs2aOkpCT9/PPPl13/oUOH9NBDD+nll19W//79r3IPri8Xv3bVYejQoaX+HKxNdu/erVGjRikmJka+vr6yWCz64osvarot1DKEaqAKrFixQn/961/L9Zz09HQlJycTqnFdGz58uDZs2HDFug0bNmj48OGOx3v27FFycvJlQ/W5c+c0aNAgjRw5UqNGjaqMduEitm7dqpUrV8rf31/dunWr6XZQS7nXdAPA9ahNmzY13UK52e12WSwWubvzx0Jtc+bMGfn4+NR0G1elUaNGatSo0RXr7rzzznKv293dXV999VVF2qpVOFYrX2JiooYMGSJJevfdd/Xhhx/WcEeojThTDVSBiy//KC4u1uTJkxUZGSlvb2/dcMMNatWqlV5++WVJ5z8S/9Of/iRJatKkiSwWi9PHj8XFxZo2bZpuvfVWeXp6KigoSIMHD9ahQ4ectmsYhqZMmaLw8HB5eXmpXbt2SktLK/XxesnH9YsWLdK4cePUsGFDeXp66ocfftCRI0c0atQotWjRQnXr1lVQUJDuvvturVu3zmlbJZcRTJ8+XS+88IJuuukmeXt7q0uXLvruu+9kt9v1zDPPKDQ0VDabTffcc49ycnJKvU5xcXFatWqV2rRpI29vbzVv3lyrVq2SdP7Sm+bNm8vX11ft27cv8xKcrVu3ql+/fvL395eXl5fatGmj//znP1c1T7/88osGDhwoPz8/2Ww2DRo0SNnZ2WXWXs12zpw5o/Hjx6tJkyby8vKSv7+/2rVrp7fffvuyfZRcYpSWlqaHH35Y/v7+8vX1VXx8vH766Sen2i5duigqKkpffvmlOnToIB8fH/3hD3+QJGVmZuqhhx5SUFCQPD091bx5c82YMUPFxcWltllcXKznn39ejRs3drxXPvvsM6eaH374QQ8//LAiIiLk4+Ojhg0bKj4+Xt98802Z+/Hrr79q7NixCgkJkbe3tzp37qwdO3Y41ZR1+UdZLryEYcGCBbr//vslSV27dnUcHxdeovPpp5+qW7duqlevnnx8fNSxY8dS+3PkyBGNHDlSYWFh8vT01I033qiOHTvq008/vWwvJT3v2LFDAwYMUL169WSz2fTQQw/pyJEjTrVXe6xe6hKx8hyrV+tqj+nLWbp0qWJiYlS3bl3VrVtXt99+u+bPn3/Z57zyyivq1KmTgoKC5Ovrq5YtW2ratGmy2+1OdTt27FBcXJzjfRsaGqq+ffs6vWbLly9XdHS0bDabfHx81LRpU8f7vsTJkycdx5+Hh4caNmyoMWPGKD8//4r7V6cOcQjm8c9c4CoVFRXp3LlzpcYNw7jic6dNm6akpCT95S9/UadOnWS32/Xtt986LvUYPny4jh8/rtmzZ+v9999XgwYNJEktWrSQJP3xj3/Uv/71Lz3++OOKi4vTzz//rL/+9a/64osvtH37dgUGBkqSJk6cqKlTp2rkyJEaMGCADh48qOHDh8tut6tZs2al+powYYJiYmL02muvqU6dOgoKCnKEhEmTJikkJESnT5/WihUr1KVLF3322Welrn195ZVX1KpVK73yyis6ceKExo0bp/j4eEVHR8tqteqNN97QgQMHNH78eA0fPlwffPCB0/N37typCRMmaOLEibLZbEpOTtaAAQM0YcIEffbZZ5oyZYosFov+/Oc/Ky4uTvv375e3t7ckac2aNerVq5eio6P12muvyWazadmyZRo0aJDOnDlz2evaz549q+7du+uXX37R1KlT1axZM3300UcaNGhQqdqr3c7YsWO1aNEiTZ48WW3atFF+fr527dqlY8eOXf4N8j/Dhg1TbGysli5dqoMHD+ovf/mLunTpoq+//lo33HCDoy4rK0sPPfSQnn76aU2ZMkV16tTRkSNH1KFDBxUWFuq5557TTTfdpFWrVmn8+PH68ccf9eqrrzpta86cOQoPD9esWbMcQbB3795au3atYmJiJJ3/R0dAQID+8Y9/6MYbb9Tx48e1cOFCRUdHa8eOHYqMjHRa57PPPqs77rhD//73v5WXl6ekpCR16dJFO3bsUNOmTa/qNShL3759NWXKFD377LN65ZVXdMcdd0iSbr75ZknS4sWLNXjwYP3ud7/TwoULZbVaNW/ePPXs2VOffPKJ4+P8xMREbd++Xc8//7yaNWumEydOaPv27Vc9P/fcc48GDhyoRx99VLt379Zf//pX7dmzR5s2bZLVapV09cdqeZV1rF6t48ePS7r6Y/pif/vb3/Tcc89pwIABGjdunGw2m3bt2qUDBw5c9nk//vijEhISHCF3586dev755/Xtt9/qjTfekCTl5+crNjZWTZo00SuvvKLg4GBlZ2drzZo1OnXqlKTzlwINGjRIgwYNUlJSkry8vHTgwAF9/vnnjm2dOXNGnTt31qFDh/Tss8+qVatW2r17t/72t7/pm2++0aeffnpV/5gDTDEAXNabb75pSLrsT3h4uNNzwsPDjSFDhjgex8XFGbfffvtltzN9+nRDkrF//36n8b179xqSjFGjRjmNb9q0yZBkPPvss4ZhGMbx48cNT09PY9CgQU51GzZsMCQZnTt3doytWbPGkGR06tTpivt/7tw5w263G926dTPuuecex/j+/fsNSUbr1q2NoqIix/isWbMMSUa/fv2c1jNmzBhDkpGXl+cYCw8PN7y9vY1Dhw45xjIyMgxJRoMGDYz8/HzH+MqVKw1JxgcffOAYu/XWW402bdoYdrvdaVtxcXFGgwYNnPq62Ny5cw1Jxv/93/85jY8YMcKQZLz55pvl3k5UVJTRv3//S27zUkreYxe+voZhGF999ZUhyZg8ebJjrHPnzoYk47PPPnOqfeaZZwxJxqZNm5zG//jHPxoWi8XYt2+fYRj/f95CQ0ONs2fPOupOnjxp+Pv7G927d79kn+fOnTMKCwuNiIgI46mnnnKMl7yf7rjjDqO4uNgx/vPPPxtWq9UYPny4Y2zSpEnGxX/1dO7c2en9aRiGIcmYNGmS4/Hy5csNScaaNWuc6vLz8w1/f38jPj7eabyoqMho3bq10b59e8dY3bp1jTFjxlxy/y6lpOcL99kwDGPJkiWGJGPx4sWGYVz9sWoYpf+MKHHxa1GeY7XExa/dxS51TJflp59+Mtzc3IwHH3zwsnVDhgwp9efghYqKigy73W689dZbhpubm3H8+HHDMAxj69athiRj5cqVl3zuiy++aEgyTpw4ccmaqVOnGnXq1DG2bNniNP7uu+8akozVq1dftv8LXeq9BlwJn3cAV+mtt97Sli1bSv389re/veJz27dvr507d2rUqFH65JNPdPLkyave7po1aySp1FnX9u3bq3nz5o6PuDdu3KiCggINHDjQqe7OO++85Lfy77333jLHX3vtNd1xxx3y8vKSu7u7rFarPvvsM+3du7dUbZ8+fZw+Om3evLmk82cXL1QynpmZ6TR+++23q2HDhqXqunTp4nSdcMl4ydmxH374Qd9++60efPB
"text/plain": [
"<Figure size 800x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Plot histogram\n",
"plt.figure(figsize=(8, 6))\n",
"plt.hist(y_predict_proba, bins=10, range=(0, 1), color='blue', alpha=0.7)\n",
"\n",
"# Réglage des limites des axes x et y\n",
"plt.xlim(0, 1)\n",
"plt.ylim(0, None) # Laissez le maximum sur l'axe y pour s'ajuster automatiquement\n",
"\n",
"plt.title('Histogramme des probabilités pour la classe 1')\n",
"plt.xlabel('Probabilité')\n",
"plt.ylabel('Fréquence')\n",
"plt.grid(True)\n",
"plt.show()\n"
]
2024-03-08 14:48:38 +01:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}