BDC-team-1/Musee/2_Modelization_musee.ipynb

2071 lines
454 KiB
Plaintext
Raw Normal View History

2024-03-10 22:31:37 +01:00
{
"cells": [
{
"cell_type": "markdown",
"id": "3415114e-9577-4487-89eb-4931620ad9f0",
"metadata": {},
"source": [
"# Predict Sales"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "f271eb45-1470-4764-8c2e-31374efa1fe5",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import s3fs\n",
"import re\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, recall_score\n",
"from sklearn.utils import class_weight\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.model_selection import GridSearchCV\n",
"from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler\n",
"from sklearn.metrics import make_scorer, f1_score, balanced_accuracy_score\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score\n",
"from sklearn.exceptions import ConvergenceWarning, DataConversionWarning\n",
"\n",
"import pickle\n",
"import warnings\n",
"#import scikitplot as skplt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "3fecb606-22e5-4dee-8efa-f8dff0832299",
"metadata": {},
"outputs": [],
"source": [
"warnings.filterwarnings('ignore')\n",
"warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n",
"warnings.filterwarnings(\"ignore\", category=DataConversionWarning)"
]
},
{
"cell_type": "markdown",
"id": "ae591854-3003-4c75-a0c7-5abf04246e81",
"metadata": {},
"source": [
"### Load Data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "59dd4694-a812-4923-b995-a2ee86c74f85",
"metadata": {},
"outputs": [],
"source": [
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "017f7e9a-3ba0-40fa-bdc8-51b98cc1fdb3",
"metadata": {},
"outputs": [],
"source": [
"def load_train_test():\n",
" BUCKET = \"projet-bdc2324-team1/Generalization/musee\"\n",
" File_path_train = BUCKET + \"/Train_set.csv\"\n",
" File_path_test = BUCKET + \"/Test_set.csv\"\n",
" \n",
" with fs.open( File_path_train, mode=\"rb\") as file_in:\n",
" dataset_train = pd.read_csv(file_in, sep=\",\")\n",
" # dataset_train['y_has_purchased'] = dataset_train['y_has_purchased'].fillna(0)\n",
"\n",
" with fs.open(File_path_test, mode=\"rb\") as file_in:\n",
" dataset_test = pd.read_csv(file_in, sep=\",\")\n",
" # dataset_test['y_has_purchased'] = dataset_test['y_has_purchased'].fillna(0)\n",
" \n",
" return dataset_train, dataset_test"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "c479b230-b4bd-4cfb-b76b-d9faf6d95772",
"metadata": {},
"outputs": [],
"source": [
"dataset_train, dataset_test = load_train_test()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "c24c446d-4e1c-4ac1-a048-f0b8d8559f36",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"customer_id 0\n",
"nb_tickets 0\n",
"nb_purchases 0\n",
"total_amount 0\n",
"nb_suppliers 0\n",
"vente_internet_max 0\n",
"purchase_date_min 0\n",
"purchase_date_max 0\n",
"time_between_purchase 0\n",
"nb_tickets_internet 0\n",
"street_id 0\n",
"structure_id 389658\n",
"mcp_contact_id 150354\n",
"fidelity 0\n",
"tenant_id 0\n",
"is_partner 0\n",
"deleted_at 434278\n",
"gender 0\n",
"is_email_true 0\n",
"opt_in 0\n",
"last_buying_date 183987\n",
"max_price 183987\n",
"ticket_sum 0\n",
"average_price 94783\n",
"average_purchase_delay 183987\n",
"average_price_basket 183987\n",
"average_ticket_basket 183987\n",
"total_price 89204\n",
"purchase_count 0\n",
"first_buying_date 183987\n",
"country 141237\n",
"gender_label 0\n",
"gender_female 0\n",
"gender_male 0\n",
"gender_other 0\n",
"country_fr 141237\n",
"nb_campaigns 0\n",
"nb_campaigns_opened 0\n",
"time_to_open 258182\n",
"y_has_purchased 0\n",
"dtype: int64"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset_train.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "825d14a3-6967-4733-bfd4-64bf61c2bd43",
"metadata": {},
"outputs": [],
"source": [
"def features_target_split(dataset_train, dataset_test):\n",
" features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n",
" 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n",
" 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n",
" X_train = dataset_train[features_l]\n",
" y_train = dataset_train[['y_has_purchased']]\n",
"\n",
" X_test = dataset_test[features_l]\n",
" y_test = dataset_test[['y_has_purchased']]\n",
" return X_train, X_test, y_train, y_test"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "69eaec12-b30f-4d30-a461-ea520d5cbf77",
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = features_target_split(dataset_train, dataset_test)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "d039f31d-0093-46c6-9743-ddec1381f758",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Shape train : (434278, 17)\n",
"Shape test : (186120, 17)\n"
]
}
],
"source": [
"print(\"Shape train : \", X_train.shape)\n",
"print(\"Shape test : \", X_test.shape)"
]
},
{
"cell_type": "markdown",
"id": "a1d6de94-4e11-481a-a0ce-412bf29f692c",
"metadata": {},
"source": [
"### Prepare preprocessing and Hyperparameters"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "b808da43-c444-4e94-995a-7ec6ccd01e2d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{0.0: 0.5223906809346011, 1.0: 11.665359406898034}"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Compute Weights\n",
"weights = class_weight.compute_class_weight(class_weight = 'balanced', classes = np.unique(y_train['y_has_purchased']),\n",
" y = y_train['y_has_purchased'])\n",
"\n",
"weight_dict = {np.unique(y_train['y_has_purchased'])[i]: weights[i] for i in range(len(np.unique(y_train['y_has_purchased'])))}\n",
"weight_dict"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "b32a79ea-907f-4dfc-9832-6c74bef3200c",
"metadata": {},
"outputs": [],
"source": [
"numeric_features = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n",
" 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n",
" 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n",
"\n",
"numeric_transformer = Pipeline(steps=[\n",
" #(\"imputer\", SimpleImputer(strategy=\"mean\")), \n",
" (\"scaler\", StandardScaler()) \n",
"])\n",
"\n",
"categorical_features = ['opt_in'] \n",
"\n",
"# Transformer for the categorical features\n",
"categorical_transformer = Pipeline(steps=[\n",
" #(\"imputer\", SimpleImputer(strategy=\"most_frequent\")), # Impute missing values with the most frequent\n",
" (\"onehot\", OneHotEncoder(handle_unknown='ignore', sparse_output=False))\n",
"])\n",
"\n",
"preproc = ColumnTransformer(\n",
" transformers=[\n",
" (\"num\", numeric_transformer, numeric_features),\n",
" (\"cat\", categorical_transformer, categorical_features)\n",
" ]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "9809a688-bfbc-4685-a77f-17a8b2b79ab3",
"metadata": {},
"outputs": [],
"source": [
"# Set loss\n",
"balanced_scorer = make_scorer(balanced_accuracy_score)\n",
"recall_scorer = make_scorer(recall_score)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "4f9b2bbf-5f8a-4ac1-8e6c-51bd0dd8ac85",
"metadata": {},
"outputs": [],
"source": [
"def draw_confusion_matrix(y_test, y_pred):\n",
" conf_matrix = confusion_matrix(y_test, y_pred)\n",
" sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Class 0', 'Class 1'], yticklabels=['Class 0', 'Class 1'])\n",
" plt.xlabel('Predicted')\n",
" plt.ylabel('Actual')\n",
" plt.title('Confusion Matrix')\n",
" plt.show()\n",
"\n",
"\n",
"def draw_roc_curve(X_test, y_test):\n",
" y_pred_prob = pipeline.predict_proba(X_test)[:, 1]\n",
"\n",
" # Calcul des taux de faux positifs (FPR) et de vrais positifs (TPR)\n",
" fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob, pos_label=1)\n",
" \n",
" # Calcul de l'aire sous la courbe ROC (AUC)\n",
" roc_auc = auc(fpr, tpr)\n",
" \n",
" plt.figure(figsize = (14, 8))\n",
" plt.plot(fpr, tpr, label=\"ROC curve(area = %0.3f)\" % roc_auc)\n",
" plt.plot([0, 1], [0, 1], color=\"red\",label=\"Random Baseline\", linestyle=\"--\")\n",
" plt.grid(color='gray', linestyle='--', linewidth=0.5)\n",
" plt.xlabel('Taux de faux positifs (FPR)')\n",
" plt.ylabel('Taux de vrais positifs (TPR)')\n",
" plt.title('Courbe ROC : modèle logistique')\n",
" plt.legend(loc=\"lower right\")\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "206d9a95-7c37-4506-949b-e77d225e42c5",
"metadata": {},
"outputs": [],
"source": [
"# Hyperparameter\n",
"param_grid = {'logreg__C': np.logspace(-10, 6, 17, base=2),\n",
" 'logreg__penalty': ['l1', 'l2'],\n",
" 'logreg__class_weight': ['balanced', weight_dict]} "
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "7ff2f7bd-efc1-4f7c-a3c9-caa916aa2f2b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-container-id-4 {\n",
" /* Definition of color scheme common for light and dark mode */\n",
" --sklearn-color-text: black;\n",
" --sklearn-color-line: gray;\n",
" /* Definition of color scheme for unfitted estimators */\n",
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
" --sklearn-color-unfitted-level-3: chocolate;\n",
" /* Definition of color scheme for fitted estimators */\n",
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
" --sklearn-color-fitted-level-1: #d4ebff;\n",
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
"\n",
" /* Specific color for light theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-icon: #696969;\n",
"\n",
" @media (prefers-color-scheme: dark) {\n",
" /* Redefinition of color scheme for dark theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-icon: #878787;\n",
" }\n",
"}\n",
"\n",
"#sk-container-id-4 {\n",
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"#sk-container-id-4 pre {\n",
" padding: 0;\n",
"}\n",
"\n",
"#sk-container-id-4 input.sk-hidden--visually {\n",
" border: 0;\n",
" clip: rect(1px 1px 1px 1px);\n",
" clip: rect(1px, 1px, 1px, 1px);\n",
" height: 1px;\n",
" margin: -1px;\n",
" overflow: hidden;\n",
" padding: 0;\n",
" position: absolute;\n",
" width: 1px;\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-dashed-wrapped {\n",
" border: 1px dashed var(--sklearn-color-line);\n",
" margin: 0 0.4em 0.5em 0.4em;\n",
" box-sizing: border-box;\n",
" padding-bottom: 0.4em;\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-container {\n",
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
" so we also need the `!important` here to be able to override the\n",
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
" display: inline-block !important;\n",
" position: relative;\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-text-repr-fallback {\n",
" display: none;\n",
"}\n",
"\n",
"div.sk-parallel-item,\n",
"div.sk-serial,\n",
"div.sk-item {\n",
" /* draw centered vertical line to link estimators */\n",
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
" background-size: 2px 100%;\n",
" background-repeat: no-repeat;\n",
" background-position: center center;\n",
"}\n",
"\n",
"/* Parallel-specific style estimator block */\n",
"\n",
"#sk-container-id-4 div.sk-parallel-item::after {\n",
" content: \"\";\n",
" width: 100%;\n",
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
" flex-grow: 1;\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-parallel {\n",
" display: flex;\n",
" align-items: stretch;\n",
" justify-content: center;\n",
" background-color: var(--sklearn-color-background);\n",
" position: relative;\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-parallel-item {\n",
" display: flex;\n",
" flex-direction: column;\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-parallel-item:first-child::after {\n",
" align-self: flex-end;\n",
" width: 50%;\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-parallel-item:last-child::after {\n",
" align-self: flex-start;\n",
" width: 50%;\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-parallel-item:only-child::after {\n",
" width: 0;\n",
"}\n",
"\n",
"/* Serial-specific style estimator block */\n",
"\n",
"#sk-container-id-4 div.sk-serial {\n",
" display: flex;\n",
" flex-direction: column;\n",
" align-items: center;\n",
" background-color: var(--sklearn-color-background);\n",
" padding-right: 1em;\n",
" padding-left: 1em;\n",
"}\n",
"\n",
"\n",
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
"clickable and can be expanded/collapsed.\n",
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
"*/\n",
"\n",
"/* Pipeline and ColumnTransformer style (default) */\n",
"\n",
"#sk-container-id-4 div.sk-toggleable {\n",
" /* Default theme specific background. It is overwritten whether we have a\n",
" specific estimator or a Pipeline/ColumnTransformer */\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"/* Toggleable label */\n",
"#sk-container-id-4 label.sk-toggleable__label {\n",
" cursor: pointer;\n",
" display: block;\n",
" width: 100%;\n",
" margin-bottom: 0;\n",
" padding: 0.5em;\n",
" box-sizing: border-box;\n",
" text-align: center;\n",
"}\n",
"\n",
"#sk-container-id-4 label.sk-toggleable__label-arrow:before {\n",
" /* Arrow on the left of the label */\n",
" content: \"▸\";\n",
" float: left;\n",
" margin-right: 0.25em;\n",
" color: var(--sklearn-color-icon);\n",
"}\n",
"\n",
"#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {\n",
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"/* Toggleable content - dropdown */\n",
"\n",
"#sk-container-id-4 div.sk-toggleable__content {\n",
" max-height: 0;\n",
" max-width: 0;\n",
" overflow: hidden;\n",
" text-align: left;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-toggleable__content.fitted {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-toggleable__content pre {\n",
" margin: 0.2em;\n",
" border-radius: 0.25em;\n",
" color: var(--sklearn-color-text);\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-toggleable__content.fitted pre {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
" /* Expand drop-down */\n",
" max-height: 200px;\n",
" max-width: 100%;\n",
" overflow: auto;\n",
"}\n",
"\n",
"#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
" content: \"▾\";\n",
"}\n",
"\n",
"/* Pipeline/ColumnTransformer-specific style */\n",
"\n",
"#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator-specific style */\n",
"\n",
"/* Colorize estimator box */\n",
"#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-label label.sk-toggleable__label,\n",
"#sk-container-id-4 div.sk-label label {\n",
" /* The background is the default theme color */\n",
" color: var(--sklearn-color-text-on-default-background);\n",
"}\n",
"\n",
"/* On hover, darken the color of the background */\n",
"#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"/* Label box, darken color on hover, fitted */\n",
"#sk-container-id-4 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator label */\n",
"\n",
"#sk-container-id-4 div.sk-label label {\n",
" font-family: monospace;\n",
" font-weight: bold;\n",
" display: inline-block;\n",
" line-height: 1.2em;\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-label-container {\n",
" text-align: center;\n",
"}\n",
"\n",
"/* Estimator-specific */\n",
"#sk-container-id-4 div.sk-estimator {\n",
" font-family: monospace;\n",
" border: 1px dotted var(--sklearn-color-border-box);\n",
" border-radius: 0.25em;\n",
" box-sizing: border-box;\n",
" margin-bottom: 0.5em;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-estimator.fitted {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"/* on hover */\n",
"#sk-container-id-4 div.sk-estimator:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-4 div.sk-estimator.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
"\n",
"/* Common style for \"i\" and \"?\" */\n",
"\n",
".sk-estimator-doc-link,\n",
"a:link.sk-estimator-doc-link,\n",
"a:visited.sk-estimator-doc-link {\n",
" float: right;\n",
" font-size: smaller;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1em;\n",
" height: 1em;\n",
" width: 1em;\n",
" text-decoration: none !important;\n",
" margin-left: 1ex;\n",
" /* unfitted */\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted,\n",
"a:link.sk-estimator-doc-link.fitted,\n",
"a:visited.sk-estimator-doc-link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"/* Span, style for the box shown on hovering the info icon */\n",
".sk-estimator-doc-link span {\n",
" display: none;\n",
" z-index: 9999;\n",
" position: relative;\n",
" font-weight: normal;\n",
" right: .2ex;\n",
" padding: .5ex;\n",
" margin: .5ex;\n",
" width: min-content;\n",
" min-width: 20ex;\n",
" max-width: 50ex;\n",
" color: var(--sklearn-color-text);\n",
" box-shadow: 2pt 2pt 4pt #999;\n",
" /* unfitted */\n",
" background: var(--sklearn-color-unfitted-level-0);\n",
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted span {\n",
" /* fitted */\n",
" background: var(--sklearn-color-fitted-level-0);\n",
" border: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link:hover span {\n",
" display: block;\n",
"}\n",
"\n",
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
"\n",
"#sk-container-id-4 a.estimator_doc_link {\n",
" float: right;\n",
" font-size: 1rem;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1rem;\n",
" height: 1rem;\n",
" width: 1rem;\n",
" text-decoration: none;\n",
" /* unfitted */\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
"}\n",
"\n",
"#sk-container-id-4 a.estimator_doc_link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"#sk-container-id-4 a.estimator_doc_link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"#sk-container-id-4 a.estimator_doc_link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"</style><div id=\"sk-container-id-4\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;preprocessor&#x27;,\n",
" ColumnTransformer(transformers=[(&#x27;num&#x27;,\n",
" Pipeline(steps=[(&#x27;scaler&#x27;,\n",
" StandardScaler())]),\n",
" [&#x27;nb_tickets&#x27;, &#x27;nb_purchases&#x27;,\n",
" &#x27;total_amount&#x27;,\n",
" &#x27;nb_suppliers&#x27;,\n",
" &#x27;vente_internet_max&#x27;,\n",
" &#x27;purchase_date_min&#x27;,\n",
" &#x27;purchase_date_max&#x27;,\n",
" &#x27;time_between_purchase&#x27;,\n",
" &#x27;nb_tickets_internet&#x27;,\n",
" &#x27;fidelity&#x27;, &#x27;is_email_true&#x27;,\n",
" &#x27;opt_in&#x27;, &#x27;gender_female&#x27;,\n",
" &#x27;gender_male&#x27;,\n",
" &#x27;gender_other&#x27;,\n",
" &#x27;nb_campaigns&#x27;,\n",
" &#x27;nb_campaigns_opened&#x27;]),\n",
" (&#x27;cat&#x27;,\n",
" Pipeline(steps=[(&#x27;onehot&#x27;,\n",
" OneHotEncoder(handle_unknown=&#x27;ignore&#x27;,\n",
" sparse_output=False))]),\n",
" [&#x27;opt_in&#x27;])])),\n",
" (&#x27;logreg&#x27;,\n",
" LogisticRegression(class_weight={0.0: 0.5223906809346011,\n",
" 1.0: 11.665359406898034},\n",
" max_iter=5000, solver=&#x27;saga&#x27;))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-22\" type=\"checkbox\" ><label for=\"sk-estimator-id-22\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">&nbsp;&nbsp;Pipeline<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.pipeline.Pipeline.html\">?<span>Documentation for Pipeline</span></a><span class=\"sk-estimator-doc-link \">i<span>Not fitted</span></span></label><div class=\"sk-toggleable__content \"><pre>Pipeline(steps=[(&#x27;preprocessor&#x27;,\n",
" ColumnTransformer(transformers=[(&#x27;num&#x27;,\n",
" Pipeline(steps=[(&#x27;scaler&#x27;,\n",
" StandardScaler())]),\n",
" [&#x27;nb_tickets&#x27;, &#x27;nb_purchases&#x27;,\n",
" &#x27;total_amount&#x27;,\n",
" &#x27;nb_suppliers&#x27;,\n",
" &#x27;vente_internet_max&#x27;,\n",
" &#x27;purchase_date_min&#x27;,\n",
" &#x27;purchase_date_max&#x27;,\n",
" &#x27;time_between_purchase&#x27;,\n",
" &#x27;nb_tickets_internet&#x27;,\n",
" &#x27;fidelity&#x27;, &#x27;is_email_true&#x27;,\n",
" &#x27;opt_in&#x27;, &#x27;gender_female&#x27;,\n",
" &#x27;gender_male&#x27;,\n",
" &#x27;gender_other&#x27;,\n",
" &#x27;nb_campaigns&#x27;,\n",
" &#x27;nb_campaigns_opened&#x27;]),\n",
" (&#x27;cat&#x27;,\n",
" Pipeline(steps=[(&#x27;onehot&#x27;,\n",
" OneHotEncoder(handle_unknown=&#x27;ignore&#x27;,\n",
" sparse_output=False))]),\n",
" [&#x27;opt_in&#x27;])])),\n",
" (&#x27;logreg&#x27;,\n",
" LogisticRegression(class_weight={0.0: 0.5223906809346011,\n",
" 1.0: 11.665359406898034},\n",
" max_iter=5000, solver=&#x27;saga&#x27;))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-23\" type=\"checkbox\" ><label for=\"sk-estimator-id-23\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">&nbsp;preprocessor: ColumnTransformer<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></label><div class=\"sk-toggleable__content \"><pre>ColumnTransformer(transformers=[(&#x27;num&#x27;,\n",
" Pipeline(steps=[(&#x27;scaler&#x27;, StandardScaler())]),\n",
" [&#x27;nb_tickets&#x27;, &#x27;nb_purchases&#x27;, &#x27;total_amount&#x27;,\n",
" &#x27;nb_suppliers&#x27;, &#x27;vente_internet_max&#x27;,\n",
" &#x27;purchase_date_min&#x27;, &#x27;purchase_date_max&#x27;,\n",
" &#x27;time_between_purchase&#x27;,\n",
" &#x27;nb_tickets_internet&#x27;, &#x27;fidelity&#x27;,\n",
" &#x27;is_email_true&#x27;, &#x27;opt_in&#x27;, &#x27;gender_female&#x27;,\n",
" &#x27;gender_male&#x27;, &#x27;gender_other&#x27;, &#x27;nb_campaigns&#x27;,\n",
" &#x27;nb_campaigns_opened&#x27;]),\n",
" (&#x27;cat&#x27;,\n",
" Pipeline(steps=[(&#x27;onehot&#x27;,\n",
" OneHotEncoder(handle_unknown=&#x27;ignore&#x27;,\n",
" sparse_output=False))]),\n",
" [&#x27;opt_in&#x27;])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-24\" type=\"checkbox\" ><label for=\"sk-estimator-id-24\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">num</label><div class=\"sk-toggleable__content \"><pre>[&#x27;nb_tickets&#x27;, &#x27;nb_purchases&#x27;, &#x27;total_amount&#x27;, &#x27;nb_suppliers&#x27;, &#x27;vente_internet_max&#x27;, &#x27;purchase_date_min&#x27;, &#x27;purchase_date_max&#x27;, &#x27;time_between_purchase&#x27;, &#x27;nb_tickets_internet&#x27;, &#x27;fidelity&#x27;, &#x27;is_email_true&#x27;, &#x27;opt_in&#x27;, &#x27;gender_female&#x27;, &#x27;gender_male&#x27;, &#x27;gender_other&#x27;, &#x27;nb_campaigns&#x27;, &#x27;nb_campaigns_opened&#x27;]</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-25\" type=\"checkbox\" ><label for=\"sk-estimator-id-25\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">&nbsp;StandardScaler<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.StandardScaler.html\">?<span>Documentation for StandardScaler</span></a></label><div class=\"sk-toggleable__content \"><pre>StandardScaler()</pre></div> </div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-26\" type=\"checkbox\" ><label for=\"sk-estimator-id-26\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">cat</label><div class=\"sk-toggleable__content \"><pre>[&#x27;opt_in&#x27;]</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-27\" type=\"checkbox\" ><label for=\"sk-estimator-id-27\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">&nbsp;OneHotEncoder<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></label><div class=\"sk-toggleable__content \"><pre>OneHotEncoder(handle_unknown=&#x27;ignore&#x27;, sparse_output=False)</pre></div> </div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-28\" type=\"checkbox\" ><label for=\"sk-estimator-id-28\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">&nbsp;LogisticRegression<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a></label><div class=\"sk-toggleable__content \"><pre>LogisticRegression(class_weight={0.0: 0.5223906809346011,\n",
" 1.0: 11.665359406898034},\n",
" max_iter=5000, solver=&#x27;saga&#x27;)</pre></div> </div></div></div></div></div></div>"
],
"text/plain": [
"Pipeline(steps=[('preprocessor',\n",
" ColumnTransformer(transformers=[('num',\n",
" Pipeline(steps=[('scaler',\n",
" StandardScaler())]),\n",
" ['nb_tickets', 'nb_purchases',\n",
" 'total_amount',\n",
" 'nb_suppliers',\n",
" 'vente_internet_max',\n",
" 'purchase_date_min',\n",
" 'purchase_date_max',\n",
" 'time_between_purchase',\n",
" 'nb_tickets_internet',\n",
" 'fidelity', 'is_email_true',\n",
" 'opt_in', 'gender_female',\n",
" 'gender_male',\n",
" 'gender_other',\n",
" 'nb_campaigns',\n",
" 'nb_campaigns_opened']),\n",
" ('cat',\n",
" Pipeline(steps=[('onehot',\n",
" OneHotEncoder(handle_unknown='ignore',\n",
" sparse_output=False))]),\n",
" ['opt_in'])])),\n",
" ('logreg',\n",
" LogisticRegression(class_weight={0.0: 0.5223906809346011,\n",
" 1.0: 11.665359406898034},\n",
" max_iter=5000, solver='saga'))])"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Pipeline\n",
"pipeline = Pipeline(steps=[\n",
" ('preprocessor', preproc),\n",
" ('logreg', LogisticRegression(solver='saga', class_weight = weight_dict,\n",
" max_iter=5000)) \n",
"])\n",
"\n",
"pipeline.set_output(transform=\"pandas\")"
]
},
{
"cell_type": "markdown",
"id": "ed415f60-9663-4179-877b-233faf6e1645",
"metadata": {},
"source": [
"## Baseline"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "2b467511-2ae5-4a16-a502-397c3460471d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-container-id-5 {\n",
" /* Definition of color scheme common for light and dark mode */\n",
" --sklearn-color-text: black;\n",
" --sklearn-color-line: gray;\n",
" /* Definition of color scheme for unfitted estimators */\n",
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
" --sklearn-color-unfitted-level-3: chocolate;\n",
" /* Definition of color scheme for fitted estimators */\n",
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
" --sklearn-color-fitted-level-1: #d4ebff;\n",
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
"\n",
" /* Specific color for light theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-icon: #696969;\n",
"\n",
" @media (prefers-color-scheme: dark) {\n",
" /* Redefinition of color scheme for dark theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-icon: #878787;\n",
" }\n",
"}\n",
"\n",
"#sk-container-id-5 {\n",
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"#sk-container-id-5 pre {\n",
" padding: 0;\n",
"}\n",
"\n",
"#sk-container-id-5 input.sk-hidden--visually {\n",
" border: 0;\n",
" clip: rect(1px 1px 1px 1px);\n",
" clip: rect(1px, 1px, 1px, 1px);\n",
" height: 1px;\n",
" margin: -1px;\n",
" overflow: hidden;\n",
" padding: 0;\n",
" position: absolute;\n",
" width: 1px;\n",
"}\n",
"\n",
"#sk-container-id-5 div.sk-dashed-wrapped {\n",
" border: 1px dashed var(--sklearn-color-line);\n",
" margin: 0 0.4em 0.5em 0.4em;\n",
" box-sizing: border-box;\n",
" padding-bottom: 0.4em;\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"#sk-container-id-5 div.sk-container {\n",
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
" so we also need the `!important` here to be able to override the\n",
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
" display: inline-block !important;\n",
" position: relative;\n",
"}\n",
"\n",
"#sk-container-id-5 div.sk-text-repr-fallback {\n",
" display: none;\n",
"}\n",
"\n",
"div.sk-parallel-item,\n",
"div.sk-serial,\n",
"div.sk-item {\n",
" /* draw centered vertical line to link estimators */\n",
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
" background-size: 2px 100%;\n",
" background-repeat: no-repeat;\n",
" background-position: center center;\n",
"}\n",
"\n",
"/* Parallel-specific style estimator block */\n",
"\n",
"#sk-container-id-5 div.sk-parallel-item::after {\n",
" content: \"\";\n",
" width: 100%;\n",
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
" flex-grow: 1;\n",
"}\n",
"\n",
"#sk-container-id-5 div.sk-parallel {\n",
" display: flex;\n",
" align-items: stretch;\n",
" justify-content: center;\n",
" background-color: var(--sklearn-color-background);\n",
" position: relative;\n",
"}\n",
"\n",
"#sk-container-id-5 div.sk-parallel-item {\n",
" display: flex;\n",
" flex-direction: column;\n",
"}\n",
"\n",
"#sk-container-id-5 div.sk-parallel-item:first-child::after {\n",
" align-self: flex-end;\n",
" width: 50%;\n",
"}\n",
"\n",
"#sk-container-id-5 div.sk-parallel-item:last-child::after {\n",
" align-self: flex-start;\n",
" width: 50%;\n",
"}\n",
"\n",
"#sk-container-id-5 div.sk-parallel-item:only-child::after {\n",
" width: 0;\n",
"}\n",
"\n",
"/* Serial-specific style estimator block */\n",
"\n",
"#sk-container-id-5 div.sk-serial {\n",
" display: flex;\n",
" flex-direction: column;\n",
" align-items: center;\n",
" background-color: var(--sklearn-color-background);\n",
" padding-right: 1em;\n",
" padding-left: 1em;\n",
"}\n",
"\n",
"\n",
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
"clickable and can be expanded/collapsed.\n",
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
"*/\n",
"\n",
"/* Pipeline and ColumnTransformer style (default) */\n",
"\n",
"#sk-container-id-5 div.sk-toggleable {\n",
" /* Default theme specific background. It is overwritten whether we have a\n",
" specific estimator or a Pipeline/ColumnTransformer */\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"/* Toggleable label */\n",
"#sk-container-id-5 label.sk-toggleable__label {\n",
" cursor: pointer;\n",
" display: block;\n",
" width: 100%;\n",
" margin-bottom: 0;\n",
" padding: 0.5em;\n",
" box-sizing: border-box;\n",
" text-align: center;\n",
"}\n",
"\n",
"#sk-container-id-5 label.sk-toggleable__label-arrow:before {\n",
" /* Arrow on the left of the label */\n",
" content: \"▸\";\n",
" float: left;\n",
" margin-right: 0.25em;\n",
" color: var(--sklearn-color-icon);\n",
"}\n",
"\n",
"#sk-container-id-5 label.sk-toggleable__label-arrow:hover:before {\n",
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"/* Toggleable content - dropdown */\n",
"\n",
"#sk-container-id-5 div.sk-toggleable__content {\n",
" max-height: 0;\n",
" max-width: 0;\n",
" overflow: hidden;\n",
" text-align: left;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-5 div.sk-toggleable__content.fitted {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-5 div.sk-toggleable__content pre {\n",
" margin: 0.2em;\n",
" border-radius: 0.25em;\n",
" color: var(--sklearn-color-text);\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-5 div.sk-toggleable__content.fitted pre {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-5 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
" /* Expand drop-down */\n",
" max-height: 200px;\n",
" max-width: 100%;\n",
" overflow: auto;\n",
"}\n",
"\n",
"#sk-container-id-5 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
" content: \"▾\";\n",
"}\n",
"\n",
"/* Pipeline/ColumnTransformer-specific style */\n",
"\n",
"#sk-container-id-5 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-5 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator-specific style */\n",
"\n",
"/* Colorize estimator box */\n",
"#sk-container-id-5 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-5 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-5 div.sk-label label.sk-toggleable__label,\n",
"#sk-container-id-5 div.sk-label label {\n",
" /* The background is the default theme color */\n",
" color: var(--sklearn-color-text-on-default-background);\n",
"}\n",
"\n",
"/* On hover, darken the color of the background */\n",
"#sk-container-id-5 div.sk-label:hover label.sk-toggleable__label {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"/* Label box, darken color on hover, fitted */\n",
"#sk-container-id-5 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator label */\n",
"\n",
"#sk-container-id-5 div.sk-label label {\n",
" font-family: monospace;\n",
" font-weight: bold;\n",
" display: inline-block;\n",
" line-height: 1.2em;\n",
"}\n",
"\n",
"#sk-container-id-5 div.sk-label-container {\n",
" text-align: center;\n",
"}\n",
"\n",
"/* Estimator-specific */\n",
"#sk-container-id-5 div.sk-estimator {\n",
" font-family: monospace;\n",
" border: 1px dotted var(--sklearn-color-border-box);\n",
" border-radius: 0.25em;\n",
" box-sizing: border-box;\n",
" margin-bottom: 0.5em;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-5 div.sk-estimator.fitted {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"/* on hover */\n",
"#sk-container-id-5 div.sk-estimator:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-5 div.sk-estimator.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
"\n",
"/* Common style for \"i\" and \"?\" */\n",
"\n",
".sk-estimator-doc-link,\n",
"a:link.sk-estimator-doc-link,\n",
"a:visited.sk-estimator-doc-link {\n",
" float: right;\n",
" font-size: smaller;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1em;\n",
" height: 1em;\n",
" width: 1em;\n",
" text-decoration: none !important;\n",
" margin-left: 1ex;\n",
" /* unfitted */\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted,\n",
"a:link.sk-estimator-doc-link.fitted,\n",
"a:visited.sk-estimator-doc-link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"/* Span, style for the box shown on hovering the info icon */\n",
".sk-estimator-doc-link span {\n",
" display: none;\n",
" z-index: 9999;\n",
" position: relative;\n",
" font-weight: normal;\n",
" right: .2ex;\n",
" padding: .5ex;\n",
" margin: .5ex;\n",
" width: min-content;\n",
" min-width: 20ex;\n",
" max-width: 50ex;\n",
" color: var(--sklearn-color-text);\n",
" box-shadow: 2pt 2pt 4pt #999;\n",
" /* unfitted */\n",
" background: var(--sklearn-color-unfitted-level-0);\n",
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted span {\n",
" /* fitted */\n",
" background: var(--sklearn-color-fitted-level-0);\n",
" border: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link:hover span {\n",
" display: block;\n",
"}\n",
"\n",
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
"\n",
"#sk-container-id-5 a.estimator_doc_link {\n",
" float: right;\n",
" font-size: 1rem;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1rem;\n",
" height: 1rem;\n",
" width: 1rem;\n",
" text-decoration: none;\n",
" /* unfitted */\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
"}\n",
"\n",
"#sk-container-id-5 a.estimator_doc_link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"#sk-container-id-5 a.estimator_doc_link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"#sk-container-id-5 a.estimator_doc_link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"</style><div id=\"sk-container-id-5\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;preprocessor&#x27;,\n",
" ColumnTransformer(transformers=[(&#x27;num&#x27;,\n",
" Pipeline(steps=[(&#x27;scaler&#x27;,\n",
" StandardScaler())]),\n",
" [&#x27;nb_tickets&#x27;, &#x27;nb_purchases&#x27;,\n",
" &#x27;total_amount&#x27;,\n",
" &#x27;nb_suppliers&#x27;,\n",
" &#x27;vente_internet_max&#x27;,\n",
" &#x27;purchase_date_min&#x27;,\n",
" &#x27;purchase_date_max&#x27;,\n",
" &#x27;time_between_purchase&#x27;,\n",
" &#x27;nb_tickets_internet&#x27;,\n",
" &#x27;fidelity&#x27;, &#x27;is_email_true&#x27;,\n",
" &#x27;opt_in&#x27;, &#x27;gender_female&#x27;,\n",
" &#x27;gender_male&#x27;,\n",
" &#x27;gender_other&#x27;,\n",
" &#x27;nb_campaigns&#x27;,\n",
" &#x27;nb_campaigns_opened&#x27;]),\n",
" (&#x27;cat&#x27;,\n",
" Pipeline(steps=[(&#x27;onehot&#x27;,\n",
" OneHotEncoder(handle_unknown=&#x27;ignore&#x27;,\n",
" sparse_output=False))]),\n",
" [&#x27;opt_in&#x27;])])),\n",
" (&#x27;logreg&#x27;,\n",
" LogisticRegression(class_weight={0.0: 0.5223906809346011,\n",
" 1.0: 11.665359406898034},\n",
" max_iter=5000, solver=&#x27;saga&#x27;))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-29\" type=\"checkbox\" ><label for=\"sk-estimator-id-29\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;Pipeline<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.pipeline.Pipeline.html\">?<span>Documentation for Pipeline</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>Pipeline(steps=[(&#x27;preprocessor&#x27;,\n",
" ColumnTransformer(transformers=[(&#x27;num&#x27;,\n",
" Pipeline(steps=[(&#x27;scaler&#x27;,\n",
" StandardScaler())]),\n",
" [&#x27;nb_tickets&#x27;, &#x27;nb_purchases&#x27;,\n",
" &#x27;total_amount&#x27;,\n",
" &#x27;nb_suppliers&#x27;,\n",
" &#x27;vente_internet_max&#x27;,\n",
" &#x27;purchase_date_min&#x27;,\n",
" &#x27;purchase_date_max&#x27;,\n",
" &#x27;time_between_purchase&#x27;,\n",
" &#x27;nb_tickets_internet&#x27;,\n",
" &#x27;fidelity&#x27;, &#x27;is_email_true&#x27;,\n",
" &#x27;opt_in&#x27;, &#x27;gender_female&#x27;,\n",
" &#x27;gender_male&#x27;,\n",
" &#x27;gender_other&#x27;,\n",
" &#x27;nb_campaigns&#x27;,\n",
" &#x27;nb_campaigns_opened&#x27;]),\n",
" (&#x27;cat&#x27;,\n",
" Pipeline(steps=[(&#x27;onehot&#x27;,\n",
" OneHotEncoder(handle_unknown=&#x27;ignore&#x27;,\n",
" sparse_output=False))]),\n",
" [&#x27;opt_in&#x27;])])),\n",
" (&#x27;logreg&#x27;,\n",
" LogisticRegression(class_weight={0.0: 0.5223906809346011,\n",
" 1.0: 11.665359406898034},\n",
" max_iter=5000, solver=&#x27;saga&#x27;))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-30\" type=\"checkbox\" ><label for=\"sk-estimator-id-30\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;preprocessor: ColumnTransformer<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>ColumnTransformer(transformers=[(&#x27;num&#x27;,\n",
" Pipeline(steps=[(&#x27;scaler&#x27;, StandardScaler())]),\n",
" [&#x27;nb_tickets&#x27;, &#x27;nb_purchases&#x27;, &#x27;total_amount&#x27;,\n",
" &#x27;nb_suppliers&#x27;, &#x27;vente_internet_max&#x27;,\n",
" &#x27;purchase_date_min&#x27;, &#x27;purchase_date_max&#x27;,\n",
" &#x27;time_between_purchase&#x27;,\n",
" &#x27;nb_tickets_internet&#x27;, &#x27;fidelity&#x27;,\n",
" &#x27;is_email_true&#x27;, &#x27;opt_in&#x27;, &#x27;gender_female&#x27;,\n",
" &#x27;gender_male&#x27;, &#x27;gender_other&#x27;, &#x27;nb_campaigns&#x27;,\n",
" &#x27;nb_campaigns_opened&#x27;]),\n",
" (&#x27;cat&#x27;,\n",
" Pipeline(steps=[(&#x27;onehot&#x27;,\n",
" OneHotEncoder(handle_unknown=&#x27;ignore&#x27;,\n",
" sparse_output=False))]),\n",
" [&#x27;opt_in&#x27;])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-31\" type=\"checkbox\" ><label for=\"sk-estimator-id-31\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">num</label><div class=\"sk-toggleable__content fitted\"><pre>[&#x27;nb_tickets&#x27;, &#x27;nb_purchases&#x27;, &#x27;total_amount&#x27;, &#x27;nb_suppliers&#x27;, &#x27;vente_internet_max&#x27;, &#x27;purchase_date_min&#x27;, &#x27;purchase_date_max&#x27;, &#x27;time_between_purchase&#x27;, &#x27;nb_tickets_internet&#x27;, &#x27;fidelity&#x27;, &#x27;is_email_true&#x27;, &#x27;opt_in&#x27;, &#x27;gender_female&#x27;, &#x27;gender_male&#x27;, &#x27;gender_other&#x27;, &#x27;nb_campaigns&#x27;, &#x27;nb_campaigns_opened&#x27;]</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-32\" type=\"checkbox\" ><label for=\"sk-estimator-id-32\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;StandardScaler<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.StandardScaler.html\">?<span>Documentation for StandardScaler</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>StandardScaler()</pre></div> </div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-33\" type=\"checkbox\" ><label for=\"sk-estimator-id-33\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">cat</label><div class=\"sk-toggleable__content fitted\"><pre>[&#x27;opt_in&#x27;]</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-34\" type=\"checkbox\" ><label for=\"sk-estimator-id-34\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;OneHotEncoder<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>OneHotEncoder(handle_unknown=&#x27;ignore&#x27;, sparse_output=False)</pre></div> </div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-35\" type=\"checkbox\" ><label for=\"sk-estimator-id-35\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;LogisticRegression<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>LogisticRegression(class_weight={0.0: 0.5223906809346011,\n",
" 1.0: 11.665359406898034},\n",
" max_iter=5000, solver=&#x27;saga&#x27;)</pre></div> </div></div></div></div></div></div>"
],
"text/plain": [
"Pipeline(steps=[('preprocessor',\n",
" ColumnTransformer(transformers=[('num',\n",
" Pipeline(steps=[('scaler',\n",
" StandardScaler())]),\n",
" ['nb_tickets', 'nb_purchases',\n",
" 'total_amount',\n",
" 'nb_suppliers',\n",
" 'vente_internet_max',\n",
" 'purchase_date_min',\n",
" 'purchase_date_max',\n",
" 'time_between_purchase',\n",
" 'nb_tickets_internet',\n",
" 'fidelity', 'is_email_true',\n",
" 'opt_in', 'gender_female',\n",
" 'gender_male',\n",
" 'gender_other',\n",
" 'nb_campaigns',\n",
" 'nb_campaigns_opened']),\n",
" ('cat',\n",
" Pipeline(steps=[('onehot',\n",
" OneHotEncoder(handle_unknown='ignore',\n",
" sparse_output=False))]),\n",
" ['opt_in'])])),\n",
" ('logreg',\n",
" LogisticRegression(class_weight={0.0: 0.5223906809346011,\n",
" 1.0: 11.665359406898034},\n",
" max_iter=5000, solver='saga'))])"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipeline.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "6356e870-0dfc-4e60-9e48-e2de5e7f9f87",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy Score: 0.9083440790887599\n",
"F1 Score: 0.4349266289045679\n",
"Recall Score: 0.8231974921630094\n"
]
}
],
"source": [
"y_pred = pipeline.predict(X_test)\n",
"\n",
"# Calculate the F1 score\n",
"acc = accuracy_score(y_test, y_pred)\n",
"print(f\"Accuracy Score: {acc}\")\n",
"\n",
"f1 = f1_score(y_test, y_pred)\n",
"print(f\"F1 Score: {f1}\")\n",
"\n",
"recall = recall_score(y_test, y_pred)\n",
"print(f\"Recall Score: {recall}\")"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "09387a09-0d53-4c54-baac-f3c2a57a629a",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjYAAAHFCAYAAADhWLMfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABi10lEQVR4nO3deVxU9f7H8deIMCLKhCLguKSWkoilYSlqqbngguatrgtFUoZetbwkblRuLeKWVu7ZYtfsUjfTq2WEZWmmuJCkqGnlgiaIC+GSAuH8/vDn3EZQwWbR8f3scR4P55zP+Z7vGRv58Pl+v2cMFovFgoiIiIgbKOfqDoiIiIjYixIbERERcRtKbERERMRtKLERERERt6HERkRERNyGEhsRERFxG0psRERExG0osRERERG3ocRGRERE3IYSG3Fr27Zt44knnqBu3bpUqFCBSpUqcffddzNlyhROnDjh0Gtv3bqVNm3aYDKZMBgMvPbaa3a/hsFgYPz48XZv92oWLlyIwWDAYDDwzTffFDtusVi4/fbbMRgMtG3b9pquMWfOHBYuXFimc7755pvL9klEbg7lXd0BEUdZsGABgwcPJjg4mBEjRhASEkJhYSFbtmxh3rx5bNiwgaVLlzrs+k8++SRnzpwhKSkJPz8/6tSpY/drbNiwgZo1a9q93dKqXLkyb7/9drHkZc2aNfzyyy9Urlz5mtueM2cO/v7+xMTElPqcu+++mw0bNhASEnLN1xWRG5sSG3FLGzZsYNCgQXTs2JFly5ZhNBqtxzp27Eh8fDzJyckO7UNGRgaxsbF06dLFYddo0aKFw9oujd69e7N48WJmz56Nr6+vdf/bb79NeHg4J0+edEo/CgsLMRgM+Pr6uvw9ERHX0lCUuKWJEydiMBh48803bZKai7y8vOjRo4f19fnz55kyZQp33HEHRqORgIAAHn/8cQ4dOmRzXtu2bQkNDWXz5s3cd999VKxYkXr16jFp0iTOnz8P/G+Y5o8//mDu3LnWIRuA8ePHW//8ZxfP2b9/v3Xf6tWradu2LVWrVsXb25vatWvz8MMP8/vvv1tjShqKysjI4MEHH8TPz48KFSrQpEkT3nvvPZuYi0M2//73v3n++ecxm834+vrSoUMHdu/eXbo3Gejbty8A//73v6378vLyWLJkCU8++WSJ50yYMIHmzZtTpUoVfH19ufvuu3n77bf58/fx1qlThx07drBmzRrr+3ex4nWx74sWLSI+Pp4aNWpgNBr5+eefiw1FHTt2jFq1atGyZUsKCwut7e/cuRMfHx+io6NLfa8icmNQYiNup6ioiNWrVxMWFkatWrVKdc6gQYMYNWoUHTt2ZPny5bz00kskJyfTsmVLjh07ZhObnZ3No48+ymOPPcby5cvp0qULCQkJvP/++wB069aNDRs2APDII4+wYcMG6+vS2r9/P926dcPLy4t33nmH5ORkJk2ahI+PDwUFBZc9b/fu3bRs2ZIdO3bwxhtv8MknnxASEkJMTAxTpkwpFv/cc89x4MAB3nrrLd58801++uknunfvTlFRUan66evryyOPPMI777xj3ffvf/+bcuXK0bt378ve28CBA/noo4/45JNPeOihh3jmmWd46aWXrDFLly6lXr16NG3a1Pr+XTpsmJCQQGZmJvPmzWPFihUEBAQUu5a/vz9JSUls3ryZUaNGAfD777/z97//ndq1azNv3rxS3aeI3EAsIm4mOzvbAlj69OlTqvhdu3ZZAMvgwYNt9m/cuNECWJ577jnrvjZt2lgAy8aNG21iQ0JCLBERETb7AMuQIUNs9o0bN85S0sfu3XfftQCWffv2WSwWi+Xjjz+2AJb09PQr9h2wjBs3zvq6T58+FqPRaMnMzLSJ69Kli6VixYqW3377zWKxWCxff/21BbB07drVJu6jjz6yAJYNGzZc8boX+7t582ZrWxkZGRaLxWK55557LDExMRaLxWJp1KiRpU2bNpdtp6ioyFJYWGh58cUXLVWrVrWcP3/eeuxy51683v3333/ZY19//bXN/smTJ1sAy9KlSy39+vWzeHt7W7Zt23bFexSRG5MqNnLT+/rrrwGKTVK99957adiwIV999ZXN/qCgIO69916bfXfeeScHDhywW5+aNGmCl5cXAwYM4L333mPv3r2lOm/16tW0b9++WKUqJiaG33//vVjl6M/DcXDhPoAy3UubNm247bbbeOedd9i+fTubN2++7DDUxT526NABk8mEh4cHnp6ejB07luPHj5OTk1Pq6z788MOljh0xYgTdunWjb9++vPfee8ycOZPGjRuX+nwRuXEosRG34+/vT8WKFdm3b1+p4o8fPw5A9erVix0zm83W4xdVrVq1WJzRaOTs2bPX0NuS3XbbbXz55ZcEBAQwZMgQbrvtNm677TZef/31K553/Pjxy97HxeN/dum9XJyPVJZ7MRgMPPHEE7z//vvMmzePBg0acN9995UYu2nTJjp16gRcWLX23XffsXnzZp5//vkyX7ek+7xSH2NiYjh37hxBQUGaWyPixpTYiNvx8PCgffv2pKWlFZv8W5KLP9yzsrKKHTt8+DD+/v5261uFChUAyM/Pt9l/6TwegPvuu48VK1aQl5dHamoq4eHhxMXFkZSUdNn2q1atetn7AOx6L38WExPDsWPHmDdvHk888cRl45KSkvD09OTTTz+lV69etGzZkmbNml3TNUuahH05WVlZDBkyhCZNmnD8+HGGDx9+TdcUkeufEhtxSwkJCVgsFmJjY0ucbFtYWMiKFSsAeOCBBwCsk38v2rx5M7t27aJ9+/Z269fFlT3btm2z2X+xLyXx8PCgefPmzJ49G4Dvv//+srHt27dn9erV1kTmon/9619UrFjRYUuha9SowYgRI+jevTv9+vW7bJzBYKB8+fJ4eHhY9509e5ZFixYVi7VXFayoqIi+fftiMBj4/PPPSUxMZObMmXzyySd/uW0Ruf7oOTbilsLDw5k7dy6DBw8mLCyMQYMG0ahRIwoLC9m6dStvvvkmoaGhdO/eneDgYAYMGMDMmTMpV64cXbp0Yf/+/YwZM4ZatWrx7LPP2q1fXbt2pUqVKvTv358XX3yR8uXLs3DhQg4ePGgTN2/ePFavXk23bt2oXbs2586ds6486tChw2XbHzduHJ9++int2rVj7NixVKlShcWLF/PZZ58xZcoUTCaT3e7lUpMmTbpqTLdu3Zg+fTpRUVEMGDCA48ePM23atBKX5Ddu3JikpCQ+/PBD6tWrR4UKFa5pXsy4ceP49ttvSUlJISgoiPj4eNasWUP//v1p2rQpdevWLXObInL9UmIjbis2NpZ7772XGTNmMHnyZLKzs/H09KRBgwZERUXx9NNPW2Pnzp3Lbbfdxttvv83s2bMxmUx07tyZxMTEEufUXCtfX1+Sk5OJi4vjscce45ZbbuGpp56iS5cuPPXUU9a4Jk2akJKSwrhx48jOzqZSpUqEhoayfPly6xyVkgQHB7N+/Xqee+45hgwZwtmzZ2nYsCHvvvtumZ7g6ygPPPAA77zzDpMnT6Z79+7UqFGD2NhYAgIC6N+/v03shAkTyMrKIjY2llOnTnHrrbfaPOenNFatWkViYiJjxoyxqbwtXLiQpk2b0rt3b9atW4eXl5c9bk9ErgMGi+VPT8USERERuYFpjo2IiIi4DSU2IiIi4jaU2IiIiIjbUGIjIiIibkOJjYiIiLgNJTYiIiLiNpTYiIiIiNtwywf0eTd9+upBIjehvd9Md3UXRK471U2Of0CjvX4und06yy7tuDNVbERERMRtKLERERFxNEM5+2xltHbtWrp3747ZbMZgMLBs2bJiMbt27aJHjx6YTCYqV65MixYtyMzMtB7Pz8/nmWeewd/fHx8fH3r06MGhQ4ds2sjNzSU6OhqTyYTJZCI6OprffvvNJiYzM5Pu3bvj4+ODv78/Q4cOLfYlxdu3b6dNmzZ4e3tTo0YNXnzxRcr6BQlKbERERBzNYLDPVkZnzpzhrrvuYtaskoewfvnlF1q3bs0dd9zBN998ww8//MCYMWOoUKGCNSYuLo6lS5eSlJTEunXrOH3
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"draw_confusion_matrix(y_test, y_pred)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "580b58d7-596f-4207-8c99-4365aba2bc9f",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABIQAAAK8CAYAAACeK2TMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUddrG8e9MeieF0Lv0XgQBpQpIUVBAZO0ruyq7umJbsXcUC7ZVd33trogo4gqIoIiIoIKACIggvQVIIL1NOe8fBwKHCZhAkjOTuT/XlYuZM2dmniTcM8mTX3EYhmEgIiIiIiIiIiJBw2l3ASIiIiIiIiIiUrXUEBIRERERERERCTJqCImIiIiIiIiIBBk1hEREREREREREgowaQiIiIiIiIiIiQUYNIRERERERERGRIKOGkIiIiIiIiIhIkFFDSEREREREREQkyKghJCIiIiIiIiISZNQQEhEREQlwDz30EMnJyezYscPuUkRERCRAqCEkIiJSidauXcu1115LkyZNiIyMJDY2li5dujB16lQOHTpkS01vvfUWDoeDlStXVurzbN++HYfDUfLhdDpJTExk4MCBLFiw4KT3mz9/PsOHD6dmzZpERETQoEEDrr76ajZs2HDS+3z77bdceuml1KtXj/DwcBISEujVqxevvPIKeXl5lfHp2eLo92779u2W4w888AAXX3wxY8eOpbi4uNT7PvjggzgcjgqrZfHixTgcDhYvXlxhj1maxo0bc80115TrPsuWLePBBx8kMzPT57Z+/frRr1+/CqlNREQkkKkhJCIiUklee+01unbtyooVK7jjjjuYP38+n3zyCWPHjuXVV1/luuuus7vEKnHTTTexfPlyvv32W55++mk2b97MsGHDWLJkic+5d955J0OHDsXr9fLyyy+zcOFCHnjgAVasWEGXLl2YNWuWz30eeOAB+vTpw549e3jkkUdYuHAhH3zwAQMHDuTBBx/k3nvvrYpP03avvvoqNWvWZNKkSXaXUqE++eQT7rvvvnLdZ9myZTz00EOlNoRefvllXn755QqqTkREJHCF2l2AiIhIdbR8+XJuvPFGBg0axOzZs4mIiCi5bdCgQdx2223Mnz+/SmtyuVwVOkKkrBo2bMg555wDQO/evWnevDl9+/bl9ddfp0+fPiXnTZ8+naeeeoobb7zR8gt7nz59GD9+PH379uXKK6+kU6dONG3aFICZM2fy8MMPc9111/Haa69ZPr+hQ4dy5513snz58ir6TO0VGhrK3Llz7S6jwnXu3LlCH69NmzYV+ngiIiKBSiOEREREKsHjjz+Ow+HgP//5j6UZdFR4eDgXXXRRyXWv18vUqVNp1aoVERERpKamctVVV7F7927L/U42febEaTBHp/O8++673HbbbdSrV4+IiAh+//33knMOHz7MtddeS1JSEjExMVx44YVs3brV57G//PJLBg4cSHx8PNHR0fTu3ZuvvvrqNL4qpm7dugGwf/9+y/HHHnuMxMREnn76aZ/7xMTE8OKLL5Kfn8+0adNKjj/88MMkJibywgsvlNrsiouLY/Dgwadd64n69etHu3btWL58Ob169SIqKorGjRvz5ptvAjB37ly6dOlCdHQ07du3L7Xpt3TpUgYOHEhcXBzR0dH06tWr1EbO999/T+/evYmMjKRu3bpMnjwZl8tVal0zZsygZ8+exMTEEBsby+DBg/npp5/K9DmdeN8hQ4awevXqcnxVrP73v//Rs2dPoqOjiYuLY9CgQaU25T799FM6dOhAREQETZs25fnnny91WtuJ/+e9Xi+PPvooLVu2JCoqiho1atChQweef/55wJwad8cddwDQpEmTkimLR6e2lTZlbO/evVx66aXExcWRkJDAuHHj+P7773E4HLz11lsl551sutk111xD48aNLceKi4t59NFHSzJds2ZNrr32Wg4ePFi2L6SIiEglU0NIRESkgnk8HhYtWkTXrl1p0KBBme5z44038s9//pNBgwbxv//9j0ceeYT58+fTq1cv0tPTT7uWyZMns3PnTl599VU+++wzUlNTS2677rrrcDqdvP/++zz33HP8+OOP9OvXzzLN5r333mPw4MHEx8fz9ttv8+GHH5KUlMSQIUNOuym0bds2AFq0aFFybN++faxfv57BgwcTHR1d6v169uxJamoqCxcuLLnPunXrTnmfsjjaPHvwwQfLdH5aWhrXXnstEyZM4NNPP6V9+/b8+c9/5uGHH2by5MnceeedfPzxx8TGxjJq1Cj27t1bct9vvvmGAQMGkJWVxeuvv8706dOJi4vjwgsvZMaMGSXnbdiwgYEDB5KZmclbb73Fq6++yurVq3n00Ud96nn88ccZP348bdq04cMPP+Sdd94hOzub8847j3Xr1p3ycznxvu+++y45OTmcd955p1yz6WTef/99Ro4cSXx8PNOnT+f111/n8OHD9OvXj6VLl5acN3/+fC655BKSk5OZMWMGU6dOZfr06bz99tt/+BxTp07lwQcfZPz48cydO5cZM2Zw3XXXlfy/nTBhAjfddBMAs2bNYvny5SxfvpwuXbqU+ngFBQWcf/75LFiwgClTpjBz5kxq167NuHHjyv35H+X1ehk5ciRPPPEEf/rTn5g7dy5PPPEECxcupF+/fhQUFJz2Y4uIiFQYQ0RERCpUWlqaARiXXXZZmc7/9ddfDcCYOHGi5fgPP/xgAMbdd99dcqxRo0bG1Vdf7fMYffv2Nfr27Vty/euvvzYAo0+fPj7nvvnmmwZgXHzxxZbj3333nQEYjz76qGEYhpGXl2ckJSUZF154oeU8j8djdOzY0ejevfspP69t27YZgPHkk08aLpfLKCwsNNasWWP07NnTqFOnjrFt27aSc7///nsDMO66665TPmaPHj2MqKioct3njyxevNgICQkxHnrooT88t2/fvgZgrFy5suRYRkaGERISYkRFRRl79uwpOb5mzRoDMF544YWSY+ecc46Rmppq5OTklBxzu91Gu3btjPr16xter9cwDMMYN26cERUVZaSlpVnOa9WqlQGUfO127txphIaGGn/7298sdWZnZxupqanGmDFjSo498MADxvE/+h2970033WS5b05OjlG7dm3j0ksvPeXX4uj/sa+//towDPP/Rd26dY327dsbHo/H8nipqalGr169So6dffbZRoMGDYyioiLLecnJycaJP56e+H9+xIgRRqdOnU5Z21NPPWX5Oh3vxKy88sorBmB8+umnlvP+8pe/GIDx5ptvnvS+R1199dVGo0aNSq5Pnz7dAIyPP/7Yct6KFSsMwHj55ZdPWb+IiEhV0AghERERm3399dcAPlPBunfvTuvWrc9oetbo0aNPetvll19uud6rVy8aNWpUUs+yZcs4dOgQV199NW63u+TD6/VywQUXsGLFijLt4PXPf/6TsLAwIiMj6dSpE+vWreOzzz7zmWJTFoZhVPg6SH379sXtdnP//feX6fw6derQtWvXkutJSUmkpqbSqVMn6tatW3K8devWACVbwefl5fHDDz8wZswYYmNjS84LCQnhyiuvZPfu3fz222+A+X9i4MCB1KpVy3LeiaNWvvjiC9xuN3/+858tx+Pi4ujfvz/ffPPNST+Po/e96qqrLN/fyMhI+vbtW+7dw3777Tf27t3LlVdeidN57EfM2NhYRo8ezffff09+fj55eXmsXLmSUaNGER4ebjnvwgsv/MPn6d69Oz///DMTJ07kiy++IDs7u1x1nujrr78mLi7OMoUT4E9/+tNpP+acOXOoUaMGF154oeVr26lTJ2rXrl3pO7OJiIiUhRaVFhERqWApKSlER0eXTI36IxkZGYDZaDhR3bp1SxoKp6O0xzyqdu3apR47Ws/RNX7GjBlz0sc4dOgQMTExp6zhH//4B1dccQVFRUV8//333HvvvYwcOZKff/6Z5ORkwFx4GvjDr9mOHTtKpuGV9T4VLSkpyedYeHi4z/GjzY7CwkLAXLPJMIyTfp/h2P+FjIyMk35/jnf0e9SrVy+fc482707m6H3PPvvsUm8/vqlTFn/0/9jr9ZZ8DQzDsDS7jirt2Ik
"text/plain": [
"<Figure size 1400x800 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"draw_roc_curve(X_test, y_test)"
]
},
{
"cell_type": "markdown",
"id": "ae8e9bd3-0f6a-4f82-bb4c-470cbdc8d6bb",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"## Cross Validation"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "7f0535de-34f1-4e97-b993-b429ecf0a554",
"metadata": {},
"outputs": [],
"source": [
"y_train = y_train['y_has_purchased']"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "f7fca463-d7d6-493b-8329-fdfa92457f78",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best parameters found: {'logreg__C': 0.0009765625, 'logreg__class_weight': 'balanced', 'logreg__penalty': 'l1'}\n",
"Best cross-validation score: 0.65\n",
"Test set score: 0.64\n"
]
}
],
"source": [
"# Cross validation\n",
"\n",
"grid_search = GridSearchCV(pipeline, param_grid, cv=3, scoring=recall_scorer, error_score='raise',\n",
" n_jobs=-1)\n",
"\n",
"grid_search.fit(X_train, y_train)\n",
"\n",
"# Print the best parameters and the best score\n",
"print(\"Best parameters found: \", grid_search.best_params_)\n",
"print(\"Best cross-validation score: {:.2f}\".format(grid_search.best_score_))\n",
"\n",
"# Evaluate the best model on the test set\n",
"test_score = grid_search.score(X_test, y_test)\n",
"print(\"Test set score: {:.2f}\".format(test_score))"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "56bd7828-4de1-4166-bea0-5d5e152b9d38",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi0AAAHFCAYAAAA+FskAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABQP0lEQVR4nO3de3yP9f/H8cdnp49tbMZsM6ecMxRRjEI5M/LtgFYrEXJsOaZy6mBOIWc60FdpiUilpVJKDGHOUU5LzBxmGLbZrt8ffj7fPjbZdH189uF57/a53ey6Xtd1va5PrV693u/3dVkMwzAQERERKeDcnJ2AiIiISF6oaBERERGXoKJFREREXIKKFhEREXEJKlpERETEJahoEREREZegokVERERcgooWERERcQkqWkRERMQlqGiRW9q2bdt49tlnKV++PIUKFaJw4cLcc889jB8/nlOnTjn02lu2bKFx48b4+/tjsViYMmWK6dewWCyMGjXK9PNez/z587FYLFgsFn788ccc+w3DoFKlSlgsFpo0aXJD15g5cybz58/P1zE//vjjNXMSEdfn4ewERBzlnXfeoXfv3lStWpXBgwcTFhZGZmYmv/76K7Nnz2bdunUsXbrUYdfv2rUraWlpxMbGEhAQwB133GH6NdatW0fp0qVNP29eFSlShPfeey9HYbJ69Wr27dtHkSJFbvjcM2fOJDAwkC5duuT5mHvuuYd169YRFhZ2w9cVkYJLRYvcktatW0evXr1o3rw5y5Ytw2q12vY1b96cgQMHEhcX59AcduzYQffu3WndurXDrlG/fn2HnTsvOnXqxEcffcSMGTPw8/OzbX/vvfcIDw/nzJkzNyWPzMxMLBYLfn5+Tv9ORMRxNDwkt6QxY8ZgsViYO3euXcFyhZeXF+3bt7f9nJ2dzfjx47nzzjuxWq0EBQXx9NNPc/jwYbvjmjRpQo0aNdi4cSMPPPAAPj4+VKhQgbFjx5KdnQ38b+jk0qVLzJo1yzaMAjBq1Cjbn//uyjEHDx60bVu1ahVNmjShePHieHt7U7ZsWR599FHOnz9vi8lteGjHjh08/PDDBAQEUKhQIWrVqsUHH3xgF3NlGOXjjz/mlVdeITQ0FD8/P5o1a8aePXvy9iUDTzzxBAAff/yxbVtqaipLliyha9euuR4zevRo6tWrR7FixfDz8+Oee+7hvffe4+/vbr3jjjvYuXMnq1evtn1/VzpVV3JfsGABAwcOpFSpUlitVv74448cw0MnTpygTJkyNGjQgMzMTNv5d+3aha+vL1FRUXm+VxFxPhUtcsvJyspi1apV1KlThzJlyuTpmF69ejF06FCaN2/O8uXLef3114mLi6NBgwacOHHCLjYpKYknn3ySp556iuXLl9O6dWuGDRvGhx9+CEDbtm1Zt24dAI899hjr1q2z/ZxXBw8epG3btnh5efH+++8TFxfH2LFj8fX1JSMj45rH7dmzhwYNGrBz506mTp3KZ599RlhYGF26dGH8+PE54l9++WUOHTrEu+++y9y5c/n9999p164dWVlZecrTz8+Pxx57jPfff9+27eOPP8bNzY1OnTpd89569uzJokWL+Oyzz3jkkUfo168fr7/+ui1m6dKlVKhQgdq1a9u+v6uH8oYNG0ZiYiKzZ8/miy++ICgoKMe1AgMDiY2NZePGjQwdOhSA8+fP8/jjj1O2bFlmz56dp/sUkQLCELnFJCUlGYDRuXPnPMXv3r3bAIzevXvbbV+/fr0BGC+//LJtW+PGjQ3AWL9+vV1sWFiY0bJlS7ttgNGnTx+7bSNHjjRy+7WbN2+eARgHDhwwDMMwFi9ebABGQkLCP+YOGCNHjrT93LlzZ8NqtRqJiYl2ca1btzZ8fHyM06dPG4ZhGD/88IMBGG3atLGLW7RokQEY69at+8frXsl348aNtnPt2LHDMAzDuPfee40uXboYhmEY1atXNxo3bnzN82RlZRmZmZnGa6+9ZhQvXtzIzs627bvWsVeu16hRo2vu++GHH+y2jxs3zgCMpUuXGs8884zh7e1tbNu27R/vUUQKHnVa5Lb3ww8/AOSY8HnfffdRrVo1vv/+e7vtISEh3HfffXbb7rrrLg4dOmRaTrVq1cLLy4sePXrwwQcfsH///jwdt2rVKpo2bZqjw9SlSxfOnz+fo+Pz9yEyuHwfQL7upXHjxlSsWJH333+f7du3s3HjxmsODV3JsVmzZvj7++Pu7o6npycjRozg5MmTJCcn5/m6jz76aJ5jBw8eTNu2bXniiSf44IMPmDZtGjVr1szz8SJSMKhokVtOYGAgPj4+HDhwIE/xJ0+eBKBkyZI59oWGhtr2X1G8ePEccVarlQsXLtxAtrmrWLEi3333HUFBQfTp04eKFStSsWJF3n777X887uTJk9e8jyv7/+7qe7ky/yc/92KxWHj22Wf58MMPmT17NlWqVOGBBx7INXbDhg20aNECuLy665dffmHjxo288sor+b5ubvf5Tzl26dKFixcvEhISorksIi5KRYvcctzd3WnatCmbNm3KMZE2N1f+w3306NEc+44cOUJgYKBpuRUqVAiA9PR0u+1Xz5sBeOCBB/jiiy9ITU0lPj6e8PBwoqOjiY2Nveb5ixcvfs37AEy9l7/r0qULJ06cYPbs2Tz77LPXjIuNjcXT05Mvv/ySjh070qBBA+rWrXtD18xtQvO1HD16lD59+lCrVi1OnjzJoEGDbuiaIuJcKlrkljRs2DAMw6B79+65TlzNzMzkiy++AOChhx4CsE2kvWLjxo3s3r2bpk2bmpbXlRUw27Zts9t+JZfcuLu7U69ePWbMmAHA5s2brxnbtGlTVq1aZStSrvjvf/+Lj4+Pw5YDlypVisGDB9OuXTueeeaZa8ZZLBY8PDxwd3e3bbtw4QILFizIEWtW9yorK4snnngCi8XC119/TUxMDNOmTeOzzz771+cWkZtLz2mRW1J4eDizZs2id+/e1KlTh169elG9enUyMzPZsmULc+fOpUaNGrRr146qVavSo0cPpk2bhpubG61bt+bgwYMMHz6cMmXK8OKLL5qWV5s2bShWrBjdunXjtddew8PDg/nz5/Pnn3/axc2ePZtVq1bRtm1bypYty8WLF20rdJo1a3bN848cOZIvv/ySBx98kBEjRlCsWDE++ugjvvrqK8aPH4+/v79p93K1sWPHXjembdu2TJo0icjISHr06MHJkyeZOHFirsvSa9asSWxsLJ988gkVKlSgUKFCNzQPZeTIkfz888+sXLmSkJAQBg4cyOrVq+nWrRu1a9emfPny+T6niDiHiha5ZXXv3p377ruPyZMnM27cOJKSkvD09KRKlSpERkbSt29fW+ysWbOoWLEi7733HjNmzMDf359WrVoRExOT6xyWG+Xn50dcXBzR0dE89dRTFC1alOeee47WrVvz3HPP2eJq1arFypUrGTlyJElJSRQuXJgaNWqwfPly25yQ3FStWpW1a9fy8ssv06dPHy5cuEC1atWYN29evp4s6ygPPfQQ77//PuPGjaNdu3aUKlWK7t27ExQURLdu3exiR48ezdGjR+nevTtnz56lXLlyds+xyYtvv/2WmJgYhg8fbtcxmz9/PrVr16ZTp06sWbMGLy8vM25PRBzMYhh/e6KTiIiISAGlOS0iIiLiElS0iIiIiEtQ0SIiIiIuQUWLiIiIuAQVLSIiIuISVLSIiIiIS1DRIiIiIi7hlny4nHftvtcPErkNLV84ytkpiBQ4zas55p1cf2fWf5cubJluynlclTotIiIi4hJuyU6LiIhIgWJRj8AMKlpEREQczWJxdga3BBUtIiIijqZOiyn0LYqIiIhLUKdFRETE0TQ8ZAoVLSIiIo6m4SFT6FsUERERl6BOi4iIiKNpeMgUKlpEREQcTcNDptC3KCIiIi5BnRYRERFH0/CQKVS0iIiIOJqGh0yhb1FERERcgjotIiIijqbhIVOoaBEREXE0DQ+ZQkWLiIiIo6nTYgqVfiIiIuIS1GkRERFxNA0PmUJFi4iIiKOpaDGFvkURERFxCeq0iIiIOJqbJuKaQUWLiIiIo2l4yBT6FkV
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"y_pred = grid_search.predict(X_test)\n",
"\n",
"draw_confusion_matrix(y_test, y_pred)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "319fe0eb-4d4a-492c-bd50-3f08ab483021",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABIQAAAK8CAYAAACeK2TMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUddrG8e+k904ooSahSheBJIggioINBUTWvuqqrLr2tfeKBdtrXde6IqKIq7IIFsQA0hEBBRJ6JwnpPXPePw4MHAcwgSRnJnN/rotL5pwzyQNy54SH33l+DsMwDERERERERERExGf42V2AiIiIiIiIiIg0LjWERERERERERER8jBpCIiIiIiIiIiI+Rg0hEREREREREREfo4aQiIiIiIiIiIiPUUNIRERERERERMTHqCEkIiIiIiIiIuJj1BASEREREREREfExagiJiIiIiIiIiPgYNYREREREvNzDDz9MfHw8mzdvtrsUERER8RJqCImIiDSglStXcuWVV9KhQwdCQkKIiIigb9++TJw4kby8PFtqevfdd3E4HCxZsqRBP8+mTZtwOByuH35+fsTGxjJs2DBmzZp1xPfNnDmTs846i2bNmhEcHEybNm24/PLLWbNmzRHf89NPP3HhhReSlJREUFAQ0dHRpKen89prr1FSUtIQvzxbHPh/t2nTJsvxBx98kPPPP5+xY8dSWVl52Pc+9NBDOByOeqtlzpw5OBwO5syZU28f83Dat2/PFVdcUaf3zJ8/n4ceeoj8/Hy3c0OGDGHIkCH1UpuIiIg3U0NIRESkgbz11luceOKJLF68mDvuuIOZM2fy+eefM3bsWF5//XWuuuoqu0tsFDfeeCMLFizgp59+4tlnn2X9+vWMHDmSuXPnul175513MmLECJxOJ6+++iqzZ8/mwQcfZPHixfTt25dp06a5vefBBx9k8ODBbN++nUcffZTZs2fz8ccfM2zYMB566CHuu+++xvhl2u7111+nWbNm3HLLLXaXUq8+//xz7r///jq9Z/78+Tz88MOHbQi9+uqrvPrqq/VUnYiIiPcKsLsAERGRpmjBggVcf/31nH766UyfPp3g4GDXudNPP53bbruNmTNnNmpNVVVV9bpCpLbatm3LwIEDAcjIyKBjx46ccsopvP322wwePNh13eTJk3nmmWe4/vrrLX9hHzx4MOPHj+eUU07h0ksvpXfv3iQnJwMwdepUHnnkEa666ireeusty69vxIgR3HnnnSxYsKCRfqX2CggI4Ouvv7a7jHrXp0+fev143bp1q9ePJyIi4q20QkhERKQBPPHEEzgcDt58801LM+iAoKAgzj33XNdrp9PJxIkT6dKlC8HBwSQmJnLZZZexbds2y/uO9PjMHx+DOfA4zwcffMBtt91GUlISwcHBZGVlua7Zt28fV155JXFxcYSHh3POOeewYcMGt4/97bffMmzYMKKioggLCyMjI4PvvvvuGH5XTP369QNg9+7dluOPP/44sbGxPPvss27vCQ8P5+WXX6a0tJRJkya5jj/yyCPExsby0ksvHbbZFRkZyfDhw4+51j8aMmQI3bt3Z8GCBaSnpxMaGkr79u155513APj666/p27cvYWFh9OjR47BNv8zMTIYNG0ZkZCRhYWGkp6cftpHz888/k5GRQUhICK1ateLuu++mqqrqsHVNmTKFtLQ0wsPDiYiIYPjw4SxdurRWv6Y/vveMM85g+fLldfhdsfrvf/9LWloaYWFhREZGcvrppx+2KffFF1/Qs2dPgoODSU5O5sUXXzzsY21//DPvdDp57LHH6Ny5M6GhocTExNCzZ09efPFFwHw07o477gCgQ4cOrkcWDzzadrhHxnbs2MGFF15IZGQk0dHRjBs3jp9//hmHw8G7777ruu5Ij5tdccUVtG/f3nKssrKSxx57zJXpZs2aceWVV7J3797a/UaKiIg0MDWERERE6llNTQ3ff/89J554Im3atKnVe66//nr++c9/cvrpp/Pf//6XRx99lJkzZ5Kenk5OTs4x13L33XezZcsWXn/9db788ksSExNd56666ir8/Pz46KOPeOGFF1i0aBFDhgyxPGbz4YcfMnz4cKKionjvvff45JNPiIuL44wzzjjmptDGjRsB6NSpk+vYzp07Wb16NcOHDycsLOyw70tLSyMxMZHZs2e73rNq1aqjvqc2DjTPHnrooVpdv2vXLq688kquvvpqvvjiC3r06MFf//pXHnnkEe6++27uvPNOPvvsMyIiIhg1ahQ7duxwvffHH3/k1FNPpaCggLfffpvJkycTGRnJOeecw5QpU1zXrVmzhmHDhpGfn8+7777L66+/zvLly3nsscfc6nniiScYP3483bp145NPPuH999+nsLCQk08+mVWrVh311/LH937wwQcUFRVx8sknH3Vm05F89NFHnHfeeURFRTF58mTefvtt9u3bx5AhQ8jMzHRdN3PmTC644ALi4+OZMmUKEydOZPLkybz33nt/+jkmTpzIQw89xPjx4/n666+ZMmUKV111levP7dVXX82NN94IwLRp01iwYAELFiygb9++h/14ZWVlnHbaacyaNYsnn3ySqVOn0qJFC8aNG1fnX/8BTqeT8847j6eeeoq//OUvfP311zz11FPMnj2bIUOGUFZWdswfW0REpN4YIiIiUq927dplAMZFF11Uq+t/++03AzAmTJhgOb5w4UIDMO655x7XsXbt2hmXX36528c45ZRTjFNOOcX1+ocffjAAY/DgwW7XvvPOOwZgnH/++Zbj8+bNMwDjscceMwzDMEpKSoy4uDjjnHPOsVxXU1Nj9OrVy+jfv/9Rf10bN240AOPpp582qqqqjPLycmPFihVGWlqa0bJlS2Pjxo2ua3/++WcDMO66666jfswBAwYYoaGhdXrPn5kzZ47h7+9vPPzww3967SmnnGIAxpIlS1zHcnNzDX9/fyM0NNTYvn276/iKFSsMwHjppZdcxwYOHGgkJiYaRUVFrmPV1dVG9+7djdatWxtOp9MwDMMYN26cERoaauzatctyXZcuXQzA9Xu3ZcsWIyAgwPj73/9uqbOwsNBITEw0xowZ4zr24IMPGod+63fgvTfeeKPlvUVFRUaLFi2MCy+88Ki/Fwf+jP3www+GYZh/Llq1amX06NHDqKmpsXy8xMREIz093XXspJNOMtq0aWNUVFRYrouPjzf++O3pH//Mn3322Ubv3r2PWtszzzxj+X061B+z8tprrxmA8cUXX1iuu+aaawzAeOedd4743gMuv/xyo127dq7XkydPNgDjs88+s1y3ePFiAzBeffXVo9YvIiLSGLRCSERExGY//PADgNujYP3796dr167H9XjW6NGjj3ju4osvtrxOT0+nXbt2rnrmz59PXl4el19+OdXV1a4fTqeTM888k8WLF9dqB69//vOfBAYGEhISQu/evVm1ahVffvml2yM2tWEYRr3PQTrllFOorq7mgQceqNX1LVu25MQTT3S9jouLIzExkd69e9OqVSvX8a5duwK4toIvKSlh4cKFjBkzhoiICNd1/v7+XHrppWzbto21a9cC5p+JYcOG0bx5c8t1f1y18s0331BdXc1f//pXy/HIyEiGDh3Kjz/+eMRfx4H3XnbZZZb/vyEhIZxyyil13j1s7dq17Nixg0svvRQ/v4PfYkZERDB69Gh+/vlnSktLKSkpYcmSJYwaNYqgoCDLdeecc86ffp7+/fvzyy+/MGHCBL755hsKCwvrVOcf/fDDD0RGRloe4QT4y1/+cswf86uvviImJoZzzjnH8nvbu3dvWrRo0eA7s4mIiNSGhkqLiIjUs4SEBMLCwlyPRv2Z3NxcwGw0/FGrVq1cDYVjcbiPeUCLFi0Oe+xAPQdm/IwZM+aIHyMvL4/w8PCj1vCPf/yDSy65hIqKCn7++Wfuu+8+zjvvPH755Rfi4+MBc/A08Ke/Z5s3b3Y9hlfb99S3uLg4t2NBQUFuxw80O8rLywFzZpNhGEf8/wwH/yzk5uYe8f/PoQ78P0pPT3e79kDz7kgOvPekk0467PlDmzq18Wd/jp1Op+v3wDA
"text/plain": [
"<Figure size 1400x800 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"draw_roc_curve(X_test, y_test)"
]
},
{
"cell_type": "markdown",
"id": "ab122f66-1591-43ea-a364-2564f09b2bb3",
"metadata": {},
"source": [
"# Segmentation du score de prédiction"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "279e18c7-29d8-4328-963a-18babd13c2c8",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABA4AAAIjCAYAAACDPFmSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAD8YklEQVR4nOzdd1gUV/s38O8ufekgUgyICCIWFIMoVixYUCOWYEsQW0LUWFBRYgNDFAuW6BNjEhVjNKapsaCIBWPsUVGfYIlERB9RogZQUVzYef/w3fm5sgtLE2W/n+vySubMmTP3zD0M7NkzZySCIAggIiIiIiIiIlJDWt0BEBEREREREdHrix0HRERERERERKQROw6IiIiIiIiISCN2HBARERERERGRRuw4ICIiIiIiIiKN2HFARERERERERBqx44CIiIiIiIiINGLHARERERERERFpxI4DIiIiIiIiItKIHQdERERUIUlJSTAwMMD27durOxSiKiOXy9GtWzf4+/tDLpdXdzjVatiwYfD09MS9e/eqOxQiekXYcUBEVAkSEhIgkUjwxx9/VHco5bZ582YsX768usOocgEBAQgICKjuMF57aWlpiI6ORkZGRon1bt26hffeew8rVqxAcHCw1u1HR0dDIpGUKab8/HxER0cjJSWl2Drlz2Bp8b6pynO+XpWMjAxIJBIkJCRUdyiVJiwsDK6uriplBgYG+OWXX/D06VNMnTpV47av6h6jLsZX4csvv8SBAwewd+9e1KpV65Xvv7JJJBJER0e/8v2+qntWeY4vMTFR4zaurq4ICwurcFz05mHHARERAdCdjgPSTlpaGmJiYkr8o7awsBCDBg3CBx98gLFjx5ap/dGjR+P48eNl2iY/Px8xMTFqOw569eqF48ePw9HRsUxtEpWFubk5EhMTsWPHDvz444/VHc4rd/bsWcyePRuJiYmoV69edYdTKY4fP47Ro0dXdxhVpjzHl5iYiJiYGLXrtm3bhtmzZ1dGaPSG0a/uAIiIqHrl5+dDJpNVdxhUAdWVQ319fRw9erRM2yhjfeutt/DWW29VWix2dnaws7OrtPaINHF0dMT169erO4xX4uV7S4sWLfDPP/9Uyb6ePHkCY2PjVz6ypnXr1q90f69aZR+fj49PpbZHbw6OOCAiqiJhYWEwMzPD5cuX0b17d5iamsLR0RFxcXEAgBMnTqBdu3YwNTVFgwYNsGHDBpXtlcMYk5OTMWLECNjY2MDU1BR9+vTB33//XWx/69atQ7NmzWBsbAwbGxv069cPly5dUhvTxYsX0a1bN5ibm6NLly4ICAjA7t27cePGDUgkEvGfUkxMDFq1agUbGxtYWFigRYsWWLt2LQRBUGnf1dUVvXv3xt69e9GiRQuYmJigYcOGWLduXbF4//e//+GDDz6As7MzDA0N4eTkhIEDB+Lu3btinby8PEydOhX16tWDoaEh6tSpg0mTJuHx48elnn9BELBo0SLUrVsXxsbGaNGiBfbs2aO2rrb7+emnn9CqVStYWlpCJpPBzc0NI0eOLDUWhUKBlStXonnz5jAxMYGVlRVat26NHTt2iHV++OEHdOvWDY6OjjAxMYGXlxdmzJhRLAZNOQSA5ORk9O3bF2+99RaMjY3h7u6ODz/8UO1zyJcvX8aQIUNgb28PIyMjuLi4IDQ0FAUFBUhISMC7774LAOjUqZN4Pbw4FH3//v3o0qULLCwsIJPJ0LZtWxw4cEBlH8rh9WfPnsXAgQNhbW2N+vXrq6x70cGDBxEQEABbW1uYmJjAxcUFAwYMQH5+PjIyMsSOgZiYGDEm5ZBZdcN+NV0DLw8l1zRkOCUlBRKJpNgIB22O/Z9//hGvbyMjI9jZ2aFt27bYv39/sVy8bPfu3WjevDmMjIxQr149LFmyRG29//znP+jQoQNq164NU1NTNG3aFIsWLSr2/H1AQACaNGmC06dPo3379uK1GxcXB4VCIdZTKBSIjY2Fp6eneJ16e3tjxYoVpcaszl9//YWhQ4eidu3aMDIygpeXF/7zn/9ota1EIsH48eOxfv16MR5fX1+cOHECgiBg8eLFqFevHszMzNC5c2dcu3atWBva3BOB5/n39PQUY/z222/VxvTs2TPExsaiYcOGYk7DwsJU7lmaqNt2xIgRWn8Ir0iM2u6npHuLtu0WFBRgypQpcHBwgEwmQ4cOHXDmzJliw9uVP3P79u3DyJEjYWdnB5lMhoKCAgDP74f+/v4wNTWFmZkZunfvjnPnzqns6++//8bgwYPh5OQEIyMj2Nvbo0uXLkhNTRXrlHRPUVI3lP+///0v+vbtC2traxgbG6N58+bFfkcr7w/ff/89Zs6cCScnJ1hYWKBr1664cuVKqedbE22v26+//hoNGjSAkZERGjVqhM2bN6t9fOXl48vPzxd/3yn34evri++//x7A8+tA+XP64t8DyvujukcVLl++jB49ekAmk6FWrVoIDw/Hzp07i90/NT3moO7xnor8/qeqwREHRERVSC6Xo3///ggPD8e0adOwefNmREVFIS8vD7/88gumT5+Ot956CytXrkRYWBiaNGmCt99+W6WNUaNGITAwEJs3b8bNmzcxa9YsBAQE4MKFC7CysgIALFiwAJ988gmGDBmCBQsW4P79+4iOjoa/vz9Onz4NDw8Psb1nz57hnXfewYcffogZM2agsLAQb731Fj744AOkp6dj27ZtxY4jIyMDH374IVxcXAA87/T4+OOP8b///Q9z5sxRqXv+/HlMmTIFM2bMgL29Pb755huMGjUK7u7u6NChA4DnnQYtW7aEXC7HJ598Am9vb9y/fx9JSUn4999/YW9vj/z8fHTs2BG3bt0S6/z555+YM2cOLl68iP3795f4zVRMTAxiYmIwatQoDBw4EDdv3sSYMWNQVFQET09PsZ62+zl+/DgGDRqEQYMGITo6GsbGxrhx4wYOHjxY6nUQFhaG7777DqNGjcK8efNgaGiIs2fPqnxQ/euvvxAUFIRJkybB1NQUly9fxsKFC3Hq1Kli+1CXQwBIT0+Hv78/Ro8eDUtLS2RkZGDp0qVo164dLl68CAMDAzFH7dq1Q61atTBv3jx4eHggKysLO3bswLNnz9CrVy/Mnz8fn3zyCf7zn/+gRYsWACB+6P/uu+8QGhqKvn37YsOGDTAwMMCaNWvQvXt3JCUliR82lPr374/BgwcjPDxc4x99GRkZ6NWrF9q3b49169bBysoK//vf/7B37148e/YMjo6O2Lt3L3r06IFRo0aJQ29LGmWg7TVQFtoe+/vvv4+zZ8/is88+Q4MGDZCTk4OzZ8/i/v37JbZ/4MAB9O3bF/7+/tiyZQuKioqwaNEitR9O09PTMXToUPEP6/Pnz+Ozzz7D5cuXi3XW3blzB8OGDcOUKVMwd+5cbNu2DVFRUXByckJoaCgAYNGiRYiOjsasWbPQoUMHyOVyXL58GTk5OWU+T2lpaWjTpg1cXFwQHx8PBwcHJCUlYcKECbh37x7mzp1bahu7du3CuXPnEBcXB4lEgunTp6NXr14YPnw4/v77b6xatQq5ubmIiIjAgAEDkJqaKt4TtL0nJiQkYMSIEejbty/i4+ORm5uL6OhoFBQUQCr9v+/XFAoF+vbtiyNHjiAyMhJt2rTBjRs3MHv2bJw8eRJnzpzROOpH07Zz585FQEAA/vjjD5iYmGg8DxWNUdv9AOrvLWVpd8SIEfjhhx8QGRmJzp07Iy0tDf369UNeXp7a/Y0cORK9evXCxo0b8fjxYxgYGGD+/PmYNWsWRowYgVmzZuHZs2dYvHgx2rdvj1OnTqFRo0YAgKCgIPHnw8XFBffu3cOxY8fE67W0e4qmfF25cgVt2rRB7dq18fnnn8PW1hbfffed2EkUGRmpUv+TTz5B27Zt8c033yAvLw/Tp09Hnz59cOnSJejp6ZV4vl+m7XX71Vdf4cMPP8SAAQOwbNky5ObmIiYmRux4KUlERAQ2btyI2NhY+Pj44PHjx/jvf/8r3ptmz56Nx48f4+eff1Z5nEzTY2B3795Fx44dYWBggC+++AL29vbYtGkTxo8fX6Zjf1FFf/9
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"coefficients = pipeline.named_steps['logreg'].coef_[0]\n",
"feature_names = pipeline.named_steps['logreg'].feature_names_in_\n",
"\n",
"# Tracer l'importance des caractéristiques\n",
"plt.figure(figsize=(10, 6))\n",
"plt.barh(feature_names, coefficients, color='skyblue')\n",
"plt.xlabel('Importance des caractéristiques')\n",
"plt.ylabel('Caractéristiques')\n",
"plt.title('Importance des caractéristiques dans le modèle de régression logistique')\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "210b931c-6d46-4ebf-a9c7-d1ee05c3fadf",
"metadata": {},
"outputs": [],
"source": [
"# Création d'un dataframe avec le score\n",
"dataset_for_segmentation = dataset_test[['customer_id'] + numeric_features + categorical_features]\n",
"\n",
"y_predict_proba = pipeline.predict_proba(X_test)[:, 1]\n",
"\n",
"dataset_for_segmentation['prediction_probability'] = y_predict_proba\n",
"\n",
"# Arrondir les valeurs de la colonne 'prediction_probability' et les multiplier par 10\n",
"dataset_for_segmentation['category'] = dataset_for_segmentation['prediction_probability'].apply(lambda x: int(x * 10))\n",
"\n",
"dataset_for_segmentation['prediction'] = y_pred\n",
"\n",
"def premiere_partie(chaine):\n",
" if chaine:\n",
" return chaine.split('_')[0]\n",
" else:\n",
" return None\n",
"\n",
"dataset_for_segmentation['company_number'] = dataset_for_segmentation['customer_id'].apply(lambda x: premiere_partie(x))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "055e47dd-9ff3-4853-a46d-d5a5edc1f361",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 73,
"id": "969f1f92-d715-4d74-85a7-437e72838cb5",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>fidelity</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" </tr>\n",
" <tr>\n",
" <th>category</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.113637</td>\n",
" <td>0.006274</td>\n",
" <td>1.586366</td>\n",
" <td>0.005821</td>\n",
" <td>0.000647</td>\n",
" <td>548.790455</td>\n",
" <td>548.773103</td>\n",
" <td>-0.977118</td>\n",
" <td>0.001585</td>\n",
" <td>0.000776</td>\n",
" <td>0.000000</td>\n",
" <td>0.000032</td>\n",
" <td>0.999968</td>\n",
" <td>13.984219</td>\n",
" <td>1.302720</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.810841</td>\n",
" <td>0.128432</td>\n",
" <td>9.611292</td>\n",
" <td>0.125295</td>\n",
" <td>0.018186</td>\n",
" <td>525.437516</td>\n",
" <td>525.275222</td>\n",
" <td>-0.729328</td>\n",
" <td>0.054312</td>\n",
" <td>0.111832</td>\n",
" <td>0.245480</td>\n",
" <td>0.495929</td>\n",
" <td>0.258591</td>\n",
" <td>18.413562</td>\n",
" <td>3.718711</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.159419</td>\n",
" <td>0.339253</td>\n",
" <td>15.182143</td>\n",
" <td>0.337577</td>\n",
" <td>0.323824</td>\n",
" <td>501.529129</td>\n",
" <td>501.415505</td>\n",
" <td>-0.554439</td>\n",
" <td>0.969939</td>\n",
" <td>0.304757</td>\n",
" <td>0.392570</td>\n",
" <td>0.297258</td>\n",
" <td>0.310173</td>\n",
" <td>17.395042</td>\n",
" <td>2.608084</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2.153080</td>\n",
" <td>0.744161</td>\n",
" <td>27.820044</td>\n",
" <td>0.734881</td>\n",
" <td>0.600982</td>\n",
" <td>287.051054</td>\n",
" <td>286.675385</td>\n",
" <td>0.105360</td>\n",
" <td>1.776035</td>\n",
" <td>0.659878</td>\n",
" <td>0.288813</td>\n",
" <td>0.253244</td>\n",
" <td>0.457943</td>\n",
" <td>16.790421</td>\n",
" <td>4.173954</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2.044749</td>\n",
" <td>0.777640</td>\n",
" <td>27.353145</td>\n",
" <td>0.754549</td>\n",
" <td>0.079213</td>\n",
" <td>297.179255</td>\n",
" <td>295.019902</td>\n",
" <td>1.898178</td>\n",
" <td>0.293760</td>\n",
" <td>0.894877</td>\n",
" <td>0.666980</td>\n",
" <td>0.301424</td>\n",
" <td>0.031596</td>\n",
" <td>16.954707</td>\n",
" <td>6.060621</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>3.237988</td>\n",
" <td>0.958520</td>\n",
" <td>46.637380</td>\n",
" <td>0.807655</td>\n",
" <td>0.484785</td>\n",
" <td>387.464785</td>\n",
" <td>380.145068</td>\n",
" <td>7.111357</td>\n",
" <td>2.080397</td>\n",
" <td>1.164958</td>\n",
" <td>0.497758</td>\n",
" <td>0.259769</td>\n",
" <td>0.242473</td>\n",
" <td>27.006406</td>\n",
" <td>12.457719</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>3.592233</td>\n",
" <td>1.102881</td>\n",
" <td>49.989226</td>\n",
" <td>0.878014</td>\n",
" <td>0.599906</td>\n",
" <td>268.627019</td>\n",
" <td>250.949344</td>\n",
" <td>17.539247</td>\n",
" <td>2.525994</td>\n",
" <td>1.420921</td>\n",
" <td>0.534607</td>\n",
" <td>0.304259</td>\n",
" <td>0.161134</td>\n",
" <td>14.073285</td>\n",
" <td>4.604134</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>3.747016</td>\n",
" <td>1.391266</td>\n",
" <td>40.710335</td>\n",
" <td>0.914702</td>\n",
" <td>0.160990</td>\n",
" <td>309.716173</td>\n",
" <td>274.795570</td>\n",
" <td>34.796876</td>\n",
" <td>0.844250</td>\n",
" <td>1.963028</td>\n",
" <td>0.650364</td>\n",
" <td>0.263464</td>\n",
" <td>0.086172</td>\n",
" <td>26.186317</td>\n",
" <td>8.891703</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>5.698276</td>\n",
" <td>1.567006</td>\n",
" <td>63.033699</td>\n",
" <td>0.907915</td>\n",
" <td>0.334248</td>\n",
" <td>326.485952</td>\n",
" <td>257.940194</td>\n",
" <td>68.425460</td>\n",
" <td>2.794279</td>\n",
" <td>2.413009</td>\n",
" <td>0.606583</td>\n",
" <td>0.251567</td>\n",
" <td>0.141850</td>\n",
" <td>30.987461</td>\n",
" <td>11.676332</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>14.505956</td>\n",
" <td>3.211571</td>\n",
" <td>107.288514</td>\n",
" <td>1.011628</td>\n",
" <td>0.157119</td>\n",
" <td>369.696066</td>\n",
" <td>209.280306</td>\n",
" <td>160.348544</td>\n",
" <td>3.514464</td>\n",
" <td>5.394498</td>\n",
" <td>0.669314</td>\n",
" <td>0.223766</td>\n",
" <td>0.106920</td>\n",
" <td>45.928247</td>\n",
" <td>18.241634</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>2262.859155</td>\n",
" <td>45.619718</td>\n",
" <td>11051.732394</td>\n",
" <td>1.464789</td>\n",
" <td>0.154930</td>\n",
" <td>467.111875</td>\n",
" <td>31.146796</td>\n",
" <td>435.950994</td>\n",
" <td>54.295775</td>\n",
" <td>64.704225</td>\n",
" <td>0.507042</td>\n",
" <td>0.295775</td>\n",
" <td>0.197183</td>\n",
" <td>53.352113</td>\n",
" <td>26.070423</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
" mean mean mean mean \n",
"category \n",
"0 0.113637 0.006274 1.586366 0.005821 \n",
"1 0.810841 0.128432 9.611292 0.125295 \n",
"2 1.159419 0.339253 15.182143 0.337577 \n",
"3 2.153080 0.744161 27.820044 0.734881 \n",
"4 2.044749 0.777640 27.353145 0.754549 \n",
"5 3.237988 0.958520 46.637380 0.807655 \n",
"6 3.592233 1.102881 49.989226 0.878014 \n",
"7 3.747016 1.391266 40.710335 0.914702 \n",
"8 5.698276 1.567006 63.033699 0.907915 \n",
"9 14.505956 3.211571 107.288514 1.011628 \n",
"10 2262.859155 45.619718 11051.732394 1.464789 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
" mean mean mean \n",
"category \n",
"0 0.000647 548.790455 548.773103 \n",
"1 0.018186 525.437516 525.275222 \n",
"2 0.323824 501.529129 501.415505 \n",
"3 0.600982 287.051054 286.675385 \n",
"4 0.079213 297.179255 295.019902 \n",
"5 0.484785 387.464785 380.145068 \n",
"6 0.599906 268.627019 250.949344 \n",
"7 0.160990 309.716173 274.795570 \n",
"8 0.334248 326.485952 257.940194 \n",
"9 0.157119 369.696066 209.280306 \n",
"10 0.154930 467.111875 31.146796 \n",
"\n",
" time_between_purchase nb_tickets_internet fidelity gender_female \\\n",
" mean mean mean mean \n",
"category \n",
"0 -0.977118 0.001585 0.000776 0.000000 \n",
"1 -0.729328 0.054312 0.111832 0.245480 \n",
"2 -0.554439 0.969939 0.304757 0.392570 \n",
"3 0.105360 1.776035 0.659878 0.288813 \n",
"4 1.898178 0.293760 0.894877 0.666980 \n",
"5 7.111357 2.080397 1.164958 0.497758 \n",
"6 17.539247 2.525994 1.420921 0.534607 \n",
"7 34.796876 0.844250 1.963028 0.650364 \n",
"8 68.425460 2.794279 2.413009 0.606583 \n",
"9 160.348544 3.514464 5.394498 0.669314 \n",
"10 435.950994 54.295775 64.704225 0.507042 \n",
"\n",
" gender_male gender_other nb_campaigns nb_campaigns_opened \n",
" mean mean mean mean \n",
"category \n",
"0 0.000032 0.999968 13.984219 1.302720 \n",
"1 0.495929 0.258591 18.413562 3.718711 \n",
"2 0.297258 0.310173 17.395042 2.608084 \n",
"3 0.253244 0.457943 16.790421 4.173954 \n",
"4 0.301424 0.031596 16.954707 6.060621 \n",
"5 0.259769 0.242473 27.006406 12.457719 \n",
"6 0.304259 0.161134 14.073285 4.604134 \n",
"7 0.263464 0.086172 26.186317 8.891703 \n",
"8 0.251567 0.141850 30.987461 11.676332 \n",
"9 0.223766 0.106920 45.928247 18.241634 \n",
"10 0.295775 0.197183 53.352113 26.070423 "
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Grouper le DataFrame par la colonne 'category' et calculer la moyenne pour chaque groupe\n",
"summary_stats = dataset_for_segmentation.groupby('category')[numeric_features].describe()\n",
"\n",
"# Sélectionner uniquement la colonne 'mean' pour chaque variable numérique\n",
"mean_stats = summary_stats.loc[:, (slice(None), 'mean')]\n",
"\n",
"# Afficher le DataFrame résultant\n",
"mean_stats"
]
},
{
"cell_type": "code",
"execution_count": 75,
"id": "14da601e-7b1b-469c-bab1-de8fad4047f2",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAtUAAAIiCAYAAAAHJDTKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABUOklEQVR4nO3de1iUdf7/8dcIw1GcBAJEkbSQNNRMV0I3DykewTUr3SjU1kOtlZm6bebuCpvppmm2Wua2luYhWyv9lhlBZabhWbE8ZCcT3UA8ICoajHD//nCZnyN4gJuDOM/HdXHVfO733Pf7ns/c+vKee24shmEYAgAAAFBhdWq6AQAAAKC2I1QDAAAAJhGqAQAAAJMI1QAAAIBJhGoAAADAJEI1AAAAYBKhGgAAADCJUA0AAACYRKgGAAAATCJUAwCuK5988omsVqtWrlxZ060AcCGEauAKFixYIIvFoq1bt5a5PC4uTjfddJPT2E033aShQ4eWazvp6elKSkrSiRMnKtYoKsXPP/8si8WiBQsW1HQrVaJk/1588cVKW+cXX3whi8Wid99994q1SUlJslgsTmNdunRRly5dnMYsFouSkpIcj/fs2aOkpCT9/PPPl13/oUOH9NBDD+nll19W//79r3IPri8Xv3bVYejQoaX+HKxNdu/erVGjRikmJka+vr6yWCz64osvarot1DKEaqAKrFixQn/961/L9Zz09HQlJycTqnFdGz58uDZs2HDFug0bNmj48OGOx3v27FFycvJlQ/W5c+c0aNAgjRw5UqNGjaqMduEitm7dqpUrV8rf31/dunWr6XZQS7nXdAPA9ahNmzY13UK52e12WSwWubvzx0Jtc+bMGfn4+NR0G1elUaNGatSo0RXr7rzzznKv293dXV999VVF2qpVOFYrX2JiooYMGSJJevfdd/Xhhx/WcEeojThTDVSBiy//KC4u1uTJkxUZGSlvb2/dcMMNatWqlV5++WVJ5z8S/9Of/iRJatKkiSwWi9PHj8XFxZo2bZpuvfVWeXp6KigoSIMHD9ahQ4ectmsYhqZMmaLw8HB5eXmpXbt2SktLK/XxesnH9YsWLdK4cePUsGFDeXp66ocfftCRI0c0atQotWjRQnXr1lVQUJDuvvturVu3zmlbJZcRTJ8+XS+88IJuuukmeXt7q0uXLvruu+9kt9v1zDPPKDQ0VDabTffcc49ycnJKvU5xcXFatWqV2rRpI29vbzVv3lyrVq2SdP7Sm+bNm8vX11ft27cv8xKcrVu3ql+/fvL395eXl5fatGmj//znP1c1T7/88osGDhwoPz8/2Ww2DRo0SNnZ2WXWXs12zpw5o/Hjx6tJkyby8vKSv7+/2rVrp7fffvuyfZRcYpSWlqaHH35Y/v7+8vX1VXx8vH766Sen2i5duigqKkpffvmlOnToIB8fH/3hD3+QJGVmZuqhhx5SUFCQPD091bx5c82YMUPFxcWltllcXKznn39ejRs3drxXPvvsM6eaH374QQ8//LAiIiLk4+Ojhg0bKj4+Xt98802Z+/Hrr79q7NixCgkJkbe3tzp37qwdO3Y41ZR1+UdZLryEYcGCBbr//vslSV27dnUcHxdeovPpp5+qW7duqlevnnx8fNSxY8dS+3PkyBGNHDlSYWFh8vT01I033qiOHTvq008/vWwvJT3v2LFDAwYMUL169WSz2fTQQw/pyJEjTrVXe6xe6hKx8hyrV+tqj+nLWbp0qWJiYlS3bl3VrVtXt99+u+bPn3/Z57zyyivq1KmTgoKC5Ovrq5YtW2ratGmy2+1OdTt27FBcXJzjfRsaGqq+ffs6vWbLly9XdHS0bDabfHx81LRpU8f7vsTJkycdx5+Hh4caNmyoMWPGKD8//4r7V6cOcQjm8c9c4CoVFRXp3LlzpcYNw7jic6dNm6akpCT95S9/UadOnWS32/Xtt986LvUYPny4jh8/rtmzZ+v9999XgwYNJEktWrSQJP3xj3/Uv/71Lz3++OOKi4vTzz//rL/+9a/64osvtH37dgUGBkqSJk6cqKlTp2rkyJEaMGCADh48qOHDh8tut6tZs2al+powYYJiYmL02muvqU6dOgoKCnKEhEmTJikkJESnT5/WihUr1KVLF3322Welrn195ZVX1KpVK73yyis6ceKExo0bp/j4eEVHR8tqteqNN97QgQMHNH78eA0fPlwffPCB0/N37typCRMmaOLEibLZbEpOTtaAAQM0YcIEffbZZ5oyZYosFov+/Oc/Ky4uTvv375e3t7ckac2aNerVq5eio6P12muvyWazadmyZRo0aJDOnDlz2evaz549q+7du+uXX37R1KlT1axZM3300UcaNGhQqdqr3c7YsWO1aNEiTZ48WW3atFF+fr527dqlY8eOXf4N8j/Dhg1TbGysli5dqoMHD+ovf/mLunTpoq+//lo33HCDoy4rK0sPPfSQnn76aU2ZMkV16tTRkSNH1KFDBxUWFuq5557TTTfdpFWrVmn8+PH68ccf9eqrrzpta86cOQoPD9esWbMcQbB3795au3atYmJiJJ3/R0dAQID+8Y9/6MYbb9Tx48e1cOFCRUdHa8eOHYqMjHRa57PPPqs77rhD//73v5WXl6ekpCR16dJFO3bsUNOmTa/qNShL3759NWXKFD377LN65ZVXdMcdd0iSbr75ZknS4sWLNXjwYP3ud7/TwoULZbVaNW/ePPXs2VOffPKJ4+P8xMREbd++Xc8//7yaNWumEydOaPv27Vc9P/fcc48GDhyoRx99VLt379Zf//pX7dmzR5s2bZLVapV09cdqeZV1rF6t48ePS7r6Y/pif/vb3/Tcc89pwIABGjdunGw2m3bt2qUDBw5c9nk//vijEhISHCF3586dev755/Xtt9/qjTfekCTl5+crNjZWTZo00SuvvKLg4GBlZ2drzZo1OnXqlKTzlwINGjRIgwYNUlJSkry8vHTgwAF9/vnnjm2dOXNGnTt31qFDh/Tss8+qVatW2r17t/72t7/pm2++0aeffnpV/5gDTDEAXNabb75pSLrsT3h4uNNzwsPDjSFDhjgex8XFGbfffvtltzN9+nRDkrF//36n8b179xqSjFGjRjmNb9q0yZBkPPvss4ZhGMbx48cNT09PY9CgQU51GzZsMCQZnTt3doytWbPGkGR06tTpivt/7tw5w263G926dTPuuecex/j+/fsNSUbr1q2NoqIix/isWbMMSUa/fv2c1jNmzBhDkpGXl+cYCw8PN7y9vY1Dhw45xjIyMgxJRoMGDYz8/HzH+MqVKw1JxgcffOAYu/XWW402bdoYdrvdaVtxcXFGgwYNnPq62Ny5cw1Jxv/93/85jY8YMcKQZLz55pvl3k5UVJTRv3//S27zUkreYxe+voZhGF999ZUhyZg8ebJjrHPnzoYk47PPPnOqfeaZZwxJxqZNm5zG//jHPxoWi8XYt2+fYRj/f95CQ0ONs2fPOupOnjxp+Pv7G927d79kn+fOnTMKCwuNiIgI46mnnnKMl7yf7rjjDqO4uNgx/vPPPxtWq9UYPny4Y2zSpEnGxX/1dO7c2en9aRiGIcmYNGmS4/Hy5csNScaaNWuc6vLz8w1/f38jPj7eabyoqMho3bq10b59e8dY3bp1jTFjxlxy/y6lpOcL99kwDGPJkiWGJGPx4sWGYVz9sWoYpf+MKHHxa1GeY7XExa/dxS51TJflp59+Mtzc3IwHH3zwsnVDhgwp9efghYqKigy73W689dZbhpubm3H8+HHDMAxj69athiRj5cqVl3zuiy++aEgyTpw4ccmaqVOnGnXq1DG2bNniNP7uu+8akozVq1dftv8LXeq9BlwJn3cAV+mtt97Sli1bSv389re/veJz27dvr507d2rUqFH65JNPdPLkyave7po1aySp1FnX9u3bq3nz5o6PuDdu3KiCggINHDjQqe7OO++85Lfy77333jLHX3vtNd1xxx3y8vKSu7u7rFarPvvsM+3du7dUbZ8+fZw+Om3evLmk82cXL1QynpmZ6TR+++23q2HDhqXqunTp4nSdcMl4ydmxH374Qd9++60efPB
"text/plain": [
"<Figure size 800x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Plot histogram\n",
"plt.figure(figsize=(8, 6))\n",
"plt.hist(y_predict_proba, bins=10, range=(0, 1), color='blue', alpha=0.7)\n",
"\n",
"# Réglage des limites des axes x et y\n",
"plt.xlim(0, 1)\n",
"plt.ylim(0, None) # Laissez le maximum sur l'axe y pour s'ajuster automatiquement\n",
"\n",
"plt.title('Histogramme des probabilités pour la classe 1')\n",
"plt.xlabel('Probabilité')\n",
"plt.ylabel('Fréquence')\n",
"plt.grid(True)\n",
"plt.show()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}