2024-03-08 14:48:38 +01:00
{
"cells": [
{
"cell_type": "markdown",
"id": "3415114e-9577-4487-89eb-4931620ad9f0",
"metadata": {},
"source": [
"# Predict Sales"
]
},
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 22,
2024-03-08 14:48:38 +01:00
"id": "f271eb45-1470-4764-8c2e-31374efa1fe5",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import s3fs\n",
"import re\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.ensemble import RandomForestClassifier\n",
2024-03-10 12:30:57 +01:00
"from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, recall_score\n",
2024-03-10 11:09:53 +01:00
"from sklearn.utils import class_weight\n",
2024-03-08 14:48:38 +01:00
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.model_selection import GridSearchCV\n",
"from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler\n",
"from sklearn.metrics import make_scorer, f1_score, balanced_accuracy_score\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score\n",
2024-03-10 11:09:53 +01:00
"from sklearn.exceptions import ConvergenceWarning, DataConversionWarning\n",
"\n",
2024-03-08 14:48:38 +01:00
"import pickle\n",
2024-03-10 11:09:53 +01:00
"import warnings\n",
2024-03-08 14:48:38 +01:00
"#import scikitplot as skplt"
]
},
2024-03-10 11:09:53 +01:00
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 23,
2024-03-10 11:09:53 +01:00
"id": "3fecb606-22e5-4dee-8efa-f8dff0832299",
"metadata": {},
"outputs": [],
"source": [
"warnings.filterwarnings('ignore')\n",
"warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n",
"warnings.filterwarnings(\"ignore\", category=DataConversionWarning)"
]
},
2024-03-08 14:48:38 +01:00
{
"cell_type": "markdown",
"id": "ae591854-3003-4c75-a0c7-5abf04246e81",
"metadata": {},
"source": [
"### Load Data"
]
},
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 24,
2024-03-08 14:48:38 +01:00
"id": "59dd4694-a812-4923-b995-a2ee86c74f85",
"metadata": {},
"outputs": [],
"source": [
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
]
},
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 25,
2024-03-08 14:48:38 +01:00
"id": "017f7e9a-3ba0-40fa-bdc8-51b98cc1fdb3",
"metadata": {},
"outputs": [],
"source": [
"def load_train_test():\n",
2024-03-10 11:09:53 +01:00
" BUCKET = \"projet-bdc2324-team1/Generalization/sport\"\n",
2024-03-10 12:30:57 +01:00
" File_path_train = BUCKET + \"/Train_set/\" + \"dataset_train5.csv\"\n",
" File_path_test = BUCKET + \"/Test_set/\" + \"dataset_test5.csv\"\n",
2024-03-08 14:48:38 +01:00
" \n",
2024-03-10 11:09:53 +01:00
" with fs.open( File_path_train, mode=\"rb\") as file_in:\n",
2024-03-08 14:48:38 +01:00
" dataset_train = pd.read_csv(file_in, sep=\",\")\n",
" dataset_train['y_has_purchased'] = dataset_train['y_has_purchased'].fillna(0)\n",
"\n",
2024-03-10 11:09:53 +01:00
" with fs.open(File_path_test, mode=\"rb\") as file_in:\n",
2024-03-08 14:48:38 +01:00
" dataset_test = pd.read_csv(file_in, sep=\",\")\n",
" dataset_test['y_has_purchased'] = dataset_test['y_has_purchased'].fillna(0)\n",
" \n",
" return dataset_train, dataset_test"
]
},
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 26,
2024-03-08 14:48:38 +01:00
"id": "825d14a3-6967-4733-bfd4-64bf61c2bd43",
"metadata": {},
"outputs": [],
"source": [
"def features_target_split(dataset_train, dataset_test):\n",
2024-03-10 11:09:53 +01:00
" features_l = ['nb_tickets', 'nb_purchases', 'total_amount',\n",
" 'nb_suppliers', 'nb_tickets_internet',\n",
" 'opt_in',\n",
" 'nb_campaigns', 'nb_campaigns_opened']\n",
" X_train = dataset_train[features_l]\n",
" y_train = dataset_train[['y_has_purchased']]\n",
2024-03-08 14:48:38 +01:00
"\n",
2024-03-10 11:09:53 +01:00
" X_test = dataset_test[features_l]\n",
" y_test = dataset_test[['y_has_purchased']]\n",
2024-03-08 14:48:38 +01:00
" return X_train, X_test, y_train, y_test"
]
},
2024-03-10 11:09:53 +01:00
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 27,
2024-03-10 11:09:53 +01:00
"id": "c479b230-b4bd-4cfb-b76b-d9faf6d95772",
"metadata": {},
"outputs": [],
"source": [
"dataset_train, dataset_test = load_train_test()"
]
},
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 28,
2024-03-10 11:09:53 +01:00
"id": "69eaec12-b30f-4d30-a461-ea520d5cbf77",
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = features_target_split(dataset_train, dataset_test)"
]
},
2024-03-10 12:30:57 +01:00
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 29,
2024-03-10 12:30:57 +01:00
"id": "d039f31d-0093-46c6-9743-ddec1381f758",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Shape train : (330117, 8)\n",
"Shape test : (141480, 8)\n"
]
}
],
"source": [
"print(\"Shape train : \", X_train.shape)\n",
"print(\"Shape test : \", X_test.shape)"
]
},
2024-03-08 14:48:38 +01:00
{
"cell_type": "markdown",
"id": "a1d6de94-4e11-481a-a0ce-412bf29f692c",
"metadata": {},
"source": [
"### Prepare preprocessing and Hyperparameters"
]
},
2024-03-10 11:09:53 +01:00
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 30,
2024-03-10 11:09:53 +01:00
"id": "b808da43-c444-4e94-995a-7ec6ccd01e2d",
"metadata": {},
2024-03-10 12:30:57 +01:00
"outputs": [
{
"data": {
"text/plain": [
"{0.0: 0.5381774965030861, 1.0: 7.048360235716116}"
]
},
2024-03-10 13:30:10 +01:00
"execution_count": 30,
2024-03-10 12:30:57 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
2024-03-10 11:09:53 +01:00
"source": [
"# Compute Weights\n",
"weights = class_weight.compute_class_weight(class_weight = 'balanced', classes = np.unique(y_train['y_has_purchased']),\n",
" y = y_train['y_has_purchased'])\n",
"\n",
"weight_dict = {np.unique(y_train['y_has_purchased'])[i]: weights[i] for i in range(len(np.unique(y_train['y_has_purchased'])))}\n",
"weight_dict"
]
},
2024-03-08 14:48:38 +01:00
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 31,
2024-03-08 14:48:38 +01:00
"id": "b32a79ea-907f-4dfc-9832-6c74bef3200c",
"metadata": {},
"outputs": [],
"source": [
"numeric_features = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers',\n",
2024-03-10 11:09:53 +01:00
" 'nb_tickets_internet', 'nb_campaigns', 'nb_campaigns_opened']\n",
2024-03-08 14:48:38 +01:00
"\n",
"numeric_transformer = Pipeline(steps=[\n",
2024-03-10 11:09:53 +01:00
" #(\"imputer\", SimpleImputer(strategy=\"mean\")), \n",
" (\"scaler\", StandardScaler()) \n",
"])\n",
"\n",
"categorical_features = ['opt_in'] \n",
2024-03-08 14:48:38 +01:00
"\n",
2024-03-10 11:09:53 +01:00
"# Transformer for the categorical features\n",
"categorical_transformer = Pipeline(steps=[\n",
" #(\"imputer\", SimpleImputer(strategy=\"most_frequent\")), # Impute missing values with the most frequent\n",
" (\"onehot\", OneHotEncoder(handle_unknown='ignore', sparse_output=False))\n",
"])\n",
"\n",
"preproc = ColumnTransformer(\n",
" transformers=[\n",
" (\"num\", numeric_transformer, numeric_features),\n",
" (\"cat\", categorical_transformer, categorical_features)\n",
" ]\n",
")"
2024-03-08 14:48:38 +01:00
]
},
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 32,
2024-03-08 14:48:38 +01:00
"id": "9809a688-bfbc-4685-a77f-17a8b2b79ab3",
"metadata": {},
"outputs": [],
"source": [
"# Set loss\n",
"\n",
"balanced_scorer = make_scorer(balanced_accuracy_score)\n",
2024-03-10 13:30:10 +01:00
"recall_scorer = make_scorer(recall_score)"
2024-03-08 14:48:38 +01:00
]
},
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 33,
"id": "4f9b2bbf-5f8a-4ac1-8e6c-51bd0dd8ac85",
"metadata": {},
"outputs": [],
"source": [
"def draw_confusion_matrix(y_test, y_pred):\n",
" conf_matrix = confusion_matrix(y_test, y_pred)\n",
" sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Class 0', 'Class 1'], yticklabels=['Class 0', 'Class 1'])\n",
" plt.xlabel('Predicted')\n",
" plt.ylabel('Actual')\n",
" plt.title('Confusion Matrix')\n",
" plt.show()\n",
"\n",
"\n",
"def draw_roc_curve(X_test, y_test):\n",
" y_pred_prob = pipeline.predict_proba(X_test)[:, 1]\n",
"\n",
" # Calcul des taux de faux positifs (FPR) et de vrais positifs (TPR)\n",
" fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob, pos_label=1)\n",
" \n",
" # Calcul de l'aire sous la courbe ROC (AUC)\n",
" roc_auc = auc(fpr, tpr)\n",
" \n",
" plt.figure(figsize = (14, 8))\n",
" plt.plot(fpr, tpr, label=\"ROC curve(area = %0.3f)\" % roc_auc)\n",
" plt.plot([0, 1], [0, 1], color=\"red\",label=\"Random Baseline\", linestyle=\"--\")\n",
" plt.grid(color='gray', linestyle='--', linewidth=0.5)\n",
" plt.xlabel('Taux de faux positifs (FPR)')\n",
" plt.ylabel('Taux de vrais positifs (TPR)')\n",
" plt.title('Courbe ROC : modèle logistique')\n",
" plt.legend(loc=\"lower right\")\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 34,
2024-03-08 14:48:38 +01:00
"id": "206d9a95-7c37-4506-949b-e77d225e42c5",
"metadata": {},
"outputs": [],
"source": [
2024-03-10 13:30:10 +01:00
"# Hyperparameter\n",
"\n",
"param_grid = {'logreg__C': np.logspace(-10, 6, 17, base=2),\n",
" 'logreg__penalty': ['l1', 'l2'],\n",
" 'logreg__class_weight': ['balanced', weight_dict]} "
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "7ff2f7bd-efc1-4f7c-a3c9-caa916aa2f2b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-container-id-3 {\n",
" /* Definition of color scheme common for light and dark mode */\n",
" --sklearn-color-text: black;\n",
" --sklearn-color-line: gray;\n",
" /* Definition of color scheme for unfitted estimators */\n",
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
" --sklearn-color-unfitted-level-3: chocolate;\n",
" /* Definition of color scheme for fitted estimators */\n",
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
" --sklearn-color-fitted-level-1: #d4ebff;\n",
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
"\n",
" /* Specific color for light theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-icon: #696969;\n",
"\n",
" @media (prefers-color-scheme: dark) {\n",
" /* Redefinition of color scheme for dark theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-icon: #878787;\n",
" }\n",
"}\n",
"\n",
"#sk-container-id-3 {\n",
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"#sk-container-id-3 pre {\n",
" padding: 0;\n",
"}\n",
"\n",
"#sk-container-id-3 input.sk-hidden--visually {\n",
" border: 0;\n",
" clip: rect(1px 1px 1px 1px);\n",
" clip: rect(1px, 1px, 1px, 1px);\n",
" height: 1px;\n",
" margin: -1px;\n",
" overflow: hidden;\n",
" padding: 0;\n",
" position: absolute;\n",
" width: 1px;\n",
"}\n",
"\n",
"#sk-container-id-3 div.sk-dashed-wrapped {\n",
" border: 1px dashed var(--sklearn-color-line);\n",
" margin: 0 0.4em 0.5em 0.4em;\n",
" box-sizing: border-box;\n",
" padding-bottom: 0.4em;\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"#sk-container-id-3 div.sk-container {\n",
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
" so we also need the `!important` here to be able to override the\n",
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
" display: inline-block !important;\n",
" position: relative;\n",
"}\n",
"\n",
"#sk-container-id-3 div.sk-text-repr-fallback {\n",
" display: none;\n",
"}\n",
"\n",
"div.sk-parallel-item,\n",
"div.sk-serial,\n",
"div.sk-item {\n",
" /* draw centered vertical line to link estimators */\n",
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
" background-size: 2px 100%;\n",
" background-repeat: no-repeat;\n",
" background-position: center center;\n",
"}\n",
"\n",
"/* Parallel-specific style estimator block */\n",
"\n",
"#sk-container-id-3 div.sk-parallel-item::after {\n",
" content: \"\";\n",
" width: 100%;\n",
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
" flex-grow: 1;\n",
"}\n",
"\n",
"#sk-container-id-3 div.sk-parallel {\n",
" display: flex;\n",
" align-items: stretch;\n",
" justify-content: center;\n",
" background-color: var(--sklearn-color-background);\n",
" position: relative;\n",
"}\n",
"\n",
"#sk-container-id-3 div.sk-parallel-item {\n",
" display: flex;\n",
" flex-direction: column;\n",
"}\n",
"\n",
"#sk-container-id-3 div.sk-parallel-item:first-child::after {\n",
" align-self: flex-end;\n",
" width: 50%;\n",
"}\n",
"\n",
"#sk-container-id-3 div.sk-parallel-item:last-child::after {\n",
" align-self: flex-start;\n",
" width: 50%;\n",
"}\n",
"\n",
"#sk-container-id-3 div.sk-parallel-item:only-child::after {\n",
" width: 0;\n",
"}\n",
"\n",
"/* Serial-specific style estimator block */\n",
"\n",
"#sk-container-id-3 div.sk-serial {\n",
" display: flex;\n",
" flex-direction: column;\n",
" align-items: center;\n",
" background-color: var(--sklearn-color-background);\n",
" padding-right: 1em;\n",
" padding-left: 1em;\n",
"}\n",
"\n",
"\n",
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
"clickable and can be expanded/collapsed.\n",
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
"*/\n",
"\n",
"/* Pipeline and ColumnTransformer style (default) */\n",
"\n",
"#sk-container-id-3 div.sk-toggleable {\n",
" /* Default theme specific background. It is overwritten whether we have a\n",
" specific estimator or a Pipeline/ColumnTransformer */\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"/* Toggleable label */\n",
"#sk-container-id-3 label.sk-toggleable__label {\n",
" cursor: pointer;\n",
" display: block;\n",
" width: 100%;\n",
" margin-bottom: 0;\n",
" padding: 0.5em;\n",
" box-sizing: border-box;\n",
" text-align: center;\n",
"}\n",
"\n",
"#sk-container-id-3 label.sk-toggleable__label-arrow:before {\n",
" /* Arrow on the left of the label */\n",
" content: \"▸\";\n",
" float: left;\n",
" margin-right: 0.25em;\n",
" color: var(--sklearn-color-icon);\n",
"}\n",
"\n",
"#sk-container-id-3 label.sk-toggleable__label-arrow:hover:before {\n",
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"/* Toggleable content - dropdown */\n",
"\n",
"#sk-container-id-3 div.sk-toggleable__content {\n",
" max-height: 0;\n",
" max-width: 0;\n",
" overflow: hidden;\n",
" text-align: left;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-3 div.sk-toggleable__content.fitted {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-3 div.sk-toggleable__content pre {\n",
" margin: 0.2em;\n",
" border-radius: 0.25em;\n",
" color: var(--sklearn-color-text);\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-3 div.sk-toggleable__content.fitted pre {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-3 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
" /* Expand drop-down */\n",
" max-height: 200px;\n",
" max-width: 100%;\n",
" overflow: auto;\n",
"}\n",
"\n",
"#sk-container-id-3 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
" content: \"▾\";\n",
"}\n",
"\n",
"/* Pipeline/ColumnTransformer-specific style */\n",
"\n",
"#sk-container-id-3 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-3 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator-specific style */\n",
"\n",
"/* Colorize estimator box */\n",
"#sk-container-id-3 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-3 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-3 div.sk-label label.sk-toggleable__label,\n",
"#sk-container-id-3 div.sk-label label {\n",
" /* The background is the default theme color */\n",
" color: var(--sklearn-color-text-on-default-background);\n",
"}\n",
"\n",
"/* On hover, darken the color of the background */\n",
"#sk-container-id-3 div.sk-label:hover label.sk-toggleable__label {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"/* Label box, darken color on hover, fitted */\n",
"#sk-container-id-3 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator label */\n",
"\n",
"#sk-container-id-3 div.sk-label label {\n",
" font-family: monospace;\n",
" font-weight: bold;\n",
" display: inline-block;\n",
" line-height: 1.2em;\n",
"}\n",
"\n",
"#sk-container-id-3 div.sk-label-container {\n",
" text-align: center;\n",
"}\n",
"\n",
"/* Estimator-specific */\n",
"#sk-container-id-3 div.sk-estimator {\n",
" font-family: monospace;\n",
" border: 1px dotted var(--sklearn-color-border-box);\n",
" border-radius: 0.25em;\n",
" box-sizing: border-box;\n",
" margin-bottom: 0.5em;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-3 div.sk-estimator.fitted {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"/* on hover */\n",
"#sk-container-id-3 div.sk-estimator:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-3 div.sk-estimator.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
"\n",
"/* Common style for \"i\" and \"?\" */\n",
"\n",
".sk-estimator-doc-link,\n",
"a:link.sk-estimator-doc-link,\n",
"a:visited.sk-estimator-doc-link {\n",
" float: right;\n",
" font-size: smaller;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1em;\n",
" height: 1em;\n",
" width: 1em;\n",
" text-decoration: none !important;\n",
" margin-left: 1ex;\n",
" /* unfitted */\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted,\n",
"a:link.sk-estimator-doc-link.fitted,\n",
"a:visited.sk-estimator-doc-link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"/* Span, style for the box shown on hovering the info icon */\n",
".sk-estimator-doc-link span {\n",
" display: none;\n",
" z-index: 9999;\n",
" position: relative;\n",
" font-weight: normal;\n",
" right: .2ex;\n",
" padding: .5ex;\n",
" margin: .5ex;\n",
" width: min-content;\n",
" min-width: 20ex;\n",
" max-width: 50ex;\n",
" color: var(--sklearn-color-text);\n",
" box-shadow: 2pt 2pt 4pt #999;\n",
" /* unfitted */\n",
" background: var(--sklearn-color-unfitted-level-0);\n",
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted span {\n",
" /* fitted */\n",
" background: var(--sklearn-color-fitted-level-0);\n",
" border: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link:hover span {\n",
" display: block;\n",
"}\n",
"\n",
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
"\n",
"#sk-container-id-3 a.estimator_doc_link {\n",
" float: right;\n",
" font-size: 1rem;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1rem;\n",
" height: 1rem;\n",
" width: 1rem;\n",
" text-decoration: none;\n",
" /* unfitted */\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
"}\n",
"\n",
"#sk-container-id-3 a.estimator_doc_link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"#sk-container-id-3 a.estimator_doc_link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"#sk-container-id-3 a.estimator_doc_link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"</style><div id=\"sk-container-id-3\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('preprocessor',\n",
" ColumnTransformer(transformers=[('num',\n",
" Pipeline(steps=[('scaler',\n",
" StandardScaler())]),\n",
" ['nb_tickets', 'nb_purchases',\n",
" 'total_amount',\n",
" 'nb_suppliers',\n",
" 'nb_tickets_internet',\n",
" 'nb_campaigns',\n",
" 'nb_campaigns_opened']),\n",
" ('cat',\n",
" Pipeline(steps=[('onehot',\n",
" OneHotEncoder(handle_unknown='ignore',\n",
" sparse_output=False))]),\n",
" ['opt_in'])])),\n",
" ('logreg',\n",
" LogisticRegression(class_weight={0.0: 0.5381774965030861,\n",
" 1.0: 7.048360235716116},\n",
" max_iter=5000, solver='saga'))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-15\" type=\"checkbox\" ><label for=\"sk-estimator-id-15\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> Pipeline<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.pipeline.Pipeline.html\">?<span>Documentation for Pipeline</span></a><span class=\"sk-estimator-doc-link \">i<span>Not fitted</span></span></label><div class=\"sk-toggleable__content \"><pre>Pipeline(steps=[('preprocessor',\n",
" ColumnTransformer(transformers=[('num',\n",
" Pipeline(steps=[('scaler',\n",
" StandardScaler())]),\n",
" ['nb_tickets', 'nb_purchases',\n",
" 'total_amount',\n",
" 'nb_suppliers',\n",
" 'nb_tickets_internet',\n",
" 'nb_campaigns',\n",
" 'nb_campaigns_opened']),\n",
" ('cat',\n",
" Pipeline(steps=[('onehot',\n",
" OneHotEncoder(handle_unknown='ignore',\n",
" sparse_output=False))]),\n",
" ['opt_in'])])),\n",
" ('logreg',\n",
" LogisticRegression(class_weight={0.0: 0.5381774965030861,\n",
" 1.0: 7.048360235716116},\n",
" max_iter=5000, solver='saga'))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-16\" type=\"checkbox\" ><label for=\"sk-estimator-id-16\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> preprocessor: ColumnTransformer<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></label><div class=\"sk-toggleable__content \"><pre>ColumnTransformer(transformers=[('num',\n",
" Pipeline(steps=[('scaler', StandardScaler())]),\n",
" ['nb_tickets', 'nb_purchases', 'total_amount',\n",
" 'nb_suppliers', 'nb_tickets_internet',\n",
" 'nb_campaigns', 'nb_campaigns_opened']),\n",
" ('cat',\n",
" Pipeline(steps=[('onehot',\n",
" OneHotEncoder(handle_unknown='ignore',\n",
" sparse_output=False))]),\n",
" ['opt_in'])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-17\" type=\"checkbox\" ><label for=\"sk-estimator-id-17\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">num</label><div class=\"sk-toggleable__content \"><pre>['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'nb_tickets_internet', 'nb_campaigns', 'nb_campaigns_opened']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-18\" type=\"checkbox\" ><label for=\"sk-estimator-id-18\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> StandardScaler<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.StandardScaler.html\">?<span>Documentation for StandardScaler</span></a></label><div class=\"sk-toggleable__content \"><pre>StandardScaler()</pre></div> </div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-19\" type=\"checkbox\" ><label for=\"sk-estimator-id-19\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">cat</label><div class=\"sk-toggleable__content \"><pre>['opt_in']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-20\" type=\"checkbox\" ><label for=\"sk-estimator-id-20\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> OneHotEncoder<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></label><div class=\"sk-toggleable__content \"><pre>OneHotEncoder(handle_unknown='ignore', sparse_output=False)</pre></div> </div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-21\" type=\"checkbox\" ><label for=\"sk-estimator-id-21\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> LogisticRegression<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a></label><div class=\"sk-toggleable__content \"><pre>LogisticRegression(class_weight={0.0: 0.5381774965030861,\n",
" 1.0: 7.048360235716116},\n",
" max_iter=5000, solver='saga')</pre></div> </div></div></div></div></div></div>"
],
"text/plain": [
"Pipeline(steps=[('preprocessor',\n",
" ColumnTransformer(transformers=[('num',\n",
" Pipeline(steps=[('scaler',\n",
" StandardScaler())]),\n",
" ['nb_tickets', 'nb_purchases',\n",
" 'total_amount',\n",
" 'nb_suppliers',\n",
" 'nb_tickets_internet',\n",
" 'nb_campaigns',\n",
" 'nb_campaigns_opened']),\n",
" ('cat',\n",
" Pipeline(steps=[('onehot',\n",
" OneHotEncoder(handle_unknown='ignore',\n",
" sparse_output=False))]),\n",
" ['opt_in'])])),\n",
" ('logreg',\n",
" LogisticRegression(class_weight={0.0: 0.5381774965030861,\n",
" 1.0: 7.048360235716116},\n",
" max_iter=5000, solver='saga'))])"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Pipeline\n",
2024-03-08 14:48:38 +01:00
"\n",
2024-03-10 13:30:10 +01:00
"pipeline = Pipeline(steps=[\n",
" ('preprocessor', preproc),\n",
" ('logreg', LogisticRegression(solver='saga', class_weight = weight_dict,\n",
" max_iter=5000)) \n",
"])\n",
"\n",
"pipeline.set_output(transform=\"pandas\")"
]
},
{
"cell_type": "markdown",
"id": "ed415f60-9663-4179-877b-233faf6e1645",
"metadata": {},
"source": [
"## Baseline"
2024-03-10 11:09:53 +01:00
]
},
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 36,
"id": "2b467511-2ae5-4a16-a502-397c3460471d",
2024-03-10 11:09:53 +01:00
"metadata": {},
2024-03-10 12:30:57 +01:00
"outputs": [
{
"data": {
"text/html": [
2024-03-10 13:30:10 +01:00
"<style>#sk-container-id-4 {\n",
2024-03-10 12:30:57 +01:00
" /* Definition of color scheme common for light and dark mode */\n",
" --sklearn-color-text: black;\n",
" --sklearn-color-line: gray;\n",
" /* Definition of color scheme for unfitted estimators */\n",
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
" --sklearn-color-unfitted-level-3: chocolate;\n",
" /* Definition of color scheme for fitted estimators */\n",
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
" --sklearn-color-fitted-level-1: #d4ebff;\n",
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
"\n",
" /* Specific color for light theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-icon: #696969;\n",
"\n",
" @media (prefers-color-scheme: dark) {\n",
" /* Redefinition of color scheme for dark theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-icon: #878787;\n",
" }\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 {\n",
2024-03-10 12:30:57 +01:00
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 pre {\n",
2024-03-10 12:30:57 +01:00
" padding: 0;\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 input.sk-hidden--visually {\n",
2024-03-10 12:30:57 +01:00
" border: 0;\n",
" clip: rect(1px 1px 1px 1px);\n",
" clip: rect(1px, 1px, 1px, 1px);\n",
" height: 1px;\n",
" margin: -1px;\n",
" overflow: hidden;\n",
" padding: 0;\n",
" position: absolute;\n",
" width: 1px;\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-dashed-wrapped {\n",
2024-03-10 12:30:57 +01:00
" border: 1px dashed var(--sklearn-color-line);\n",
" margin: 0 0.4em 0.5em 0.4em;\n",
" box-sizing: border-box;\n",
" padding-bottom: 0.4em;\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-container {\n",
2024-03-10 12:30:57 +01:00
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
" so we also need the `!important` here to be able to override the\n",
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
" display: inline-block !important;\n",
" position: relative;\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-text-repr-fallback {\n",
2024-03-10 12:30:57 +01:00
" display: none;\n",
"}\n",
"\n",
"div.sk-parallel-item,\n",
"div.sk-serial,\n",
"div.sk-item {\n",
" /* draw centered vertical line to link estimators */\n",
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
" background-size: 2px 100%;\n",
" background-repeat: no-repeat;\n",
" background-position: center center;\n",
"}\n",
"\n",
"/* Parallel-specific style estimator block */\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-parallel-item::after {\n",
2024-03-10 12:30:57 +01:00
" content: \"\";\n",
" width: 100%;\n",
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
" flex-grow: 1;\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-parallel {\n",
2024-03-10 12:30:57 +01:00
" display: flex;\n",
" align-items: stretch;\n",
" justify-content: center;\n",
" background-color: var(--sklearn-color-background);\n",
" position: relative;\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-parallel-item {\n",
2024-03-10 12:30:57 +01:00
" display: flex;\n",
" flex-direction: column;\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-parallel-item:first-child::after {\n",
2024-03-10 12:30:57 +01:00
" align-self: flex-end;\n",
" width: 50%;\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-parallel-item:last-child::after {\n",
2024-03-10 12:30:57 +01:00
" align-self: flex-start;\n",
" width: 50%;\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-parallel-item:only-child::after {\n",
2024-03-10 12:30:57 +01:00
" width: 0;\n",
"}\n",
"\n",
"/* Serial-specific style estimator block */\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-serial {\n",
2024-03-10 12:30:57 +01:00
" display: flex;\n",
" flex-direction: column;\n",
" align-items: center;\n",
" background-color: var(--sklearn-color-background);\n",
" padding-right: 1em;\n",
" padding-left: 1em;\n",
"}\n",
"\n",
"\n",
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
"clickable and can be expanded/collapsed.\n",
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
"*/\n",
"\n",
"/* Pipeline and ColumnTransformer style (default) */\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-toggleable {\n",
2024-03-10 12:30:57 +01:00
" /* Default theme specific background. It is overwritten whether we have a\n",
" specific estimator or a Pipeline/ColumnTransformer */\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"/* Toggleable label */\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 label.sk-toggleable__label {\n",
2024-03-10 12:30:57 +01:00
" cursor: pointer;\n",
" display: block;\n",
" width: 100%;\n",
" margin-bottom: 0;\n",
" padding: 0.5em;\n",
" box-sizing: border-box;\n",
" text-align: center;\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 label.sk-toggleable__label-arrow:before {\n",
2024-03-10 12:30:57 +01:00
" /* Arrow on the left of the label */\n",
" content: \"▸\";\n",
" float: left;\n",
" margin-right: 0.25em;\n",
" color: var(--sklearn-color-icon);\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {\n",
2024-03-10 12:30:57 +01:00
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"/* Toggleable content - dropdown */\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-toggleable__content {\n",
2024-03-10 12:30:57 +01:00
" max-height: 0;\n",
" max-width: 0;\n",
" overflow: hidden;\n",
" text-align: left;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-toggleable__content.fitted {\n",
2024-03-10 12:30:57 +01:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-toggleable__content pre {\n",
2024-03-10 12:30:57 +01:00
" margin: 0.2em;\n",
" border-radius: 0.25em;\n",
" color: var(--sklearn-color-text);\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-toggleable__content.fitted pre {\n",
2024-03-10 12:30:57 +01:00
" /* unfitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
2024-03-10 12:30:57 +01:00
" /* Expand drop-down */\n",
" max-height: 200px;\n",
" max-width: 100%;\n",
" overflow: auto;\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
2024-03-10 12:30:57 +01:00
" content: \"▾\";\n",
"}\n",
"\n",
"/* Pipeline/ColumnTransformer-specific style */\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2024-03-10 12:30:57 +01:00
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2024-03-10 12:30:57 +01:00
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator-specific style */\n",
"\n",
"/* Colorize estimator box */\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2024-03-10 12:30:57 +01:00
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2024-03-10 12:30:57 +01:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-label label.sk-toggleable__label,\n",
"#sk-container-id-4 div.sk-label label {\n",
2024-03-10 12:30:57 +01:00
" /* The background is the default theme color */\n",
" color: var(--sklearn-color-text-on-default-background);\n",
"}\n",
"\n",
"/* On hover, darken the color of the background */\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {\n",
2024-03-10 12:30:57 +01:00
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"/* Label box, darken color on hover, fitted */\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
2024-03-10 12:30:57 +01:00
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator label */\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-label label {\n",
2024-03-10 12:30:57 +01:00
" font-family: monospace;\n",
" font-weight: bold;\n",
" display: inline-block;\n",
" line-height: 1.2em;\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-label-container {\n",
2024-03-10 12:30:57 +01:00
" text-align: center;\n",
"}\n",
"\n",
"/* Estimator-specific */\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-estimator {\n",
2024-03-10 12:30:57 +01:00
" font-family: monospace;\n",
" border: 1px dotted var(--sklearn-color-border-box);\n",
" border-radius: 0.25em;\n",
" box-sizing: border-box;\n",
" margin-bottom: 0.5em;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-estimator.fitted {\n",
2024-03-10 12:30:57 +01:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"/* on hover */\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-estimator:hover {\n",
2024-03-10 12:30:57 +01:00
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 div.sk-estimator.fitted:hover {\n",
2024-03-10 12:30:57 +01:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
"\n",
"/* Common style for \"i\" and \"?\" */\n",
"\n",
".sk-estimator-doc-link,\n",
"a:link.sk-estimator-doc-link,\n",
"a:visited.sk-estimator-doc-link {\n",
" float: right;\n",
" font-size: smaller;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1em;\n",
" height: 1em;\n",
" width: 1em;\n",
" text-decoration: none !important;\n",
" margin-left: 1ex;\n",
" /* unfitted */\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted,\n",
"a:link.sk-estimator-doc-link.fitted,\n",
"a:visited.sk-estimator-doc-link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"/* Span, style for the box shown on hovering the info icon */\n",
".sk-estimator-doc-link span {\n",
" display: none;\n",
" z-index: 9999;\n",
" position: relative;\n",
" font-weight: normal;\n",
" right: .2ex;\n",
" padding: .5ex;\n",
" margin: .5ex;\n",
" width: min-content;\n",
" min-width: 20ex;\n",
" max-width: 50ex;\n",
" color: var(--sklearn-color-text);\n",
" box-shadow: 2pt 2pt 4pt #999;\n",
" /* unfitted */\n",
" background: var(--sklearn-color-unfitted-level-0);\n",
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted span {\n",
" /* fitted */\n",
" background: var(--sklearn-color-fitted-level-0);\n",
" border: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link:hover span {\n",
" display: block;\n",
"}\n",
"\n",
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 a.estimator_doc_link {\n",
2024-03-10 12:30:57 +01:00
" float: right;\n",
" font-size: 1rem;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1rem;\n",
" height: 1rem;\n",
" width: 1rem;\n",
" text-decoration: none;\n",
" /* unfitted */\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 a.estimator_doc_link.fitted {\n",
2024-03-10 12:30:57 +01:00
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 a.estimator_doc_link:hover {\n",
2024-03-10 12:30:57 +01:00
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
2024-03-10 13:30:10 +01:00
"#sk-container-id-4 a.estimator_doc_link.fitted:hover {\n",
2024-03-10 12:30:57 +01:00
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
"}\n",
2024-03-10 13:30:10 +01:00
"</style><div id=\"sk-container-id-4\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('preprocessor',\n",
2024-03-10 12:30:57 +01:00
" ColumnTransformer(transformers=[('num',\n",
" Pipeline(steps=[('scaler',\n",
" StandardScaler())]),\n",
" ['nb_tickets', 'nb_purchases',\n",
" 'total_amount',\n",
" 'nb_suppliers',\n",
" 'nb_tickets_internet',\n",
" 'nb_campaigns',\n",
" 'nb_campaigns_opened']),\n",
" ('cat',\n",
" Pipeline(steps=[('onehot',\n",
" OneHotEncoder(handle_unknown='ignore',\n",
" sparse_output=False))]),\n",
" ['opt_in'])])),\n",
" ('logreg',\n",
" LogisticRegression(class_weight={0.0: 0.5381774965030861,\n",
" 1.0: 7.048360235716116},\n",
2024-03-10 13:30:10 +01:00
" max_iter=5000, solver='saga'))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-22\" type=\"checkbox\" ><label for=\"sk-estimator-id-22\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> Pipeline<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.pipeline.Pipeline.html\">?<span>Documentation for Pipeline</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>Pipeline(steps=[('preprocessor',\n",
2024-03-10 12:30:57 +01:00
" ColumnTransformer(transformers=[('num',\n",
" Pipeline(steps=[('scaler',\n",
" StandardScaler())]),\n",
" ['nb_tickets', 'nb_purchases',\n",
" 'total_amount',\n",
" 'nb_suppliers',\n",
" 'nb_tickets_internet',\n",
" 'nb_campaigns',\n",
" 'nb_campaigns_opened']),\n",
" ('cat',\n",
" Pipeline(steps=[('onehot',\n",
" OneHotEncoder(handle_unknown='ignore',\n",
" sparse_output=False))]),\n",
" ['opt_in'])])),\n",
" ('logreg',\n",
" LogisticRegression(class_weight={0.0: 0.5381774965030861,\n",
" 1.0: 7.048360235716116},\n",
2024-03-10 13:30:10 +01:00
" max_iter=5000, solver='saga'))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-23\" type=\"checkbox\" ><label for=\"sk-estimator-id-23\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> preprocessor: ColumnTransformer<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>ColumnTransformer(transformers=[('num',\n",
2024-03-10 12:30:57 +01:00
" Pipeline(steps=[('scaler', StandardScaler())]),\n",
" ['nb_tickets', 'nb_purchases', 'total_amount',\n",
" 'nb_suppliers', 'nb_tickets_internet',\n",
" 'nb_campaigns', 'nb_campaigns_opened']),\n",
" ('cat',\n",
" Pipeline(steps=[('onehot',\n",
" OneHotEncoder(handle_unknown='ignore',\n",
" sparse_output=False))]),\n",
2024-03-10 13:30:10 +01:00
" ['opt_in'])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-24\" type=\"checkbox\" ><label for=\"sk-estimator-id-24\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">num</label><div class=\"sk-toggleable__content fitted\"><pre>['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'nb_tickets_internet', 'nb_campaigns', 'nb_campaigns_opened']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-25\" type=\"checkbox\" ><label for=\"sk-estimator-id-25\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> StandardScaler<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.StandardScaler.html\">?<span>Documentation for StandardScaler</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>StandardScaler()</pre></div> </div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-26\" type=\"checkbox\" ><label for=\"sk-estimator-id-26\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">cat</label><div class=\"sk-toggleable__content fitted\"><pre>['opt_in']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-27\" type=\"checkbox\" ><label for=\"sk-estimator-id-27\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> OneHotEncoder<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>OneHotEncoder(handle_unknown='ignore', sparse_output=False)</pre></div> </div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-28\" type=\"checkbox\" ><label for=\"sk-estimator-id-28\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> LogisticRegression<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>LogisticRegression(class_weight={0.0: 0.5381774965030861,\n",
2024-03-10 12:30:57 +01:00
" 1.0: 7.048360235716116},\n",
" max_iter=5000, solver='saga')</pre></div> </div></div></div></div></div></div>"
],
"text/plain": [
"Pipeline(steps=[('preprocessor',\n",
" ColumnTransformer(transformers=[('num',\n",
" Pipeline(steps=[('scaler',\n",
" StandardScaler())]),\n",
" ['nb_tickets', 'nb_purchases',\n",
" 'total_amount',\n",
" 'nb_suppliers',\n",
" 'nb_tickets_internet',\n",
" 'nb_campaigns',\n",
" 'nb_campaigns_opened']),\n",
" ('cat',\n",
" Pipeline(steps=[('onehot',\n",
" OneHotEncoder(handle_unknown='ignore',\n",
" sparse_output=False))]),\n",
" ['opt_in'])])),\n",
" ('logreg',\n",
" LogisticRegression(class_weight={0.0: 0.5381774965030861,\n",
" 1.0: 7.048360235716116},\n",
" max_iter=5000, solver='saga'))])"
]
},
2024-03-10 13:30:10 +01:00
"execution_count": 36,
2024-03-10 12:30:57 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
2024-03-10 11:09:53 +01:00
"source": [
"pipeline.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 37,
2024-03-10 11:09:53 +01:00
"id": "6356e870-0dfc-4e60-9e48-e2de5e7f9f87",
"metadata": {},
2024-03-10 13:30:10 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy Score: 0.7563189143341815\n",
"F1 Score: 0.2737001769613213\n",
"Recall Score: 0.6419606680502026\n"
]
}
],
2024-03-10 11:09:53 +01:00
"source": [
"y_pred = pipeline.predict(X_test)\n",
"\n",
"# Calculate the F1 score\n",
2024-03-10 12:30:57 +01:00
"acc = accuracy_score(y_test, y_pred)\n",
"print(f\"Accuracy Score: {acc}\")\n",
"\n",
2024-03-10 11:09:53 +01:00
"f1 = f1_score(y_test, y_pred)\n",
2024-03-10 12:30:57 +01:00
"print(f\"F1 Score: {f1}\")\n",
"\n",
"recall = recall_score(y_test, y_pred)\n",
"print(f\"Recall Score: {recall}\")"
2024-03-10 11:09:53 +01:00
]
},
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 38,
2024-03-10 11:09:53 +01:00
"id": "09387a09-0d53-4c54-baac-f3c2a57a629a",
"metadata": {},
2024-03-10 13:30:10 +01:00
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjYAAAHFCAYAAADhWLMfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABWoElEQVR4nO3de3yP9f/H8cdnRzP2aQ7bzPmcoZzCKJQzI53QtOxLI8eWY1KobxnyRc5SUaJVpBQWUSTGkjmXchpfZmKGmW1t1+8PP59vH5uYPgc+nvdu1+3mc12v6329r4/Gy+v9vt6XyTAMAxEREREX4ObsDoiIiIjYihIbERERcRlKbERERMRlKLERERERl6HERkRERFyGEhsRERFxGUpsRERExGUosRERERGXocRGREREXIYSG3Fpu3bt4l//+hcVK1akUKFCFClShHr16jFp0iTOnj1r12vv2LGD5s2bYzabMZlMTJs2zebXMJlMjBs3zubt3sjChQsxmUyYTCa+//77PMcNw6BKlSqYTCZatGhxS9eYPXs2CxcuLNA533///XX7JCJ3Bw9nd0DEXubPn0///v2pXr06w4cPJyQkhOzsbH766Sfmzp3Lli1bWL58ud2u36tXL9LT04mNjcXf358KFSrY/BpbtmyhTJkyNm/3ZhUtWpT33nsvT/KyYcMGDh48SNGiRW+57dmzZ1OiRAkiIyNv+px69eqxZcsWQkJCbvm6InJnU2IjLmnLli3069eP1q1b88UXX+Dt7W051rp1a4YOHUpcXJxd+7Bnzx6ioqJo37693a7RuHFju7V9M7p168bixYuZNWsWfn5+lv3vvfceoaGhnD9/3iH9yM7OxmQy4efn5/TvREScS0NR4pLGjx+PyWTinXfesUpqrvLy8qJz586Wz7m5uUyaNIl7770Xb29vAgICePbZZzl+/LjVeS1atKBWrVokJCTw0EMPUbhwYSpVqsSECRPIzc0F/jdM8+effzJnzhzLkA3AuHHjLL/+q6vnHDlyxLJv/fr1tGjRguLFi+Pj40O5cuV44oknuHTpkiUmv6GoPXv28Oijj+Lv70+hQoWoU6cOH3zwgVXM1SGbjz/+mNGjRxMcHIyfnx+tWrXi119/vbkvGXj66acB+Pjjjy370tLSWLZsGb169cr3nNdee41GjRpRrFgx/Pz8qFevHu+99x5/fR9vhQoV2Lt3Lxs2bLB8f1crXlf7vmjRIoYOHUrp0qXx9vbm999/zzMU9ccff1C2bFmaNGlCdna2pf19+/bh6+tLRETETd+riNwZlNiIy8nJyWH9+vXUr1+fsmXL3tQ5/fr1Y+TIkbRu3ZoVK1bw73//m7i4OJo0acIff/xhFZucnEyPHj145plnWLFiBe3bt2fUqFF89NFHAHTs2JEtW7YA8OSTT7JlyxbL55t15MgROnbsiJeXF++//z5xcXFMmDABX19fsrKyrnver7/+SpMmTdi7dy/Tp0/n888/JyQkhMjISCZNmpQn/uWXX+bo0aO8++67vPPOO/z222906tSJnJycm+qnn58fTz75JO+//75l38cff4ybmxvdunW77r317duXTz/9lM8//5zHH3+cQYMG8e9//9sSs3z5cipVqkTdunUt39+1w4ajRo0iKSmJuXPn8tVXXxEQEJDnWiVKlCA2NpaEhARGjhwJwKVLl3jqqacoV64cc+fOvan7FJE7iCHiYpKTkw3A6N69+03F79+/3wCM/v37W+3funWrARgvv/yyZV/z5s0NwNi6datVbEhIiNG2bVurfYAxYMAAq31jx4418vuxW7BggQEYhw8fNgzDMJYuXWoARmJi4t/2HTDGjh1r+dy9e3fD29vbSEpKsopr3769UbhwYePcuXOGYRjGd999ZwBGhw4drOI+/fRTAzC2bNnyt9e92t+EhARLW3v27DEMwzAeeOABIzIy0jAMw6hZs6bRvHnz67aTk5NjZGdnG6+//rpRvHhxIzc313LseudevV6zZs2ue+y7776z2j9x4kQDMJYvX2707NnT8PHxMXbt2vW39ygidyZVbOSu99133wHkmaTasGFDatSowbp166z2BwUF0bBhQ6t99913H0ePHrVZn+rUqYOXlxd9+vThgw8+4NChQzd13vr162nZsmWeSlVkZCSXLl3KUzn663AcXLkPoED30rx5cypXrsz777/P7t27SUhIuO4w1NU+tmrVCrPZjLu7O56enowZM4YzZ86QkpJy09d94oknbjp2+PDhdOzYkaeffpoPPviAGTNmULt27Zs+X0TuHEpsxOWUKFGCwoULc/jw4ZuKP3PmDAClSpXKcyw4ONhy/KrixYvnifP29iYjI+MWepu/ypUr8+233xIQEMCAAQOoXLkylStX5u233/7b886cOXPd+7h6/K+uvZer85EKci8mk4l//etffPTRR8ydO5dq1arx0EMP5Ru7bds22rRpA1x5au3HH38kISGB0aNHF/i6+d3n3/UxMjKSy5cvExQUpLk1Ii5MiY24HHd3d1q2bMn27dvzTP7Nz9W/3E+ePJnn2IkTJyhRooTN+laoUCEAMjMzrfZfO48H4KGHHuKrr74iLS2N+Ph4QkNDiY6OJjY29rrtFy9e/Lr3Adj0Xv4qMjKSP/74g7lz5/Kvf/3runGxsbF4enry9ddf07VrV5o0aUKDBg1u6Zr5TcK+npMnTzJgwADq1KnDmTNnGDZs2C1dU0Ruf0psxCWNGjUKwzCIiorKd7JtdnY2X331FQCPPPIIgGXy71UJCQns37+fli1b2qxfV5/s2bVrl9X+q33Jj7u7O40aNWLWrFkA/Pzzz9eNbdmyJevXr7ckMld9+OGHFC5c2G6PQpcuXZrhw4fTqVMnevbsed04k8mEh4cH7u7uln0ZGRksWrQoT6ytqmA5OTk8/fTTmEwmVq9eTUxMDDNmzODzzz//x22LyO1H69iISwoNDWXOnDn079+f+vXr069fP2rWrEl2djY7duzgnXfeoVatWnTq1Inq1avTp08fZsyYgZubG+3bt+fIkSO8+uqrlC1blhdffNFm/erQoQPFihWjd+/evP7663h4eLBw4UKOHTtmFTd37lzWr19Px44dKVeuHJcvX7Y8edSqVavrtj927Fi+/vprHn74YcaMGUOxYsVYvHgxK1euZNKkSZjNZpvdy7UmTJhww5iOHTsyZcoUwsPD6dOnD2fOnGHy5Mn5PpJfu3ZtYmNj+eSTT6hUqRKFChW6pXkxY8eO5YcffmDNmjUEBQUxdOhQNmzYQO/evalbty4VK1YscJsicvtSYiMuKyoqioYNGzJ16lQmTpxIcnIynp6eVKtWjfDwcAYOHGiJnTNnDpUrV+a9995j1qxZmM1m2rVrR0xMTL5zam6Vn58fcXFxREdH88wzz3DPPffw3HPP0b59e5577jlLXJ06dVizZg1jx44lOTmZIkWKUKtWLVasWGGZo5Kf6tWrs3nzZl5++WUGDBhARkYGNWrUYMGCBQVawddeHnnkEd5//30mTpxIp06dKF26NFFRUQQEBNC7d2+r2Ndee42TJ08SFRXFhQsXKF++vNU6Pzdj7dq1xMTE8Oqrr1pV3hYuXEjdunXp1q0bmzZtwsvLyxa3JyK3AZNh/GVVLBEREZE7mObYiIiIiMtQYiMiIiIuQ4mNiIiIuAwlNiIiIuIylNiIiIiIy1BiIyIi4qI2btxIp06dCA4OxmQy8cUXX1gdNwyDcePGERwcjI+PDy1atGDv3r1WMZmZmQwaNIgSJUrg6+tL586d86zqnpqaSkREBGazGbPZTEREBOfOnbOKSUpKolOnTvj6+lKiRAkGDx6cZwHV3bt307x5c3x8fChdujSvv/46BX14W4mNiIiIi0pPT+f+++9n5syZ+R6fNGkSU6ZMYebMmSQkJBAUFETr1q25cOGCJSY6Oprly5cTGxvLpk2buHjxImFhYeTk5FhiwsPDSUxMJC4ujri4OBITE63eyZaTk0PHjh1JT09n06ZNxMbGsmzZMoYOHWqJOX/+PK1btyY4OJiEhARmzJjB5MmTmTJlSoHu2SXXsfGpO/D
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
2024-03-10 11:09:53 +01:00
"source": [
2024-03-10 13:30:10 +01:00
"draw_confusion_matrix(y_test, y_pred)"
2024-03-10 11:09:53 +01:00
]
},
2024-03-10 12:30:57 +01:00
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 39,
2024-03-10 12:30:57 +01:00
"id": "580b58d7-596f-4207-8c99-4365aba2bc9f",
"metadata": {},
2024-03-10 13:30:10 +01:00
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABIQAAAK8CAYAAACeK2TMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUddrG8e+k904ooSahSheBJIggioINBUTWvuqqrLr2tfeKBdtrXde6IqKIq7IIFsQA0hEBBRJ6JwnpPXPePw4MHAcwgSRnJnN/rotL5pwzyQNy54SH33l+DsMwDERERERERERExGf42V2AiIiIiIiIiIg0LjWERERERERERER8jBpCIiIiIiIiIiI+Rg0hEREREREREREfo4aQiIiIiIiIiIiPUUNIRERERERERMTHqCEkIiIiIiIiIuJj1BASEREREREREfExagiJiIiIiIiIiPgYNYREREREvNzDDz9MfHw8mzdvtrsUERER8RJqCImIiDSglStXcuWVV9KhQwdCQkKIiIigb9++TJw4kby8PFtqevfdd3E4HCxZsqRBP8+mTZtwOByuH35+fsTGxjJs2DBmzZp1xPfNnDmTs846i2bNmhEcHEybNm24/PLLWbNmzRHf89NPP3HhhReSlJREUFAQ0dHRpKen89prr1FSUtIQvzxbHPh/t2nTJsvxBx98kPPPP5+xY8dSWVl52Pc+9NBDOByOeqtlzpw5OBwO5syZU28f83Dat2/PFVdcUaf3zJ8/n4ceeoj8/Hy3c0OGDGHIkCH1UpuIiIg3U0NIRESkgbz11luceOKJLF68mDvuuIOZM2fy+eefM3bsWF5//XWuuuoqu0tsFDfeeCMLFizgp59+4tlnn2X9+vWMHDmSuXPnul175513MmLECJxOJ6+++iqzZ8/mwQcfZPHixfTt25dp06a5vefBBx9k8ODBbN++nUcffZTZs2fz8ccfM2zYMB566CHuu+++xvhl2u7111+nWbNm3HLLLXaXUq8+//xz7r///jq9Z/78+Tz88MOHbQi9+uqrvPrqq/VUnYiIiPcKsLsAERGRpmjBggVcf/31nH766UyfPp3g4GDXudNPP53bbruNmTNnNmpNVVVV9bpCpLbatm3LwIEDAcjIyKBjx46ccsopvP322wwePNh13eTJk3nmmWe4/vrrLX9hHzx4MOPHj+eUU07h0ksvpXfv3iQnJwMwdepUHnnkEa666ireeusty69vxIgR3HnnnSxYsKCRfqX2CggI4Ouvv7a7jHrXp0+fev143bp1q9ePJyIi4q20QkhERKQBPPHEEzgcDt58801LM+iAoKAgzj33XNdrp9PJxIkT6dKlC8HBwSQmJnLZZZexbds2y/uO9PjMHx+DOfA4zwcffMBtt91GUlISwcHBZGVlua7Zt28fV155JXFxcYSHh3POOeewYcMGt4/97bffMmzYMKKioggLCyMjI4PvvvvuGH5XTP369QNg9+7dluOPP/44sbGxPPvss27vCQ8P5+WXX6a0tJRJkya5jj/yyCPExsby0ksvHbbZFRkZyfDhw4+51j8aMmQI3bt3Z8GCBaSnpxMaGkr79u155513APj666/p27cvYWFh9OjR47BNv8zMTIYNG0ZkZCRhYWGkp6cftpHz888/k5GRQUhICK1ateLuu++mqqrqsHVNmTKFtLQ0wsPDiYiIYPjw4SxdurRWv6Y/vveMM85g+fLldfhdsfrvf/9LWloaYWFhREZGcvrppx+2KffFF1/Qs2dPgoODSU5O5sUXXzzsY21//DPvdDp57LHH6Ny5M6GhocTExNCzZ09efPFFwHw07o477gCgQ4cOrkcWDzzadrhHxnbs2MGFF15IZGQk0dHRjBs3jp9//hmHw8G7777ruu5Ij5tdccUVtG/f3nKssrKSxx57zJXpZs2aceWVV7J3797a/UaKiIg0MDWERERE6llNTQ3ff/89J554Im3atKnVe66//nr++c9/cvrpp/Pf//6XRx99lJkzZ5Kenk5OTs4x13L33XezZcsWXn/9db788ksSExNd56666ir8/Pz46KOPeOGFF1i0aBFDhgyxPGbz4YcfMnz4cKKionjvvff45JNPiIuL44wzzjjmptDGjRsB6NSpk+vYzp07Wb16NcOHDycsLOyw70tLSyMxMZHZs2e73rNq1aqjvqc2DjTPHnrooVpdv2vXLq688kquvvpqvvjiC3r06MFf//pXHnnkEe6++27uvPNOPvvsMyIiIhg1ahQ7duxwvffHH3/k1FNPpaCggLfffpvJkycTGRnJOeecw5QpU1zXrVmzhmHDhpGfn8+7777L66+/zvLly3nsscfc6nniiScYP3483bp145NPPuH999+nsLCQk08+mVWrVh311/LH937wwQcUFRVx8sknH3Vm05F89NFHnHfeeURFRTF58mTefvtt9u3bx5AhQ8jMzHRdN3PmTC644ALi4+OZMmUKEydOZPLkybz33nt/+jkmTpzIQw89xPjx4/n666+ZMmUKV111levP7dVXX82NN94IwLRp01iwYAELFiygb9++h/14ZWVlnHbaacyaNYsnn3ySqVOn0qJFC8aNG1fnX/8BTqeT8847j6eeeoq//OUvfP311zz11FPMnj2bIUOGUFZWdswfW0REpN4YIiIiUq927dplAMZFF11Uq+t/++03AzAmTJhgOb5w4UIDMO655x7XsXbt2hmXX36528c45ZRTjFNOOcX1+ocffjAAY/DgwW7XvvPOOwZgnH/++Zbj8+bNMwDjscceMwzDMEpKSoy4uDjjnHPOsVxXU1Nj9OrVy+jfv/9Rf10bN240AOPpp582qqqqjPLycmPFihVGWlqa0bJlS2Pjxo2ua3/++WcDMO66666jfswBAwYYoaGhdXrPn5kzZ47h7+9vPPzww3967SmnnGIAxpIlS1zHcnNzDX9/fyM0NNTYvn276/iKFSsMwHjppZdcxwYOHGgkJiYaRUVFrmPV1dVG9+7djdatWxtOp9MwDMMYN26cERoaauzatctyXZcuXQzA9Xu3ZcsWIyAgwPj73/9uqbOwsNBITEw0xowZ4zr24IMPGod+63fgvTfeeKPlvUVFRUaLFi2MCy+88Ki/Fwf+jP3www+GYZh/Llq1amX06NHDqKmpsXy8xMREIz093XXspJNOMtq0aWNUVFRYrouPjzf++O3pH//Mn3322Ubv3r2PWtszzzxj+X061B+z8tprrxmA8cUXX1iuu+aaawzAeOedd4743gMuv/xyo127dq7XkydPNgDjs88+s1y3ePFiAzBeffXVo9YvIiLSGLRCSERExGY//PADgNujYP3796dr167H9XjW6NGjj3ju4osvtrxOT0+nXbt2rnrmz59PXl4el19+OdXV1a4fTqeTM888k8WLF9dqB69//vOfBAYGEhISQu/evVm1ahVffvml2yM2tWEYRr3PQTrllFOorq7mgQceqNX1LVu25MQTT3S9jouLIzExkd69e9OqVSvX8a5duwK4toIvKSlh4cKFjBkzhoiICNd1/v7+XHrppWzbto21a9cC5p+JYcOG0bx5c8t1f1y18s0331BdXc1f//pXy/HIyEiGDh3Kjz/+eMRfx4H3XnbZZZb/vyEhIZxyyil13j1s7dq17Nixg0svvRQ/v4PfYkZERDB69Gh+/vlnSktLKSkpYcmSJYwaNYqgoCDLdeecc86ffp7+/fvzyy+/MGHCBL755hsKCwvrVOcf/fDDD0RGRloe4QT4y1/+cswf86uvviImJoZzzjnH8nvbu3dvWrRo0eA7s4mIiNSGhkqLiIjUs4SEBMLCwlyPRv2Z3NxcwGw0/FGrVq1cDYVjcbiPeUCLFi0Oe+xAPQdm/IwZM+aIHyMvL4/w8PCj1vCPf/yDSy65hIqKCn7++Wfuu+8+zjvvPH755Rfi4+MBc/A08Ke/Z5s3b3Y9hlfb99S3uLg4t2NBQUFuxw80O8rLywFzZpNhGEf8/wwH/yzk5uYe8f/PoQ78P0pPT3e79kDz7kgOvPekk0467PlDmzq18Wd/jp1Op+v3wDA
"text/plain": [
"<Figure size 1400x800 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
2024-03-10 12:30:57 +01:00
"source": [
2024-03-10 13:30:10 +01:00
"draw_roc_curve(X_test, y_test)"
2024-03-10 12:30:57 +01:00
]
},
2024-03-10 11:09:53 +01:00
{
"cell_type": "markdown",
"id": "ae8e9bd3-0f6a-4f82-bb4c-470cbdc8d6bb",
"metadata": {},
"source": [
"## Cross Validation"
]
},
2024-03-10 12:30:57 +01:00
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 40,
2024-03-10 12:30:57 +01:00
"id": "7f0535de-34f1-4e97-b993-b429ecf0a554",
"metadata": {},
"outputs": [],
"source": [
"y_train = y_train['y_has_purchased']"
]
},
2024-03-10 11:09:53 +01:00
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 41,
2024-03-10 11:09:53 +01:00
"id": "f7fca463-d7d6-493b-8329-fdfa92457f78",
"metadata": {},
2024-03-10 13:30:10 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best parameters found: {'logreg__C': 0.0009765625, 'logreg__class_weight': 'balanced', 'logreg__penalty': 'l1'}\n",
"Best cross-validation score: 0.65\n",
"Test set score: 0.64\n"
]
}
],
2024-03-10 11:09:53 +01:00
"source": [
"# Cross validation\n",
2024-03-10 12:30:57 +01:00
"\n",
2024-03-10 13:30:10 +01:00
"grid_search = GridSearchCV(pipeline, param_grid, cv=3, scoring=recall_scorer, error_score='raise',\n",
2024-03-10 11:09:53 +01:00
" n_jobs=-1)\n",
"\n",
"grid_search.fit(X_train, y_train)\n",
"\n",
"# Print the best parameters and the best score\n",
"print(\"Best parameters found: \", grid_search.best_params_)\n",
"print(\"Best cross-validation score: {:.2f}\".format(grid_search.best_score_))\n",
"\n",
"# Evaluate the best model on the test set\n",
"test_score = grid_search.score(X_test, y_test)\n",
"print(\"Test set score: {:.2f}\".format(test_score))"
]
},
{
"cell_type": "code",
2024-03-10 13:30:10 +01:00
"execution_count": 43,
2024-03-10 11:09:53 +01:00
"id": "56bd7828-4de1-4166-bea0-5d5e152b9d38",
"metadata": {},
2024-03-10 13:30:10 +01:00
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi0AAAHFCAYAAAA+FskAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABQP0lEQVR4nO3de3yP9f/H8cdnp49tbMZsM6ecMxRRjEI5M/LtgFYrEXJsOaZy6mBOIWc60FdpiUilpVJKDGHOUU5LzBxmGLbZrt8ffj7fPjbZdH189uF57/a53ey6Xtd1va5PrV693u/3dVkMwzAQERERKeDcnJ2AiIiISF6oaBERERGXoKJFREREXIKKFhEREXEJKlpERETEJahoEREREZegokVERERcgooWERERcQkqWkRERMQlqGiRW9q2bdt49tlnKV++PIUKFaJw4cLcc889jB8/nlOnTjn02lu2bKFx48b4+/tjsViYMmWK6dewWCyMGjXK9PNez/z587FYLFgsFn788ccc+w3DoFKlSlgsFpo0aXJD15g5cybz58/P1zE//vjjNXMSEdfn4ewERBzlnXfeoXfv3lStWpXBgwcTFhZGZmYmv/76K7Nnz2bdunUsXbrUYdfv2rUraWlpxMbGEhAQwB133GH6NdatW0fp0qVNP29eFSlShPfeey9HYbJ69Wr27dtHkSJFbvjcM2fOJDAwkC5duuT5mHvuuYd169YRFhZ2w9cVkYJLRYvcktatW0evXr1o3rw5y5Ytw2q12vY1b96cgQMHEhcX59AcduzYQffu3WndurXDrlG/fn2HnTsvOnXqxEcffcSMGTPw8/OzbX/vvfcIDw/nzJkzNyWPzMxMLBYLfn5+Tv9ORMRxNDwkt6QxY8ZgsViYO3euXcFyhZeXF+3bt7f9nJ2dzfjx47nzzjuxWq0EBQXx9NNPc/jwYbvjmjRpQo0aNdi4cSMPPPAAPj4+VKhQgbFjx5KdnQ38b+jk0qVLzJo1yzaMAjBq1Cjbn//uyjEHDx60bVu1ahVNmjShePHieHt7U7ZsWR599FHOnz9vi8lteGjHjh08/PDDBAQEUKhQIWrVqsUHH3xgF3NlGOXjjz/mlVdeITQ0FD8/P5o1a8aePXvy9iUDTzzxBAAff/yxbVtqaipLliyha9euuR4zevRo6tWrR7FixfDz8+Oee+7hvffe4+/vbr3jjjvYuXMnq1evtn1/VzpVV3JfsGABAwcOpFSpUlitVv74448cw0MnTpygTJkyNGjQgMzMTNv5d+3aha+vL1FRUXm+VxFxPhUtcsvJyspi1apV1KlThzJlyuTpmF69ejF06FCaN2/O8uXLef3114mLi6NBgwacOHHCLjYpKYknn3ySp556iuXLl9O6dWuGDRvGhx9+CEDbtm1Zt24dAI899hjr1q2z/ZxXBw8epG3btnh5efH+++8TFxfH2LFj8fX1JSMj45rH7dmzhwYNGrBz506mTp3KZ599RlhYGF26dGH8+PE54l9++WUOHTrEu+++y9y5c/n9999p164dWVlZecrTz8+Pxx57jPfff9+27eOPP8bNzY1OnTpd89569uzJokWL+Oyzz3jkkUfo168fr7/+ui1m6dKlVKhQgdq1a9u+v6uH8oYNG0ZiYiKzZ8/miy++ICgoKMe1AgMDiY2NZePGjQwdOhSA8+fP8/jjj1O2bFlmz56dp/sUkQLCELnFJCUlGYDRuXPnPMXv3r3bAIzevXvbbV+/fr0BGC+//LJtW+PGjQ3AWL9+vV1sWFiY0bJlS7ttgNGnTx+7bSNHjjRy+7WbN2+eARgHDhwwDMMwFi9ebABGQkLCP+YOGCNHjrT93LlzZ8NqtRqJiYl2ca1btzZ8fHyM06dPG4ZhGD/88IMBGG3atLGLW7RokQEY69at+8frXsl348aNtnPt2LHDMAzDuPfee40uXboYhmEY1atXNxo3bnzN82RlZRmZmZnGa6+9ZhQvXtzIzs627bvWsVeu16hRo2vu++GHH+y2jxs3zgCMpUuXGs8884zh7e1tbNu27R/vUUQKHnVa5Lb3ww8/AOSY8HnfffdRrVo1vv/+e7vtISEh3HfffXbb7rrrLg4dOmRaTrVq1cLLy4sePXrwwQcfsH///jwdt2rVKpo2bZqjw9SlSxfOnz+fo+Pz9yEyuHwfQL7upXHjxlSsWJH333+f7du3s3HjxmsODV3JsVmzZvj7++Pu7o6npycjRozg5MmTJCcn5/m6jz76aJ5jBw8eTNu2bXniiSf44IMPmDZtGjVr1szz8SJSMKhokVtOYGAgPj4+HDhwIE/xJ0+eBKBkyZI59oWGhtr2X1G8ePEccVarlQsXLtxAtrmrWLEi3333HUFBQfTp04eKFStSsWJF3n777X887uTJk9e8jyv7/+7qe7ky/yc/92KxWHj22Wf58MMPmT17NlWqVOGBBx7INXbDhg20aNECuLy665dffmHjxo288sor+b5ubvf5Tzl26dKFixcvEhISorksIi5KRYvcctzd3WnatCmbNm3KMZE2N1f+w3306NEc+44cOUJgYKBpuRUqVAiA9PR0u+1Xz5sBeOCBB/jiiy9ITU0lPj6e8PBwoqOjiY2Nveb5ixcvfs37AEy9l7/r0qULJ06cYPbs2Tz77LPXjIuNjcXT05Mvv/ySjh070qBBA+rWrXtD18xtQvO1HD16lD59+lCrVi1OnjzJoEGDbuiaIuJcKlrkljRs2DAMw6B79+65TlzNzMzkiy++AOChhx4CsE2kvWLjxo3s3r2bpk2bmpbXlRUw27Zts9t+JZfcuLu7U69ePWbMmAHA5s2brxnbtGlTVq1aZStSrvjvf/+Lj4+Pw5YDlypVisGDB9OuXTueeeaZa8ZZLBY8PDxwd3e3bbtw4QILFizIEWtW9yorK4snnngCi8XC119/TUxMDNOmTeOzzz771+cWkZtLz2mRW1J4eDizZs2id+/e1KlTh169elG9enUyMzPZsmULc+fOpUaNGrRr146qVavSo0cPpk2bhpubG61bt+bgwYMMHz6cMmXK8OKLL5qWV5s2bShWrBjdunXjtddew8PDg/nz5/Pnn3/axc2ePZtVq1bRtm1bypYty8WLF20rdJo1a3bN848cOZIvv/ySBx98kBEjRlCsWDE++ugjvvrqK8aPH4+/v79p93K1sWPHXjembdu2TJo0icjISHr06MHJkyeZOHFirsvSa9asSWxsLJ988gkVKlSgUKFCNzQPZeTIkfz888+sXLmSkJAQBg4cyOrVq+nWrRu1a9emfPny+T6niDiHiha5ZXXv3p377ruPyZMnM27cOJKSkvD09KRKlSpERkbSt29fW+ysWbOoWLEi7733HjNmzMDf359WrVoRExOT6xyWG+Xn50dcXBzR0dE89dRTFC1alOeee47WrVvz3HPP2eJq1arFypUrGTlyJElJSRQuXJgaNWqwfPly25yQ3FStWpW1a9fy8ssv06dPHy5cuEC1atWYN29evp4s6ygPPfQQ77//PuPGjaNdu3aUKlWK7t27ExQURLdu3exiR48ezdGjR+nevTtnz56lXLlyds+xyYtvv/2WmJgYhg8fbtcxmz9/PrVr16ZTp06sWbMGLy8vM25PRBzMYhh/e6KTiIiISAGlOS0iIiLiElS0iIiIiEtQ0SIiIiIuQUWLiIiIuAQVLSIiIuISVLSIiIiIS1DRIiIiIi7hlny4nHftvtcPErkNLV84ytkpiBQ4zas55p1cf2fWf5cubJluynlclTotIiIi4hJuyU6LiIhIgWJRj8AMKlpEREQczWJxdga3BBUtIiIijqZOiyn0LYqIiIhLUKdFRETE0TQ8ZAoVLSIiIo6m4SFT6FsUERERl6BOi4iIiKNpeMgUKlpEREQcTcNDptC3KCIiIi5BnRYRERFH0/CQKVS0iIiIOJqGh0yhb1FERERcgjotIiIijqbhIVOoaBEREXE0DQ+ZQkWLiIiIo6nTYgqVfiIiIuIS1GkRERFxNA0PmUJFi4iIiKOpaDGFvkURERFxCeq0iIiIOJqbJuKaQUWLiIiIo2l4yBT6FkV
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"y_pred = grid_search.predict(X_test)\n",
"\n",
"draw_confusion_matrix(y_test, y_pred)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "319fe0eb-4d4a-492c-bd50-3f08ab483021",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABIQAAAK8CAYAAACeK2TMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUddrG8e+k904ooSahSheBJIggioINBUTWvuqqrLr2tfeKBdtrXde6IqKIq7IIFsQA0hEBBRJ6JwnpPXPePw4MHAcwgSRnJnN/rotL5pwzyQNy54SH33l+DsMwDERERERERERExGf42V2AiIiIiIiIiIg0LjWERERERERERER8jBpCIiIiIiIiIiI+Rg0hEREREREREREfo4aQiIiIiIiIiIiPUUNIRERERERERMTHqCEkIiIiIiIiIuJj1BASEREREREREfExagiJiIiIiIiIiPgYNYREREREvNzDDz9MfHw8mzdvtrsUERER8RJqCImIiDSglStXcuWVV9KhQwdCQkKIiIigb9++TJw4kby8PFtqevfdd3E4HCxZsqRBP8+mTZtwOByuH35+fsTGxjJs2DBmzZp1xPfNnDmTs846i2bNmhEcHEybNm24/PLLWbNmzRHf89NPP3HhhReSlJREUFAQ0dHRpKen89prr1FSUtIQvzxbHPh/t2nTJsvxBx98kPPPP5+xY8dSWVl52Pc+9NBDOByOeqtlzpw5OBwO5syZU28f83Dat2/PFVdcUaf3zJ8/n4ceeoj8/Hy3c0OGDGHIkCH1UpuIiIg3U0NIRESkgbz11luceOKJLF68mDvuuIOZM2fy+eefM3bsWF5//XWuuuoqu0tsFDfeeCMLFizgp59+4tlnn2X9+vWMHDmSuXPnul175513MmLECJxOJ6+++iqzZ8/mwQcfZPHixfTt25dp06a5vefBBx9k8ODBbN++nUcffZTZs2fz8ccfM2zYMB566CHuu+++xvhl2u7111+nWbNm3HLLLXaXUq8+//xz7r///jq9Z/78+Tz88MOHbQi9+uqrvPrqq/VUnYiIiPcKsLsAERGRpmjBggVcf/31nH766UyfPp3g4GDXudNPP53bbruNmTNnNmpNVVVV9bpCpLbatm3LwIEDAcjIyKBjx46ccsopvP322wwePNh13eTJk3nmmWe4/vrrLX9hHzx4MOPHj+eUU07h0ksvpXfv3iQnJwMwdepUHnnkEa666ireeusty69vxIgR3HnnnSxYsKCRfqX2CggI4Ouvv7a7jHrXp0+fev143bp1q9ePJyIi4q20QkhERKQBPPHEEzgcDt58801LM+iAoKAgzj33XNdrp9PJxIkT6dKlC8HBwSQmJnLZZZexbds2y/uO9PjMHx+DOfA4zwcffMBtt91GUlISwcHBZGVlua7Zt28fV155JXFxcYSHh3POOeewYcMGt4/97bffMmzYMKKioggLCyMjI4PvvvvuGH5XTP369QNg9+7dluOPP/44sbGxPPvss27vCQ8P5+WXX6a0tJRJkya5jj/yyCPExsby0ksvHbbZFRkZyfDhw4+51j8aMmQI3bt3Z8GCBaSnpxMaGkr79u155513APj666/p27cvYWFh9OjR47BNv8zMTIYNG0ZkZCRhYWGkp6cftpHz888/k5GRQUhICK1ateLuu++mqqrqsHVNmTKFtLQ0wsPDiYiIYPjw4SxdurRWv6Y/vveMM85g+fLldfhdsfrvf/9LWloaYWFhREZGcvrppx+2KffFF1/Qs2dPgoODSU5O5sUXXzzsY21//DPvdDp57LHH6Ny5M6GhocTExNCzZ09efPFFwHw07o477gCgQ4cOrkcWDzzadrhHxnbs2MGFF15IZGQk0dHRjBs3jp9//hmHw8G7777ruu5Ij5tdccUVtG/f3nKssrKSxx57zJXpZs2aceWVV7J3797a/UaKiIg0MDWERERE6llNTQ3ff/89J554Im3atKnVe66//nr++c9/cvrpp/Pf//6XRx99lJkzZ5Kenk5OTs4x13L33XezZcsWXn/9db788ksSExNd56666ir8/Pz46KOPeOGFF1i0aBFDhgyxPGbz4YcfMnz4cKKionjvvff45JNPiIuL44wzzjjmptDGjRsB6NSpk+vYzp07Wb16NcOHDycsLOyw70tLSyMxMZHZs2e73rNq1aqjvqc2DjTPHnrooVpdv2vXLq688kquvvpqvvjiC3r06MFf//pXHnnkEe6++27uvPNOPvvsMyIiIhg1ahQ7duxwvffHH3/k1FNPpaCggLfffpvJkycTGRnJOeecw5QpU1zXrVmzhmHDhpGfn8+7777L66+/zvLly3nsscfc6nniiScYP3483bp145NPPuH999+nsLCQk08+mVWrVh311/LH937wwQcUFRVx8sknH3Vm05F89NFHnHfeeURFRTF58mTefvtt9u3bx5AhQ8jMzHRdN3PmTC644ALi4+OZMmUKEydOZPLkybz33nt/+jkmTpzIQw89xPjx4/n666+ZMmUKV111levP7dVXX82NN94IwLRp01iwYAELFiygb9++h/14ZWVlnHbaacyaNYsnn3ySqVOn0qJFC8aNG1fnX/8BTqeT8847j6eeeoq//OUvfP311zz11FPMnj2bIUOGUFZWdswfW0REpN4YIiIiUq927dplAMZFF11Uq+t/++03AzAmTJhgOb5w4UIDMO655x7XsXbt2hmXX36528c45ZRTjFNOOcX1+ocffjAAY/DgwW7XvvPOOwZgnH/++Zbj8+bNMwDjscceMwzDMEpKSoy4uDjjnHPOsVxXU1Nj9OrVy+jfv/9Rf10bN240AOPpp582qqqqjPLycmPFihVGWlqa0bJlS2Pjxo2ua3/++WcDMO66666jfswBAwYYoaGhdXrPn5kzZ47h7+9vPPzww3967SmnnGIAxpIlS1zHcnNzDX9/fyM0NNTYvn276/iKFSsMwHjppZdcxwYOHGgkJiYaRUVFrmPV1dVG9+7djdatWxtOp9MwDMMYN26cERoaauzatctyXZcuXQzA9Xu3ZcsWIyAgwPj73/9uqbOwsNBITEw0xowZ4zr24IMPGod+63fgvTfeeKPlvUVFRUaLFi2MCy+88Ki/Fwf+jP3www+GYZh/Llq1amX06NHDqKmpsXy8xMREIz093XXspJNOMtq0aWNUVFRYrouPjzf++O3pH//Mn3322Ubv3r2PWtszzzxj+X061B+z8tprrxmA8cUXX1iuu+aaawzAeOedd4743gMuv/xyo127dq7XkydPNgDjs88+s1y3ePFiAzBeffXVo9YvIiLSGLRCSERExGY//PADgNujYP3796dr167H9XjW6NGjj3ju4osvtrxOT0+nXbt2rnrmz59PXl4el19+OdXV1a4fTqeTM888k8WLF9dqB69//vOfBAYGEhISQu/evVm1ahVffvml2yM2tWEYRr3PQTrllFOorq7mgQceqNX1LVu25MQTT3S9jouLIzExkd69e9OqVSvX8a5duwK4toIvKSlh4cKFjBkzhoiICNd1/v7+XHrppWzbto21a9cC5p+JYcOG0bx5c8t1f1y18s0331BdXc1f//pXy/HIyEiGDh3Kjz/+eMRfx4H3XnbZZZb/vyEhIZxyyil13j1s7dq17Nixg0svvRQ/v4PfYkZERDB69Gh+/vlnSktLKSkpYcmSJYwaNYqgoCDLdeecc86ffp7+/fvzyy+/MGHCBL755hsKCwvrVOcf/fDDD0RGRloe4QT4y1/+cswf86uvviImJoZzzjnH8nvbu3dvWrRo0eA7s4mIiNSGhkqLiIjUs4SEBMLCwlyPRv2Z3NxcwGw0/FGrVq1cDYVjcbiPeUCLFi0Oe+xAPQdm/IwZM+aIHyMvL4/w8PCj1vCPf/yDSy65hIqKCn7++Wfuu+8+zjvvPH755Rfi4+MBc/A08Ke/Z5s3b3Y9hlfb99S3uLg4t2NBQUFuxw80O8rLywFzZpNhGEf8/wwH/yzk5uYe8f/PoQ78P0pPT3e79kDz7kgOvPekk0467PlDmzq18Wd/jp1Op+v3wDA
"text/plain": [
"<Figure size 1400x800 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"draw_roc_curve(X_test, y_test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "210b931c-6d46-4ebf-a9c7-d1ee05c3fadf",
"metadata": {},
2024-03-10 11:09:53 +01:00
"outputs": [],
"source": []
2024-03-08 14:48:38 +01:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}