2076 lines
456 KiB
Plaintext
2076 lines
456 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "3415114e-9577-4487-89eb-4931620ad9f0",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Predict Sales"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"id": "f271eb45-1470-4764-8c2e-31374efa1fe5",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import pandas as pd\n",
|
||
|
"import numpy as np\n",
|
||
|
"import os\n",
|
||
|
"import s3fs\n",
|
||
|
"import re\n",
|
||
|
"from sklearn.linear_model import LogisticRegression\n",
|
||
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
||
|
"from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, recall_score\n",
|
||
|
"from sklearn.utils import class_weight\n",
|
||
|
"from sklearn.neighbors import KNeighborsClassifier\n",
|
||
|
"from sklearn.pipeline import Pipeline\n",
|
||
|
"from sklearn.compose import ColumnTransformer\n",
|
||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
||
|
"from sklearn.impute import SimpleImputer\n",
|
||
|
"from sklearn.model_selection import GridSearchCV\n",
|
||
|
"from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler\n",
|
||
|
"from sklearn.metrics import make_scorer, f1_score, balanced_accuracy_score\n",
|
||
|
"import seaborn as sns\n",
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score\n",
|
||
|
"from sklearn.exceptions import ConvergenceWarning, DataConversionWarning\n",
|
||
|
"\n",
|
||
|
"import pickle\n",
|
||
|
"import warnings\n",
|
||
|
"#import scikitplot as skplt"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"id": "3fecb606-22e5-4dee-8efa-f8dff0832299",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"warnings.filterwarnings('ignore')\n",
|
||
|
"warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n",
|
||
|
"warnings.filterwarnings(\"ignore\", category=DataConversionWarning)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "ae591854-3003-4c75-a0c7-5abf04246e81",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Load Data"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"id": "59dd4694-a812-4923-b995-a2ee86c74f85",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Create filesystem object\n",
|
||
|
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||
|
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"id": "017f7e9a-3ba0-40fa-bdc8-51b98cc1fdb3",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"def load_train_test():\n",
|
||
|
" BUCKET = \"projet-bdc2324-team1/Generalization/musique\"\n",
|
||
|
" File_path_train = BUCKET + \"/Train_set.csv\"\n",
|
||
|
" File_path_test = BUCKET + \"/Test_set.csv\"\n",
|
||
|
" \n",
|
||
|
" with fs.open( File_path_train, mode=\"rb\") as file_in:\n",
|
||
|
" dataset_train = pd.read_csv(file_in, sep=\",\")\n",
|
||
|
" # dataset_train['y_has_purchased'] = dataset_train['y_has_purchased'].fillna(0)\n",
|
||
|
"\n",
|
||
|
" with fs.open(File_path_test, mode=\"rb\") as file_in:\n",
|
||
|
" dataset_test = pd.read_csv(file_in, sep=\",\")\n",
|
||
|
" # dataset_test['y_has_purchased'] = dataset_test['y_has_purchased'].fillna(0)\n",
|
||
|
" \n",
|
||
|
" return dataset_train, dataset_test"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 5,
|
||
|
"id": "c479b230-b4bd-4cfb-b76b-d9faf6d95772",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"dataset_train, dataset_test = load_train_test()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 6,
|
||
|
"id": "c24c446d-4e1c-4ac1-a048-f0b8d8559f36",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"customer_id 0\n",
|
||
|
"nb_tickets 0\n",
|
||
|
"nb_purchases 0\n",
|
||
|
"total_amount 0\n",
|
||
|
"nb_suppliers 0\n",
|
||
|
"vente_internet_max 0\n",
|
||
|
"purchase_date_min 0\n",
|
||
|
"purchase_date_max 0\n",
|
||
|
"time_between_purchase 0\n",
|
||
|
"nb_tickets_internet 0\n",
|
||
|
"street_id 0\n",
|
||
|
"structure_id 327067\n",
|
||
|
"mcp_contact_id 135224\n",
|
||
|
"fidelity 0\n",
|
||
|
"tenant_id 0\n",
|
||
|
"is_partner 0\n",
|
||
|
"deleted_at 354365\n",
|
||
|
"gender 0\n",
|
||
|
"is_email_true 0\n",
|
||
|
"opt_in 0\n",
|
||
|
"last_buying_date 119201\n",
|
||
|
"max_price 119201\n",
|
||
|
"ticket_sum 0\n",
|
||
|
"average_price 115193\n",
|
||
|
"average_purchase_delay 119203\n",
|
||
|
"average_price_basket 119203\n",
|
||
|
"average_ticket_basket 119203\n",
|
||
|
"total_price 4008\n",
|
||
|
"purchase_count 0\n",
|
||
|
"first_buying_date 119201\n",
|
||
|
"country 56856\n",
|
||
|
"gender_label 0\n",
|
||
|
"gender_female 0\n",
|
||
|
"gender_male 0\n",
|
||
|
"gender_other 0\n",
|
||
|
"country_fr 56856\n",
|
||
|
"nb_campaigns 0\n",
|
||
|
"nb_campaigns_opened 0\n",
|
||
|
"time_to_open 224310\n",
|
||
|
"y_has_purchased 0\n",
|
||
|
"dtype: int64"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 6,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"dataset_train.isna().sum()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 7,
|
||
|
"id": "825d14a3-6967-4733-bfd4-64bf61c2bd43",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"def features_target_split(dataset_train, dataset_test):\n",
|
||
|
" features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n",
|
||
|
" 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n",
|
||
|
" 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n",
|
||
|
" X_train = dataset_train[features_l]\n",
|
||
|
" y_train = dataset_train[['y_has_purchased']]\n",
|
||
|
"\n",
|
||
|
" X_test = dataset_test[features_l]\n",
|
||
|
" y_test = dataset_test[['y_has_purchased']]\n",
|
||
|
" return X_train, X_test, y_train, y_test"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 8,
|
||
|
"id": "69eaec12-b30f-4d30-a461-ea520d5cbf77",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"X_train, X_test, y_train, y_test = features_target_split(dataset_train, dataset_test)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 9,
|
||
|
"id": "d039f31d-0093-46c6-9743-ddec1381f758",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Shape train : (354365, 17)\n",
|
||
|
"Shape test : (151874, 17)\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"print(\"Shape train : \", X_train.shape)\n",
|
||
|
"print(\"Shape test : \", X_test.shape)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "a1d6de94-4e11-481a-a0ce-412bf29f692c",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Prepare preprocessing and Hyperparameters"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 10,
|
||
|
"id": "b808da43-c444-4e94-995a-7ec6ccd01e2d",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"{0.0: 0.5481283836040216, 1.0: 5.694439980716696}"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 10,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Compute Weights\n",
|
||
|
"weights = class_weight.compute_class_weight(class_weight = 'balanced', classes = np.unique(y_train['y_has_purchased']),\n",
|
||
|
" y = y_train['y_has_purchased'])\n",
|
||
|
"\n",
|
||
|
"weight_dict = {np.unique(y_train['y_has_purchased'])[i]: weights[i] for i in range(len(np.unique(y_train['y_has_purchased'])))}\n",
|
||
|
"weight_dict"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 11,
|
||
|
"id": "b32a79ea-907f-4dfc-9832-6c74bef3200c",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"numeric_features = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n",
|
||
|
" 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n",
|
||
|
" 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n",
|
||
|
"\n",
|
||
|
"numeric_transformer = Pipeline(steps=[\n",
|
||
|
" #(\"imputer\", SimpleImputer(strategy=\"mean\")), \n",
|
||
|
" (\"scaler\", StandardScaler()) \n",
|
||
|
"])\n",
|
||
|
"\n",
|
||
|
"categorical_features = ['opt_in'] \n",
|
||
|
"\n",
|
||
|
"# Transformer for the categorical features\n",
|
||
|
"categorical_transformer = Pipeline(steps=[\n",
|
||
|
" #(\"imputer\", SimpleImputer(strategy=\"most_frequent\")), # Impute missing values with the most frequent\n",
|
||
|
" (\"onehot\", OneHotEncoder(handle_unknown='ignore', sparse_output=False))\n",
|
||
|
"])\n",
|
||
|
"\n",
|
||
|
"preproc = ColumnTransformer(\n",
|
||
|
" transformers=[\n",
|
||
|
" (\"num\", numeric_transformer, numeric_features),\n",
|
||
|
" (\"cat\", categorical_transformer, categorical_features)\n",
|
||
|
" ]\n",
|
||
|
")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 12,
|
||
|
"id": "9809a688-bfbc-4685-a77f-17a8b2b79ab3",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Set loss\n",
|
||
|
"balanced_scorer = make_scorer(balanced_accuracy_score)\n",
|
||
|
"recall_scorer = make_scorer(recall_score)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 13,
|
||
|
"id": "4f9b2bbf-5f8a-4ac1-8e6c-51bd0dd8ac85",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"def draw_confusion_matrix(y_test, y_pred):\n",
|
||
|
" conf_matrix = confusion_matrix(y_test, y_pred)\n",
|
||
|
" sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Class 0', 'Class 1'], yticklabels=['Class 0', 'Class 1'])\n",
|
||
|
" plt.xlabel('Predicted')\n",
|
||
|
" plt.ylabel('Actual')\n",
|
||
|
" plt.title('Confusion Matrix')\n",
|
||
|
" plt.show()\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"def draw_roc_curve(X_test, y_test):\n",
|
||
|
" y_pred_prob = pipeline.predict_proba(X_test)[:, 1]\n",
|
||
|
"\n",
|
||
|
" # Calcul des taux de faux positifs (FPR) et de vrais positifs (TPR)\n",
|
||
|
" fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob, pos_label=1)\n",
|
||
|
" \n",
|
||
|
" # Calcul de l'aire sous la courbe ROC (AUC)\n",
|
||
|
" roc_auc = auc(fpr, tpr)\n",
|
||
|
" \n",
|
||
|
" plt.figure(figsize = (14, 8))\n",
|
||
|
" plt.plot(fpr, tpr, label=\"ROC curve(area = %0.3f)\" % roc_auc)\n",
|
||
|
" plt.plot([0, 1], [0, 1], color=\"red\",label=\"Random Baseline\", linestyle=\"--\")\n",
|
||
|
" plt.grid(color='gray', linestyle='--', linewidth=0.5)\n",
|
||
|
" plt.xlabel('Taux de faux positifs (FPR)')\n",
|
||
|
" plt.ylabel('Taux de vrais positifs (TPR)')\n",
|
||
|
" plt.title('Courbe ROC : modèle logistique')\n",
|
||
|
" plt.legend(loc=\"lower right\")\n",
|
||
|
" plt.show()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 14,
|
||
|
"id": "cf400c70-0192-42cc-9919-f61bae8382b0",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"def draw_features_importance(pipeline, model):\n",
|
||
|
" coefficients = pipeline.named_steps['logreg'].coef_[0]\n",
|
||
|
" feature_names = pipeline.named_steps['logreg'].feature_names_in_\n",
|
||
|
" \n",
|
||
|
" # Tracer l'importance des caractéristiques\n",
|
||
|
" plt.figure(figsize=(10, 6))\n",
|
||
|
" plt.barh(feature_names, coefficients, color='skyblue')\n",
|
||
|
" plt.xlabel('Importance des caractéristiques')\n",
|
||
|
" plt.ylabel('Caractéristiques')\n",
|
||
|
" plt.title('Importance des caractéristiques dans le modèle de régression logistique')\n",
|
||
|
" plt.grid(True)\n",
|
||
|
" plt.show()\n",
|
||
|
"\n",
|
||
|
"def draw_prob_distribution(X_test):\n",
|
||
|
" y_pred_prob = pipeline.predict_proba(X_test)[:, 1]\n",
|
||
|
" plt.figure(figsize=(8, 6))\n",
|
||
|
" plt.hist(y_pred_prob, bins=10, range=(0, 1), color='blue', alpha=0.7)\n",
|
||
|
" \n",
|
||
|
" plt.xlim(0, 1)\n",
|
||
|
" plt.ylim(0, None)\n",
|
||
|
" \n",
|
||
|
" plt.title('Histogramme des probabilités pour la classe 1')\n",
|
||
|
" plt.xlabel('Probabilité')\n",
|
||
|
" plt.ylabel('Fréquence')\n",
|
||
|
" plt.grid(True)\n",
|
||
|
" plt.show()\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 15,
|
||
|
"id": "206d9a95-7c37-4506-949b-e77d225e42c5",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Hyperparameter\n",
|
||
|
"param_grid = {'logreg__C': np.logspace(-10, 6, 17, base=2),\n",
|
||
|
" 'logreg__penalty': ['l1', 'l2'],\n",
|
||
|
" 'logreg__class_weight': ['balanced', weight_dict]} "
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 16,
|
||
|
"id": "7ff2f7bd-efc1-4f7c-a3c9-caa916aa2f2b",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<style>#sk-container-id-1 {\n",
|
||
|
" /* Definition of color scheme common for light and dark mode */\n",
|
||
|
" --sklearn-color-text: black;\n",
|
||
|
" --sklearn-color-line: gray;\n",
|
||
|
" /* Definition of color scheme for unfitted estimators */\n",
|
||
|
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
|
||
|
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
|
||
|
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
|
||
|
" --sklearn-color-unfitted-level-3: chocolate;\n",
|
||
|
" /* Definition of color scheme for fitted estimators */\n",
|
||
|
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
|
||
|
" --sklearn-color-fitted-level-1: #d4ebff;\n",
|
||
|
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
|
||
|
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
|
||
|
"\n",
|
||
|
" /* Specific color for light theme */\n",
|
||
|
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
|
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
|
||
|
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
|
" --sklearn-color-icon: #696969;\n",
|
||
|
"\n",
|
||
|
" @media (prefers-color-scheme: dark) {\n",
|
||
|
" /* Redefinition of color scheme for dark theme */\n",
|
||
|
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
|
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
|
||
|
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
|
" --sklearn-color-icon: #878787;\n",
|
||
|
" }\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 {\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 pre {\n",
|
||
|
" padding: 0;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 input.sk-hidden--visually {\n",
|
||
|
" border: 0;\n",
|
||
|
" clip: rect(1px 1px 1px 1px);\n",
|
||
|
" clip: rect(1px, 1px, 1px, 1px);\n",
|
||
|
" height: 1px;\n",
|
||
|
" margin: -1px;\n",
|
||
|
" overflow: hidden;\n",
|
||
|
" padding: 0;\n",
|
||
|
" position: absolute;\n",
|
||
|
" width: 1px;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-dashed-wrapped {\n",
|
||
|
" border: 1px dashed var(--sklearn-color-line);\n",
|
||
|
" margin: 0 0.4em 0.5em 0.4em;\n",
|
||
|
" box-sizing: border-box;\n",
|
||
|
" padding-bottom: 0.4em;\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-container {\n",
|
||
|
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
|
||
|
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
|
||
|
" so we also need the `!important` here to be able to override the\n",
|
||
|
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
|
||
|
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
|
||
|
" display: inline-block !important;\n",
|
||
|
" position: relative;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-text-repr-fallback {\n",
|
||
|
" display: none;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"div.sk-parallel-item,\n",
|
||
|
"div.sk-serial,\n",
|
||
|
"div.sk-item {\n",
|
||
|
" /* draw centered vertical line to link estimators */\n",
|
||
|
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
|
||
|
" background-size: 2px 100%;\n",
|
||
|
" background-repeat: no-repeat;\n",
|
||
|
" background-position: center center;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Parallel-specific style estimator block */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-parallel-item::after {\n",
|
||
|
" content: \"\";\n",
|
||
|
" width: 100%;\n",
|
||
|
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
|
||
|
" flex-grow: 1;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-parallel {\n",
|
||
|
" display: flex;\n",
|
||
|
" align-items: stretch;\n",
|
||
|
" justify-content: center;\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
" position: relative;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-parallel-item {\n",
|
||
|
" display: flex;\n",
|
||
|
" flex-direction: column;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
|
||
|
" align-self: flex-end;\n",
|
||
|
" width: 50%;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
|
||
|
" align-self: flex-start;\n",
|
||
|
" width: 50%;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
|
||
|
" width: 0;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Serial-specific style estimator block */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-serial {\n",
|
||
|
" display: flex;\n",
|
||
|
" flex-direction: column;\n",
|
||
|
" align-items: center;\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
" padding-right: 1em;\n",
|
||
|
" padding-left: 1em;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
|
||
|
"clickable and can be expanded/collapsed.\n",
|
||
|
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
|
||
|
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
|
||
|
"*/\n",
|
||
|
"\n",
|
||
|
"/* Pipeline and ColumnTransformer style (default) */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-toggleable {\n",
|
||
|
" /* Default theme specific background. It is overwritten whether we have a\n",
|
||
|
" specific estimator or a Pipeline/ColumnTransformer */\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Toggleable label */\n",
|
||
|
"#sk-container-id-1 label.sk-toggleable__label {\n",
|
||
|
" cursor: pointer;\n",
|
||
|
" display: block;\n",
|
||
|
" width: 100%;\n",
|
||
|
" margin-bottom: 0;\n",
|
||
|
" padding: 0.5em;\n",
|
||
|
" box-sizing: border-box;\n",
|
||
|
" text-align: center;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
|
||
|
" /* Arrow on the left of the label */\n",
|
||
|
" content: \"▸\";\n",
|
||
|
" float: left;\n",
|
||
|
" margin-right: 0.25em;\n",
|
||
|
" color: var(--sklearn-color-icon);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Toggleable content - dropdown */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-toggleable__content {\n",
|
||
|
" max-height: 0;\n",
|
||
|
" max-width: 0;\n",
|
||
|
" overflow: hidden;\n",
|
||
|
" text-align: left;\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-toggleable__content pre {\n",
|
||
|
" margin: 0.2em;\n",
|
||
|
" border-radius: 0.25em;\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
||
|
" /* Expand drop-down */\n",
|
||
|
" max-height: 200px;\n",
|
||
|
" max-width: 100%;\n",
|
||
|
" overflow: auto;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
||
|
" content: \"▾\";\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Pipeline/ColumnTransformer-specific style */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Estimator-specific style */\n",
|
||
|
"\n",
|
||
|
"/* Colorize estimator box */\n",
|
||
|
"#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
|
||
|
"#sk-container-id-1 div.sk-label label {\n",
|
||
|
" /* The background is the default theme color */\n",
|
||
|
" color: var(--sklearn-color-text-on-default-background);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* On hover, darken the color of the background */\n",
|
||
|
"#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Label box, darken color on hover, fitted */\n",
|
||
|
"#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Estimator label */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-label label {\n",
|
||
|
" font-family: monospace;\n",
|
||
|
" font-weight: bold;\n",
|
||
|
" display: inline-block;\n",
|
||
|
" line-height: 1.2em;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-label-container {\n",
|
||
|
" text-align: center;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Estimator-specific */\n",
|
||
|
"#sk-container-id-1 div.sk-estimator {\n",
|
||
|
" font-family: monospace;\n",
|
||
|
" border: 1px dotted var(--sklearn-color-border-box);\n",
|
||
|
" border-radius: 0.25em;\n",
|
||
|
" box-sizing: border-box;\n",
|
||
|
" margin-bottom: 0.5em;\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-estimator.fitted {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* on hover */\n",
|
||
|
"#sk-container-id-1 div.sk-estimator:hover {\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
|
||
|
"\n",
|
||
|
"/* Common style for \"i\" and \"?\" */\n",
|
||
|
"\n",
|
||
|
".sk-estimator-doc-link,\n",
|
||
|
"a:link.sk-estimator-doc-link,\n",
|
||
|
"a:visited.sk-estimator-doc-link {\n",
|
||
|
" float: right;\n",
|
||
|
" font-size: smaller;\n",
|
||
|
" line-height: 1em;\n",
|
||
|
" font-family: monospace;\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
" border-radius: 1em;\n",
|
||
|
" height: 1em;\n",
|
||
|
" width: 1em;\n",
|
||
|
" text-decoration: none !important;\n",
|
||
|
" margin-left: 1ex;\n",
|
||
|
" /* unfitted */\n",
|
||
|
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
|
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
".sk-estimator-doc-link.fitted,\n",
|
||
|
"a:link.sk-estimator-doc-link.fitted,\n",
|
||
|
"a:visited.sk-estimator-doc-link.fitted {\n",
|
||
|
" /* fitted */\n",
|
||
|
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
|
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* On hover */\n",
|
||
|
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
|
||
|
".sk-estimator-doc-link:hover,\n",
|
||
|
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
|
||
|
".sk-estimator-doc-link:hover {\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
|
" color: var(--sklearn-color-background);\n",
|
||
|
" text-decoration: none;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
|
".sk-estimator-doc-link.fitted:hover,\n",
|
||
|
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
|
".sk-estimator-doc-link.fitted:hover {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
|
" color: var(--sklearn-color-background);\n",
|
||
|
" text-decoration: none;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Span, style for the box shown on hovering the info icon */\n",
|
||
|
".sk-estimator-doc-link span {\n",
|
||
|
" display: none;\n",
|
||
|
" z-index: 9999;\n",
|
||
|
" position: relative;\n",
|
||
|
" font-weight: normal;\n",
|
||
|
" right: .2ex;\n",
|
||
|
" padding: .5ex;\n",
|
||
|
" margin: .5ex;\n",
|
||
|
" width: min-content;\n",
|
||
|
" min-width: 20ex;\n",
|
||
|
" max-width: 50ex;\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
" box-shadow: 2pt 2pt 4pt #999;\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background: var(--sklearn-color-unfitted-level-0);\n",
|
||
|
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
".sk-estimator-doc-link.fitted span {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background: var(--sklearn-color-fitted-level-0);\n",
|
||
|
" border: var(--sklearn-color-fitted-level-3);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
".sk-estimator-doc-link:hover span {\n",
|
||
|
" display: block;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 a.estimator_doc_link {\n",
|
||
|
" float: right;\n",
|
||
|
" font-size: 1rem;\n",
|
||
|
" line-height: 1em;\n",
|
||
|
" font-family: monospace;\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
" border-radius: 1rem;\n",
|
||
|
" height: 1rem;\n",
|
||
|
" width: 1rem;\n",
|
||
|
" text-decoration: none;\n",
|
||
|
" /* unfitted */\n",
|
||
|
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
|
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 a.estimator_doc_link.fitted {\n",
|
||
|
" /* fitted */\n",
|
||
|
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
|
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* On hover */\n",
|
||
|
"#sk-container-id-1 a.estimator_doc_link:hover {\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
|
" color: var(--sklearn-color-background);\n",
|
||
|
" text-decoration: none;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
|
"}\n",
|
||
|
"</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('preprocessor',\n",
|
||
|
" ColumnTransformer(transformers=[('num',\n",
|
||
|
" Pipeline(steps=[('scaler',\n",
|
||
|
" StandardScaler())]),\n",
|
||
|
" ['nb_tickets', 'nb_purchases',\n",
|
||
|
" 'total_amount',\n",
|
||
|
" 'nb_suppliers',\n",
|
||
|
" 'vente_internet_max',\n",
|
||
|
" 'purchase_date_min',\n",
|
||
|
" 'purchase_date_max',\n",
|
||
|
" 'time_between_purchase',\n",
|
||
|
" 'nb_tickets_internet',\n",
|
||
|
" 'fidelity', 'is_email_true',\n",
|
||
|
" 'opt_in', 'gender_female',\n",
|
||
|
" 'gender_male',\n",
|
||
|
" 'gender_other',\n",
|
||
|
" 'nb_campaigns',\n",
|
||
|
" 'nb_campaigns_opened']),\n",
|
||
|
" ('cat',\n",
|
||
|
" Pipeline(steps=[('onehot',\n",
|
||
|
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
|
" sparse_output=False))]),\n",
|
||
|
" ['opt_in'])])),\n",
|
||
|
" ('logreg',\n",
|
||
|
" LogisticRegression(class_weight={0.0: 0.5481283836040216,\n",
|
||
|
" 1.0: 5.694439980716696},\n",
|
||
|
" max_iter=5000, solver='saga'))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> Pipeline<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.pipeline.Pipeline.html\">?<span>Documentation for Pipeline</span></a><span class=\"sk-estimator-doc-link \">i<span>Not fitted</span></span></label><div class=\"sk-toggleable__content \"><pre>Pipeline(steps=[('preprocessor',\n",
|
||
|
" ColumnTransformer(transformers=[('num',\n",
|
||
|
" Pipeline(steps=[('scaler',\n",
|
||
|
" StandardScaler())]),\n",
|
||
|
" ['nb_tickets', 'nb_purchases',\n",
|
||
|
" 'total_amount',\n",
|
||
|
" 'nb_suppliers',\n",
|
||
|
" 'vente_internet_max',\n",
|
||
|
" 'purchase_date_min',\n",
|
||
|
" 'purchase_date_max',\n",
|
||
|
" 'time_between_purchase',\n",
|
||
|
" 'nb_tickets_internet',\n",
|
||
|
" 'fidelity', 'is_email_true',\n",
|
||
|
" 'opt_in', 'gender_female',\n",
|
||
|
" 'gender_male',\n",
|
||
|
" 'gender_other',\n",
|
||
|
" 'nb_campaigns',\n",
|
||
|
" 'nb_campaigns_opened']),\n",
|
||
|
" ('cat',\n",
|
||
|
" Pipeline(steps=[('onehot',\n",
|
||
|
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
|
" sparse_output=False))]),\n",
|
||
|
" ['opt_in'])])),\n",
|
||
|
" ('logreg',\n",
|
||
|
" LogisticRegression(class_weight={0.0: 0.5481283836040216,\n",
|
||
|
" 1.0: 5.694439980716696},\n",
|
||
|
" max_iter=5000, solver='saga'))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> preprocessor: ColumnTransformer<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></label><div class=\"sk-toggleable__content \"><pre>ColumnTransformer(transformers=[('num',\n",
|
||
|
" Pipeline(steps=[('scaler', StandardScaler())]),\n",
|
||
|
" ['nb_tickets', 'nb_purchases', 'total_amount',\n",
|
||
|
" 'nb_suppliers', 'vente_internet_max',\n",
|
||
|
" 'purchase_date_min', 'purchase_date_max',\n",
|
||
|
" 'time_between_purchase',\n",
|
||
|
" 'nb_tickets_internet', 'fidelity',\n",
|
||
|
" 'is_email_true', 'opt_in', 'gender_female',\n",
|
||
|
" 'gender_male', 'gender_other', 'nb_campaigns',\n",
|
||
|
" 'nb_campaigns_opened']),\n",
|
||
|
" ('cat',\n",
|
||
|
" Pipeline(steps=[('onehot',\n",
|
||
|
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
|
" sparse_output=False))]),\n",
|
||
|
" ['opt_in'])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">num</label><div class=\"sk-toggleable__content \"><pre>['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> StandardScaler<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.StandardScaler.html\">?<span>Documentation for StandardScaler</span></a></label><div class=\"sk-toggleable__content \"><pre>StandardScaler()</pre></div> </div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">cat</label><div class=\"sk-toggleable__content \"><pre>['opt_in']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> OneHotEncoder<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></label><div class=\"sk-toggleable__content \"><pre>OneHotEncoder(handle_unknown='ignore', sparse_output=False)</pre></div> </div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> LogisticRegression<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a></label><div class=\"sk-toggleable__content \"><pre>LogisticRegression(class_weight={0.0: 0.5481283836040216,\n",
|
||
|
" 1.0: 5.694439980716696},\n",
|
||
|
" max_iter=5000, solver='saga')</pre></div> </div></div></div></div></div></div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"Pipeline(steps=[('preprocessor',\n",
|
||
|
" ColumnTransformer(transformers=[('num',\n",
|
||
|
" Pipeline(steps=[('scaler',\n",
|
||
|
" StandardScaler())]),\n",
|
||
|
" ['nb_tickets', 'nb_purchases',\n",
|
||
|
" 'total_amount',\n",
|
||
|
" 'nb_suppliers',\n",
|
||
|
" 'vente_internet_max',\n",
|
||
|
" 'purchase_date_min',\n",
|
||
|
" 'purchase_date_max',\n",
|
||
|
" 'time_between_purchase',\n",
|
||
|
" 'nb_tickets_internet',\n",
|
||
|
" 'fidelity', 'is_email_true',\n",
|
||
|
" 'opt_in', 'gender_female',\n",
|
||
|
" 'gender_male',\n",
|
||
|
" 'gender_other',\n",
|
||
|
" 'nb_campaigns',\n",
|
||
|
" 'nb_campaigns_opened']),\n",
|
||
|
" ('cat',\n",
|
||
|
" Pipeline(steps=[('onehot',\n",
|
||
|
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
|
" sparse_output=False))]),\n",
|
||
|
" ['opt_in'])])),\n",
|
||
|
" ('logreg',\n",
|
||
|
" LogisticRegression(class_weight={0.0: 0.5481283836040216,\n",
|
||
|
" 1.0: 5.694439980716696},\n",
|
||
|
" max_iter=5000, solver='saga'))])"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 16,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Pipeline\n",
|
||
|
"pipeline = Pipeline(steps=[\n",
|
||
|
" ('preprocessor', preproc),\n",
|
||
|
" ('logreg', LogisticRegression(solver='saga', class_weight = weight_dict,\n",
|
||
|
" max_iter=5000)) \n",
|
||
|
"])\n",
|
||
|
"\n",
|
||
|
"pipeline.set_output(transform=\"pandas\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "ed415f60-9663-4179-877b-233faf6e1645",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Baseline"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 17,
|
||
|
"id": "2b467511-2ae5-4a16-a502-397c3460471d",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<style>#sk-container-id-2 {\n",
|
||
|
" /* Definition of color scheme common for light and dark mode */\n",
|
||
|
" --sklearn-color-text: black;\n",
|
||
|
" --sklearn-color-line: gray;\n",
|
||
|
" /* Definition of color scheme for unfitted estimators */\n",
|
||
|
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
|
||
|
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
|
||
|
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
|
||
|
" --sklearn-color-unfitted-level-3: chocolate;\n",
|
||
|
" /* Definition of color scheme for fitted estimators */\n",
|
||
|
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
|
||
|
" --sklearn-color-fitted-level-1: #d4ebff;\n",
|
||
|
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
|
||
|
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
|
||
|
"\n",
|
||
|
" /* Specific color for light theme */\n",
|
||
|
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
|
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
|
||
|
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
|
" --sklearn-color-icon: #696969;\n",
|
||
|
"\n",
|
||
|
" @media (prefers-color-scheme: dark) {\n",
|
||
|
" /* Redefinition of color scheme for dark theme */\n",
|
||
|
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
|
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
|
||
|
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
|
" --sklearn-color-icon: #878787;\n",
|
||
|
" }\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 {\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 pre {\n",
|
||
|
" padding: 0;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 input.sk-hidden--visually {\n",
|
||
|
" border: 0;\n",
|
||
|
" clip: rect(1px 1px 1px 1px);\n",
|
||
|
" clip: rect(1px, 1px, 1px, 1px);\n",
|
||
|
" height: 1px;\n",
|
||
|
" margin: -1px;\n",
|
||
|
" overflow: hidden;\n",
|
||
|
" padding: 0;\n",
|
||
|
" position: absolute;\n",
|
||
|
" width: 1px;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-dashed-wrapped {\n",
|
||
|
" border: 1px dashed var(--sklearn-color-line);\n",
|
||
|
" margin: 0 0.4em 0.5em 0.4em;\n",
|
||
|
" box-sizing: border-box;\n",
|
||
|
" padding-bottom: 0.4em;\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-container {\n",
|
||
|
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
|
||
|
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
|
||
|
" so we also need the `!important` here to be able to override the\n",
|
||
|
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
|
||
|
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
|
||
|
" display: inline-block !important;\n",
|
||
|
" position: relative;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-text-repr-fallback {\n",
|
||
|
" display: none;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"div.sk-parallel-item,\n",
|
||
|
"div.sk-serial,\n",
|
||
|
"div.sk-item {\n",
|
||
|
" /* draw centered vertical line to link estimators */\n",
|
||
|
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
|
||
|
" background-size: 2px 100%;\n",
|
||
|
" background-repeat: no-repeat;\n",
|
||
|
" background-position: center center;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Parallel-specific style estimator block */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-parallel-item::after {\n",
|
||
|
" content: \"\";\n",
|
||
|
" width: 100%;\n",
|
||
|
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
|
||
|
" flex-grow: 1;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-parallel {\n",
|
||
|
" display: flex;\n",
|
||
|
" align-items: stretch;\n",
|
||
|
" justify-content: center;\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
" position: relative;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-parallel-item {\n",
|
||
|
" display: flex;\n",
|
||
|
" flex-direction: column;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-parallel-item:first-child::after {\n",
|
||
|
" align-self: flex-end;\n",
|
||
|
" width: 50%;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-parallel-item:last-child::after {\n",
|
||
|
" align-self: flex-start;\n",
|
||
|
" width: 50%;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-parallel-item:only-child::after {\n",
|
||
|
" width: 0;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Serial-specific style estimator block */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-serial {\n",
|
||
|
" display: flex;\n",
|
||
|
" flex-direction: column;\n",
|
||
|
" align-items: center;\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
" padding-right: 1em;\n",
|
||
|
" padding-left: 1em;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
|
||
|
"clickable and can be expanded/collapsed.\n",
|
||
|
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
|
||
|
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
|
||
|
"*/\n",
|
||
|
"\n",
|
||
|
"/* Pipeline and ColumnTransformer style (default) */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-toggleable {\n",
|
||
|
" /* Default theme specific background. It is overwritten whether we have a\n",
|
||
|
" specific estimator or a Pipeline/ColumnTransformer */\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Toggleable label */\n",
|
||
|
"#sk-container-id-2 label.sk-toggleable__label {\n",
|
||
|
" cursor: pointer;\n",
|
||
|
" display: block;\n",
|
||
|
" width: 100%;\n",
|
||
|
" margin-bottom: 0;\n",
|
||
|
" padding: 0.5em;\n",
|
||
|
" box-sizing: border-box;\n",
|
||
|
" text-align: center;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 label.sk-toggleable__label-arrow:before {\n",
|
||
|
" /* Arrow on the left of the label */\n",
|
||
|
" content: \"▸\";\n",
|
||
|
" float: left;\n",
|
||
|
" margin-right: 0.25em;\n",
|
||
|
" color: var(--sklearn-color-icon);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Toggleable content - dropdown */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-toggleable__content {\n",
|
||
|
" max-height: 0;\n",
|
||
|
" max-width: 0;\n",
|
||
|
" overflow: hidden;\n",
|
||
|
" text-align: left;\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-toggleable__content.fitted {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-toggleable__content pre {\n",
|
||
|
" margin: 0.2em;\n",
|
||
|
" border-radius: 0.25em;\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-toggleable__content.fitted pre {\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
||
|
" /* Expand drop-down */\n",
|
||
|
" max-height: 200px;\n",
|
||
|
" max-width: 100%;\n",
|
||
|
" overflow: auto;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
||
|
" content: \"▾\";\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Pipeline/ColumnTransformer-specific style */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Estimator-specific style */\n",
|
||
|
"\n",
|
||
|
"/* Colorize estimator box */\n",
|
||
|
"#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-label label.sk-toggleable__label,\n",
|
||
|
"#sk-container-id-2 div.sk-label label {\n",
|
||
|
" /* The background is the default theme color */\n",
|
||
|
" color: var(--sklearn-color-text-on-default-background);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* On hover, darken the color of the background */\n",
|
||
|
"#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Label box, darken color on hover, fitted */\n",
|
||
|
"#sk-container-id-2 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Estimator label */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-label label {\n",
|
||
|
" font-family: monospace;\n",
|
||
|
" font-weight: bold;\n",
|
||
|
" display: inline-block;\n",
|
||
|
" line-height: 1.2em;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-label-container {\n",
|
||
|
" text-align: center;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Estimator-specific */\n",
|
||
|
"#sk-container-id-2 div.sk-estimator {\n",
|
||
|
" font-family: monospace;\n",
|
||
|
" border: 1px dotted var(--sklearn-color-border-box);\n",
|
||
|
" border-radius: 0.25em;\n",
|
||
|
" box-sizing: border-box;\n",
|
||
|
" margin-bottom: 0.5em;\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-estimator.fitted {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* on hover */\n",
|
||
|
"#sk-container-id-2 div.sk-estimator:hover {\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 div.sk-estimator.fitted:hover {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
|
||
|
"\n",
|
||
|
"/* Common style for \"i\" and \"?\" */\n",
|
||
|
"\n",
|
||
|
".sk-estimator-doc-link,\n",
|
||
|
"a:link.sk-estimator-doc-link,\n",
|
||
|
"a:visited.sk-estimator-doc-link {\n",
|
||
|
" float: right;\n",
|
||
|
" font-size: smaller;\n",
|
||
|
" line-height: 1em;\n",
|
||
|
" font-family: monospace;\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
" border-radius: 1em;\n",
|
||
|
" height: 1em;\n",
|
||
|
" width: 1em;\n",
|
||
|
" text-decoration: none !important;\n",
|
||
|
" margin-left: 1ex;\n",
|
||
|
" /* unfitted */\n",
|
||
|
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
|
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
".sk-estimator-doc-link.fitted,\n",
|
||
|
"a:link.sk-estimator-doc-link.fitted,\n",
|
||
|
"a:visited.sk-estimator-doc-link.fitted {\n",
|
||
|
" /* fitted */\n",
|
||
|
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
|
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* On hover */\n",
|
||
|
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
|
||
|
".sk-estimator-doc-link:hover,\n",
|
||
|
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
|
||
|
".sk-estimator-doc-link:hover {\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
|
" color: var(--sklearn-color-background);\n",
|
||
|
" text-decoration: none;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
|
".sk-estimator-doc-link.fitted:hover,\n",
|
||
|
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
|
".sk-estimator-doc-link.fitted:hover {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
|
" color: var(--sklearn-color-background);\n",
|
||
|
" text-decoration: none;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Span, style for the box shown on hovering the info icon */\n",
|
||
|
".sk-estimator-doc-link span {\n",
|
||
|
" display: none;\n",
|
||
|
" z-index: 9999;\n",
|
||
|
" position: relative;\n",
|
||
|
" font-weight: normal;\n",
|
||
|
" right: .2ex;\n",
|
||
|
" padding: .5ex;\n",
|
||
|
" margin: .5ex;\n",
|
||
|
" width: min-content;\n",
|
||
|
" min-width: 20ex;\n",
|
||
|
" max-width: 50ex;\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
" box-shadow: 2pt 2pt 4pt #999;\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background: var(--sklearn-color-unfitted-level-0);\n",
|
||
|
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
".sk-estimator-doc-link.fitted span {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background: var(--sklearn-color-fitted-level-0);\n",
|
||
|
" border: var(--sklearn-color-fitted-level-3);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
".sk-estimator-doc-link:hover span {\n",
|
||
|
" display: block;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 a.estimator_doc_link {\n",
|
||
|
" float: right;\n",
|
||
|
" font-size: 1rem;\n",
|
||
|
" line-height: 1em;\n",
|
||
|
" font-family: monospace;\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
" border-radius: 1rem;\n",
|
||
|
" height: 1rem;\n",
|
||
|
" width: 1rem;\n",
|
||
|
" text-decoration: none;\n",
|
||
|
" /* unfitted */\n",
|
||
|
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
|
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 a.estimator_doc_link.fitted {\n",
|
||
|
" /* fitted */\n",
|
||
|
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
|
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* On hover */\n",
|
||
|
"#sk-container-id-2 a.estimator_doc_link:hover {\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
|
" color: var(--sklearn-color-background);\n",
|
||
|
" text-decoration: none;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-2 a.estimator_doc_link.fitted:hover {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
|
"}\n",
|
||
|
"</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('preprocessor',\n",
|
||
|
" ColumnTransformer(transformers=[('num',\n",
|
||
|
" Pipeline(steps=[('scaler',\n",
|
||
|
" StandardScaler())]),\n",
|
||
|
" ['nb_tickets', 'nb_purchases',\n",
|
||
|
" 'total_amount',\n",
|
||
|
" 'nb_suppliers',\n",
|
||
|
" 'vente_internet_max',\n",
|
||
|
" 'purchase_date_min',\n",
|
||
|
" 'purchase_date_max',\n",
|
||
|
" 'time_between_purchase',\n",
|
||
|
" 'nb_tickets_internet',\n",
|
||
|
" 'fidelity', 'is_email_true',\n",
|
||
|
" 'opt_in', 'gender_female',\n",
|
||
|
" 'gender_male',\n",
|
||
|
" 'gender_other',\n",
|
||
|
" 'nb_campaigns',\n",
|
||
|
" 'nb_campaigns_opened']),\n",
|
||
|
" ('cat',\n",
|
||
|
" Pipeline(steps=[('onehot',\n",
|
||
|
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
|
" sparse_output=False))]),\n",
|
||
|
" ['opt_in'])])),\n",
|
||
|
" ('logreg',\n",
|
||
|
" LogisticRegression(class_weight={0.0: 0.5481283836040216,\n",
|
||
|
" 1.0: 5.694439980716696},\n",
|
||
|
" max_iter=5000, solver='saga'))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> Pipeline<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.pipeline.Pipeline.html\">?<span>Documentation for Pipeline</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>Pipeline(steps=[('preprocessor',\n",
|
||
|
" ColumnTransformer(transformers=[('num',\n",
|
||
|
" Pipeline(steps=[('scaler',\n",
|
||
|
" StandardScaler())]),\n",
|
||
|
" ['nb_tickets', 'nb_purchases',\n",
|
||
|
" 'total_amount',\n",
|
||
|
" 'nb_suppliers',\n",
|
||
|
" 'vente_internet_max',\n",
|
||
|
" 'purchase_date_min',\n",
|
||
|
" 'purchase_date_max',\n",
|
||
|
" 'time_between_purchase',\n",
|
||
|
" 'nb_tickets_internet',\n",
|
||
|
" 'fidelity', 'is_email_true',\n",
|
||
|
" 'opt_in', 'gender_female',\n",
|
||
|
" 'gender_male',\n",
|
||
|
" 'gender_other',\n",
|
||
|
" 'nb_campaigns',\n",
|
||
|
" 'nb_campaigns_opened']),\n",
|
||
|
" ('cat',\n",
|
||
|
" Pipeline(steps=[('onehot',\n",
|
||
|
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
|
" sparse_output=False))]),\n",
|
||
|
" ['opt_in'])])),\n",
|
||
|
" ('logreg',\n",
|
||
|
" LogisticRegression(class_weight={0.0: 0.5481283836040216,\n",
|
||
|
" 1.0: 5.694439980716696},\n",
|
||
|
" max_iter=5000, solver='saga'))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> preprocessor: ColumnTransformer<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>ColumnTransformer(transformers=[('num',\n",
|
||
|
" Pipeline(steps=[('scaler', StandardScaler())]),\n",
|
||
|
" ['nb_tickets', 'nb_purchases', 'total_amount',\n",
|
||
|
" 'nb_suppliers', 'vente_internet_max',\n",
|
||
|
" 'purchase_date_min', 'purchase_date_max',\n",
|
||
|
" 'time_between_purchase',\n",
|
||
|
" 'nb_tickets_internet', 'fidelity',\n",
|
||
|
" 'is_email_true', 'opt_in', 'gender_female',\n",
|
||
|
" 'gender_male', 'gender_other', 'nb_campaigns',\n",
|
||
|
" 'nb_campaigns_opened']),\n",
|
||
|
" ('cat',\n",
|
||
|
" Pipeline(steps=[('onehot',\n",
|
||
|
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
|
" sparse_output=False))]),\n",
|
||
|
" ['opt_in'])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-10\" type=\"checkbox\" ><label for=\"sk-estimator-id-10\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">num</label><div class=\"sk-toggleable__content fitted\"><pre>['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-11\" type=\"checkbox\" ><label for=\"sk-estimator-id-11\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> StandardScaler<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.StandardScaler.html\">?<span>Documentation for StandardScaler</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>StandardScaler()</pre></div> </div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-12\" type=\"checkbox\" ><label for=\"sk-estimator-id-12\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">cat</label><div class=\"sk-toggleable__content fitted\"><pre>['opt_in']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-13\" type=\"checkbox\" ><label for=\"sk-estimator-id-13\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> OneHotEncoder<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>OneHotEncoder(handle_unknown='ignore', sparse_output=False)</pre></div> </div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-14\" type=\"checkbox\" ><label for=\"sk-estimator-id-14\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> LogisticRegression<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>LogisticRegression(class_weight={0.0: 0.5481283836040216,\n",
|
||
|
" 1.0: 5.694439980716696},\n",
|
||
|
" max_iter=5000, solver='saga')</pre></div> </div></div></div></div></div></div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"Pipeline(steps=[('preprocessor',\n",
|
||
|
" ColumnTransformer(transformers=[('num',\n",
|
||
|
" Pipeline(steps=[('scaler',\n",
|
||
|
" StandardScaler())]),\n",
|
||
|
" ['nb_tickets', 'nb_purchases',\n",
|
||
|
" 'total_amount',\n",
|
||
|
" 'nb_suppliers',\n",
|
||
|
" 'vente_internet_max',\n",
|
||
|
" 'purchase_date_min',\n",
|
||
|
" 'purchase_date_max',\n",
|
||
|
" 'time_between_purchase',\n",
|
||
|
" 'nb_tickets_internet',\n",
|
||
|
" 'fidelity', 'is_email_true',\n",
|
||
|
" 'opt_in', 'gender_female',\n",
|
||
|
" 'gender_male',\n",
|
||
|
" 'gender_other',\n",
|
||
|
" 'nb_campaigns',\n",
|
||
|
" 'nb_campaigns_opened']),\n",
|
||
|
" ('cat',\n",
|
||
|
" Pipeline(steps=[('onehot',\n",
|
||
|
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
|
" sparse_output=False))]),\n",
|
||
|
" ['opt_in'])])),\n",
|
||
|
" ('logreg',\n",
|
||
|
" LogisticRegression(class_weight={0.0: 0.5481283836040216,\n",
|
||
|
" 1.0: 5.694439980716696},\n",
|
||
|
" max_iter=5000, solver='saga'))])"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 17,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"pipeline.fit(X_train, y_train)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 18,
|
||
|
"id": "6356e870-0dfc-4e60-9e48-e2de5e7f9f87",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Accuracy Score: 0.8489010627230468\n",
|
||
|
"F1 Score: 0.4775997086140958\n",
|
||
|
"Recall Score: 0.7887218045112782\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"y_pred = pipeline.predict(X_test)\n",
|
||
|
"\n",
|
||
|
"# Calculate the F1 score\n",
|
||
|
"acc = accuracy_score(y_test, y_pred)\n",
|
||
|
"print(f\"Accuracy Score: {acc}\")\n",
|
||
|
"\n",
|
||
|
"f1 = f1_score(y_test, y_pred)\n",
|
||
|
"print(f\"F1 Score: {f1}\")\n",
|
||
|
"\n",
|
||
|
"recall = recall_score(y_test, y_pred)\n",
|
||
|
"print(f\"Recall Score: {recall}\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 19,
|
||
|
"id": "09387a09-0d53-4c54-baac-f3c2a57a629a",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjYAAAHFCAYAAADhWLMfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABVn0lEQVR4nO3deVxV1f7/8dcR4QgoJwYBcShHEjUHTEVLLWdF8966DhRFmXnVNHLMzCErcPqp19kGpUyj0uyaKVfTskxRU8kxLWdTxIFwQiDcvz/8euoI5nQGPb2f93EeD9n7s9dee9u5fvistfY2GYZhICIiIuIGiri6AyIiIiL2osRGRERE3IYSGxEREXEbSmxERETEbSixEREREbehxEZERETchhIbERERcRtKbERERMRtKLERERERt6HERtza1q1befbZZylfvjzFihWjePHi1KlTh7Fjx3L69GmHnnvLli00adIEi8WCyWRi0qRJdj+HyWRi5MiRdm/3epKSkjCZTJhMJr755psC+w3DoFKlSphMJpo2bXpL55g+fTpJSUk3dcw333xzzT6JyN9DUVd3QMRR3nnnHXr16kV4eDgDBw4kIiKCvLw8fvjhB2bOnMm6detYtGiRw87/3HPPcf78eZKTk/H39+e+++6z+znWrVtHmTJl7N7ujSpRogTvvfdegeRl9erV7N27lxIlStxy29OnTycoKIi4uLgbPqZOnTqsW7eOiIiIWz6viNzdlNiIW1q3bh09e/akRYsWfP7555jNZuu+Fi1a0L9/f1JSUhzah+3bt9O9e3fatGnjsHM0aNDAYW3fiM6dOzNv3jymTZuGn5+fdft7771HVFQUZ86ccUo/8vLyMJlM+Pn5ufyeiIhraShK3FJCQgImk4m3337bJqm5wsvLiw4dOlh/vnTpEmPHjuX+++/HbDYTHBzM008/zZEjR2yOa9q0KdWrV2fjxo08/PDD+Pj4UKFCBUaPHs2lS5eAP4Zpfv/9d2bMmGEdsgEYOXKk9c9/duWYAwcOWLetWrWKpk2bEhgYiLe3N+XKlePxxx/nwoUL1pjChqK2b9/OY489hr+/P8WKFaNWrVq8//77NjFXhmw++ugjhg4dSlhYGH5+fjRv3pzdu3ff2E0GunbtCsBHH31k3ZaVlcXChQt57rnnCj3m9ddfp379+gQEBODn50edOnV47733+PP7eO+77z527NjB6tWrrffvSsXrSt/nzp1L//79KV26NGazmV9++aXAUNTJkycpW7YsDRs2JC8vz9r+zp078fX1JTY29oavVUTuDkpsxO3k5+ezatUqIiMjKVu27A0d07NnTwYPHkyLFi1YvHgxb7zxBikpKTRs2JCTJ0/axKanp/Pkk0/y1FNPsXjxYtq0acOQIUP48MMPAWjXrh3r1q0D4IknnmDdunXWn2/UgQMHaNeuHV5eXsyePZuUlBRGjx6Nr68vubm51zxu9+7dNGzYkB07djB58mQ+++wzIiIiiIuLY+zYsQXiX331VQ4ePMi7777L22+/zc8//0z79u3Jz8+/oX76+fnxxBNPMHv2bOu2jz76iCJFitC5c+drXluPHj345JNP+Oyzz/jnP/9Jnz59eOONN6wxixYtokKFCtSuXdt6/64eNhwyZAiHDh1i5syZfPHFFwQHBxc4V1BQEMnJyWzcuJHBgwcDcOHCBf71r39Rrlw5Zs6ceUPXKSJ3EUPEzaSnpxuA0aVLlxuK37VrlwEYvXr1stm+fv16AzBeffVV67YmTZoYgLF+/Xqb2IiICKNVq1Y22wCjd+/eNttGjBhhFPa1mzNnjgEY+/fvNwzDMBYsWGAARlpa2l/2HTBGjBhh/blLly6G2Ww2Dh06ZBPXpk0bw8fHx/jtt98MwzCMr7/+2gCMtm3b2sR98sknBmCsW7fuL897pb8bN260trV9+3bDMAzjwQcfNOLi4gzDMIxq1aoZTZo0uWY7+fn5Rl5enjFq1CgjMDDQuHTpknXftY69cr7GjRtfc9/XX39ts33MmDEGYCxatMh45plnDG9vb2Pr1q1/eY0icndSxUb+9r7++muAApNU69WrR9WqVVm5cqXN9tDQUOrVq2ez7YEHHuDgwYN261OtWrXw8vLihRde4P3332ffvn03dNyqVato1qxZgUpVXFwcFy5cKFA5+vNwHFy+DuCmrqVJkyZUrFiR2bNns23bNjZu3HjNYagrfWzevDkWiwUPDw88PT0ZPnw4p06dIiMj44bP+/jjj99w7MCBA2nXrh1du3bl/fffZ8qUKdSoUeOGjxeRu4cSG3E7QUFB+Pj4sH///huKP3XqFAClSpUqsC8sLMy6/4rAwMACcWazmezs7FvobeEqVqzIV199RXBwML1796ZixYpUrFiR//znP3953KlTp655HVf2/9nV13JlPtLNXIvJZOLZZ5/lww8/ZObMmVSpUoWHH3640NgNGzbQsmVL4PKqte+//56NGzcydOjQmz5vYdf5V32Mi4vj4sWLhIaGam6NiBtTYiNux8PDg2bNmrFp06YCk38Lc+Uf92PHjhXYd/ToUYKCguzWt2LFigGQk5Njs/3qeTwADz/8MF988QVZWVmkpqYSFRVFfHw8ycnJ12w/MDDwmtcB2PVa/iwuLo6TJ08yc+ZMnn322WvGJScn4+npyZIlS+jUqRMNGzakbt26t3TOwiZhX8uxY8fo3bs3tWrV4tSpUwwYMOCWzikidz4lNuKWhgwZgmEYdO/evdDJtnl5eXzxxRcAPProowDWyb9XbNy4kV27dtGsWTO79evKyp6tW7fabL/Sl8J4eHhQv359pk2bBsDmzZuvGdusWTNWrVplTWSu+OCDD/Dx8XHYUujSpUszcOBA2rdvzzPPPHPNOJPJRNGiRfHw8LBuy87OZu7cuQVi7VUFy8/Pp2vXrphMJpYtW0ZiYiJTpkzhs88+u+22ReTOo+fYiFuKiopixowZ9OrVi8jISHr27Em1atXIy8tjy5YtvP3221SvXp327dsTHh7OCy+8wJQpUyhSpAht2rThwIEDDBs2jLJly/Lyyy/brV9t27YlICCAbt26MWrUKIoWLUpSUhKHDx+2iZs5cyarVq2iXbt2lCtXjosXL1pXHjVv3vya7Y8YMYIlS5bwyCOPMHz4cAICApg3bx5ffvklY8eOxWKx2O1arjZ69OjrxrRr144JEyYQExPDCy+8wKlTpxg/fnyhS/Jr1KhBcnIyH3/8MRUqVKBYsWK3NC9mxIgRfPfddyxfvpzQ0FD69+/P6tWr6datG7Vr16Z8+fI33aaI3LmU2Ijb6t69O/Xq1WPixImMGTOG9PR0PD09qVKlCjExMbz44ovW2BkzZlCxYkXee+89pk2bhsVioXXr1iQmJhY6p+ZW+fn5kZKSQnx8PE899RT33HMPzz//PG3atOH555+3xtWqVYvly5czYsQI0tPTKV68ONWrV2fx4sXWOSqFCQ8PZ+3atbz66qv07t2b7Oxsqlatypw5c27qCb6O8uijjzJ79mzGjBlD+/btKV26NN27dyc4OJhu3brZxL7++uscO3aM7t27c/bsWe69916b5/zciBUrVpCYmMiwYcNsKm9JSUnUrl2bzp07s2bNGry8vOxxeSJyBzAZxp+eiiUiIiJyF9McGxEREXEbSmxERETEbSixEREREbehxEZERETchhIbERERcRtKbERERMRtKLERERERt+GWD+jzrv3i9YNE/oZ+TBnr6i6I3HGqhPg4/Bz2+ncpe8tUu7TjzlSxEREREbfhlhUbERGRO4pJdQRnUWIjIiLiaCaTq3vwt6HERkRExNFUsXEa3WkRERFxG6rYiIiIOJqGopxGiY2IiIijaSjKaXSnRURExG2oYiMiIuJoGopyGiU2IiIijqahKKfRnRYRERG3oYqNiIiIo2koymmU2IiIiDiahqKcRndaRERE3IYqNiIiIo6moSinUWIjIiLiaBqKcholNiIiIo6mio3TKIUUERERt6GKjYiIiKNpKMpplNiIiIg4mhIbp9GdFhEREbehio2IiIijFdHkYWdRYiMiIuJoGopyGt1pERE
|
||
|
"text/plain": [
|
||
|
"<Figure size 640x480 with 2 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"draw_confusion_matrix(y_test, y_pred)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 22,
|
||
|
"id": "580b58d7-596f-4207-8c99-4365aba2bc9f",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABIQAAAK8CAYAAACeK2TMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUZfrG8e9MeieFUELvvYvSO0hRUEFk1cW2q8uuq9h7r1iwrfXnWldELKiACCqIFJUq0nsNISSQXidzfn8ciRwmYIJJzkzm/lxXLuacOTPzJOSekzx5z/s6DMMwEBERERERERERv+G0uwAREREREREREaleagiJiIiIiIiIiPgZNYRERERERERERPyMGkIiIiIiIiIiIn5GDSERERERERERET+jhpCIiIiIiIiIiJ9RQ0hERERERERExM+oISQiIiIiIiIi4mfUEBIRERERERER8TNqCImIiIj4uAcffJD4+Hj27t1rdykiIiLiI9QQEhERqULr16/nyiuvpGnTpoSGhhIZGUm3bt2YNm0aR48etaWmt99+G4fDwapVq6r0dfbs2YPD4Sj9cDqdxMbGMmTIEBYsWHDKx82fP5/Ro0dTu3ZtQkJCaNiwIZMnT2bTpk2nfMwPP/zAxRdfTFJSEsHBwcTExNC7d29eeeUVcnNzq+LTs8Xx/7s9e/ZY9t9///1ccMEFTJgwgaKiojIf+8ADD+BwOCqtlsWLF+NwOFi8eHGlPWdZmjRpwhVXXFGhxyxfvpwHHniAjIwMj/sGDhzIwIEDK6U2ERERX6aGkIiISBV544036N69OytXruTWW29l/vz5fPbZZ0yYMIFXX32Vq6++2u4Sq8X111/PihUr+OGHH3j66afZvn07o0aNYsmSJR7H3nbbbYwcORK3283LL7/MwoULuf/++1m5ciXdunXj008/9XjM/fffT//+/Tl48CAPP/wwCxcu5MMPP2TIkCE88MAD3HPPPdXxadru1VdfpXbt2kydOtXuUirVZ599xr333luhxyxfvpwHH3ywzIbQyy+/zMsvv1xJ1YmIiPiuQLsLEBERqYlWrFjBP/7xD4YNG8bs2bMJCQkpvW/YsGHcfPPNzJ8/v1prKi4urtQRIuXVqFEjzjnnHAD69OlDy5YtGTBgAG+++Sb9+/cvPW7GjBk89dRT/OMf/7D8wt6/f38mTZrEgAEDuPzyy+nSpQvNmjUDYNasWTz00ENcffXVvPHGG5bPb+TIkdx2222sWLGimj5TewUGBjJ37ly7y6h0Xbt2rdTna9euXaU+n4iIiK/SCCEREZEq8Nhjj+FwOHj99dctzaDjgoODOf/880u33W4306ZNo02bNoSEhJCYmMhf//pXDhw4YHncqS6fOfkymOOX87z33nvcfPPNJCUlERISwo4dO0qPOXbsGFdeeSVxcXFERERw3nnnsWvXLo/n/uabbxgyZAjR0dGEh4fTp08fvv322zP4qph69OgBwOHDhy37H330UWJjY3n66ac9HhMREcGLL75IXl4e06dPL93/0EMPERsbywsvvFBmsysqKorhw4efca0nGzhwIB06dGDFihX07t2bsLAwmjRpwltvvQXA3Llz6datG+Hh4XTs2LHMpt/SpUsZMmQIUVFRhIeH07t37zIbOT/++CN9+vQhNDSU+vXrc+edd1JcXFxmXTNnzqRXr15EREQQGRnJ8OHDWb16dbk+p5MfO2LECNauXVuBr4rVF198Qa9evQgPDycqKophw4aV2ZT7/PPP6dSpEyEhITRr1oznn3++zMvaTv6ed7vdPPLII7Ru3ZqwsDBq1apFp06deP755wHz0rhbb70VgKZNm5Zesnj80rayLhlLTk7m4osvJioqipiYGCZOnMiPP/6Iw+Hg7bffLj3uVJebXXHFFTRp0sSyr6ioiEceeaQ007Vr1+bKK6/kyJEj5ftCioiIVDE1hERERCpZSUkJ3333Hd27d6dhw4blesw//vEPbr/9doYNG8YXX3zBww8/zPz58+nduzdpaWlnXMudd97Jvn37ePXVV/nyyy9JTEwsve/qq6/G6XTywQcf8Nxzz/Hzzz8zcOBAy2U277//PsOHDyc6Opp33nmHjz76iLi4OEaMGHHGTaHdu3cD0KpVq9J9hw4dYuPGjQwfPpzw8PAyH9erVy8SExNZuHBh6WM2bNhw2seUx/Hm2QMPPFCu41NSUrjyyiu55ppr+Pzzz+nYsSNXXXUVDz30EHfeeSe33XYbn3zyCZGRkYwbN47k5OTSx37//fcMHjyYzMxM3nzzTWbMmEFUVBTnnXceM2fOLD1u06ZNDBkyhIyMDN5++21effVV1q5dyyOPPOJRz2OPPcakSZNo164dH330Ee+++y5ZWVn069ePDRs2nPZzOfmx7733HtnZ2fTr1++0czadygcffMDYsWOJjo5mxowZvPnmmxw7doyBAweydOnS0uPmz5/PhRdeSHx8PDNnzmTatGnMmDGDd9555w9fY9q0aTzwwANMmjSJuXPnMnPmTK6++urS79trrrmG66+/HoBPP/2UFStWsGLFCrp161bm8+Xn5zN06FAWLFjA448/zqxZs6hbty4TJ06s8Od/nNvtZuzYsTzxxBP85S9/Ye7cuTzxxBMsXLiQgQMHkp+ff8bPLSIiUmkMERERqVQpKSkGYFxyySXlOn7z5s0GYEyZMsWy/6effjIA46677ird17hxY2Py5MkezzFgwABjwIABpduLFi0yAKN///4ex7711lsGYFxwwQWW/cuWLTMA45FHHjEMwzByc3ONuLg447zzzrMcV1JSYnTu3Nno2bPnaT+v3bt3G4Dx5JNPGsXFxUZBQYGxbt06o1evXka9evWM3bt3lx77448/GoBxxx13nPY5zz77bCMsLKxCj/kjixcvNgICAowHH3zwD48dMGCAARirVq0q3Zeenm4EBAQYYWFhxsGDB0v3r1u3zgCMF154oXTfOeecYyQmJhrZ2dml+1wul9GhQwejQYMGhtvtNgzDMCZOnGiEhYUZKSkpluPatGljAKVfu3379hmBgYHGP//5T0udWVlZRmJiojF+/PjSfffff79x4o9+xx97/fXXWx6bnZ1t1K1b17j44otP+7U4/j22aNEiwzDM74v69esbHTt2NEpKSizPl5iYaPTu3bt031lnnWU0bNjQKCwstBwXHx9vnPzj6cnf82PGjDG6dOly2tqeeuopy9fpRCdn5ZVXXjEA4/PPP7cc97e//c0AjLfeeuuUjz1u8uTJRuPGjUu3Z8yYYQDGJ598Yjlu5cqVBmC8/PLLp61fRESkOmiEkIiIiM0WLVoE4HEpWM+ePWnbtu2fujzroosuOuV9l156qWW7d+/eNG7cuLSe5cuXc/ToUSZPnozL5Sr9cLvdnHvuuaxcubJcK3jdfvvtBAUFERoaSpcuXdiwYQNffvmlxyU25WEYRqXPgzRgwABcLhf33XdfuY6vV68e3bt3L92Oi4sjMTGRLl26UL9+/dL9bdu2BShdCj43N5effvqJ8ePHExkZWXpcQEAAl19+OQcOHGDr1q2A+T0xZMgQ6tSpYznu5FErX3/9NS6Xi6uuusqyPyoqikGDBvH999+f8vM4/ti//vWvlv/f0NBQBgwYUOHVw7Zu3UpycjKXX345TufvP2JGRkZy0UUX8eOPP5KXl0dubi6rVq1i3LhxBAcHW44777zz/vB1evbsyS+//MKUKVP4+uuvycrKqlCdJ1u0aBFRUVGWSzgB/vKXv5zxc86ZM4datWpx3nnnWb62Xbp0oW7dulW+MpuIiEh5aFJpERGRSpaQkEB4eHjppVF/JD09HTAbDSerX79+aUPhTJT1nMfVrVu3zH3H6zk+x8/48eNP+RxHjx4lIiLitDXccMMNXHbZZRQWFvLjjz9yzz33MHbsWH755Rfi4+MBc+Jp4A+/Znv37i29DK+8j6lscXFxHvuCg4M99h9vdhQUFADmnE2GYZzy/xl+/15IT08/5f/PiY7/H/Xu3dvj2OPNu1M5/tizzjqrzPtPbOqUxx99H7vd7tKvgWEYlmbXcWXtO9mdd95
|
||
|
"text/plain": [
|
||
|
"<Figure size 1400x800 with 1 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"draw_roc_curve(X_test, y_test)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 23,
|
||
|
"id": "ca5d0a55-adbb-47a0-a4c8-6af9ca75ca9d",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABA4AAAIjCAYAAACDPFmSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd1gU1/s28HvpLB1EigERQcSCYBDFigULaqwhlgSxJUSNBRUlNjBEsWv0G2OKYozGJEaNBQsWjLHHnmAlIhpRLAFUFBd23j98d36u7MKysIvK/bkur2RmzpzzzLPLwJ49c45EEAQBREREREREREQqGFR2AERERERERET06mLHARERERERERGpxY4DIiIiIiIiIlKLHQdEREREREREpBY7DoiIiIiIiIhILXYcEBEREREREZFa7DggIiIiIiIiIrXYcUBEREREREREarHjgIiIiIiIiIjUYscBERERlcuuXbtgbGyMzZs3V3YoRDojk8nQsWNHBAcHQyaTVXY4lWrgwIHw8fHBvXv3KjsUItITdhwQEVWApKQkSCQS/Pnnn5UditbWrVuHxYsXV3YYOhcSEoKQkJDKDuOVl5aWhri4OGRkZJRY7ubNm3j//fexZMkS9OzZU+P64+LiIJFIyhRTfn4+4uLikJqaWuyY4mewtHhfV9rkS18yMjIgkUiQlJRU2aFUmMjISHh4eCjtMzY2xq+//oqnT59iwoQJas/V1z1GVYz68NVXX2Hv3r3YuXMnqlWrpvf2K5pEIkFcXJze29XXPUub60tOTlZ7joeHByIjI8sdF71+2HFAREQAqk7HAWkmLS0N8fHxJf5RW1hYiPfeew8ffvghRowYUab6hw0bhiNHjpTpnPz8fMTHx6vsOOjatSuOHDkCFxeXMtVJVBZWVlZITk7Gli1b8PPPP1d2OHp36tQpTJs2DcnJyahVq1Zlh1Mhjhw5gmHDhlV2GDqjzfUlJycjPj5e5bFNmzZh2rRpFREavWaMKjsAIiKqXPn5+ZBKpZUdBpVDZb2GRkZGOHToUJnOUcT61ltv4a233qqwWBwdHeHo6Fhh9RGp4+LigmvXrlV2GHrx8r2lcePGuHv3rk7aevLkCczMzPQ+sqZZs2Z6bU/fKvr6AgICKrQ+en1wxAERkY5ERkbC0tISFy9eRKdOnWBhYQEXFxckJiYCAI4ePYqWLVvCwsICderUwerVq5XOVwxjTElJweDBg2Fvbw8LCwt0794d//zzT7H2Vq5ciUaNGsHMzAz29vbo1asXLly4oDKm8+fPo2PHjrCyskL79u0REhKC7du34/r165BIJOI/hfj4eDRt2hT29vawtrZG48aN8d1330EQBKX6PTw80K1bN+zcuRONGzeGubk56tati5UrVxaL999//8WHH34INzc3mJiYwNXVFX379sWdO3fEMnl5eZgwYQJq1aoFExMT1KhRA2PHjsXjx49Lzb8gCJg7dy5q1qwJMzMzNG7cGDt27FBZVtN2fvnlFzRt2hQ2NjaQSqXw9PTEkCFDSo1FLpdj6dKl8Pf3h7m5OWxtbdGsWTNs2bJFLPPTTz+hY8eOcHFxgbm5OXx9fTF58uRiMah7DQEgJSUFPXr0wFtvvQUzMzN4eXnho48+Uvkc8sWLF9G/f384OTnB1NQU7u7uiIiIQEFBAZKSkvDuu+8CANq2bSu+H14cir5nzx60b98e1tbWkEqlaNGiBfbu3avUhmJ4/alTp9C3b1/Y2dmhdu3aSsdetG/fPoSEhMDBwQHm5uZwd3dHnz59kJ+fj4yMDLFjID4+XoxJMWRW1bBfde+Bl4eSqxsynJqaColEUmyEgybXfvfuXfH9bWpqCkdHR7Ro0QJ79uwp9lq8bPv27fD394epqSlq1aqF+fPnqyz3v//9D61bt0b16tVhYWGBhg0bYu7cucWevw8JCUGDBg1w4sQJtGrVSnzvJiYmQi6Xi+XkcjkSEhLg4+Mjvk/9/PywZMmSUmNW5cqVKxgwYACqV68OU1NT+Pr64n//+59G50okEowaNQqrVq0S4wkMDMTRo0chCALmzZuHWrVqwdLSEu3atcPVq1eL1aHJPRF4/vr7+PiIMX7//fcqY3r27BkSEhJQt25d8TWNjIxUumepo+rcwYMHa/whvDwxatpOSfcWTestKCjA+PHj4ezsDKlUitatW+PkyZPFhrcrfuZ2796NIUOGwNHREVKpFAUFBQCe3w+Dg4NhYWEBS0tLdOrUCadPn1Zq659//kG/fv3g6uoKU1NTODk5oX379jhz5oxYpqR7ioKqofx//fUXevToATs7O5iZmcHf37/Y72jF/eHHH3/ElClT4OrqCmtra3To0AGXLl0qNd/qaPq+/eabb1CnTh2YmpqiXr16WLduncrHV16+vvz8fPH3naKNwMBA/PjjjwCevw8UP6cv/j2guD+qelTh4sWL6Ny5M6RSKapVq4aoqChs3bq12P1T3WMOqh7vKc/vf9INjjggItIhmUyG3r17IyoqChMnTsS6desQGxuLvLw8/Prrr5g0aRLeeustLF26FJGRkWjQoAHefvttpTqGDh2K0NBQrFu3Djdu3MDUqVMREhKCc+fOwdbWFgAwe/ZsfPrpp+jfvz9mz56N+/fvIy4uDsHBwThx4gS8vb3F+p49e4Z33nkHH330ESZPnozCwkK89dZb+PDDD5Geno5NmzYVu46MjAx89NFHcHd3B/C80+OTTz7Bv//+i+nTpyuVPXv2LMaPH4/JkyfDyckJ3377LYYOHQovLy+0bt0awPNOgyZNmkAmk+HTTz+Fn58f7t+/j127duG///6Dk5MT8vPz0aZNG9y8eVMs8/fff2P69Ok4f/489uzZU+I3U/Hx8YiPj8fQoUPRt29f3LhxA8OHD0dRURF8fHzEcpq2c+TIEbz33nt47733EBcXBzMzM1y/fh379u0r9X0QGRmJH374AUOHDsXMmTNhYmKCU6dOKX1QvXLlCsLCwjB27FhYWFjg4sWLmDNnDo4fP16sDVWvIQCkp6cjODgYw4YNg42NDTIyMrBw4UK0bNkS58+fh7GxsfgatWzZEtWqVcPMmTPh7e2NrKwsbNmyBc+ePUPXrl0xa9YsfPrpp/jf//6Hxo0bA4D4of+HH35AREQEevTogdWrV8PY2BgrVqxAp06dsGvXLvHDhkLv3r3Rr18/REVFqf2jLyMjA127dkWrVq2wcuVK2Nra4t9//8XOnTvx7NkzuLi4YOfOnejcuTOGDh0qDr0taZSBpu+BstD02j/44AOcOnUKn3/+OerUqYOcnBycOnUK9+/fL7H+vXv3okePHggODsb69etRVFSEuXPnqvxwmp6ejgEDBoh/WJ89exaff/45Ll68WKyz7vbt2xg4cCDGjx+PGTNmYNOmTYiNjYWrqysiIiIAAHPnzkVcXBymTp2K1q1bQyaT4eLFi8jJySlzntLS0tC8eXO4u7tjwYIFcHZ2xq5duzB69Gjcu3cPM2bMKLWObdu24fTp00hMTIREIsGkSZPQtWtXDBo0CP/88w+WLVuG3NxcREdHo0+fPjhz5ox4T9D0npiUlITBgwejR48eWLBgAXJzcxEXF4eCggIYGPzf92tyuRw9evTAwYMHERMTg+bNm+P69euYNm0ajh07hpMnT6od9aPu3BkzZiAkJAR//vknzM3N1eahvDFq2g6g+t5SlnoHDx6Mn376CTExMWjXrh3S0tLQq1cv5OXlqWxvyJAh6Nq1K9asWYPHjx/D2NgYs2bNwtSpUzF48GBMnToVz549w7x589CqVSscP34c9erVAwCEhYWJPx/u7u64d+8eDh8+LL5fS7unqHu9Ll26hObNm6N69er44osv4ODggB9++EHsJIqJiVEq/+mnn6JFixb49ttvkZeXh0mTJqF79+64cOECDA0NS8z3yzR933799df46KOP0KdPHyxatAi5ubmIj48XO15KEh0djTVr1iAhIQEBAQF4/Pgx/vrrL/HeNG3aNDx+/BgbNmxQepxM3WNgd+7cQZs2bWBsbIwvv/wSTk5OWLt2LUaNGlWma39ReX//k44IRER
|
||
|
"text/plain": [
|
||
|
"<Figure size 1000x600 with 1 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"draw_features_importance(pipeline, 'logreg')"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 24,
|
||
|
"id": "f3782ec2-9f2c-4c23-9691-79413c4e04be",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAtUAAAIiCAYAAAAHJDTKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABd9ElEQVR4nO3de1iUdf7/8dcEw3AQRoE4KZKWkoZaaSq65RHQBDPbdKMQWw+1Vq6p26ZtK2ypm2bZWpnbWpaHdK10Sw2lUtPwSGJ5WDt53MAjoqLBiPfvD7/MzxGP3CDgPB/XxaXzud9z3+97Ptz48p57biyGYRgCAAAAUG43VHUDAAAAQE1HqAYAAABMIlQDAAAAJhGqAQAAAJMI1QAAAIBJhGoAAADAJEI1AAAAYBKhGgAAADCJUA0AAACYRKgGAFxXli5dKqvVqoULF1Z1KwDcCKEauIwZM2bIYrFo48aNF1yemJiom266yWXspptuUv/+/a9qO1lZWUpLS9PRo0fL1ygqxK5du2SxWDRjxoyqbqVSlO7fyy+/XGHrXLFihSwWiz788MPL1qalpclisbiMdezYUR07dnQZs1gsSktLcz7etm2b0tLStGvXrkuuf9++fXrkkUf02muvqVevXle4B9eX81+7a6F///5lfg7WJFu3btWQIUMUGxsrPz8/WSwWrVixoqrbQg1DqAYqwYIFC/T8889f1XOysrKUnp5OqMZ1beDAgVqzZs1l69asWaOBAwc6H2/btk3p6emXDNWnT59W3759NXjwYA0ZMqQi2oWb2LhxoxYuXKjAwEB16dKlqttBDeVZ1Q0A16M77rijqlu4ag6HQxaLRZ6e/FioaU6ePClfX9+qbuOK1KtXT/Xq1btsXdu2ba963Z6envr666/L01aNwrFa8VJSUpSamipJ+vDDD/Xpp59WcUeoiThTDVSC8y//OHPmjF588UVFR0fLx8dHtWvXVvPmzfXaa69JOvuW+J/+9CdJUoMGDWSxWFzefjxz5owmTJigW2+9VTabTSEhIerXr5/27dvnsl3DMDRu3DhFRUXJ29tbrVq1UmZmZpm310vfrp85c6ZGjBihunXrymaz6ccff9TBgwc1ZMgQNW3aVLVq1VJISIg6d+6sVatWuWyr9DKCiRMn6qWXXtJNN90kHx8fdezYUd9//70cDoeeffZZRUREyG636/7779eBAwfKvE6JiYlatGiR7rjjDvn4+KhJkyZatGiRpLOX3jRp0kR+fn5q3br1BS/B2bhxo3r27KnAwEB5e3vrjjvu0L///e8rmqdffvlFffr0kb+/v+x2u/r27au8vLwL1l7Jdk6ePKmRI0eqQYMG8vb2VmBgoFq1aqUPPvjgkn2UXmKUmZmpRx99VIGBgfLz81NSUpJ+/vlnl9qOHTsqJiZGX331ldq1aydfX1/9/ve/lyTt2bNHjzzyiEJCQmSz2dSkSRNNmjRJZ86cKbPNM2fOaOzYsapfv77ze+WLL75wqfnxxx/16KOPqlGjRvL19VXdunWVlJSk77777oL78euvv2r48OEKCwuTj4+POnTooE2bNrnUXOjyjws59xKGGTNm6MEHH5QkderUyXl8nHuJzueff64uXbooICBAvr6+at++fZn9OXjwoAYPHqzIyEjZbDbdeOONat++vT7//PNL9lLa86ZNm9S7d28FBATIbrfrkUce0cGDB11qr/RYvdglYldzrF6pKz2mL2XOnDmKjY1VrVq1VKtWLd1+++2aPn36JZ/zxhtv6J577lFISIj8/PzUrFkzTZgwQQ6Hw6Vu06ZNSkxMdH7fRkREqEePHi6v2fz589WmTRvZ7Xb5+vqqYcOGzu/7UseOHXMef15eXqpbt66GDRumwsLCy+7fDTcQh2Ae/80FrlBJSYlOnz5dZtwwjMs+d8KECUpLS9Nf/vIX3XPPPXI4HPrvf//rvNRj4MCBOnLkiKZMmaKPP/5Y4eHhkqSmTZtKkv7whz/on//8p5588kklJiZq165dev7557VixQp98803Cg4OliQ999xzGj9+vAYPHqzevXtr7969GjhwoBwOhxo3blymr1GjRik2NlZvvfWWbrjhBoWEhDhDwpgxYxQWFqYTJ05owYIF6tixo7744osy176+8cYbat68ud544w0dPXpUI0aMUFJSktq0aSOr1ap33nlHu3fv1siRIzVw4EB98sknLs/fvHmzRo0apeeee052u13p6enq3bu3Ro0apS+++ELjxo2TxWLRn//8ZyUmJmrnzp3y8fGRJC1fvlzdunVTmzZt9NZbb8lut2vu3Lnq27evTp48ecnr2k+dOqWuXbvql19+0fjx49W4cWMtXrxYffv2LVN7pdsZPny4Zs6cqRdffFF33HGHCgsLtWXLFh0+fPjS3yD/Z8CAAYqLi9OcOXO0d+9e/eUvf1HHjh317bffqnbt2s663NxcPfLII3rmmWc0btw43XDDDTp48KDatWun4uJivfDCC7rpppu0aNEijRw5Uj/99JPefPNNl229/vrrioqK0uTJk51BsHv37lq5cqViY2Mlnf1PR1BQkP7+97/rxhtv1JEjR/Tee++pTZs22rRpk6Kjo13WOXr0aN15553617/+pYKCAqWlpaljx47atGmTGjZseEWvwYX06NFD48aN0+jRo/XGG2/ozjvvlCTdfPPNkqRZs2apX79+uu+++/Tee+/JarVq2rRpSkhI0NKlS51v56ekpOibb77R2LFj1bhxYx09elTffPPNFc/P/fffrz59+ujxxx/X1q1b9fzzz2vbtm1at26drFarpCs/Vq/WhY7VK3XkyBFJV35Mn++vf/2rXnjhBfXu3VsjRoyQ3W7Xli1btHv37ks+76efflJycrIz5G7evFljx47Vf//7X73zzjuSpMLCQsXFxalBgwZ64403FBoaqry8PC1fvlzHjx+XdPZSoL59+6pv375KS0uTt7e3du/erS+//NK5rZMnT6pDhw7at2+fRo8erebNm2vr1q3661//qu+++06ff/75Ff1nDjDFAHBJ7777riHpkl9RUVEuz4mKijJSU1OdjxMTE43bb7/9ktuZOHGiIcnYuXOny/j27dsNScaQIUNcxtetW2dIMkaPHm0YhmEcOXLEsNlsRt++fV3q1qxZY0gyOnTo4Bxbvny5Icm45557Lrv/p0+fNhwOh9GlSxfj/vvvd47v3LnTkGS0aNHCKCkpcY5PnjzZkGT07NnTZT3Dhg0zJBkFBQXOsaioKMPHx8fYt2+fcywnJ8eQZISHhxuFhYXO8YULFxqSjE8++cQ5duuttxp33HGH4XA4XLaVmJhohIeHu/R1vqlTpxqSjP/85z8u44MGDTIkGe++++5VbycmJsbo1avXRbd5MaXfY+e+voZhGF9//bUhyXjxxRedYx06dDAkGV988YVL7bPPPmtIMtatW+cy/oc//MGwWCzGjh07DMP4//MWERFhnDp1yll37NgxIzAw0OjatetF+zx9+rRRXFxsNGrUyHj66aed46XfT3feeadx5swZ5/iuXbsMq9VqDBw40Dk2ZswY4/x/ejp06ODy/WkYhiHJGDNmjPPx/PnzDUnG8uXLXeoKCwuNwMBAIykpyWW8pKTEaNGihdG6dWvnWK1atYxhw4ZddP8uprTnc/fZMAxj9uzZhiRj1qxZhmFc+bFqGGV/RpQ6/7W4mmO11Pmv3fkudkxfyM8//2x4eHgYDz/88CXrUlNTy/wcPFdJSYnhcDiM999/3/Dw8DCOHDliGIZhbNy40ZBkLFy48KLPffnllw1JxtGjRy9aM378eOOGG24wNmzY4DL+4YcfGpKMJUuWXLL/c13sew24HN7vAK7Q+++/rw0bNpT5+s1vfnPZ57Zu3VqbN2/WkCFDtHTpUh07duyKt7t8+XJJKnPWtXXr1mrSpInzLe61a9eqqKhIffr0calr27btRT+V/8ADD1xw/K233tKdd94pb29veXp6ymq16osvvtD27dvL1N57770ub502adJE0tmzi+cqHd+zZ4/L+O233666deuWqevYsaPLdcKl46Vnx3788Uf997//1cM
|
||
|
"text/plain": [
|
||
|
"<Figure size 800x600 with 1 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"draw_prob_distribution(X_test)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "ae8e9bd3-0f6a-4f82-bb4c-470cbdc8d6bb",
|
||
|
"metadata": {
|
||
|
"jp-MarkdownHeadingCollapsed": true
|
||
|
},
|
||
|
"source": [
|
||
|
"## Cross Validation"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 40,
|
||
|
"id": "7f0535de-34f1-4e97-b993-b429ecf0a554",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"y_train = y_train['y_has_purchased']"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 41,
|
||
|
"id": "f7fca463-d7d6-493b-8329-fdfa92457f78",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Best parameters found: {'logreg__C': 0.0009765625, 'logreg__class_weight': 'balanced', 'logreg__penalty': 'l1'}\n",
|
||
|
"Best cross-validation score: 0.65\n",
|
||
|
"Test set score: 0.64\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Cross validation\n",
|
||
|
"\n",
|
||
|
"grid_search = GridSearchCV(pipeline, param_grid, cv=3, scoring=recall_scorer, error_score='raise',\n",
|
||
|
" n_jobs=-1)\n",
|
||
|
"\n",
|
||
|
"grid_search.fit(X_train, y_train)\n",
|
||
|
"\n",
|
||
|
"# Print the best parameters and the best score\n",
|
||
|
"print(\"Best parameters found: \", grid_search.best_params_)\n",
|
||
|
"print(\"Best cross-validation score: {:.2f}\".format(grid_search.best_score_))\n",
|
||
|
"\n",
|
||
|
"# Evaluate the best model on the test set\n",
|
||
|
"test_score = grid_search.score(X_test, y_test)\n",
|
||
|
"print(\"Test set score: {:.2f}\".format(test_score))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 43,
|
||
|
"id": "56bd7828-4de1-4166-bea0-5d5e152b9d38",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi0AAAHFCAYAAAA+FskAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABQP0lEQVR4nO3de3yP9f/H8cdnp49tbMZsM6ecMxRRjEI5M/LtgFYrEXJsOaZy6mBOIWc60FdpiUilpVJKDGHOUU5LzBxmGLbZrt8ffj7fPjbZdH189uF57/a53ey6Xtd1va5PrV693u/3dVkMwzAQERERKeDcnJ2AiIiISF6oaBERERGXoKJFREREXIKKFhEREXEJKlpERETEJahoEREREZegokVERERcgooWERERcQkqWkRERMQlqGiRW9q2bdt49tlnKV++PIUKFaJw4cLcc889jB8/nlOnTjn02lu2bKFx48b4+/tjsViYMmWK6dewWCyMGjXK9PNez/z587FYLFgsFn788ccc+w3DoFKlSlgsFpo0aXJD15g5cybz58/P1zE//vjjNXMSEdfn4ewERBzlnXfeoXfv3lStWpXBgwcTFhZGZmYmv/76K7Nnz2bdunUsXbrUYdfv2rUraWlpxMbGEhAQwB133GH6NdatW0fp0qVNP29eFSlShPfeey9HYbJ69Wr27dtHkSJFbvjcM2fOJDAwkC5duuT5mHvuuYd169YRFhZ2w9cVkYJLRYvcktatW0evXr1o3rw5y5Ytw2q12vY1b96cgQMHEhcX59AcduzYQffu3WndurXDrlG/fn2HnTsvOnXqxEcffcSMGTPw8/OzbX/vvfcIDw/nzJkzNyWPzMxMLBYLfn5+Tv9ORMRxNDwkt6QxY8ZgsViYO3euXcFyhZeXF+3bt7f9nJ2dzfjx47nzzjuxWq0EBQXx9NNPc/jwYbvjmjRpQo0aNdi4cSMPPPAAPj4+VKhQgbFjx5KdnQ38b+jk0qVLzJo1yzaMAjBq1Cjbn//uyjEHDx60bVu1ahVNmjShePHieHt7U7ZsWR599FHOnz9vi8lteGjHjh08/PDDBAQEUKhQIWrVqsUHH3xgF3NlGOXjjz/mlVdeITQ0FD8/P5o1a8aePXvy9iUDTzzxBAAff/yxbVtqaipLliyha9euuR4zevRo6tWrR7FixfDz8+Oee+7hvffe4+/vbr3jjjvYuXMnq1evtn1/VzpVV3JfsGABAwcOpFSpUlitVv74448cw0MnTpygTJkyNGjQgMzMTNv5d+3aha+vL1FRUXm+VxFxPhUtcsvJyspi1apV1KlThzJlyuTpmF69ejF06FCaN2/O8uXLef3114mLi6NBgwacOHHCLjYpKYknn3ySp556iuXLl9O6dWuGDRvGhx9+CEDbtm1Zt24dAI899hjr1q2z/ZxXBw8epG3btnh5efH+++8TFxfH2LFj8fX1JSMj45rH7dmzhwYNGrBz506mTp3KZ599RlhYGF26dGH8+PE54l9++WUOHTrEu+++y9y5c/n9999p164dWVlZecrTz8+Pxx57jPfff9+27eOPP8bNzY1OnTpd89569uzJokWL+Oyzz3jkkUfo168fr7/+ui1m6dKlVKhQgdq1a9u+v6uH8oYNG0ZiYiKzZ8/miy++ICgoKMe1AgMDiY2NZePGjQwdOhSA8+fP8/jjj1O2bFlmz56dp/sUkQLCELnFJCUlGYDRuXPnPMXv3r3bAIzevXvbbV+/fr0BGC+//LJtW+PGjQ3AWL9+vV1sWFiY0bJlS7ttgNGnTx+7bSNHjjRy+7WbN2+eARgHDhwwDMMwFi9ebABGQkLCP+YOGCNHjrT93LlzZ8NqtRqJiYl2ca1btzZ8fHyM06dPG4ZhGD/88IMBGG3atLGLW7RokQEY69at+8frXsl348aNtnPt2LHDMAzDuPfee40uXboYhmEY1atXNxo3bnzN82RlZRmZmZnGa6+9ZhQvXtzIzs627bvWsVeu16hRo2vu++GHH+y2jxs3zgCMpUuXGs8884zh7e1tbNu27R/vUUQKHnVa5Lb3ww8/AOSY8HnfffdRrVo1vv/+e7vtISEh3HfffXbb7rrrLg4dOmRaTrVq1cLLy4sePXrwwQcfsH///jwdt2rVKpo2bZqjw9SlSxfOnz+fo+Pz9yEyuHwfQL7upXHjxlSsWJH333+f7du3s3HjxmsODV3JsVmzZvj7++Pu7o6npycjRozg5MmTJCcn5/m6jz76aJ5jBw8eTNu2bXniiSf44IMPmDZtGjVr1szz8SJSMKhokVtOYGAgPj4+HDhwIE/xJ0+eBKBkyZI59oWGhtr2X1G8ePEccVarlQsXLtxAtrmrWLEi3333HUFBQfTp04eKFStSsWJF3n777X887uTJk9e8jyv7/+7qe7ky/yc/92KxWHj22Wf58MMPmT17NlWqVOGBBx7INXbDhg20aNECuLy665dffmHjxo288sor+b5ubvf5Tzl26dKFixcvEhISorksIi5KRYvcctzd3WnatCmbNm3KMZE2N1f+w3306NEc+44cOUJgYKBpuRUqVAiA9PR0u+1Xz5sBeOCBB/jiiy9ITU0lPj6e8PBwoqOjiY2Nveb5ixcvfs37AEy9l7/r0qULJ06cYPbs2Tz77LPXjIuNjcXT05Mvv/ySjh070qBBA+rWrXtD18xtQvO1HD16lD59+lCrVi1OnjzJoEGDbuiaIuJcKlrkljRs2DAMw6B79+65TlzNzMzkiy++AOChhx4CsE2kvWLjxo3s3r2bpk2bmpbXlRUw27Zts9t+JZfcuLu7U69ePWbMmAHA5s2brxnbtGlTVq1aZStSrvjvf/+Lj4+Pw5YDlypVisGDB9OuXTueeeaZa8ZZLBY8PDxwd3e3bbtw4QILFizIEWtW9yorK4snnngCi8XC119/TUxMDNOmTeOzzz771+cWkZtLz2mRW1J4eDizZs2id+/e1KlTh169elG9enUyMzPZsmULc+fOpUaNGrRr146qVavSo0cPpk2bhpubG61bt+bgwYMMHz6cMmXK8OKLL5qWV5s2bShWrBjdunXjtddew8PDg/nz5/Pnn3/axc2ePZtVq1bRtm1bypYty8WLF20rdJo1a3bN848cOZIvv/ySBx98kBEjRlCsWDE++ugjvvrqK8aPH4+/v79p93K1sWPHXjembdu2TJo0icjISHr06MHJkyeZOHFirsvSa9asSWxsLJ988gkVKlSgUKFCNzQPZeTIkfz888+sXLmSkJAQBg4cyOrVq+nWrRu1a9emfPny+T6niDiHiha5ZXXv3p377ruPyZMnM27cOJKSkvD09KRKlSpERkbSt29fW+ysWbOoWLEi7733HjNmzMDf359WrVoRExOT6xyWG+Xn50dcXBzR0dE89dRTFC1alOeee47WrVvz3HPP2eJq1arFypUrGTlyJElJSRQuXJgaNWqwfPly25yQ3FStWpW1a9fy8ssv06dPHy5cuEC1atWYN29evp4s6ygPPfQQ77//PuPGjaNdu3aUKlWK7t27ExQURLdu3exiR48ezdGjR+nevTtnz56lXLlyds+xyYtvv/2WmJgYhg8fbtcxmz9/PrVr16ZTp06sWbMGLy8vM25PRBzMYhh/e6KTiIiISAGlOS0iIiLiElS0iIiIiEtQ0SIiIiIuQUWLiIiIuAQVLSIiIuISVLSIiIiIS1DRIiIiIi7hlny4nHftvtcPErkNLV84ytkpiBQ4zas55p1cf2fWf5cubJluynlclTotIiIi4hJuyU6LiIhIgWJRj8AMKlpEREQczWJxdga3BBUtIiIijqZOiyn0LYqIiIhLUKdFRETE0TQ8ZAoVLSIiIo6m4SFT6FsUERERl6BOi4iIiKNpeMgUKlpEREQcTcNDptC3KCIiIi5BnRYRERFH0/CQKVS0iIiIOJqGh0yhb1FERERcgjotIiIijqbhIVOoaBEREXE0DQ+ZQkWLiIiIo6nTYgqVfiIiIuIS1GkRERFxNA0PmUJFi4iIiKOpaDGFvkURERFxCeq0iIiIOJqbJuKaQUWLiIiIo2l4yBT6FkV
|
||
|
"text/plain": [
|
||
|
"<Figure size 640x480 with 2 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"y_pred = grid_search.predict(X_test)\n",
|
||
|
"\n",
|
||
|
"draw_confusion_matrix(y_test, y_pred)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 44,
|
||
|
"id": "319fe0eb-4d4a-492c-bd50-3f08ab483021",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABIQAAAK8CAYAAACeK2TMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUddrG8e+k904ooSahSheBJIggioINBUTWvuqqrLr2tfeKBdtrXde6IqKIq7IIFsQA0hEBBRJ6JwnpPXPePw4MHAcwgSRnJnN/rotL5pwzyQNy54SH33l+DsMwDERERERERERExGf42V2AiIiIiIiIiIg0LjWERERERERERER8jBpCIiIiIiIiIiI+Rg0hEREREREREREfo4aQiIiIiIiIiIiPUUNIRERERERERMTHqCEkIiIiIiIiIuJj1BASEREREREREfExagiJiIiIiIiIiPgYNYREREREvNzDDz9MfHw8mzdvtrsUERER8RJqCImIiDSglStXcuWVV9KhQwdCQkKIiIigb9++TJw4kby8PFtqevfdd3E4HCxZsqRBP8+mTZtwOByuH35+fsTGxjJs2DBmzZp1xPfNnDmTs846i2bNmhEcHEybNm24/PLLWbNmzRHf89NPP3HhhReSlJREUFAQ0dHRpKen89prr1FSUtIQvzxbHPh/t2nTJsvxBx98kPPPP5+xY8dSWVl52Pc+9NBDOByOeqtlzpw5OBwO5syZU28f83Dat2/PFVdcUaf3zJ8/n4ceeoj8/Hy3c0OGDGHIkCH1UpuIiIg3U0NIRESkgbz11luceOKJLF68mDvuuIOZM2fy+eefM3bsWF5//XWuuuoqu0tsFDfeeCMLFizgp59+4tlnn2X9+vWMHDmSuXPnul175513MmLECJxOJ6+++iqzZ8/mwQcfZPHixfTt25dp06a5vefBBx9k8ODBbN++nUcffZTZs2fz8ccfM2zYMB566CHuu+++xvhl2u7111+nWbNm3HLLLXaXUq8+//xz7r///jq9Z/78+Tz88MOHbQi9+uqrvPrqq/VUnYiIiPcKsLsAERGRpmjBggVcf/31nH766UyfPp3g4GDXudNPP53bbruNmTNnNmpNVVVV9bpCpLbatm3LwIEDAcjIyKBjx46ccsopvP322wwePNh13eTJk3nmmWe4/vrrLX9hHzx4MOPHj+eUU07h0ksvpXfv3iQnJwMwdepUHnnkEa666ireeusty69vxIgR3HnnnSxYsKCRfqX2CggI4Ouvv7a7jHrXp0+fev143bp1q9ePJyIi4q20QkhERKQBPPHEEzgcDt58801LM+iAoKAgzj33XNdrp9PJxIkT6dKlC8HBwSQmJnLZZZexbds2y/uO9PjMHx+DOfA4zwcffMBtt91GUlISwcHBZGVlua7Zt28fV155JXFxcYSHh3POOeewYcMGt4/97bffMmzYMKKioggLCyMjI4PvvvvuGH5XTP369QNg9+7dluOPP/44sbGxPPvss27vCQ8P5+WXX6a0tJRJkya5jj/yyCPExsby0ksvHbbZFRkZyfDhw4+51j8aMmQI3bt3Z8GCBaSnpxMaGkr79u155513APj666/p27cvYWFh9OjR47BNv8zMTIYNG0ZkZCRhYWGkp6cftpHz888/k5GRQUhICK1ateLuu++mqqrqsHVNmTKFtLQ0wsPDiYiIYPjw4SxdurRWv6Y/vveMM85g+fLldfhdsfrvf/9LWloaYWFhREZGcvrppx+2KffFF1/Qs2dPgoODSU5O5sUXXzzsY21//DPvdDp57LHH6Ny5M6GhocTExNCzZ09efPFFwHw07o477gCgQ4cOrkcWDzzadrhHxnbs2MGFF15IZGQk0dHRjBs3jp9//hmHw8G7777ruu5Ij5tdccUVtG/f3nKssrKSxx57zJXpZs2aceWVV7J3797a/UaKiIg0MDWERERE6llNTQ3ff/89J554Im3atKnVe66//nr++c9/cvrpp/Pf//6XRx99lJkzZ5Kenk5OTs4x13L33XezZcsWXn/9db788ksSExNd56666ir8/Pz46KOPeOGFF1i0aBFDhgyxPGbz4YcfMnz4cKKionjvvff45JNPiIuL44wzzjjmptDGjRsB6NSpk+vYzp07Wb16NcOHDycsLOyw70tLSyMxMZHZs2e73rNq1aqjvqc2DjTPHnrooVpdv2vXLq688kquvvpqvvjiC3r06MFf//pXHnnkEe6++27uvPNOPvvsMyIiIhg1ahQ7duxwvffHH3/k1FNPpaCggLfffpvJkycTGRnJOeecw5QpU1zXrVmzhmHDhpGfn8+7777L66+/zvLly3nsscfc6nniiScYP3483bp145NPPuH999+nsLCQk08+mVWrVh311/LH937wwQcUFRVx8sknH3Vm05F89NFHnHfeeURFRTF58mTefvtt9u3bx5AhQ8jMzHRdN3PmTC644ALi4+OZMmUKEydOZPLkybz33nt/+jkmTpzIQw89xPjx4/n666+ZMmUKV111levP7dVXX82NN94IwLRp01iwYAELFiygb9++h/14ZWVlnHbaacyaNYsnn3ySqVOn0qJFC8aNG1fnX/8BTqeT8847j6eeeoq//OUvfP311zz11FPMnj2bIUOGUFZWdswfW0REpN4YIiIiUq927dplAMZFF11Uq+t/++03AzAmTJhgOb5w4UIDMO655x7XsXbt2hmXX36528c45ZRTjFNOOcX1+ocffjAAY/DgwW7XvvPOOwZgnH/++Zbj8+bNMwDjscceMwzDMEpKSoy4uDjjnHPOsVxXU1Nj9OrVy+jfv/9Rf10bN240AOPpp582qqqqjPLycmPFihVGWlqa0bJlS2Pjxo2ua3/++WcDMO66666jfswBAwYYoaGhdXrPn5kzZ47h7+9vPPzww3967SmnnGIAxpIlS1zHcnNzDX9/fyM0NNTYvn276/iKFSsMwHjppZdcxwYOHGgkJiYaRUVFrmPV1dVG9+7djdatWxtOp9MwDMMYN26cERoaauzatctyXZcuXQzA9Xu3ZcsWIyAgwPj73/9uqbOwsNBITEw0xowZ4zr24IMPGod+63fgvTfeeKPlvUVFRUaLFi2MCy+88Ki/Fwf+jP3www+GYZh/Llq1amX06NHDqKmpsXy8xMREIz093XXspJNOMtq0aWNUVFRYrouPjzf++O3pH//Mn3322Ubv3r2PWtszzzxj+X061B+z8tprrxmA8cUXX1iuu+aaawzAeOedd4743gMuv/xyo127dq7XkydPNgDjs88+s1y3ePFiAzBeffXVo9YvIiLSGLRCSERExGY//PADgNujYP3796dr167H9XjW6NGjj3ju4osvtrxOT0+nXbt2rnrmz59PXl4el19+OdXV1a4fTqeTM888k8WLF9dqB69//vOfBAYGEhISQu/evVm1ahVffvml2yM2tWEYRr3PQTrllFOorq7mgQceqNX1LVu25MQTT3S9jouLIzExkd69e9OqVSvX8a5duwK4toIvKSlh4cKFjBkzhoiICNd1/v7+XHrppWzbto21a9cC5p+JYcOG0bx5c8t1f1y18s0331BdXc1f//pXy/HIyEiGDh3Kjz/+eMRfx4H3XnbZZZb/vyEhIZxyyil13j1s7dq17Nixg0svvRQ/v4PfYkZERDB69Gh+/vlnSktLKSkpYcmSJYwaNYqgoCDLdeecc86ffp7+/fvzyy+/MGHCBL755hsKCwvrVOcf/fDDD0RGRloe4QT4y1/+cswf86uvviImJoZzzjnH8nvbu3dvWrRo0eA7s4mIiNSGhkqLiIjUs4SEBMLCwlyPRv2Z3NxcwGw0/FGrVq1cDYVjcbiPeUCLFi0Oe+xAPQdm/IwZM+aIHyMvL4/w8PCj1vCPf/yDSy65hIqKCn7++Wfuu+8+zjvvPH755Rfi4+MBc/A08Ke/Z5s3b3Y9hlfb99S3uLg4t2NBQUFuxw80O8rLywFzZpNhGEf8/wwH/yzk5uYe8f/PoQ78P0pPT3e79kDz7kgOvPekk0467PlDmzq18Wd/jp1Op+v3wDA
|
||
|
"text/plain": [
|
||
|
"<Figure size 1400x800 with 1 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"draw_roc_curve(X_test, y_test)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "ab122f66-1591-43ea-a364-2564f09b2bb3",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Segmentation du score de prédiction"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 70,
|
||
|
"id": "210b931c-6d46-4ebf-a9c7-d1ee05c3fadf",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Création d'un dataframe avec le score\n",
|
||
|
"dataset_for_segmentation = dataset_test[['customer_id'] + numeric_features + categorical_features]\n",
|
||
|
"\n",
|
||
|
"y_predict_proba = pipeline.predict_proba(X_test)[:, 1]\n",
|
||
|
"\n",
|
||
|
"dataset_for_segmentation['prediction_probability'] = y_predict_proba\n",
|
||
|
"\n",
|
||
|
"# Arrondir les valeurs de la colonne 'prediction_probability' et les multiplier par 10\n",
|
||
|
"dataset_for_segmentation['category'] = dataset_for_segmentation['prediction_probability'].apply(lambda x: int(x * 10))\n",
|
||
|
"\n",
|
||
|
"dataset_for_segmentation['prediction'] = y_pred\n",
|
||
|
"\n",
|
||
|
"def premiere_partie(chaine):\n",
|
||
|
" if chaine:\n",
|
||
|
" return chaine.split('_')[0]\n",
|
||
|
" else:\n",
|
||
|
" return None\n",
|
||
|
"\n",
|
||
|
"dataset_for_segmentation['company_number'] = dataset_for_segmentation['customer_id'].apply(lambda x: premiere_partie(x))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 73,
|
||
|
"id": "969f1f92-d715-4d74-85a7-437e72838cb5",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead tr th {\n",
|
||
|
" text-align: left;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead tr:last-of-type th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>nb_tickets</th>\n",
|
||
|
" <th>nb_purchases</th>\n",
|
||
|
" <th>total_amount</th>\n",
|
||
|
" <th>nb_suppliers</th>\n",
|
||
|
" <th>vente_internet_max</th>\n",
|
||
|
" <th>purchase_date_min</th>\n",
|
||
|
" <th>purchase_date_max</th>\n",
|
||
|
" <th>time_between_purchase</th>\n",
|
||
|
" <th>nb_tickets_internet</th>\n",
|
||
|
" <th>fidelity</th>\n",
|
||
|
" <th>gender_female</th>\n",
|
||
|
" <th>gender_male</th>\n",
|
||
|
" <th>gender_other</th>\n",
|
||
|
" <th>nb_campaigns</th>\n",
|
||
|
" <th>nb_campaigns_opened</th>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>category</th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>0.113637</td>\n",
|
||
|
" <td>0.006274</td>\n",
|
||
|
" <td>1.586366</td>\n",
|
||
|
" <td>0.005821</td>\n",
|
||
|
" <td>0.000647</td>\n",
|
||
|
" <td>548.790455</td>\n",
|
||
|
" <td>548.773103</td>\n",
|
||
|
" <td>-0.977118</td>\n",
|
||
|
" <td>0.001585</td>\n",
|
||
|
" <td>0.000776</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000032</td>\n",
|
||
|
" <td>0.999968</td>\n",
|
||
|
" <td>13.984219</td>\n",
|
||
|
" <td>1.302720</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>0.810841</td>\n",
|
||
|
" <td>0.128432</td>\n",
|
||
|
" <td>9.611292</td>\n",
|
||
|
" <td>0.125295</td>\n",
|
||
|
" <td>0.018186</td>\n",
|
||
|
" <td>525.437516</td>\n",
|
||
|
" <td>525.275222</td>\n",
|
||
|
" <td>-0.729328</td>\n",
|
||
|
" <td>0.054312</td>\n",
|
||
|
" <td>0.111832</td>\n",
|
||
|
" <td>0.245480</td>\n",
|
||
|
" <td>0.495929</td>\n",
|
||
|
" <td>0.258591</td>\n",
|
||
|
" <td>18.413562</td>\n",
|
||
|
" <td>3.718711</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>1.159419</td>\n",
|
||
|
" <td>0.339253</td>\n",
|
||
|
" <td>15.182143</td>\n",
|
||
|
" <td>0.337577</td>\n",
|
||
|
" <td>0.323824</td>\n",
|
||
|
" <td>501.529129</td>\n",
|
||
|
" <td>501.415505</td>\n",
|
||
|
" <td>-0.554439</td>\n",
|
||
|
" <td>0.969939</td>\n",
|
||
|
" <td>0.304757</td>\n",
|
||
|
" <td>0.392570</td>\n",
|
||
|
" <td>0.297258</td>\n",
|
||
|
" <td>0.310173</td>\n",
|
||
|
" <td>17.395042</td>\n",
|
||
|
" <td>2.608084</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>2.153080</td>\n",
|
||
|
" <td>0.744161</td>\n",
|
||
|
" <td>27.820044</td>\n",
|
||
|
" <td>0.734881</td>\n",
|
||
|
" <td>0.600982</td>\n",
|
||
|
" <td>287.051054</td>\n",
|
||
|
" <td>286.675385</td>\n",
|
||
|
" <td>0.105360</td>\n",
|
||
|
" <td>1.776035</td>\n",
|
||
|
" <td>0.659878</td>\n",
|
||
|
" <td>0.288813</td>\n",
|
||
|
" <td>0.253244</td>\n",
|
||
|
" <td>0.457943</td>\n",
|
||
|
" <td>16.790421</td>\n",
|
||
|
" <td>4.173954</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>2.044749</td>\n",
|
||
|
" <td>0.777640</td>\n",
|
||
|
" <td>27.353145</td>\n",
|
||
|
" <td>0.754549</td>\n",
|
||
|
" <td>0.079213</td>\n",
|
||
|
" <td>297.179255</td>\n",
|
||
|
" <td>295.019902</td>\n",
|
||
|
" <td>1.898178</td>\n",
|
||
|
" <td>0.293760</td>\n",
|
||
|
" <td>0.894877</td>\n",
|
||
|
" <td>0.666980</td>\n",
|
||
|
" <td>0.301424</td>\n",
|
||
|
" <td>0.031596</td>\n",
|
||
|
" <td>16.954707</td>\n",
|
||
|
" <td>6.060621</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>3.237988</td>\n",
|
||
|
" <td>0.958520</td>\n",
|
||
|
" <td>46.637380</td>\n",
|
||
|
" <td>0.807655</td>\n",
|
||
|
" <td>0.484785</td>\n",
|
||
|
" <td>387.464785</td>\n",
|
||
|
" <td>380.145068</td>\n",
|
||
|
" <td>7.111357</td>\n",
|
||
|
" <td>2.080397</td>\n",
|
||
|
" <td>1.164958</td>\n",
|
||
|
" <td>0.497758</td>\n",
|
||
|
" <td>0.259769</td>\n",
|
||
|
" <td>0.242473</td>\n",
|
||
|
" <td>27.006406</td>\n",
|
||
|
" <td>12.457719</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>3.592233</td>\n",
|
||
|
" <td>1.102881</td>\n",
|
||
|
" <td>49.989226</td>\n",
|
||
|
" <td>0.878014</td>\n",
|
||
|
" <td>0.599906</td>\n",
|
||
|
" <td>268.627019</td>\n",
|
||
|
" <td>250.949344</td>\n",
|
||
|
" <td>17.539247</td>\n",
|
||
|
" <td>2.525994</td>\n",
|
||
|
" <td>1.420921</td>\n",
|
||
|
" <td>0.534607</td>\n",
|
||
|
" <td>0.304259</td>\n",
|
||
|
" <td>0.161134</td>\n",
|
||
|
" <td>14.073285</td>\n",
|
||
|
" <td>4.604134</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>3.747016</td>\n",
|
||
|
" <td>1.391266</td>\n",
|
||
|
" <td>40.710335</td>\n",
|
||
|
" <td>0.914702</td>\n",
|
||
|
" <td>0.160990</td>\n",
|
||
|
" <td>309.716173</td>\n",
|
||
|
" <td>274.795570</td>\n",
|
||
|
" <td>34.796876</td>\n",
|
||
|
" <td>0.844250</td>\n",
|
||
|
" <td>1.963028</td>\n",
|
||
|
" <td>0.650364</td>\n",
|
||
|
" <td>0.263464</td>\n",
|
||
|
" <td>0.086172</td>\n",
|
||
|
" <td>26.186317</td>\n",
|
||
|
" <td>8.891703</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>5.698276</td>\n",
|
||
|
" <td>1.567006</td>\n",
|
||
|
" <td>63.033699</td>\n",
|
||
|
" <td>0.907915</td>\n",
|
||
|
" <td>0.334248</td>\n",
|
||
|
" <td>326.485952</td>\n",
|
||
|
" <td>257.940194</td>\n",
|
||
|
" <td>68.425460</td>\n",
|
||
|
" <td>2.794279</td>\n",
|
||
|
" <td>2.413009</td>\n",
|
||
|
" <td>0.606583</td>\n",
|
||
|
" <td>0.251567</td>\n",
|
||
|
" <td>0.141850</td>\n",
|
||
|
" <td>30.987461</td>\n",
|
||
|
" <td>11.676332</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>14.505956</td>\n",
|
||
|
" <td>3.211571</td>\n",
|
||
|
" <td>107.288514</td>\n",
|
||
|
" <td>1.011628</td>\n",
|
||
|
" <td>0.157119</td>\n",
|
||
|
" <td>369.696066</td>\n",
|
||
|
" <td>209.280306</td>\n",
|
||
|
" <td>160.348544</td>\n",
|
||
|
" <td>3.514464</td>\n",
|
||
|
" <td>5.394498</td>\n",
|
||
|
" <td>0.669314</td>\n",
|
||
|
" <td>0.223766</td>\n",
|
||
|
" <td>0.106920</td>\n",
|
||
|
" <td>45.928247</td>\n",
|
||
|
" <td>18.241634</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>10</th>\n",
|
||
|
" <td>2262.859155</td>\n",
|
||
|
" <td>45.619718</td>\n",
|
||
|
" <td>11051.732394</td>\n",
|
||
|
" <td>1.464789</td>\n",
|
||
|
" <td>0.154930</td>\n",
|
||
|
" <td>467.111875</td>\n",
|
||
|
" <td>31.146796</td>\n",
|
||
|
" <td>435.950994</td>\n",
|
||
|
" <td>54.295775</td>\n",
|
||
|
" <td>64.704225</td>\n",
|
||
|
" <td>0.507042</td>\n",
|
||
|
" <td>0.295775</td>\n",
|
||
|
" <td>0.197183</td>\n",
|
||
|
" <td>53.352113</td>\n",
|
||
|
" <td>26.070423</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
|
" mean mean mean mean \n",
|
||
|
"category \n",
|
||
|
"0 0.113637 0.006274 1.586366 0.005821 \n",
|
||
|
"1 0.810841 0.128432 9.611292 0.125295 \n",
|
||
|
"2 1.159419 0.339253 15.182143 0.337577 \n",
|
||
|
"3 2.153080 0.744161 27.820044 0.734881 \n",
|
||
|
"4 2.044749 0.777640 27.353145 0.754549 \n",
|
||
|
"5 3.237988 0.958520 46.637380 0.807655 \n",
|
||
|
"6 3.592233 1.102881 49.989226 0.878014 \n",
|
||
|
"7 3.747016 1.391266 40.710335 0.914702 \n",
|
||
|
"8 5.698276 1.567006 63.033699 0.907915 \n",
|
||
|
"9 14.505956 3.211571 107.288514 1.011628 \n",
|
||
|
"10 2262.859155 45.619718 11051.732394 1.464789 \n",
|
||
|
"\n",
|
||
|
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
|
" mean mean mean \n",
|
||
|
"category \n",
|
||
|
"0 0.000647 548.790455 548.773103 \n",
|
||
|
"1 0.018186 525.437516 525.275222 \n",
|
||
|
"2 0.323824 501.529129 501.415505 \n",
|
||
|
"3 0.600982 287.051054 286.675385 \n",
|
||
|
"4 0.079213 297.179255 295.019902 \n",
|
||
|
"5 0.484785 387.464785 380.145068 \n",
|
||
|
"6 0.599906 268.627019 250.949344 \n",
|
||
|
"7 0.160990 309.716173 274.795570 \n",
|
||
|
"8 0.334248 326.485952 257.940194 \n",
|
||
|
"9 0.157119 369.696066 209.280306 \n",
|
||
|
"10 0.154930 467.111875 31.146796 \n",
|
||
|
"\n",
|
||
|
" time_between_purchase nb_tickets_internet fidelity gender_female \\\n",
|
||
|
" mean mean mean mean \n",
|
||
|
"category \n",
|
||
|
"0 -0.977118 0.001585 0.000776 0.000000 \n",
|
||
|
"1 -0.729328 0.054312 0.111832 0.245480 \n",
|
||
|
"2 -0.554439 0.969939 0.304757 0.392570 \n",
|
||
|
"3 0.105360 1.776035 0.659878 0.288813 \n",
|
||
|
"4 1.898178 0.293760 0.894877 0.666980 \n",
|
||
|
"5 7.111357 2.080397 1.164958 0.497758 \n",
|
||
|
"6 17.539247 2.525994 1.420921 0.534607 \n",
|
||
|
"7 34.796876 0.844250 1.963028 0.650364 \n",
|
||
|
"8 68.425460 2.794279 2.413009 0.606583 \n",
|
||
|
"9 160.348544 3.514464 5.394498 0.669314 \n",
|
||
|
"10 435.950994 54.295775 64.704225 0.507042 \n",
|
||
|
"\n",
|
||
|
" gender_male gender_other nb_campaigns nb_campaigns_opened \n",
|
||
|
" mean mean mean mean \n",
|
||
|
"category \n",
|
||
|
"0 0.000032 0.999968 13.984219 1.302720 \n",
|
||
|
"1 0.495929 0.258591 18.413562 3.718711 \n",
|
||
|
"2 0.297258 0.310173 17.395042 2.608084 \n",
|
||
|
"3 0.253244 0.457943 16.790421 4.173954 \n",
|
||
|
"4 0.301424 0.031596 16.954707 6.060621 \n",
|
||
|
"5 0.259769 0.242473 27.006406 12.457719 \n",
|
||
|
"6 0.304259 0.161134 14.073285 4.604134 \n",
|
||
|
"7 0.263464 0.086172 26.186317 8.891703 \n",
|
||
|
"8 0.251567 0.141850 30.987461 11.676332 \n",
|
||
|
"9 0.223766 0.106920 45.928247 18.241634 \n",
|
||
|
"10 0.295775 0.197183 53.352113 26.070423 "
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 73,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Grouper le DataFrame par la colonne 'category' et calculer la moyenne pour chaque groupe\n",
|
||
|
"summary_stats = dataset_for_segmentation.groupby('category')[numeric_features].describe()\n",
|
||
|
"\n",
|
||
|
"# Sélectionner uniquement la colonne 'mean' pour chaque variable numérique\n",
|
||
|
"mean_stats = summary_stats.loc[:, (slice(None), 'mean')]\n",
|
||
|
"\n",
|
||
|
"# Afficher le DataFrame résultant\n",
|
||
|
"mean_stats"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3 (ipykernel)",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.11.6"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 5
|
||
|
}
|