2071 lines
454 KiB
Plaintext
2071 lines
454 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "3415114e-9577-4487-89eb-4931620ad9f0",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Predict Sales"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"id": "f271eb45-1470-4764-8c2e-31374efa1fe5",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import pandas as pd\n",
|
||
|
"import numpy as np\n",
|
||
|
"import os\n",
|
||
|
"import s3fs\n",
|
||
|
"import re\n",
|
||
|
"from sklearn.linear_model import LogisticRegression\n",
|
||
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
||
|
"from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, recall_score\n",
|
||
|
"from sklearn.utils import class_weight\n",
|
||
|
"from sklearn.neighbors import KNeighborsClassifier\n",
|
||
|
"from sklearn.pipeline import Pipeline\n",
|
||
|
"from sklearn.compose import ColumnTransformer\n",
|
||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
||
|
"from sklearn.impute import SimpleImputer\n",
|
||
|
"from sklearn.model_selection import GridSearchCV\n",
|
||
|
"from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler\n",
|
||
|
"from sklearn.metrics import make_scorer, f1_score, balanced_accuracy_score\n",
|
||
|
"import seaborn as sns\n",
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score\n",
|
||
|
"from sklearn.exceptions import ConvergenceWarning, DataConversionWarning\n",
|
||
|
"\n",
|
||
|
"import pickle\n",
|
||
|
"import warnings\n",
|
||
|
"#import scikitplot as skplt"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"id": "3fecb606-22e5-4dee-8efa-f8dff0832299",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"warnings.filterwarnings('ignore')\n",
|
||
|
"warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n",
|
||
|
"warnings.filterwarnings(\"ignore\", category=DataConversionWarning)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "ae591854-3003-4c75-a0c7-5abf04246e81",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Load Data"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"id": "59dd4694-a812-4923-b995-a2ee86c74f85",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Create filesystem object\n",
|
||
|
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||
|
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"id": "017f7e9a-3ba0-40fa-bdc8-51b98cc1fdb3",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"def load_train_test():\n",
|
||
|
" BUCKET = \"projet-bdc2324-team1/Generalization/musee\"\n",
|
||
|
" File_path_train = BUCKET + \"/Train_set.csv\"\n",
|
||
|
" File_path_test = BUCKET + \"/Test_set.csv\"\n",
|
||
|
" \n",
|
||
|
" with fs.open( File_path_train, mode=\"rb\") as file_in:\n",
|
||
|
" dataset_train = pd.read_csv(file_in, sep=\",\")\n",
|
||
|
" # dataset_train['y_has_purchased'] = dataset_train['y_has_purchased'].fillna(0)\n",
|
||
|
"\n",
|
||
|
" with fs.open(File_path_test, mode=\"rb\") as file_in:\n",
|
||
|
" dataset_test = pd.read_csv(file_in, sep=\",\")\n",
|
||
|
" # dataset_test['y_has_purchased'] = dataset_test['y_has_purchased'].fillna(0)\n",
|
||
|
" \n",
|
||
|
" return dataset_train, dataset_test"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 5,
|
||
|
"id": "c479b230-b4bd-4cfb-b76b-d9faf6d95772",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"dataset_train, dataset_test = load_train_test()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 26,
|
||
|
"id": "c24c446d-4e1c-4ac1-a048-f0b8d8559f36",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"customer_id 0\n",
|
||
|
"nb_tickets 0\n",
|
||
|
"nb_purchases 0\n",
|
||
|
"total_amount 0\n",
|
||
|
"nb_suppliers 0\n",
|
||
|
"vente_internet_max 0\n",
|
||
|
"purchase_date_min 0\n",
|
||
|
"purchase_date_max 0\n",
|
||
|
"time_between_purchase 0\n",
|
||
|
"nb_tickets_internet 0\n",
|
||
|
"street_id 0\n",
|
||
|
"structure_id 389658\n",
|
||
|
"mcp_contact_id 150354\n",
|
||
|
"fidelity 0\n",
|
||
|
"tenant_id 0\n",
|
||
|
"is_partner 0\n",
|
||
|
"deleted_at 434278\n",
|
||
|
"gender 0\n",
|
||
|
"is_email_true 0\n",
|
||
|
"opt_in 0\n",
|
||
|
"last_buying_date 183987\n",
|
||
|
"max_price 183987\n",
|
||
|
"ticket_sum 0\n",
|
||
|
"average_price 94783\n",
|
||
|
"average_purchase_delay 183987\n",
|
||
|
"average_price_basket 183987\n",
|
||
|
"average_ticket_basket 183987\n",
|
||
|
"total_price 89204\n",
|
||
|
"purchase_count 0\n",
|
||
|
"first_buying_date 183987\n",
|
||
|
"country 141237\n",
|
||
|
"gender_label 0\n",
|
||
|
"gender_female 0\n",
|
||
|
"gender_male 0\n",
|
||
|
"gender_other 0\n",
|
||
|
"country_fr 141237\n",
|
||
|
"nb_campaigns 0\n",
|
||
|
"nb_campaigns_opened 0\n",
|
||
|
"time_to_open 258182\n",
|
||
|
"y_has_purchased 0\n",
|
||
|
"dtype: int64"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 26,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"dataset_train.isna().sum()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 27,
|
||
|
"id": "825d14a3-6967-4733-bfd4-64bf61c2bd43",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"def features_target_split(dataset_train, dataset_test):\n",
|
||
|
" features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n",
|
||
|
" 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n",
|
||
|
" 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n",
|
||
|
" X_train = dataset_train[features_l]\n",
|
||
|
" y_train = dataset_train[['y_has_purchased']]\n",
|
||
|
"\n",
|
||
|
" X_test = dataset_test[features_l]\n",
|
||
|
" y_test = dataset_test[['y_has_purchased']]\n",
|
||
|
" return X_train, X_test, y_train, y_test"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 28,
|
||
|
"id": "69eaec12-b30f-4d30-a461-ea520d5cbf77",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"X_train, X_test, y_train, y_test = features_target_split(dataset_train, dataset_test)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 29,
|
||
|
"id": "d039f31d-0093-46c6-9743-ddec1381f758",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Shape train : (434278, 17)\n",
|
||
|
"Shape test : (186120, 17)\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"print(\"Shape train : \", X_train.shape)\n",
|
||
|
"print(\"Shape test : \", X_test.shape)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "a1d6de94-4e11-481a-a0ce-412bf29f692c",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Prepare preprocessing and Hyperparameters"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 30,
|
||
|
"id": "b808da43-c444-4e94-995a-7ec6ccd01e2d",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"{0.0: 0.5223906809346011, 1.0: 11.665359406898034}"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 30,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Compute Weights\n",
|
||
|
"weights = class_weight.compute_class_weight(class_weight = 'balanced', classes = np.unique(y_train['y_has_purchased']),\n",
|
||
|
" y = y_train['y_has_purchased'])\n",
|
||
|
"\n",
|
||
|
"weight_dict = {np.unique(y_train['y_has_purchased'])[i]: weights[i] for i in range(len(np.unique(y_train['y_has_purchased'])))}\n",
|
||
|
"weight_dict"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 59,
|
||
|
"id": "b32a79ea-907f-4dfc-9832-6c74bef3200c",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"numeric_features = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n",
|
||
|
" 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n",
|
||
|
" 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n",
|
||
|
"\n",
|
||
|
"numeric_transformer = Pipeline(steps=[\n",
|
||
|
" #(\"imputer\", SimpleImputer(strategy=\"mean\")), \n",
|
||
|
" (\"scaler\", StandardScaler()) \n",
|
||
|
"])\n",
|
||
|
"\n",
|
||
|
"categorical_features = ['opt_in'] \n",
|
||
|
"\n",
|
||
|
"# Transformer for the categorical features\n",
|
||
|
"categorical_transformer = Pipeline(steps=[\n",
|
||
|
" #(\"imputer\", SimpleImputer(strategy=\"most_frequent\")), # Impute missing values with the most frequent\n",
|
||
|
" (\"onehot\", OneHotEncoder(handle_unknown='ignore', sparse_output=False))\n",
|
||
|
"])\n",
|
||
|
"\n",
|
||
|
"preproc = ColumnTransformer(\n",
|
||
|
" transformers=[\n",
|
||
|
" (\"num\", numeric_transformer, numeric_features),\n",
|
||
|
" (\"cat\", categorical_transformer, categorical_features)\n",
|
||
|
" ]\n",
|
||
|
")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 32,
|
||
|
"id": "9809a688-bfbc-4685-a77f-17a8b2b79ab3",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Set loss\n",
|
||
|
"balanced_scorer = make_scorer(balanced_accuracy_score)\n",
|
||
|
"recall_scorer = make_scorer(recall_score)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 33,
|
||
|
"id": "4f9b2bbf-5f8a-4ac1-8e6c-51bd0dd8ac85",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"def draw_confusion_matrix(y_test, y_pred):\n",
|
||
|
" conf_matrix = confusion_matrix(y_test, y_pred)\n",
|
||
|
" sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Class 0', 'Class 1'], yticklabels=['Class 0', 'Class 1'])\n",
|
||
|
" plt.xlabel('Predicted')\n",
|
||
|
" plt.ylabel('Actual')\n",
|
||
|
" plt.title('Confusion Matrix')\n",
|
||
|
" plt.show()\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"def draw_roc_curve(X_test, y_test):\n",
|
||
|
" y_pred_prob = pipeline.predict_proba(X_test)[:, 1]\n",
|
||
|
"\n",
|
||
|
" # Calcul des taux de faux positifs (FPR) et de vrais positifs (TPR)\n",
|
||
|
" fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob, pos_label=1)\n",
|
||
|
" \n",
|
||
|
" # Calcul de l'aire sous la courbe ROC (AUC)\n",
|
||
|
" roc_auc = auc(fpr, tpr)\n",
|
||
|
" \n",
|
||
|
" plt.figure(figsize = (14, 8))\n",
|
||
|
" plt.plot(fpr, tpr, label=\"ROC curve(area = %0.3f)\" % roc_auc)\n",
|
||
|
" plt.plot([0, 1], [0, 1], color=\"red\",label=\"Random Baseline\", linestyle=\"--\")\n",
|
||
|
" plt.grid(color='gray', linestyle='--', linewidth=0.5)\n",
|
||
|
" plt.xlabel('Taux de faux positifs (FPR)')\n",
|
||
|
" plt.ylabel('Taux de vrais positifs (TPR)')\n",
|
||
|
" plt.title('Courbe ROC : modèle logistique')\n",
|
||
|
" plt.legend(loc=\"lower right\")\n",
|
||
|
" plt.show()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 34,
|
||
|
"id": "206d9a95-7c37-4506-949b-e77d225e42c5",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Hyperparameter\n",
|
||
|
"param_grid = {'logreg__C': np.logspace(-10, 6, 17, base=2),\n",
|
||
|
" 'logreg__penalty': ['l1', 'l2'],\n",
|
||
|
" 'logreg__class_weight': ['balanced', weight_dict]} "
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 35,
|
||
|
"id": "7ff2f7bd-efc1-4f7c-a3c9-caa916aa2f2b",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<style>#sk-container-id-4 {\n",
|
||
|
" /* Definition of color scheme common for light and dark mode */\n",
|
||
|
" --sklearn-color-text: black;\n",
|
||
|
" --sklearn-color-line: gray;\n",
|
||
|
" /* Definition of color scheme for unfitted estimators */\n",
|
||
|
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
|
||
|
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
|
||
|
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
|
||
|
" --sklearn-color-unfitted-level-3: chocolate;\n",
|
||
|
" /* Definition of color scheme for fitted estimators */\n",
|
||
|
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
|
||
|
" --sklearn-color-fitted-level-1: #d4ebff;\n",
|
||
|
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
|
||
|
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
|
||
|
"\n",
|
||
|
" /* Specific color for light theme */\n",
|
||
|
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
|
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
|
||
|
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
|
" --sklearn-color-icon: #696969;\n",
|
||
|
"\n",
|
||
|
" @media (prefers-color-scheme: dark) {\n",
|
||
|
" /* Redefinition of color scheme for dark theme */\n",
|
||
|
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
|
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
|
||
|
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
|
" --sklearn-color-icon: #878787;\n",
|
||
|
" }\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 {\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 pre {\n",
|
||
|
" padding: 0;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 input.sk-hidden--visually {\n",
|
||
|
" border: 0;\n",
|
||
|
" clip: rect(1px 1px 1px 1px);\n",
|
||
|
" clip: rect(1px, 1px, 1px, 1px);\n",
|
||
|
" height: 1px;\n",
|
||
|
" margin: -1px;\n",
|
||
|
" overflow: hidden;\n",
|
||
|
" padding: 0;\n",
|
||
|
" position: absolute;\n",
|
||
|
" width: 1px;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-dashed-wrapped {\n",
|
||
|
" border: 1px dashed var(--sklearn-color-line);\n",
|
||
|
" margin: 0 0.4em 0.5em 0.4em;\n",
|
||
|
" box-sizing: border-box;\n",
|
||
|
" padding-bottom: 0.4em;\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-container {\n",
|
||
|
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
|
||
|
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
|
||
|
" so we also need the `!important` here to be able to override the\n",
|
||
|
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
|
||
|
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
|
||
|
" display: inline-block !important;\n",
|
||
|
" position: relative;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-text-repr-fallback {\n",
|
||
|
" display: none;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"div.sk-parallel-item,\n",
|
||
|
"div.sk-serial,\n",
|
||
|
"div.sk-item {\n",
|
||
|
" /* draw centered vertical line to link estimators */\n",
|
||
|
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
|
||
|
" background-size: 2px 100%;\n",
|
||
|
" background-repeat: no-repeat;\n",
|
||
|
" background-position: center center;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Parallel-specific style estimator block */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-parallel-item::after {\n",
|
||
|
" content: \"\";\n",
|
||
|
" width: 100%;\n",
|
||
|
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
|
||
|
" flex-grow: 1;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-parallel {\n",
|
||
|
" display: flex;\n",
|
||
|
" align-items: stretch;\n",
|
||
|
" justify-content: center;\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
" position: relative;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-parallel-item {\n",
|
||
|
" display: flex;\n",
|
||
|
" flex-direction: column;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-parallel-item:first-child::after {\n",
|
||
|
" align-self: flex-end;\n",
|
||
|
" width: 50%;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-parallel-item:last-child::after {\n",
|
||
|
" align-self: flex-start;\n",
|
||
|
" width: 50%;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-parallel-item:only-child::after {\n",
|
||
|
" width: 0;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Serial-specific style estimator block */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-serial {\n",
|
||
|
" display: flex;\n",
|
||
|
" flex-direction: column;\n",
|
||
|
" align-items: center;\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
" padding-right: 1em;\n",
|
||
|
" padding-left: 1em;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
|
||
|
"clickable and can be expanded/collapsed.\n",
|
||
|
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
|
||
|
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
|
||
|
"*/\n",
|
||
|
"\n",
|
||
|
"/* Pipeline and ColumnTransformer style (default) */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-toggleable {\n",
|
||
|
" /* Default theme specific background. It is overwritten whether we have a\n",
|
||
|
" specific estimator or a Pipeline/ColumnTransformer */\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Toggleable label */\n",
|
||
|
"#sk-container-id-4 label.sk-toggleable__label {\n",
|
||
|
" cursor: pointer;\n",
|
||
|
" display: block;\n",
|
||
|
" width: 100%;\n",
|
||
|
" margin-bottom: 0;\n",
|
||
|
" padding: 0.5em;\n",
|
||
|
" box-sizing: border-box;\n",
|
||
|
" text-align: center;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 label.sk-toggleable__label-arrow:before {\n",
|
||
|
" /* Arrow on the left of the label */\n",
|
||
|
" content: \"▸\";\n",
|
||
|
" float: left;\n",
|
||
|
" margin-right: 0.25em;\n",
|
||
|
" color: var(--sklearn-color-icon);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Toggleable content - dropdown */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-toggleable__content {\n",
|
||
|
" max-height: 0;\n",
|
||
|
" max-width: 0;\n",
|
||
|
" overflow: hidden;\n",
|
||
|
" text-align: left;\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-toggleable__content.fitted {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-toggleable__content pre {\n",
|
||
|
" margin: 0.2em;\n",
|
||
|
" border-radius: 0.25em;\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-toggleable__content.fitted pre {\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
||
|
" /* Expand drop-down */\n",
|
||
|
" max-height: 200px;\n",
|
||
|
" max-width: 100%;\n",
|
||
|
" overflow: auto;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
||
|
" content: \"▾\";\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Pipeline/ColumnTransformer-specific style */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Estimator-specific style */\n",
|
||
|
"\n",
|
||
|
"/* Colorize estimator box */\n",
|
||
|
"#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-label label.sk-toggleable__label,\n",
|
||
|
"#sk-container-id-4 div.sk-label label {\n",
|
||
|
" /* The background is the default theme color */\n",
|
||
|
" color: var(--sklearn-color-text-on-default-background);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* On hover, darken the color of the background */\n",
|
||
|
"#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Label box, darken color on hover, fitted */\n",
|
||
|
"#sk-container-id-4 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Estimator label */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-label label {\n",
|
||
|
" font-family: monospace;\n",
|
||
|
" font-weight: bold;\n",
|
||
|
" display: inline-block;\n",
|
||
|
" line-height: 1.2em;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-label-container {\n",
|
||
|
" text-align: center;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Estimator-specific */\n",
|
||
|
"#sk-container-id-4 div.sk-estimator {\n",
|
||
|
" font-family: monospace;\n",
|
||
|
" border: 1px dotted var(--sklearn-color-border-box);\n",
|
||
|
" border-radius: 0.25em;\n",
|
||
|
" box-sizing: border-box;\n",
|
||
|
" margin-bottom: 0.5em;\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-estimator.fitted {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* on hover */\n",
|
||
|
"#sk-container-id-4 div.sk-estimator:hover {\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 div.sk-estimator.fitted:hover {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
|
||
|
"\n",
|
||
|
"/* Common style for \"i\" and \"?\" */\n",
|
||
|
"\n",
|
||
|
".sk-estimator-doc-link,\n",
|
||
|
"a:link.sk-estimator-doc-link,\n",
|
||
|
"a:visited.sk-estimator-doc-link {\n",
|
||
|
" float: right;\n",
|
||
|
" font-size: smaller;\n",
|
||
|
" line-height: 1em;\n",
|
||
|
" font-family: monospace;\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
" border-radius: 1em;\n",
|
||
|
" height: 1em;\n",
|
||
|
" width: 1em;\n",
|
||
|
" text-decoration: none !important;\n",
|
||
|
" margin-left: 1ex;\n",
|
||
|
" /* unfitted */\n",
|
||
|
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
|
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
".sk-estimator-doc-link.fitted,\n",
|
||
|
"a:link.sk-estimator-doc-link.fitted,\n",
|
||
|
"a:visited.sk-estimator-doc-link.fitted {\n",
|
||
|
" /* fitted */\n",
|
||
|
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
|
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* On hover */\n",
|
||
|
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
|
||
|
".sk-estimator-doc-link:hover,\n",
|
||
|
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
|
||
|
".sk-estimator-doc-link:hover {\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
|
" color: var(--sklearn-color-background);\n",
|
||
|
" text-decoration: none;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
|
".sk-estimator-doc-link.fitted:hover,\n",
|
||
|
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
|
".sk-estimator-doc-link.fitted:hover {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
|
" color: var(--sklearn-color-background);\n",
|
||
|
" text-decoration: none;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Span, style for the box shown on hovering the info icon */\n",
|
||
|
".sk-estimator-doc-link span {\n",
|
||
|
" display: none;\n",
|
||
|
" z-index: 9999;\n",
|
||
|
" position: relative;\n",
|
||
|
" font-weight: normal;\n",
|
||
|
" right: .2ex;\n",
|
||
|
" padding: .5ex;\n",
|
||
|
" margin: .5ex;\n",
|
||
|
" width: min-content;\n",
|
||
|
" min-width: 20ex;\n",
|
||
|
" max-width: 50ex;\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
" box-shadow: 2pt 2pt 4pt #999;\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background: var(--sklearn-color-unfitted-level-0);\n",
|
||
|
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
".sk-estimator-doc-link.fitted span {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background: var(--sklearn-color-fitted-level-0);\n",
|
||
|
" border: var(--sklearn-color-fitted-level-3);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
".sk-estimator-doc-link:hover span {\n",
|
||
|
" display: block;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 a.estimator_doc_link {\n",
|
||
|
" float: right;\n",
|
||
|
" font-size: 1rem;\n",
|
||
|
" line-height: 1em;\n",
|
||
|
" font-family: monospace;\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
" border-radius: 1rem;\n",
|
||
|
" height: 1rem;\n",
|
||
|
" width: 1rem;\n",
|
||
|
" text-decoration: none;\n",
|
||
|
" /* unfitted */\n",
|
||
|
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
|
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 a.estimator_doc_link.fitted {\n",
|
||
|
" /* fitted */\n",
|
||
|
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
|
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* On hover */\n",
|
||
|
"#sk-container-id-4 a.estimator_doc_link:hover {\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
|
" color: var(--sklearn-color-background);\n",
|
||
|
" text-decoration: none;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-4 a.estimator_doc_link.fitted:hover {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
|
"}\n",
|
||
|
"</style><div id=\"sk-container-id-4\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('preprocessor',\n",
|
||
|
" ColumnTransformer(transformers=[('num',\n",
|
||
|
" Pipeline(steps=[('scaler',\n",
|
||
|
" StandardScaler())]),\n",
|
||
|
" ['nb_tickets', 'nb_purchases',\n",
|
||
|
" 'total_amount',\n",
|
||
|
" 'nb_suppliers',\n",
|
||
|
" 'vente_internet_max',\n",
|
||
|
" 'purchase_date_min',\n",
|
||
|
" 'purchase_date_max',\n",
|
||
|
" 'time_between_purchase',\n",
|
||
|
" 'nb_tickets_internet',\n",
|
||
|
" 'fidelity', 'is_email_true',\n",
|
||
|
" 'opt_in', 'gender_female',\n",
|
||
|
" 'gender_male',\n",
|
||
|
" 'gender_other',\n",
|
||
|
" 'nb_campaigns',\n",
|
||
|
" 'nb_campaigns_opened']),\n",
|
||
|
" ('cat',\n",
|
||
|
" Pipeline(steps=[('onehot',\n",
|
||
|
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
|
" sparse_output=False))]),\n",
|
||
|
" ['opt_in'])])),\n",
|
||
|
" ('logreg',\n",
|
||
|
" LogisticRegression(class_weight={0.0: 0.5223906809346011,\n",
|
||
|
" 1.0: 11.665359406898034},\n",
|
||
|
" max_iter=5000, solver='saga'))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-22\" type=\"checkbox\" ><label for=\"sk-estimator-id-22\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> Pipeline<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.pipeline.Pipeline.html\">?<span>Documentation for Pipeline</span></a><span class=\"sk-estimator-doc-link \">i<span>Not fitted</span></span></label><div class=\"sk-toggleable__content \"><pre>Pipeline(steps=[('preprocessor',\n",
|
||
|
" ColumnTransformer(transformers=[('num',\n",
|
||
|
" Pipeline(steps=[('scaler',\n",
|
||
|
" StandardScaler())]),\n",
|
||
|
" ['nb_tickets', 'nb_purchases',\n",
|
||
|
" 'total_amount',\n",
|
||
|
" 'nb_suppliers',\n",
|
||
|
" 'vente_internet_max',\n",
|
||
|
" 'purchase_date_min',\n",
|
||
|
" 'purchase_date_max',\n",
|
||
|
" 'time_between_purchase',\n",
|
||
|
" 'nb_tickets_internet',\n",
|
||
|
" 'fidelity', 'is_email_true',\n",
|
||
|
" 'opt_in', 'gender_female',\n",
|
||
|
" 'gender_male',\n",
|
||
|
" 'gender_other',\n",
|
||
|
" 'nb_campaigns',\n",
|
||
|
" 'nb_campaigns_opened']),\n",
|
||
|
" ('cat',\n",
|
||
|
" Pipeline(steps=[('onehot',\n",
|
||
|
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
|
" sparse_output=False))]),\n",
|
||
|
" ['opt_in'])])),\n",
|
||
|
" ('logreg',\n",
|
||
|
" LogisticRegression(class_weight={0.0: 0.5223906809346011,\n",
|
||
|
" 1.0: 11.665359406898034},\n",
|
||
|
" max_iter=5000, solver='saga'))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-23\" type=\"checkbox\" ><label for=\"sk-estimator-id-23\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> preprocessor: ColumnTransformer<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></label><div class=\"sk-toggleable__content \"><pre>ColumnTransformer(transformers=[('num',\n",
|
||
|
" Pipeline(steps=[('scaler', StandardScaler())]),\n",
|
||
|
" ['nb_tickets', 'nb_purchases', 'total_amount',\n",
|
||
|
" 'nb_suppliers', 'vente_internet_max',\n",
|
||
|
" 'purchase_date_min', 'purchase_date_max',\n",
|
||
|
" 'time_between_purchase',\n",
|
||
|
" 'nb_tickets_internet', 'fidelity',\n",
|
||
|
" 'is_email_true', 'opt_in', 'gender_female',\n",
|
||
|
" 'gender_male', 'gender_other', 'nb_campaigns',\n",
|
||
|
" 'nb_campaigns_opened']),\n",
|
||
|
" ('cat',\n",
|
||
|
" Pipeline(steps=[('onehot',\n",
|
||
|
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
|
" sparse_output=False))]),\n",
|
||
|
" ['opt_in'])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-24\" type=\"checkbox\" ><label for=\"sk-estimator-id-24\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">num</label><div class=\"sk-toggleable__content \"><pre>['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-25\" type=\"checkbox\" ><label for=\"sk-estimator-id-25\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> StandardScaler<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.StandardScaler.html\">?<span>Documentation for StandardScaler</span></a></label><div class=\"sk-toggleable__content \"><pre>StandardScaler()</pre></div> </div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-26\" type=\"checkbox\" ><label for=\"sk-estimator-id-26\" class=\"sk-toggleable__label sk-toggleable__label-arrow \">cat</label><div class=\"sk-toggleable__content \"><pre>['opt_in']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-27\" type=\"checkbox\" ><label for=\"sk-estimator-id-27\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> OneHotEncoder<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></label><div class=\"sk-toggleable__content \"><pre>OneHotEncoder(handle_unknown='ignore', sparse_output=False)</pre></div> </div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-28\" type=\"checkbox\" ><label for=\"sk-estimator-id-28\" class=\"sk-toggleable__label sk-toggleable__label-arrow \"> LogisticRegression<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a></label><div class=\"sk-toggleable__content \"><pre>LogisticRegression(class_weight={0.0: 0.5223906809346011,\n",
|
||
|
" 1.0: 11.665359406898034},\n",
|
||
|
" max_iter=5000, solver='saga')</pre></div> </div></div></div></div></div></div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"Pipeline(steps=[('preprocessor',\n",
|
||
|
" ColumnTransformer(transformers=[('num',\n",
|
||
|
" Pipeline(steps=[('scaler',\n",
|
||
|
" StandardScaler())]),\n",
|
||
|
" ['nb_tickets', 'nb_purchases',\n",
|
||
|
" 'total_amount',\n",
|
||
|
" 'nb_suppliers',\n",
|
||
|
" 'vente_internet_max',\n",
|
||
|
" 'purchase_date_min',\n",
|
||
|
" 'purchase_date_max',\n",
|
||
|
" 'time_between_purchase',\n",
|
||
|
" 'nb_tickets_internet',\n",
|
||
|
" 'fidelity', 'is_email_true',\n",
|
||
|
" 'opt_in', 'gender_female',\n",
|
||
|
" 'gender_male',\n",
|
||
|
" 'gender_other',\n",
|
||
|
" 'nb_campaigns',\n",
|
||
|
" 'nb_campaigns_opened']),\n",
|
||
|
" ('cat',\n",
|
||
|
" Pipeline(steps=[('onehot',\n",
|
||
|
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
|
" sparse_output=False))]),\n",
|
||
|
" ['opt_in'])])),\n",
|
||
|
" ('logreg',\n",
|
||
|
" LogisticRegression(class_weight={0.0: 0.5223906809346011,\n",
|
||
|
" 1.0: 11.665359406898034},\n",
|
||
|
" max_iter=5000, solver='saga'))])"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 35,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Pipeline\n",
|
||
|
"pipeline = Pipeline(steps=[\n",
|
||
|
" ('preprocessor', preproc),\n",
|
||
|
" ('logreg', LogisticRegression(solver='saga', class_weight = weight_dict,\n",
|
||
|
" max_iter=5000)) \n",
|
||
|
"])\n",
|
||
|
"\n",
|
||
|
"pipeline.set_output(transform=\"pandas\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "ed415f60-9663-4179-877b-233faf6e1645",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Baseline"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 36,
|
||
|
"id": "2b467511-2ae5-4a16-a502-397c3460471d",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<style>#sk-container-id-5 {\n",
|
||
|
" /* Definition of color scheme common for light and dark mode */\n",
|
||
|
" --sklearn-color-text: black;\n",
|
||
|
" --sklearn-color-line: gray;\n",
|
||
|
" /* Definition of color scheme for unfitted estimators */\n",
|
||
|
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
|
||
|
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
|
||
|
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
|
||
|
" --sklearn-color-unfitted-level-3: chocolate;\n",
|
||
|
" /* Definition of color scheme for fitted estimators */\n",
|
||
|
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
|
||
|
" --sklearn-color-fitted-level-1: #d4ebff;\n",
|
||
|
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
|
||
|
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
|
||
|
"\n",
|
||
|
" /* Specific color for light theme */\n",
|
||
|
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
|
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
|
||
|
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
|
" --sklearn-color-icon: #696969;\n",
|
||
|
"\n",
|
||
|
" @media (prefers-color-scheme: dark) {\n",
|
||
|
" /* Redefinition of color scheme for dark theme */\n",
|
||
|
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
|
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
|
||
|
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
|
" --sklearn-color-icon: #878787;\n",
|
||
|
" }\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 {\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 pre {\n",
|
||
|
" padding: 0;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 input.sk-hidden--visually {\n",
|
||
|
" border: 0;\n",
|
||
|
" clip: rect(1px 1px 1px 1px);\n",
|
||
|
" clip: rect(1px, 1px, 1px, 1px);\n",
|
||
|
" height: 1px;\n",
|
||
|
" margin: -1px;\n",
|
||
|
" overflow: hidden;\n",
|
||
|
" padding: 0;\n",
|
||
|
" position: absolute;\n",
|
||
|
" width: 1px;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-dashed-wrapped {\n",
|
||
|
" border: 1px dashed var(--sklearn-color-line);\n",
|
||
|
" margin: 0 0.4em 0.5em 0.4em;\n",
|
||
|
" box-sizing: border-box;\n",
|
||
|
" padding-bottom: 0.4em;\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-container {\n",
|
||
|
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
|
||
|
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
|
||
|
" so we also need the `!important` here to be able to override the\n",
|
||
|
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
|
||
|
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
|
||
|
" display: inline-block !important;\n",
|
||
|
" position: relative;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-text-repr-fallback {\n",
|
||
|
" display: none;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"div.sk-parallel-item,\n",
|
||
|
"div.sk-serial,\n",
|
||
|
"div.sk-item {\n",
|
||
|
" /* draw centered vertical line to link estimators */\n",
|
||
|
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
|
||
|
" background-size: 2px 100%;\n",
|
||
|
" background-repeat: no-repeat;\n",
|
||
|
" background-position: center center;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Parallel-specific style estimator block */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-parallel-item::after {\n",
|
||
|
" content: \"\";\n",
|
||
|
" width: 100%;\n",
|
||
|
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
|
||
|
" flex-grow: 1;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-parallel {\n",
|
||
|
" display: flex;\n",
|
||
|
" align-items: stretch;\n",
|
||
|
" justify-content: center;\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
" position: relative;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-parallel-item {\n",
|
||
|
" display: flex;\n",
|
||
|
" flex-direction: column;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-parallel-item:first-child::after {\n",
|
||
|
" align-self: flex-end;\n",
|
||
|
" width: 50%;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-parallel-item:last-child::after {\n",
|
||
|
" align-self: flex-start;\n",
|
||
|
" width: 50%;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-parallel-item:only-child::after {\n",
|
||
|
" width: 0;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Serial-specific style estimator block */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-serial {\n",
|
||
|
" display: flex;\n",
|
||
|
" flex-direction: column;\n",
|
||
|
" align-items: center;\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
" padding-right: 1em;\n",
|
||
|
" padding-left: 1em;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
|
||
|
"clickable and can be expanded/collapsed.\n",
|
||
|
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
|
||
|
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
|
||
|
"*/\n",
|
||
|
"\n",
|
||
|
"/* Pipeline and ColumnTransformer style (default) */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-toggleable {\n",
|
||
|
" /* Default theme specific background. It is overwritten whether we have a\n",
|
||
|
" specific estimator or a Pipeline/ColumnTransformer */\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Toggleable label */\n",
|
||
|
"#sk-container-id-5 label.sk-toggleable__label {\n",
|
||
|
" cursor: pointer;\n",
|
||
|
" display: block;\n",
|
||
|
" width: 100%;\n",
|
||
|
" margin-bottom: 0;\n",
|
||
|
" padding: 0.5em;\n",
|
||
|
" box-sizing: border-box;\n",
|
||
|
" text-align: center;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 label.sk-toggleable__label-arrow:before {\n",
|
||
|
" /* Arrow on the left of the label */\n",
|
||
|
" content: \"▸\";\n",
|
||
|
" float: left;\n",
|
||
|
" margin-right: 0.25em;\n",
|
||
|
" color: var(--sklearn-color-icon);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 label.sk-toggleable__label-arrow:hover:before {\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Toggleable content - dropdown */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-toggleable__content {\n",
|
||
|
" max-height: 0;\n",
|
||
|
" max-width: 0;\n",
|
||
|
" overflow: hidden;\n",
|
||
|
" text-align: left;\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-toggleable__content.fitted {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-toggleable__content pre {\n",
|
||
|
" margin: 0.2em;\n",
|
||
|
" border-radius: 0.25em;\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-toggleable__content.fitted pre {\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
||
|
" /* Expand drop-down */\n",
|
||
|
" max-height: 200px;\n",
|
||
|
" max-width: 100%;\n",
|
||
|
" overflow: auto;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
||
|
" content: \"▾\";\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Pipeline/ColumnTransformer-specific style */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Estimator-specific style */\n",
|
||
|
"\n",
|
||
|
"/* Colorize estimator box */\n",
|
||
|
"#sk-container-id-5 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-label label.sk-toggleable__label,\n",
|
||
|
"#sk-container-id-5 div.sk-label label {\n",
|
||
|
" /* The background is the default theme color */\n",
|
||
|
" color: var(--sklearn-color-text-on-default-background);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* On hover, darken the color of the background */\n",
|
||
|
"#sk-container-id-5 div.sk-label:hover label.sk-toggleable__label {\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Label box, darken color on hover, fitted */\n",
|
||
|
"#sk-container-id-5 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Estimator label */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-label label {\n",
|
||
|
" font-family: monospace;\n",
|
||
|
" font-weight: bold;\n",
|
||
|
" display: inline-block;\n",
|
||
|
" line-height: 1.2em;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-label-container {\n",
|
||
|
" text-align: center;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Estimator-specific */\n",
|
||
|
"#sk-container-id-5 div.sk-estimator {\n",
|
||
|
" font-family: monospace;\n",
|
||
|
" border: 1px dotted var(--sklearn-color-border-box);\n",
|
||
|
" border-radius: 0.25em;\n",
|
||
|
" box-sizing: border-box;\n",
|
||
|
" margin-bottom: 0.5em;\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-estimator.fitted {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* on hover */\n",
|
||
|
"#sk-container-id-5 div.sk-estimator:hover {\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 div.sk-estimator.fitted:hover {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
|
||
|
"\n",
|
||
|
"/* Common style for \"i\" and \"?\" */\n",
|
||
|
"\n",
|
||
|
".sk-estimator-doc-link,\n",
|
||
|
"a:link.sk-estimator-doc-link,\n",
|
||
|
"a:visited.sk-estimator-doc-link {\n",
|
||
|
" float: right;\n",
|
||
|
" font-size: smaller;\n",
|
||
|
" line-height: 1em;\n",
|
||
|
" font-family: monospace;\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
" border-radius: 1em;\n",
|
||
|
" height: 1em;\n",
|
||
|
" width: 1em;\n",
|
||
|
" text-decoration: none !important;\n",
|
||
|
" margin-left: 1ex;\n",
|
||
|
" /* unfitted */\n",
|
||
|
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
|
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
".sk-estimator-doc-link.fitted,\n",
|
||
|
"a:link.sk-estimator-doc-link.fitted,\n",
|
||
|
"a:visited.sk-estimator-doc-link.fitted {\n",
|
||
|
" /* fitted */\n",
|
||
|
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
|
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* On hover */\n",
|
||
|
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
|
||
|
".sk-estimator-doc-link:hover,\n",
|
||
|
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
|
||
|
".sk-estimator-doc-link:hover {\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
|
" color: var(--sklearn-color-background);\n",
|
||
|
" text-decoration: none;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
|
".sk-estimator-doc-link.fitted:hover,\n",
|
||
|
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
|
".sk-estimator-doc-link.fitted:hover {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
|
" color: var(--sklearn-color-background);\n",
|
||
|
" text-decoration: none;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* Span, style for the box shown on hovering the info icon */\n",
|
||
|
".sk-estimator-doc-link span {\n",
|
||
|
" display: none;\n",
|
||
|
" z-index: 9999;\n",
|
||
|
" position: relative;\n",
|
||
|
" font-weight: normal;\n",
|
||
|
" right: .2ex;\n",
|
||
|
" padding: .5ex;\n",
|
||
|
" margin: .5ex;\n",
|
||
|
" width: min-content;\n",
|
||
|
" min-width: 20ex;\n",
|
||
|
" max-width: 50ex;\n",
|
||
|
" color: var(--sklearn-color-text);\n",
|
||
|
" box-shadow: 2pt 2pt 4pt #999;\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background: var(--sklearn-color-unfitted-level-0);\n",
|
||
|
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
".sk-estimator-doc-link.fitted span {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background: var(--sklearn-color-fitted-level-0);\n",
|
||
|
" border: var(--sklearn-color-fitted-level-3);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
".sk-estimator-doc-link:hover span {\n",
|
||
|
" display: block;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 a.estimator_doc_link {\n",
|
||
|
" float: right;\n",
|
||
|
" font-size: 1rem;\n",
|
||
|
" line-height: 1em;\n",
|
||
|
" font-family: monospace;\n",
|
||
|
" background-color: var(--sklearn-color-background);\n",
|
||
|
" border-radius: 1rem;\n",
|
||
|
" height: 1rem;\n",
|
||
|
" width: 1rem;\n",
|
||
|
" text-decoration: none;\n",
|
||
|
" /* unfitted */\n",
|
||
|
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
|
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 a.estimator_doc_link.fitted {\n",
|
||
|
" /* fitted */\n",
|
||
|
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
|
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"/* On hover */\n",
|
||
|
"#sk-container-id-5 a.estimator_doc_link:hover {\n",
|
||
|
" /* unfitted */\n",
|
||
|
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
|
" color: var(--sklearn-color-background);\n",
|
||
|
" text-decoration: none;\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"#sk-container-id-5 a.estimator_doc_link.fitted:hover {\n",
|
||
|
" /* fitted */\n",
|
||
|
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
|
"}\n",
|
||
|
"</style><div id=\"sk-container-id-5\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[('preprocessor',\n",
|
||
|
" ColumnTransformer(transformers=[('num',\n",
|
||
|
" Pipeline(steps=[('scaler',\n",
|
||
|
" StandardScaler())]),\n",
|
||
|
" ['nb_tickets', 'nb_purchases',\n",
|
||
|
" 'total_amount',\n",
|
||
|
" 'nb_suppliers',\n",
|
||
|
" 'vente_internet_max',\n",
|
||
|
" 'purchase_date_min',\n",
|
||
|
" 'purchase_date_max',\n",
|
||
|
" 'time_between_purchase',\n",
|
||
|
" 'nb_tickets_internet',\n",
|
||
|
" 'fidelity', 'is_email_true',\n",
|
||
|
" 'opt_in', 'gender_female',\n",
|
||
|
" 'gender_male',\n",
|
||
|
" 'gender_other',\n",
|
||
|
" 'nb_campaigns',\n",
|
||
|
" 'nb_campaigns_opened']),\n",
|
||
|
" ('cat',\n",
|
||
|
" Pipeline(steps=[('onehot',\n",
|
||
|
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
|
" sparse_output=False))]),\n",
|
||
|
" ['opt_in'])])),\n",
|
||
|
" ('logreg',\n",
|
||
|
" LogisticRegression(class_weight={0.0: 0.5223906809346011,\n",
|
||
|
" 1.0: 11.665359406898034},\n",
|
||
|
" max_iter=5000, solver='saga'))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-29\" type=\"checkbox\" ><label for=\"sk-estimator-id-29\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> Pipeline<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.pipeline.Pipeline.html\">?<span>Documentation for Pipeline</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>Pipeline(steps=[('preprocessor',\n",
|
||
|
" ColumnTransformer(transformers=[('num',\n",
|
||
|
" Pipeline(steps=[('scaler',\n",
|
||
|
" StandardScaler())]),\n",
|
||
|
" ['nb_tickets', 'nb_purchases',\n",
|
||
|
" 'total_amount',\n",
|
||
|
" 'nb_suppliers',\n",
|
||
|
" 'vente_internet_max',\n",
|
||
|
" 'purchase_date_min',\n",
|
||
|
" 'purchase_date_max',\n",
|
||
|
" 'time_between_purchase',\n",
|
||
|
" 'nb_tickets_internet',\n",
|
||
|
" 'fidelity', 'is_email_true',\n",
|
||
|
" 'opt_in', 'gender_female',\n",
|
||
|
" 'gender_male',\n",
|
||
|
" 'gender_other',\n",
|
||
|
" 'nb_campaigns',\n",
|
||
|
" 'nb_campaigns_opened']),\n",
|
||
|
" ('cat',\n",
|
||
|
" Pipeline(steps=[('onehot',\n",
|
||
|
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
|
" sparse_output=False))]),\n",
|
||
|
" ['opt_in'])])),\n",
|
||
|
" ('logreg',\n",
|
||
|
" LogisticRegression(class_weight={0.0: 0.5223906809346011,\n",
|
||
|
" 1.0: 11.665359406898034},\n",
|
||
|
" max_iter=5000, solver='saga'))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-30\" type=\"checkbox\" ><label for=\"sk-estimator-id-30\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> preprocessor: ColumnTransformer<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>ColumnTransformer(transformers=[('num',\n",
|
||
|
" Pipeline(steps=[('scaler', StandardScaler())]),\n",
|
||
|
" ['nb_tickets', 'nb_purchases', 'total_amount',\n",
|
||
|
" 'nb_suppliers', 'vente_internet_max',\n",
|
||
|
" 'purchase_date_min', 'purchase_date_max',\n",
|
||
|
" 'time_between_purchase',\n",
|
||
|
" 'nb_tickets_internet', 'fidelity',\n",
|
||
|
" 'is_email_true', 'opt_in', 'gender_female',\n",
|
||
|
" 'gender_male', 'gender_other', 'nb_campaigns',\n",
|
||
|
" 'nb_campaigns_opened']),\n",
|
||
|
" ('cat',\n",
|
||
|
" Pipeline(steps=[('onehot',\n",
|
||
|
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
|
" sparse_output=False))]),\n",
|
||
|
" ['opt_in'])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-31\" type=\"checkbox\" ><label for=\"sk-estimator-id-31\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">num</label><div class=\"sk-toggleable__content fitted\"><pre>['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-32\" type=\"checkbox\" ><label for=\"sk-estimator-id-32\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> StandardScaler<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.StandardScaler.html\">?<span>Documentation for StandardScaler</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>StandardScaler()</pre></div> </div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-33\" type=\"checkbox\" ><label for=\"sk-estimator-id-33\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">cat</label><div class=\"sk-toggleable__content fitted\"><pre>['opt_in']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-34\" type=\"checkbox\" ><label for=\"sk-estimator-id-34\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> OneHotEncoder<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>OneHotEncoder(handle_unknown='ignore', sparse_output=False)</pre></div> </div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-35\" type=\"checkbox\" ><label for=\"sk-estimator-id-35\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> LogisticRegression<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>LogisticRegression(class_weight={0.0: 0.5223906809346011,\n",
|
||
|
" 1.0: 11.665359406898034},\n",
|
||
|
" max_iter=5000, solver='saga')</pre></div> </div></div></div></div></div></div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
"Pipeline(steps=[('preprocessor',\n",
|
||
|
" ColumnTransformer(transformers=[('num',\n",
|
||
|
" Pipeline(steps=[('scaler',\n",
|
||
|
" StandardScaler())]),\n",
|
||
|
" ['nb_tickets', 'nb_purchases',\n",
|
||
|
" 'total_amount',\n",
|
||
|
" 'nb_suppliers',\n",
|
||
|
" 'vente_internet_max',\n",
|
||
|
" 'purchase_date_min',\n",
|
||
|
" 'purchase_date_max',\n",
|
||
|
" 'time_between_purchase',\n",
|
||
|
" 'nb_tickets_internet',\n",
|
||
|
" 'fidelity', 'is_email_true',\n",
|
||
|
" 'opt_in', 'gender_female',\n",
|
||
|
" 'gender_male',\n",
|
||
|
" 'gender_other',\n",
|
||
|
" 'nb_campaigns',\n",
|
||
|
" 'nb_campaigns_opened']),\n",
|
||
|
" ('cat',\n",
|
||
|
" Pipeline(steps=[('onehot',\n",
|
||
|
" OneHotEncoder(handle_unknown='ignore',\n",
|
||
|
" sparse_output=False))]),\n",
|
||
|
" ['opt_in'])])),\n",
|
||
|
" ('logreg',\n",
|
||
|
" LogisticRegression(class_weight={0.0: 0.5223906809346011,\n",
|
||
|
" 1.0: 11.665359406898034},\n",
|
||
|
" max_iter=5000, solver='saga'))])"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 36,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"pipeline.fit(X_train, y_train)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 38,
|
||
|
"id": "6356e870-0dfc-4e60-9e48-e2de5e7f9f87",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Accuracy Score: 0.9083440790887599\n",
|
||
|
"F1 Score: 0.4349266289045679\n",
|
||
|
"Recall Score: 0.8231974921630094\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"y_pred = pipeline.predict(X_test)\n",
|
||
|
"\n",
|
||
|
"# Calculate the F1 score\n",
|
||
|
"acc = accuracy_score(y_test, y_pred)\n",
|
||
|
"print(f\"Accuracy Score: {acc}\")\n",
|
||
|
"\n",
|
||
|
"f1 = f1_score(y_test, y_pred)\n",
|
||
|
"print(f\"F1 Score: {f1}\")\n",
|
||
|
"\n",
|
||
|
"recall = recall_score(y_test, y_pred)\n",
|
||
|
"print(f\"Recall Score: {recall}\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 39,
|
||
|
"id": "09387a09-0d53-4c54-baac-f3c2a57a629a",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjYAAAHFCAYAAADhWLMfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABi10lEQVR4nO3deVxU9f7H8deIMCLKhCLguKSWkoilYSlqqbngguatrgtFUoZetbwkblRuLeKWVu7ZYtfsUjfTq2WEZWmmuJCkqGnlgiaIC+GSAuH8/vDn3EZQwWbR8f3scR4P55zP+Z7vGRv58Pl+v2cMFovFgoiIiIgbKOfqDoiIiIjYixIbERERcRtKbERERMRtKLERERERt6HERkRERNyGEhsRERFxG0psRERExG0osRERERG3ocRGRERE3IYSG3Fr27Zt44knnqBu3bpUqFCBSpUqcffddzNlyhROnDjh0Gtv3bqVNm3aYDKZMBgMvPbaa3a/hsFgYPz48XZv92oWLlyIwWDAYDDwzTffFDtusVi4/fbbMRgMtG3b9pquMWfOHBYuXFimc7755pvL9klEbg7lXd0BEUdZsGABgwcPJjg4mBEjRhASEkJhYSFbtmxh3rx5bNiwgaVLlzrs+k8++SRnzpwhKSkJPz8/6tSpY/drbNiwgZo1a9q93dKqXLkyb7/9drHkZc2aNfzyyy9Urlz5mtueM2cO/v7+xMTElPqcu+++mw0bNhASEnLN1xWRG5sSG3FLGzZsYNCgQXTs2JFly5ZhNBqtxzp27Eh8fDzJyckO7UNGRgaxsbF06dLFYddo0aKFw9oujd69e7N48WJmz56Nr6+vdf/bb79NeHg4J0+edEo/CgsLMRgM+Pr6uvw9ERHX0lCUuKWJEydiMBh48803bZKai7y8vOjRo4f19fnz55kyZQp33HEHRqORgIAAHn/8cQ4dOmRzXtu2bQkNDWXz5s3cd999VKxYkXr16jFp0iTOnz8P/G+Y5o8//mDu3LnWIRuA8ePHW//8ZxfP2b9/v3Xf6tWradu2LVWrVsXb25vatWvz8MMP8/vvv1tjShqKysjI4MEHH8TPz48KFSrQpEkT3nvvPZuYi0M2//73v3n++ecxm834+vrSoUMHdu/eXbo3Gejbty8A//73v6378vLyWLJkCU8++WSJ50yYMIHmzZtTpUoVfH19ufvuu3n77bf58/fx1qlThx07drBmzRrr+3ex4nWx74sWLSI+Pp4aNWpgNBr5+eefiw1FHTt2jFq1atGyZUsKCwut7e/cuRMfHx+io6NLfa8icmNQYiNup6ioiNWrVxMWFkatWrVKdc6gQYMYNWoUHTt2ZPny5bz00kskJyfTsmVLjh07ZhObnZ3No48+ymOPPcby5cvp0qULCQkJvP/++wB069aNDRs2APDII4+wYcMG6+vS2r9/P926dcPLy4t33nmH5ORkJk2ahI+PDwUFBZc9b/fu3bRs2ZIdO3bwxhtv8MknnxASEkJMTAxTpkwpFv/cc89x4MAB3nrrLd58801++uknunfvTlFRUan66evryyOPPMI777xj3ffvf/+bcuXK0bt378ve28CBA/noo4/45JNPeOihh3jmmWd46aWXrDFLly6lXr16NG3a1Pr+XTpsmJCQQGZmJvPmzWPFihUEBAQUu5a/vz9JSUls3ryZUaNGAfD777/z97//ndq1azNv3rxS3aeI3EAsIm4mOzvbAlj69OlTqvhdu3ZZAMvgwYNt9m/cuNECWJ577jnrvjZt2lgAy8aNG21iQ0JCLBERETb7AMuQIUNs9o0bN85S0sfu3XfftQCWffv2WSwWi+Xjjz+2AJb09PQr9h2wjBs3zvq6T58+FqPRaMnMzLSJ69Kli6VixYqW3377zWKxWCxff/21BbB07drVJu6jjz6yAJYNGzZc8boX+7t582ZrWxkZGRaLxWK55557LDExMRaLxWJp1KiRpU2bNpdtp6ioyFJYWGh58cUXLVWrVrWcP3/eeuxy51683v3333/ZY19//bXN/smTJ1sAy9KlSy39+vWzeHt7W7Zt23bFexSRG5MqNnLT+/rrrwGKTVK99957adiwIV999ZXN/qCgIO69916bfXfeeScHDhywW5+aNGmCl5cXAwYM4L333mPv3r2lOm/16tW0b9++WKUqJiaG33//vVjl6M/DcXDhPoAy3UubNm247bbbeOedd9i+fTubN2++7DDUxT526NABk8mEh4cHnp6ejB07luPHj5OTk1Pq6z788MOljh0xYgTdunWjb9++vPfee8ycOZPGjRuX+nwRuXEosRG34+/vT8WKFdm3b1+p4o8fPw5A9erVix0zm83W4xdVrVq1WJzRaOTs2bPX0NuS3XbbbXz55ZcEBAQwZMgQbrvtNm677TZef/31K553/Pjxy97HxeN/dum9XJyPVJZ7MRgMPPHEE7z//vvMmzePBg0acN9995UYu2nTJjp16gRcWLX23XffsXnzZp5//vkyX7ek+7xSH2NiYjh37hxBQUGaWyPixpTYiNvx8PCgffv2pKWlFZv8W5KLP9yzsrKKHTt8+DD+/v5261uFChUAyM/Pt9l/6TwegPvuu48VK1aQl5dHamoq4eHhxMXFkZSUdNn2q1atetn7AOx6L38WExPDsWPHmDdvHk888cRl45KSkvD09OTTTz+lV69etGzZkmbNml3TNUuahH05WVlZDBkyhCZNmnD8+HGGDx9+TdcUkeufEhtxSwkJCVgsFmJjY0ucbFtYWMiKFSsAeOCBBwCsk38v2rx5M7t27aJ9+/Z269fFlT3btm2z2X+xLyXx8PCgefPmzJ49G4Dvv//+srHt27dn9erV1kTmon/9619UrFjRYUuha9SowYgRI+jevTv9+vW7bJzBYKB8+fJ4eHhY9509e5ZFixYVi7VXFayoqIi+fftiMBj4/PPPSUxMZObMmXzyySd/uW0Ruf7oOTbilsLDw5k7dy6DBw8mLCyMQYMG0ahRIwoLC9m6dStvvvkmoaGhdO/eneDgYAYMGMDMmTMpV64cXbp0Yf/+/YwZM4ZatWrx7LPP2q1fXbt2pUqVKvTv358XX3yR8uXLs3DhQg4ePGgTN2/ePFavXk23bt2oXbs2586ds6486tChw2XbHzduHJ9++int2rVj7NixVKlShcWLF/PZZ58xZcoUTCaT3e7lUpMmTbpqTLdu3Zg+fTpRUVEMGDCA48ePM23atBKX5Ddu3JikpCQ+/PBD6tWrR4UKFa5pXsy4ceP49ttvSUlJISgoiPj4eNasWUP//v1p2rQpdevWLXObInL9UmIjbis2NpZ7772XGTNmMHnyZLKzs/H09KRBgwZERUXx9NNPW2Pnzp3Lbbfdxttvv83s2bMxmUx07tyZxMTEEufUXCtfX1+Sk5OJi4vjscce45ZbbuGpp56iS5cuPPXUU9a4Jk2akJKSwrhx48jOzqZSpUqEhoayfPly6xyVkgQHB7N+/Xqee+45hgwZwtmzZ2nYsCHvvvtumZ7g6ygPPPAA77zzDpMnT6Z79+7UqFGD2NhYAgIC6N+/v03shAkTyMrKIjY2llOnTnHrrbfaPOenNFatWkViYiJjxoyxqbwtXLiQpk2b0rt3b9atW4eXl5c9bk9ErgMGi+VPT8USERERuYFpjo2IiIi4DSU2IiIi4jaU2IiIiIjbUGIjIiIibkOJjYiIiLgNJTYiIiLiNpTYiIiIiNtwywf0eTd9+upBIjehvd9Md3UXRK471U2Of0CjvX4und06yy7tuDNVbERERMRtKLERERFxNEM5+2xltHbtWrp3747ZbMZgMLBs2bJiMbt27aJHjx6YTCYqV65MixYtyMzMtB7Pz8/nmWeewd/fHx8fH3r06MGhQ4ds2sjNzSU6OhqTyYTJZCI6OprffvvNJiYzM5Pu3bvj4+ODv78/Q4cOLfYlxdu3b6dNmzZ4e3tTo0YNXnzxRcr6BQlKbERERBzNYLDPVkZnzpzhrrvuYtaskoewfvnlF1q3bs0dd9zBN998ww8//MCYMWOoUKGCNSYuLo6lS5eSlJTEunXrOH3
|
||
|
"text/plain": [
|
||
|
"<Figure size 640x480 with 2 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"draw_confusion_matrix(y_test, y_pred)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 40,
|
||
|
"id": "580b58d7-596f-4207-8c99-4365aba2bc9f",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABIQAAAK8CAYAAACeK2TMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUddrG8e9MeieF0Lv0XgQBpQpIUVBAZO0ruyq7umJbsXcUC7ZVd33trogo4gqIoIiIoIKACIggvQVIIL1NOe8fBwKHCZhAkjOTuT/XlYuZM2dmniTcM8mTX3EYhmEgIiIiIiIiIiJBw2l3ASIiIiIiIiIiUrXUEBIRERERERERCTJqCImIiIiIiIiIBBk1hEREREREREREgowaQiIiIiIiIiIiQUYNIRERERERERGRIKOGkIiIiIiIiIhIkFFDSEREREREREQkyKghJCIiIiIiIiISZNQQEhEREQlwDz30EMnJyezYscPuUkRERCRAqCEkIiJSidauXcu1115LkyZNiIyMJDY2li5dujB16lQOHTpkS01vvfUWDoeDlStXVurzbN++HYfDUfLhdDpJTExk4MCBLFiw4KT3mz9/PsOHD6dmzZpERETQoEEDrr76ajZs2HDS+3z77bdceuml1KtXj/DwcBISEujVqxevvPIKeXl5lfHp2eLo92779u2W4w888AAXX3wxY8eOpbi4uNT7PvjggzgcjgqrZfHixTgcDhYvXlxhj1maxo0bc80115TrPsuWLePBBx8kMzPT57Z+/frRr1+/CqlNREQkkKkhJCIiUklee+01unbtyooVK7jjjjuYP38+n3zyCWPHjuXVV1/luuuus7vEKnHTTTexfPlyvv32W55++mk2b97MsGHDWLJkic+5d955J0OHDsXr9fLyyy+zcOFCHnjgAVasWEGXLl2YNWuWz30eeOAB+vTpw549e3jkkUdYuHAhH3zwAQMHDuTBBx/k3nvvrYpP03avvvoqNWvWZNKkSXaXUqE++eQT7rvvvnLdZ9myZTz00EOlNoRefvllXn755QqqTkREJHCF2l2AiIhIdbR8+XJuvPFGBg0axOzZs4mIiCi5bdCgQdx2223Mnz+/SmtyuVwVOkKkrBo2bMg555wDQO/evWnevDl9+/bl9ddfp0+fPiXnTZ8+naeeeoobb7zR8gt7nz59GD9+PH379uXKK6+kU6dONG3aFICZM2fy8MMPc9111/Haa69ZPr+hQ4dy5513snz58ir6TO0VGhrK3Llz7S6jwnXu3LlCH69NmzYV+ngiIiKBSiOEREREKsHjjz+Ow+HgP//5j6UZdFR4eDgXXXRRyXWv18vUqVNp1aoVERERpKamctVVV7F7927L/U42febEaTBHp/O8++673HbbbdSrV4+IiAh+//33knMOHz7MtddeS1JSEjExMVx44YVs3brV57G//PJLBg4cSHx8PNHR0fTu3ZuvvvrqNL4qpm7dugGwf/9+y/HHHnuMxMREnn76aZ/7xMTE8OKLL5Kfn8+0adNKjj/88MMkJibywgsvlNrsiouLY/Dgwadd64n69etHu3btWL58Ob169SIqKorGjRvz5ptvAjB37ly6dOlCdHQ07du3L7Xpt3TpUgYOHEhcXBzR0dH06tWr1EbO999/T+/evYmMjKRu3bpMnjwZl8tVal0zZsygZ8+exMTEEBsby+DBg/npp5/K9DmdeN8hQ4awevXqcnxVrP73v//Rs2dPoqOjiYuLY9CgQaU25T799FM6dOhAREQETZs25fnnny91WtuJ/+e9Xi+PPvooLVu2JCoqiho1atChQweef/55wJwad8cddwDQpEmTkimLR6e2lTZlbO/evVx66aXExcWRkJDAuHHj+P7773E4HLz11lsl551sutk111xD48aNLceKi4t59NFHSzJds2ZNrr32Wg4ePFi2L6SIiEglU0NIRESkgnk8HhYtWkTXrl1p0KBBme5z44038s9//pNBgwbxv//9j0ceeYT58+fTq1cv0tPTT7uWyZMns3PnTl599VU+++wzUlNTS2677rrrcDqdvP/++zz33HP8+OOP9OvXzzLN5r333mPw4MHEx8fz9ttv8+GHH5KUlMSQIUNOuym0bds2AFq0aFFybN++faxfv57BgwcTHR1d6v169uxJamoqCxcuLLnPunXrTnmfsjjaPHvwwQfLdH5aWhrXXnstEyZM4NNPP6V9+/b8+c9/5uGHH2by5MnceeedfPzxx8TGxjJq1Cj27t1bct9vvvmGAQMGkJWVxeuvv8706dOJi4vjwgsvZMaMGSXnbdiwgYEDB5KZmclbb73Fq6++yurVq3n00Ud96nn88ccZP348bdq04cMPP+Sdd94hOzub8847j3Xr1p3ycznxvu+++y45OTmcd955p1yz6WTef/99Ro4cSXx8PNOnT+f111/n8OHD9OvXj6VLl5acN3/+fC655BKSk5OZMWMGU6dOZfr06bz99tt/+BxTp07lwQcfZPz48cydO5cZM2Zw3XXXlfy/nTBhAjfddBMAs2bNYvny5SxfvpwuXbqU+ngFBQWcf/75LFiwgClTpjBz5kxq167NuHHjyv35H+X1ehk5ciRPPPEEf/rTn5g7dy5PPPEECxcupF+/fhQUFJz2Y4uIiFQYQ0RERCpUWlqaARiXXXZZmc7/9ddfDcCYOHGi5fgPP/xgAMbdd99dcqxRo0bG1Vdf7fMYffv2Nfr27Vty/euvvzYAo0+fPj7nvvnmmwZgXHzxxZbj3333nQEYjz76qGEYhpGXl2ckJSUZF154oeU8j8djdOzY0ejevfspP69t27YZgPHkk08aLpfLKCwsNNasWWP07NnTqFOnjrFt27aSc7///nsDMO66665TPmaPHj2MqKioct3njyxevNgICQkxHnrooT88t2/fvgZgrFy5suRYRkaGERISYkRFRRl79uwpOb5mzRoDMF544YWSY+ecc46Rmppq5OTklBxzu91Gu3btjPr16xter9cwDMMYN26cERUVZaSlpVnOa9WqlQGUfO127txphIaGGn/7298sdWZnZxupqanGmDFjSo498MADxvE/+h2970033WS5b05OjlG7dm3j0ksvPeXX4uj/sa+//towDPP/Rd26dY327dsbHo/H8nipqalGr169So6dffbZRoMGDYyioiLLecnJycaJP56e+H9+xIgRRqdOnU5Z21NPPWX5Oh3vxKy88sorBmB8+umnlvP+8pe/GIDx5ptvnvS+R1199dVGo0aNSq5Pnz7dAIyPP/7Yct6KFSsMwHj55ZdPWb+IiEhV0AghERERm3399dcAPlPBunfvTuvWrc9oetbo0aNPetvll19uud6rVy8aNWpUUs+yZcs4dOgQV199NW63u+TD6/VywQUXsGLFijLt4PXPf/6TsLAwIiMj6dSpE+vWreOzzz7zmWJTFoZhVPg6SH379sXtdnP//feX6fw6derQtWvXkutJSUmkpqbSqVMn6tatW3K8devWACVbwefl5fHDDz8wZswYYmNjS84LCQnhyiuvZPfu3fz222+A+X9i4MCB1KpVy3LeiaNWvvjiC9xuN3/+858tx+Pi4ujfvz/ffPPNST+Po/e96qqrLN/fyMhI+vbtW+7dw3777Tf27t3LlVdeidN57EfM2NhYRo8ezffff09+fj55eXmsXLmSUaNGER4ebjnvwgsv/MPn6d69Oz///DMTJ07kiy++IDs7u1x1nujrr78mLi7OMoUT4E9/+tNpP+acOXOoUaMGF154oeVr26lTJ2rXrl3pO7OJiIiUhRaVFhERqWApKSlER0eXTI36IxkZGYDZaDhR3bp1SxoKp6O0xzyqdu3apR47Ws/RNX7GjBlz0sc4dOgQMTExp6zhH//4B1dccQVFRUV8//333HvvvYwcOZKff/6Z5ORkwFx4GvjDr9mOHTtKpuGV9T4VLSkpyedYeHi4z/GjzY7CwkLAXLPJMIyTfp/h2P+FjIyMk35/jnf0e9SrVy+fc482707m6H3PPvvsUm8/vqlTFn/0/9jr9ZZ8DQzDsDS7jirt2Ik
|
||
|
"text/plain": [
|
||
|
"<Figure size 1400x800 with 1 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"draw_roc_curve(X_test, y_test)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "ae8e9bd3-0f6a-4f82-bb4c-470cbdc8d6bb",
|
||
|
"metadata": {
|
||
|
"jp-MarkdownHeadingCollapsed": true
|
||
|
},
|
||
|
"source": [
|
||
|
"## Cross Validation"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 40,
|
||
|
"id": "7f0535de-34f1-4e97-b993-b429ecf0a554",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"y_train = y_train['y_has_purchased']"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 41,
|
||
|
"id": "f7fca463-d7d6-493b-8329-fdfa92457f78",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Best parameters found: {'logreg__C': 0.0009765625, 'logreg__class_weight': 'balanced', 'logreg__penalty': 'l1'}\n",
|
||
|
"Best cross-validation score: 0.65\n",
|
||
|
"Test set score: 0.64\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Cross validation\n",
|
||
|
"\n",
|
||
|
"grid_search = GridSearchCV(pipeline, param_grid, cv=3, scoring=recall_scorer, error_score='raise',\n",
|
||
|
" n_jobs=-1)\n",
|
||
|
"\n",
|
||
|
"grid_search.fit(X_train, y_train)\n",
|
||
|
"\n",
|
||
|
"# Print the best parameters and the best score\n",
|
||
|
"print(\"Best parameters found: \", grid_search.best_params_)\n",
|
||
|
"print(\"Best cross-validation score: {:.2f}\".format(grid_search.best_score_))\n",
|
||
|
"\n",
|
||
|
"# Evaluate the best model on the test set\n",
|
||
|
"test_score = grid_search.score(X_test, y_test)\n",
|
||
|
"print(\"Test set score: {:.2f}\".format(test_score))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 43,
|
||
|
"id": "56bd7828-4de1-4166-bea0-5d5e152b9d38",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi0AAAHFCAYAAAA+FskAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABQP0lEQVR4nO3de3yP9f/H8cdnp49tbMZsM6ecMxRRjEI5M/LtgFYrEXJsOaZy6mBOIWc60FdpiUilpVJKDGHOUU5LzBxmGLbZrt8ffj7fPjbZdH189uF57/a53ey6Xtd1va5PrV693u/3dVkMwzAQERERKeDcnJ2AiIiISF6oaBERERGXoKJFREREXIKKFhEREXEJKlpERETEJahoEREREZegokVERERcgooWERERcQkqWkRERMQlqGiRW9q2bdt49tlnKV++PIUKFaJw4cLcc889jB8/nlOnTjn02lu2bKFx48b4+/tjsViYMmWK6dewWCyMGjXK9PNez/z587FYLFgsFn788ccc+w3DoFKlSlgsFpo0aXJD15g5cybz58/P1zE//vjjNXMSEdfn4ewERBzlnXfeoXfv3lStWpXBgwcTFhZGZmYmv/76K7Nnz2bdunUsXbrUYdfv2rUraWlpxMbGEhAQwB133GH6NdatW0fp0qVNP29eFSlShPfeey9HYbJ69Wr27dtHkSJFbvjcM2fOJDAwkC5duuT5mHvuuYd169YRFhZ2w9cVkYJLRYvcktatW0evXr1o3rw5y5Ytw2q12vY1b96cgQMHEhcX59AcduzYQffu3WndurXDrlG/fn2HnTsvOnXqxEcffcSMGTPw8/OzbX/vvfcIDw/nzJkzNyWPzMxMLBYLfn5+Tv9ORMRxNDwkt6QxY8ZgsViYO3euXcFyhZeXF+3bt7f9nJ2dzfjx47nzzjuxWq0EBQXx9NNPc/jwYbvjmjRpQo0aNdi4cSMPPPAAPj4+VKhQgbFjx5KdnQ38b+jk0qVLzJo1yzaMAjBq1Cjbn//uyjEHDx60bVu1ahVNmjShePHieHt7U7ZsWR599FHOnz9vi8lteGjHjh08/PDDBAQEUKhQIWrVqsUHH3xgF3NlGOXjjz/mlVdeITQ0FD8/P5o1a8aePXvy9iUDTzzxBAAff/yxbVtqaipLliyha9euuR4zevRo6tWrR7FixfDz8+Oee+7hvffe4+/vbr3jjjvYuXMnq1evtn1/VzpVV3JfsGABAwcOpFSpUlitVv74448cw0MnTpygTJkyNGjQgMzMTNv5d+3aha+vL1FRUXm+VxFxPhUtcsvJyspi1apV1KlThzJlyuTpmF69ejF06FCaN2/O8uXLef3114mLi6NBgwacOHHCLjYpKYknn3ySp556iuXLl9O6dWuGDRvGhx9+CEDbtm1Zt24dAI899hjr1q2z/ZxXBw8epG3btnh5efH+++8TFxfH2LFj8fX1JSMj45rH7dmzhwYNGrBz506mTp3KZ599RlhYGF26dGH8+PE54l9++WUOHTrEu+++y9y5c/n9999p164dWVlZecrTz8+Pxx57jPfff9+27eOPP8bNzY1OnTpd89569uzJokWL+Oyzz3jkkUfo168fr7/+ui1m6dKlVKhQgdq1a9u+v6uH8oYNG0ZiYiKzZ8/miy++ICgoKMe1AgMDiY2NZePGjQwdOhSA8+fP8/jjj1O2bFlmz56dp/sUkQLCELnFJCUlGYDRuXPnPMXv3r3bAIzevXvbbV+/fr0BGC+//LJtW+PGjQ3AWL9+vV1sWFiY0bJlS7ttgNGnTx+7bSNHjjRy+7WbN2+eARgHDhwwDMMwFi9ebABGQkLCP+YOGCNHjrT93LlzZ8NqtRqJiYl2ca1btzZ8fHyM06dPG4ZhGD/88IMBGG3atLGLW7RokQEY69at+8frXsl348aNtnPt2LHDMAzDuPfee40uXboYhmEY1atXNxo3bnzN82RlZRmZmZnGa6+9ZhQvXtzIzs627bvWsVeu16hRo2vu++GHH+y2jxs3zgCMpUuXGs8884zh7e1tbNu27R/vUUQKHnVa5Lb3ww8/AOSY8HnfffdRrVo1vv/+e7vtISEh3HfffXbb7rrrLg4dOmRaTrVq1cLLy4sePXrwwQcfsH///jwdt2rVKpo2bZqjw9SlSxfOnz+fo+Pz9yEyuHwfQL7upXHjxlSsWJH333+f7du3s3HjxmsODV3JsVmzZvj7++Pu7o6npycjRozg5MmTJCcn5/m6jz76aJ5jBw8eTNu2bXniiSf44IMPmDZtGjVr1szz8SJSMKhokVtOYGAgPj4+HDhwIE/xJ0+eBKBkyZI59oWGhtr2X1G8ePEccVarlQsXLtxAtrmrWLEi3333HUFBQfTp04eKFStSsWJF3n777X887uTJk9e8jyv7/+7qe7ky/yc/92KxWHj22Wf58MMPmT17NlWqVOGBBx7INXbDhg20aNECuLy665dffmHjxo288sor+b5ubvf5Tzl26dKFixcvEhISorksIi5KRYvcctzd3WnatCmbNm3KMZE2N1f+w3306NEc+44cOUJgYKBpuRUqVAiA9PR0u+1Xz5sBeOCBB/jiiy9ITU0lPj6e8PBwoqOjiY2Nveb5ixcvfs37AEy9l7/r0qULJ06cYPbs2Tz77LPXjIuNjcXT05Mvv/ySjh070qBBA+rWrXtD18xtQvO1HD16lD59+lCrVi1OnjzJoEGDbuiaIuJcKlrkljRs2DAMw6B79+65TlzNzMzkiy++AOChhx4CsE2kvWLjxo3s3r2bpk2bmpbXlRUw27Zts9t+JZfcuLu7U69ePWbMmAHA5s2brxnbtGlTVq1aZStSrvjvf/+Lj4+Pw5YDlypVisGDB9OuXTueeeaZa8ZZLBY8PDxwd3e3bbtw4QILFizIEWtW9yorK4snnngCi8XC119/TUxMDNOmTeOzzz771+cWkZtLz2mRW1J4eDizZs2id+/e1KlTh169elG9enUyMzPZsmULc+fOpUaNGrRr146qVavSo0cPpk2bhpubG61bt+bgwYMMHz6cMmXK8OKLL5qWV5s2bShWrBjdunXjtddew8PDg/nz5/Pnn3/axc2ePZtVq1bRtm1bypYty8WLF20rdJo1a3bN848cOZIvv/ySBx98kBEjRlCsWDE++ugjvvrqK8aPH4+/v79p93K1sWPHXjembdu2TJo0icjISHr06MHJkyeZOHFirsvSa9asSWxsLJ988gkVKlSgUKFCNzQPZeTIkfz888+sXLmSkJAQBg4cyOrVq+nWrRu1a9emfPny+T6niDiHiha5ZXXv3p377ruPyZMnM27cOJKSkvD09KRKlSpERkbSt29fW+ysWbOoWLEi7733HjNmzMDf359WrVoRExOT6xyWG+Xn50dcXBzR0dE89dRTFC1alOeee47WrVvz3HPP2eJq1arFypUrGTlyJElJSRQuXJgaNWqwfPly25yQ3FStWpW1a9fy8ssv06dPHy5cuEC1atWYN29evp4s6ygPPfQQ77//PuPGjaNdu3aUKlWK7t27ExQURLdu3exiR48ezdGjR+nevTtnz56lXLlyds+xyYtvv/2WmJgYhg8fbtcxmz9/PrVr16ZTp06sWbMGLy8vM25PRBzMYhh/e6KTiIiISAGlOS0iIiLiElS0iIiIiEtQ0SIiIiIuQUWLiIiIuAQVLSIiIuISVLSIiIiIS1DRIiIiIi7hlny4nHftvtcPErkNLV84ytkpiBQ4zas55p1cf2fWf5cubJluynlclTotIiIi4hJuyU6LiIhIgWJRj8AMKlpEREQczWJxdga3BBUtIiIijqZOiyn0LYqIiIhLUKdFRETE0TQ8ZAoVLSIiIo6m4SFT6FsUERERl6BOi4iIiKNpeMgUKlpEREQcTcNDptC3KCIiIi5BnRYRERFH0/CQKVS0iIiIOJqGh0yhb1FERERcgjotIiIijqbhIVOoaBEREXE0DQ+ZQkWLiIiIo6nTYgqVfiIiIuIS1GkRERFxNA0PmUJFi4iIiKOpaDGFvkURERFxCeq0iIiIOJqbJuKaQUWLiIiIo2l4yBT6FkV
|
||
|
"text/plain": [
|
||
|
"<Figure size 640x480 with 2 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"y_pred = grid_search.predict(X_test)\n",
|
||
|
"\n",
|
||
|
"draw_confusion_matrix(y_test, y_pred)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 44,
|
||
|
"id": "319fe0eb-4d4a-492c-bd50-3f08ab483021",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABIQAAAK8CAYAAACeK2TMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUddrG8e+k904ooSahSheBJIggioINBUTWvuqqrLr2tfeKBdtrXde6IqKIq7IIFsQA0hEBBRJ6JwnpPXPePw4MHAcwgSRnJnN/rotL5pwzyQNy54SH33l+DsMwDERERERERERExGf42V2AiIiIiIiIiIg0LjWERERERERERER8jBpCIiIiIiIiIiI+Rg0hEREREREREREfo4aQiIiIiIiIiIiPUUNIRERERERERMTHqCEkIiIiIiIiIuJj1BASEREREREREfExagiJiIiIiIiIiPgYNYREREREvNzDDz9MfHw8mzdvtrsUERER8RJqCImIiDSglStXcuWVV9KhQwdCQkKIiIigb9++TJw4kby8PFtqevfdd3E4HCxZsqRBP8+mTZtwOByuH35+fsTGxjJs2DBmzZp1xPfNnDmTs846i2bNmhEcHEybNm24/PLLWbNmzRHf89NPP3HhhReSlJREUFAQ0dHRpKen89prr1FSUtIQvzxbHPh/t2nTJsvxBx98kPPPP5+xY8dSWVl52Pc+9NBDOByOeqtlzpw5OBwO5syZU28f83Dat2/PFVdcUaf3zJ8/n4ceeoj8/Hy3c0OGDGHIkCH1UpuIiIg3U0NIRESkgbz11luceOKJLF68mDvuuIOZM2fy+eefM3bsWF5//XWuuuoqu0tsFDfeeCMLFizgp59+4tlnn2X9+vWMHDmSuXPnul175513MmLECJxOJ6+++iqzZ8/mwQcfZPHixfTt25dp06a5vefBBx9k8ODBbN++nUcffZTZs2fz8ccfM2zYMB566CHuu+++xvhl2u7111+nWbNm3HLLLXaXUq8+//xz7r///jq9Z/78+Tz88MOHbQi9+uqrvPrqq/VUnYiIiPcKsLsAERGRpmjBggVcf/31nH766UyfPp3g4GDXudNPP53bbruNmTNnNmpNVVVV9bpCpLbatm3LwIEDAcjIyKBjx46ccsopvP322wwePNh13eTJk3nmmWe4/vrrLX9hHzx4MOPHj+eUU07h0ksvpXfv3iQnJwMwdepUHnnkEa666ireeusty69vxIgR3HnnnSxYsKCRfqX2CggI4Ouvv7a7jHrXp0+fev143bp1q9ePJyIi4q20QkhERKQBPPHEEzgcDt58801LM+iAoKAgzj33XNdrp9PJxIkT6dKlC8HBwSQmJnLZZZexbds2y/uO9PjMHx+DOfA4zwcffMBtt91GUlISwcHBZGVlua7Zt28fV155JXFxcYSHh3POOeewYcMGt4/97bffMmzYMKKioggLCyMjI4PvvvvuGH5XTP369QNg9+7dluOPP/44sbGxPPvss27vCQ8P5+WXX6a0tJRJkya5jj/yyCPExsby0ksvHbbZFRkZyfDhw4+51j8aMmQI3bt3Z8GCBaSnpxMaGkr79u155513APj666/p27cvYWFh9OjR47BNv8zMTIYNG0ZkZCRhYWGkp6cftpHz888/k5GRQUhICK1ateLuu++mqqrqsHVNmTKFtLQ0wsPDiYiIYPjw4SxdurRWv6Y/vveMM85g+fLldfhdsfrvf/9LWloaYWFhREZGcvrppx+2KffFF1/Qs2dPgoODSU5O5sUXXzzsY21//DPvdDp57LHH6Ny5M6GhocTExNCzZ09efPFFwHw07o477gCgQ4cOrkcWDzzadrhHxnbs2MGFF15IZGQk0dHRjBs3jp9//hmHw8G7777ruu5Ij5tdccUVtG/f3nKssrKSxx57zJXpZs2aceWVV7J3797a/UaKiIg0MDWERERE6llNTQ3ff/89J554Im3atKnVe66//nr++c9/cvrpp/Pf//6XRx99lJkzZ5Kenk5OTs4x13L33XezZcsWXn/9db788ksSExNd56666ir8/Pz46KOPeOGFF1i0aBFDhgyxPGbz4YcfMnz4cKKionjvvff45JNPiIuL44wzzjjmptDGjRsB6NSpk+vYzp07Wb16NcOHDycsLOyw70tLSyMxMZHZs2e73rNq1aqjvqc2DjTPHnrooVpdv2vXLq688kquvvpqvvjiC3r06MFf//pXHnnkEe6++27uvPNOPvvsMyIiIhg1ahQ7duxwvffHH3/k1FNPpaCggLfffpvJkycTGRnJOeecw5QpU1zXrVmzhmHDhpGfn8+7777L66+/zvLly3nsscfc6nniiScYP3483bp145NPPuH999+nsLCQk08+mVWrVh311/LH937wwQcUFRVx8sknH3Vm05F89NFHnHfeeURFRTF58mTefvtt9u3bx5AhQ8jMzHRdN3PmTC644ALi4+OZMmUKEydOZPLkybz33nt/+jkmTpzIQw89xPjx4/n666+ZMmUKV111levP7dVXX82NN94IwLRp01iwYAELFiygb9++h/14ZWVlnHbaacyaNYsnn3ySqVOn0qJFC8aNG1fnX/8BTqeT8847j6eeeoq//OUvfP311zz11FPMnj2bIUOGUFZWdswfW0REpN4YIiIiUq927dplAMZFF11Uq+t/++03AzAmTJhgOb5w4UIDMO655x7XsXbt2hmXX36528c45ZRTjFNOOcX1+ocffjAAY/DgwW7XvvPOOwZgnH/++Zbj8+bNMwDjscceMwzDMEpKSoy4uDjjnHPOsVxXU1Nj9OrVy+jfv/9Rf10bN240AOPpp582qqqqjPLycmPFihVGWlqa0bJlS2Pjxo2ua3/++WcDMO66666jfswBAwYYoaGhdXrPn5kzZ47h7+9vPPzww3967SmnnGIAxpIlS1zHcnNzDX9/fyM0NNTYvn276/iKFSsMwHjppZdcxwYOHGgkJiYaRUVFrmPV1dVG9+7djdatWxtOp9MwDMMYN26cERoaauzatctyXZcuXQzA9Xu3ZcsWIyAgwPj73/9uqbOwsNBITEw0xowZ4zr24IMPGod+63fgvTfeeKPlvUVFRUaLFi2MCy+88Ki/Fwf+jP3www+GYZh/Llq1amX06NHDqKmpsXy8xMREIz093XXspJNOMtq0aWNUVFRYrouPjzf++O3pH//Mn3322Ubv3r2PWtszzzxj+X061B+z8tprrxmA8cUXX1iuu+aaawzAeOedd4743gMuv/xyo127dq7XkydPNgDjs88+s1y3ePFiAzBeffXVo9YvIiLSGLRCSERExGY//PADgNujYP3796dr167H9XjW6NGjj3ju4osvtrxOT0+nXbt2rnrmz59PXl4el19+OdXV1a4fTqeTM888k8WLF9dqB69//vOfBAYGEhISQu/evVm1ahVffvml2yM2tWEYRr3PQTrllFOorq7mgQceqNX1LVu25MQTT3S9jouLIzExkd69e9OqVSvX8a5duwK4toIvKSlh4cKFjBkzhoiICNd1/v7+XHrppWzbto21a9cC5p+JYcOG0bx5c8t1f1y18s0331BdXc1f//pXy/HIyEiGDh3Kjz/+eMRfx4H3XnbZZZb/vyEhIZxyyil13j1s7dq17Nixg0svvRQ/v4PfYkZERDB69Gh+/vlnSktLKSkpYcmSJYwaNYqgoCDLdeecc86ffp7+/fvzyy+/MGHCBL755hsKCwvrVOcf/fDDD0RGRloe4QT4y1/+cswf86uvviImJoZzzjnH8nvbu3dvWrRo0eA7s4mIiNSGhkqLiIjUs4SEBMLCwlyPRv2Z3NxcwGw0/FGrVq1cDYVjcbiPeUCLFi0Oe+xAPQdm/IwZM+aIHyMvL4/w8PCj1vCPf/yDSy65hIqKCn7++Wfuu+8+zjvvPH755Rfi4+MBc/A08Ke/Z5s3b3Y9hlfb99S3uLg4t2NBQUFuxw80O8rLywFzZpNhGEf8/wwH/yzk5uYe8f/PoQ78P0pPT3e79kDz7kgOvPekk0467PlDmzq18Wd/jp1Op+v3wDA
|
||
|
"text/plain": [
|
||
|
"<Figure size 1400x800 with 1 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"draw_roc_curve(X_test, y_test)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "ab122f66-1591-43ea-a364-2564f09b2bb3",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Segmentation du score de prédiction"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 61,
|
||
|
"id": "279e18c7-29d8-4328-963a-18babd13c2c8",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABA4AAAIjCAYAAACDPFmSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAD8YklEQVR4nOzdd1gUV/s38O8ufekgUgyICCIWFIMoVixYUCOWYEsQW0LUWFBRYgNDFAuW6BNjEhVjNKapsaCIBWPsUVGfYIlERB9RogZQUVzYef/w3fm5sgtLE2W/n+vySubMmTP3zD0M7NkzZySCIAggIiIiIiIiIlJDWt0BEBEREREREdHrix0HRERERERERKQROw6IiIiIiIiISCN2HBARERERERGRRuw4ICIiIiIiIiKN2HFARERERERERBqx44CIiIiIiIiINGLHARERERERERFpxI4DIiIiIiIiItKIHQdERERUIUlJSTAwMMD27durOxSiKiOXy9GtWzf4+/tDLpdXdzjVatiwYfD09MS9e/eqOxQiekXYcUBEVAkSEhIgkUjwxx9/VHco5bZ582YsX768usOocgEBAQgICKjuMF57aWlpiI6ORkZGRon1bt26hffeew8rVqxAcHCw1u1HR0dDIpGUKab8/HxER0cjJSWl2Drlz2Bp8b6pynO+XpWMjAxIJBIkJCRUdyiVJiwsDK6uriplBgYG+OWXX/D06VNMnTpV47av6h6jLsZX4csvv8SBAwewd+9e1KpV65Xvv7JJJBJER0e/8v2+qntWeY4vMTFR4zaurq4ICwurcFz05mHHARERAdCdjgPSTlpaGmJiYkr8o7awsBCDBg3CBx98gLFjx5ap/dGjR+P48eNl2iY/Px8xMTFqOw569eqF48ePw9HRsUxtEpWFubk5EhMTsWPHDvz444/VHc4rd/bsWcyePRuJiYmoV69edYdTKY4fP47Ro0dXdxhVpjzHl5iYiJiYGLXrtm3bhtmzZ1dGaPSG0a/uAIiIqHrl5+dDJpNVdxhUAdWVQ319fRw9erRM2yhjfeutt/DWW29VWix2dnaws7OrtPaINHF0dMT169erO4xX4uV7S4sWLfDPP/9Uyb6ePHkCY2PjVz6ypnXr1q90f69aZR+fj49PpbZHbw6OOCAiqiJhYWEwMzPD5cuX0b17d5iamsLR0RFxcXEAgBMnTqBdu3YwNTVFgwYNsGHDBpXtlcMYk5OTMWLECNjY2MDU1BR9+vTB33//XWx/69atQ7NmzWBsbAwbGxv069cPly5dUhvTxYsX0a1bN5ibm6NLly4ICAjA7t27cePGDUgkEvGfUkxMDFq1agUbGxtYWFigRYsWWLt2LQRBUGnf1dUVvXv3xt69e9GiRQuYmJigYcOGWLduXbF4//e//+GDDz6As7MzDA0N4eTkhIEDB+Lu3btinby8PEydOhX16tWDoaEh6tSpg0mTJuHx48elnn9BELBo0SLUrVsXxsbGaNGiBfbs2aO2rrb7+emnn9CqVStYWlpCJpPBzc0NI0eOLDUWhUKBlStXonnz5jAxMYGVlRVat26NHTt2iHV++OEHdOvWDY6OjjAxMYGXlxdmzJhRLAZNOQSA5ORk9O3bF2+99RaMjY3h7u6ODz/8UO1zyJcvX8aQIUNgb28PIyMjuLi4IDQ0FAUFBUhISMC7774LAOjUqZN4Pbw4FH3//v3o0qULLCwsIJPJ0LZtWxw4cEBlH8rh9WfPnsXAgQNhbW2N+vXrq6x70cGDBxEQEABbW1uYmJjAxcUFAwYMQH5+PjIyMsSOgZiYGDEm5ZBZdcN+NV0DLw8l1zRkOCUlBRKJpNgIB22O/Z9//hGvbyMjI9jZ2aFt27bYv39/sVy8bPfu3WjevDmMjIxQr149LFmyRG29//znP+jQoQNq164NU1NTNG3aFIsWLSr2/H1AQACaNGmC06dPo3379uK1GxcXB4VCIdZTKBSIjY2Fp6eneJ16e3tjxYoVpcaszl9//YWhQ4eidu3aMDIygpeXF/7zn/9ota1EIsH48eOxfv16MR5fX1+cOHECgiBg8eLFqFevHszMzNC5c2dcu3atWBva3BOB5/n39PQUY/z222/VxvTs2TPExsaiYcOGYk7DwsJU7lmaqNt2xIgRWn8Ir0iM2u6npHuLtu0WFBRgypQpcHBwgEwmQ4cOHXDmzJliw9uVP3P79u3DyJEjYWdnB5lMhoKCAgDP74f+/v4wNTWFmZkZunfvjnPnzqns6++//8bgwYPh5OQEIyMj2Nvbo0uXLkhNTRXrlHRPUVI3lP+///0v+vbtC2traxgbG6N58+bFfkcr7w/ff/89Zs6cCScnJ1hYWKBr1664cuVKqedbE22v26+//hoNGjSAkZERGjVqhM2bN6t9fOXl48vPzxd/3yn34evri++//x7A8+tA+XP64t8DyvujukcVLl++jB49ekAmk6FWrVoIDw/Hzp07i90/NT3moO7xnor8/qeqwREHRERVSC6Xo3///ggPD8e0adOwefNmREVFIS8vD7/88gumT5+Ot956CytXrkRYWBiaNGmCt99+W6WNUaNGITAwEJs3b8bNmzcxa9YsBAQE4MKFC7CysgIALFiwAJ988gmGDBmCBQsW4P79+4iOjoa/vz9Onz4NDw8Psb1nz57hnXfewYcffogZM2agsLAQb731Fj744AOkp6dj27ZtxY4jIyMDH374IVxcXAA87/T4+OOP8b///Q9z5sxRqXv+/HlMmTIFM2bMgL29Pb755huMGjUK7u7u6NChA4DnnQYtW7aEXC7HJ598Am9vb9y/fx9JSUn4999/YW9vj/z8fHTs2BG3bt0S6/z555+YM2cOLl68iP3795f4zVRMTAxiYmIwatQoDBw4EDdv3sSYMWNQVFQET09PsZ62+zl+/DgGDRqEQYMGITo6GsbGxrhx4wYOHjxY6nUQFhaG7777DqNGjcK8efNgaGiIs2fPqnxQ/euvvxAUFIRJkybB1NQUly9fxsKFC3Hq1Kli+1CXQwBIT0+Hv78/Ro8eDUtLS2RkZGDp0qVo164dLl68CAMDAzFH7dq1Q61atTBv3jx4eHggKysLO3bswLNnz9CrVy/Mnz8fn3zyCf7zn/+gRYsWACB+6P/uu+8QGhqKvn37YsOGDTAwMMCaNWvQvXt3JCUliR82lPr374/BgwcjPDxc4x99GRkZ6NWrF9q3b49169bBysoK//vf/7B37148e/YMjo6O2Lt3L3r06IFRo0aJQ29LGmWg7TVQFtoe+/vvv4+zZ8/is88+Q4MGDZCTk4OzZ8/i/v37JbZ/4MAB9O3bF/7+/tiyZQuKioqwaNEitR9O09PTMXToUPEP6/Pnz+Ozzz7D5cuXi3XW3blzB8OGDcOUKVMwd+5cbNu2DVFRUXByckJoaCgAYNGiRYiOjsasWbPQoUMHyOVyXL58GTk5OWU+T2lpaWjTpg1cXFwQHx8PBwcHJCUlYcKECbh37x7mzp1bahu7du3CuXPnEBcXB4lEgunTp6NXr14YPnw4/v77b6xatQq5ubmIiIjAgAEDkJqaKt4TtL0nJiQkYMSIEejbty/i4+ORm5uL6OhoFBQUQCr9v+/XFAoF+vbtiyNHjiAyMhJt2rTBjRs3MHv2bJw8eRJnzpzROOpH07Zz585FQEAA/vjjD5iYmGg8DxWNUdv9AOrvLWVpd8SIEfjhhx8QGRmJzp07Iy0tDf369UNeXp7a/Y0cORK9evXCxo0b8fjxYxgYGGD+/PmYNWsWRowYgVmzZuHZs2dYvHgx2rdvj1OnTqFRo0YAgKCgIPHnw8XFBffu3cOxY8fE67W0e4qmfF25cgVt2rRB7dq18fnnn8PW1hbfffed2EkUGRmpUv+TTz5B27Zt8c033yAvLw/Tp09Hnz59cOnSJejp6ZV4vl+m7XX71Vdf4cMPP8SAAQOwbNky5ObmIiYmRux4KUlERAQ2btyI2NhY+Pj44PHjx/jvf/8r3ptmz56Nx48f4+eff1Z5nEzTY2B3795Fx44dYWBggC+++AL29vbYtGkTxo8fX6Zjf1FFf/9
|
||
|
"text/plain": [
|
||
|
"<Figure size 1000x600 with 1 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"coefficients = pipeline.named_steps['logreg'].coef_[0]\n",
|
||
|
"feature_names = pipeline.named_steps['logreg'].feature_names_in_\n",
|
||
|
"\n",
|
||
|
"# Tracer l'importance des caractéristiques\n",
|
||
|
"plt.figure(figsize=(10, 6))\n",
|
||
|
"plt.barh(feature_names, coefficients, color='skyblue')\n",
|
||
|
"plt.xlabel('Importance des caractéristiques')\n",
|
||
|
"plt.ylabel('Caractéristiques')\n",
|
||
|
"plt.title('Importance des caractéristiques dans le modèle de régression logistique')\n",
|
||
|
"plt.grid(True)\n",
|
||
|
"plt.show()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 70,
|
||
|
"id": "210b931c-6d46-4ebf-a9c7-d1ee05c3fadf",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Création d'un dataframe avec le score\n",
|
||
|
"dataset_for_segmentation = dataset_test[['customer_id'] + numeric_features + categorical_features]\n",
|
||
|
"\n",
|
||
|
"y_predict_proba = pipeline.predict_proba(X_test)[:, 1]\n",
|
||
|
"\n",
|
||
|
"dataset_for_segmentation['prediction_probability'] = y_predict_proba\n",
|
||
|
"\n",
|
||
|
"# Arrondir les valeurs de la colonne 'prediction_probability' et les multiplier par 10\n",
|
||
|
"dataset_for_segmentation['category'] = dataset_for_segmentation['prediction_probability'].apply(lambda x: int(x * 10))\n",
|
||
|
"\n",
|
||
|
"dataset_for_segmentation['prediction'] = y_pred\n",
|
||
|
"\n",
|
||
|
"def premiere_partie(chaine):\n",
|
||
|
" if chaine:\n",
|
||
|
" return chaine.split('_')[0]\n",
|
||
|
" else:\n",
|
||
|
" return None\n",
|
||
|
"\n",
|
||
|
"dataset_for_segmentation['company_number'] = dataset_for_segmentation['customer_id'].apply(lambda x: premiere_partie(x))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "055e47dd-9ff3-4853-a46d-d5a5edc1f361",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 73,
|
||
|
"id": "969f1f92-d715-4d74-85a7-437e72838cb5",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead tr th {\n",
|
||
|
" text-align: left;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead tr:last-of-type th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>nb_tickets</th>\n",
|
||
|
" <th>nb_purchases</th>\n",
|
||
|
" <th>total_amount</th>\n",
|
||
|
" <th>nb_suppliers</th>\n",
|
||
|
" <th>vente_internet_max</th>\n",
|
||
|
" <th>purchase_date_min</th>\n",
|
||
|
" <th>purchase_date_max</th>\n",
|
||
|
" <th>time_between_purchase</th>\n",
|
||
|
" <th>nb_tickets_internet</th>\n",
|
||
|
" <th>fidelity</th>\n",
|
||
|
" <th>gender_female</th>\n",
|
||
|
" <th>gender_male</th>\n",
|
||
|
" <th>gender_other</th>\n",
|
||
|
" <th>nb_campaigns</th>\n",
|
||
|
" <th>nb_campaigns_opened</th>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>category</th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>0.113637</td>\n",
|
||
|
" <td>0.006274</td>\n",
|
||
|
" <td>1.586366</td>\n",
|
||
|
" <td>0.005821</td>\n",
|
||
|
" <td>0.000647</td>\n",
|
||
|
" <td>548.790455</td>\n",
|
||
|
" <td>548.773103</td>\n",
|
||
|
" <td>-0.977118</td>\n",
|
||
|
" <td>0.001585</td>\n",
|
||
|
" <td>0.000776</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>0.000032</td>\n",
|
||
|
" <td>0.999968</td>\n",
|
||
|
" <td>13.984219</td>\n",
|
||
|
" <td>1.302720</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>0.810841</td>\n",
|
||
|
" <td>0.128432</td>\n",
|
||
|
" <td>9.611292</td>\n",
|
||
|
" <td>0.125295</td>\n",
|
||
|
" <td>0.018186</td>\n",
|
||
|
" <td>525.437516</td>\n",
|
||
|
" <td>525.275222</td>\n",
|
||
|
" <td>-0.729328</td>\n",
|
||
|
" <td>0.054312</td>\n",
|
||
|
" <td>0.111832</td>\n",
|
||
|
" <td>0.245480</td>\n",
|
||
|
" <td>0.495929</td>\n",
|
||
|
" <td>0.258591</td>\n",
|
||
|
" <td>18.413562</td>\n",
|
||
|
" <td>3.718711</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>1.159419</td>\n",
|
||
|
" <td>0.339253</td>\n",
|
||
|
" <td>15.182143</td>\n",
|
||
|
" <td>0.337577</td>\n",
|
||
|
" <td>0.323824</td>\n",
|
||
|
" <td>501.529129</td>\n",
|
||
|
" <td>501.415505</td>\n",
|
||
|
" <td>-0.554439</td>\n",
|
||
|
" <td>0.969939</td>\n",
|
||
|
" <td>0.304757</td>\n",
|
||
|
" <td>0.392570</td>\n",
|
||
|
" <td>0.297258</td>\n",
|
||
|
" <td>0.310173</td>\n",
|
||
|
" <td>17.395042</td>\n",
|
||
|
" <td>2.608084</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>2.153080</td>\n",
|
||
|
" <td>0.744161</td>\n",
|
||
|
" <td>27.820044</td>\n",
|
||
|
" <td>0.734881</td>\n",
|
||
|
" <td>0.600982</td>\n",
|
||
|
" <td>287.051054</td>\n",
|
||
|
" <td>286.675385</td>\n",
|
||
|
" <td>0.105360</td>\n",
|
||
|
" <td>1.776035</td>\n",
|
||
|
" <td>0.659878</td>\n",
|
||
|
" <td>0.288813</td>\n",
|
||
|
" <td>0.253244</td>\n",
|
||
|
" <td>0.457943</td>\n",
|
||
|
" <td>16.790421</td>\n",
|
||
|
" <td>4.173954</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>2.044749</td>\n",
|
||
|
" <td>0.777640</td>\n",
|
||
|
" <td>27.353145</td>\n",
|
||
|
" <td>0.754549</td>\n",
|
||
|
" <td>0.079213</td>\n",
|
||
|
" <td>297.179255</td>\n",
|
||
|
" <td>295.019902</td>\n",
|
||
|
" <td>1.898178</td>\n",
|
||
|
" <td>0.293760</td>\n",
|
||
|
" <td>0.894877</td>\n",
|
||
|
" <td>0.666980</td>\n",
|
||
|
" <td>0.301424</td>\n",
|
||
|
" <td>0.031596</td>\n",
|
||
|
" <td>16.954707</td>\n",
|
||
|
" <td>6.060621</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>3.237988</td>\n",
|
||
|
" <td>0.958520</td>\n",
|
||
|
" <td>46.637380</td>\n",
|
||
|
" <td>0.807655</td>\n",
|
||
|
" <td>0.484785</td>\n",
|
||
|
" <td>387.464785</td>\n",
|
||
|
" <td>380.145068</td>\n",
|
||
|
" <td>7.111357</td>\n",
|
||
|
" <td>2.080397</td>\n",
|
||
|
" <td>1.164958</td>\n",
|
||
|
" <td>0.497758</td>\n",
|
||
|
" <td>0.259769</td>\n",
|
||
|
" <td>0.242473</td>\n",
|
||
|
" <td>27.006406</td>\n",
|
||
|
" <td>12.457719</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>3.592233</td>\n",
|
||
|
" <td>1.102881</td>\n",
|
||
|
" <td>49.989226</td>\n",
|
||
|
" <td>0.878014</td>\n",
|
||
|
" <td>0.599906</td>\n",
|
||
|
" <td>268.627019</td>\n",
|
||
|
" <td>250.949344</td>\n",
|
||
|
" <td>17.539247</td>\n",
|
||
|
" <td>2.525994</td>\n",
|
||
|
" <td>1.420921</td>\n",
|
||
|
" <td>0.534607</td>\n",
|
||
|
" <td>0.304259</td>\n",
|
||
|
" <td>0.161134</td>\n",
|
||
|
" <td>14.073285</td>\n",
|
||
|
" <td>4.604134</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>3.747016</td>\n",
|
||
|
" <td>1.391266</td>\n",
|
||
|
" <td>40.710335</td>\n",
|
||
|
" <td>0.914702</td>\n",
|
||
|
" <td>0.160990</td>\n",
|
||
|
" <td>309.716173</td>\n",
|
||
|
" <td>274.795570</td>\n",
|
||
|
" <td>34.796876</td>\n",
|
||
|
" <td>0.844250</td>\n",
|
||
|
" <td>1.963028</td>\n",
|
||
|
" <td>0.650364</td>\n",
|
||
|
" <td>0.263464</td>\n",
|
||
|
" <td>0.086172</td>\n",
|
||
|
" <td>26.186317</td>\n",
|
||
|
" <td>8.891703</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>5.698276</td>\n",
|
||
|
" <td>1.567006</td>\n",
|
||
|
" <td>63.033699</td>\n",
|
||
|
" <td>0.907915</td>\n",
|
||
|
" <td>0.334248</td>\n",
|
||
|
" <td>326.485952</td>\n",
|
||
|
" <td>257.940194</td>\n",
|
||
|
" <td>68.425460</td>\n",
|
||
|
" <td>2.794279</td>\n",
|
||
|
" <td>2.413009</td>\n",
|
||
|
" <td>0.606583</td>\n",
|
||
|
" <td>0.251567</td>\n",
|
||
|
" <td>0.141850</td>\n",
|
||
|
" <td>30.987461</td>\n",
|
||
|
" <td>11.676332</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>14.505956</td>\n",
|
||
|
" <td>3.211571</td>\n",
|
||
|
" <td>107.288514</td>\n",
|
||
|
" <td>1.011628</td>\n",
|
||
|
" <td>0.157119</td>\n",
|
||
|
" <td>369.696066</td>\n",
|
||
|
" <td>209.280306</td>\n",
|
||
|
" <td>160.348544</td>\n",
|
||
|
" <td>3.514464</td>\n",
|
||
|
" <td>5.394498</td>\n",
|
||
|
" <td>0.669314</td>\n",
|
||
|
" <td>0.223766</td>\n",
|
||
|
" <td>0.106920</td>\n",
|
||
|
" <td>45.928247</td>\n",
|
||
|
" <td>18.241634</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>10</th>\n",
|
||
|
" <td>2262.859155</td>\n",
|
||
|
" <td>45.619718</td>\n",
|
||
|
" <td>11051.732394</td>\n",
|
||
|
" <td>1.464789</td>\n",
|
||
|
" <td>0.154930</td>\n",
|
||
|
" <td>467.111875</td>\n",
|
||
|
" <td>31.146796</td>\n",
|
||
|
" <td>435.950994</td>\n",
|
||
|
" <td>54.295775</td>\n",
|
||
|
" <td>64.704225</td>\n",
|
||
|
" <td>0.507042</td>\n",
|
||
|
" <td>0.295775</td>\n",
|
||
|
" <td>0.197183</td>\n",
|
||
|
" <td>53.352113</td>\n",
|
||
|
" <td>26.070423</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
|
" mean mean mean mean \n",
|
||
|
"category \n",
|
||
|
"0 0.113637 0.006274 1.586366 0.005821 \n",
|
||
|
"1 0.810841 0.128432 9.611292 0.125295 \n",
|
||
|
"2 1.159419 0.339253 15.182143 0.337577 \n",
|
||
|
"3 2.153080 0.744161 27.820044 0.734881 \n",
|
||
|
"4 2.044749 0.777640 27.353145 0.754549 \n",
|
||
|
"5 3.237988 0.958520 46.637380 0.807655 \n",
|
||
|
"6 3.592233 1.102881 49.989226 0.878014 \n",
|
||
|
"7 3.747016 1.391266 40.710335 0.914702 \n",
|
||
|
"8 5.698276 1.567006 63.033699 0.907915 \n",
|
||
|
"9 14.505956 3.211571 107.288514 1.011628 \n",
|
||
|
"10 2262.859155 45.619718 11051.732394 1.464789 \n",
|
||
|
"\n",
|
||
|
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
|
" mean mean mean \n",
|
||
|
"category \n",
|
||
|
"0 0.000647 548.790455 548.773103 \n",
|
||
|
"1 0.018186 525.437516 525.275222 \n",
|
||
|
"2 0.323824 501.529129 501.415505 \n",
|
||
|
"3 0.600982 287.051054 286.675385 \n",
|
||
|
"4 0.079213 297.179255 295.019902 \n",
|
||
|
"5 0.484785 387.464785 380.145068 \n",
|
||
|
"6 0.599906 268.627019 250.949344 \n",
|
||
|
"7 0.160990 309.716173 274.795570 \n",
|
||
|
"8 0.334248 326.485952 257.940194 \n",
|
||
|
"9 0.157119 369.696066 209.280306 \n",
|
||
|
"10 0.154930 467.111875 31.146796 \n",
|
||
|
"\n",
|
||
|
" time_between_purchase nb_tickets_internet fidelity gender_female \\\n",
|
||
|
" mean mean mean mean \n",
|
||
|
"category \n",
|
||
|
"0 -0.977118 0.001585 0.000776 0.000000 \n",
|
||
|
"1 -0.729328 0.054312 0.111832 0.245480 \n",
|
||
|
"2 -0.554439 0.969939 0.304757 0.392570 \n",
|
||
|
"3 0.105360 1.776035 0.659878 0.288813 \n",
|
||
|
"4 1.898178 0.293760 0.894877 0.666980 \n",
|
||
|
"5 7.111357 2.080397 1.164958 0.497758 \n",
|
||
|
"6 17.539247 2.525994 1.420921 0.534607 \n",
|
||
|
"7 34.796876 0.844250 1.963028 0.650364 \n",
|
||
|
"8 68.425460 2.794279 2.413009 0.606583 \n",
|
||
|
"9 160.348544 3.514464 5.394498 0.669314 \n",
|
||
|
"10 435.950994 54.295775 64.704225 0.507042 \n",
|
||
|
"\n",
|
||
|
" gender_male gender_other nb_campaigns nb_campaigns_opened \n",
|
||
|
" mean mean mean mean \n",
|
||
|
"category \n",
|
||
|
"0 0.000032 0.999968 13.984219 1.302720 \n",
|
||
|
"1 0.495929 0.258591 18.413562 3.718711 \n",
|
||
|
"2 0.297258 0.310173 17.395042 2.608084 \n",
|
||
|
"3 0.253244 0.457943 16.790421 4.173954 \n",
|
||
|
"4 0.301424 0.031596 16.954707 6.060621 \n",
|
||
|
"5 0.259769 0.242473 27.006406 12.457719 \n",
|
||
|
"6 0.304259 0.161134 14.073285 4.604134 \n",
|
||
|
"7 0.263464 0.086172 26.186317 8.891703 \n",
|
||
|
"8 0.251567 0.141850 30.987461 11.676332 \n",
|
||
|
"9 0.223766 0.106920 45.928247 18.241634 \n",
|
||
|
"10 0.295775 0.197183 53.352113 26.070423 "
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 73,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Grouper le DataFrame par la colonne 'category' et calculer la moyenne pour chaque groupe\n",
|
||
|
"summary_stats = dataset_for_segmentation.groupby('category')[numeric_features].describe()\n",
|
||
|
"\n",
|
||
|
"# Sélectionner uniquement la colonne 'mean' pour chaque variable numérique\n",
|
||
|
"mean_stats = summary_stats.loc[:, (slice(None), 'mean')]\n",
|
||
|
"\n",
|
||
|
"# Afficher le DataFrame résultant\n",
|
||
|
"mean_stats"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 75,
|
||
|
"id": "14da601e-7b1b-469c-bab1-de8fad4047f2",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAtUAAAIiCAYAAAAHJDTKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABUOklEQVR4nO3de1iUdf7/8dcIw1GcBAJEkbSQNNRMV0I3DykewTUr3SjU1kOtlZm6bebuCpvppmm2Wua2luYhWyv9lhlBZabhWbE8ZCcT3UA8ICoajHD//nCZnyN4gJuDOM/HdXHVfO733Pf7ns/c+vKee24shmEYAgAAAFBhdWq6AQAAAKC2I1QDAAAAJhGqAQAAAJMI1QAAAIBJhGoAAADAJEI1AAAAYBKhGgAAADCJUA0AAACYRKgGAAAATCJUAwCuK5988omsVqtWrlxZ060AcCGEauAKFixYIIvFoq1bt5a5PC4uTjfddJPT2E033aShQ4eWazvp6elKSkrSiRMnKtYoKsXPP/8si8WiBQsW1HQrVaJk/1588cVKW+cXX3whi8Wid99994q1SUlJslgsTmNdunRRly5dnMYsFouSkpIcj/fs2aOkpCT9/PPPl13/oUOH9NBDD+nll19W//79r3IPri8Xv3bVYejQoaX+HKxNdu/erVGjRikmJka+vr6yWCz64osvarot1DKEaqAKrFixQn/961/L9Zz09HQlJycTqnFdGz58uDZs2HDFug0bNmj48OGOx3v27FFycvJlQ/W5c+c0aNAgjRw5UqNGjaqMduEitm7dqpUrV8rf31/dunWr6XZQS7nXdAPA9ahNmzY13UK52e12WSwWubvzx0Jtc+bMGfn4+NR0G1elUaNGatSo0RXr7rzzznKv293dXV999VVF2qpVOFYrX2JiooYMGSJJevfdd/Xhhx/WcEeojThTDVSBiy//KC4u1uTJkxUZGSlvb2/dcMMNatWqlV5++WVJ5z8S/9Of/iRJatKkiSwWi9PHj8XFxZo2bZpuvfVWeXp6KigoSIMHD9ahQ4ectmsYhqZMmaLw8HB5eXmpXbt2SktLK/XxesnH9YsWLdK4cePUsGFDeXp66ocfftCRI0c0atQotWjRQnXr1lVQUJDuvvturVu3zmlbJZcRTJ8+XS+88IJuuukmeXt7q0uXLvruu+9kt9v1zDPPKDQ0VDabTffcc49ycnJKvU5xcXFatWqV2rRpI29vbzVv3lyrVq2SdP7Sm+bNm8vX11ft27cv8xKcrVu3ql+/fvL395eXl5fatGmj//znP1c1T7/88osGDhwoPz8/2Ww2DRo0SNnZ2WXWXs12zpw5o/Hjx6tJkyby8vKSv7+/2rVrp7fffvuyfZRcYpSWlqaHH35Y/v7+8vX1VXx8vH766Sen2i5duigqKkpffvmlOnToIB8fH/3hD3+QJGVmZuqhhx5SUFCQPD091bx5c82YMUPFxcWltllcXKznn39ejRs3drxXPvvsM6eaH374QQ8//LAiIiLk4+Ojhg0bKj4+Xt98802Z+/Hrr79q7NixCgkJkbe3tzp37qwdO3Y41ZR1+UdZLryEYcGCBbr//vslSV27dnUcHxdeovPpp5+qW7duqlevnnx8fNSxY8dS+3PkyBGNHDlSYWFh8vT01I033qiOHTvq008/vWwvJT3v2LFDAwYMUL169WSz2fTQQw/pyJEjTrVXe6xe6hKx8hyrV+tqj+nLWbp0qWJiYlS3bl3VrVtXt99+u+bPn3/Z57zyyivq1KmTgoKC5Ovrq5YtW2ratGmy2+1OdTt27FBcXJzjfRsaGqq+ffs6vWbLly9XdHS0bDabfHx81LRpU8f7vsTJkycdx5+Hh4caNmyoMWPGKD8//4r7V6cOcQjm8c9c4CoVFRXp3LlzpcYNw7jic6dNm6akpCT95S9/UadOnWS32/Xtt986LvUYPny4jh8/rtmzZ+v9999XgwYNJEktWrSQJP3xj3/Uv/71Lz3++OOKi4vTzz//rL/+9a/64osvtH37dgUGBkqSJk6cqKlTp2rkyJEaMGCADh48qOHDh8tut6tZs2al+powYYJiYmL02muvqU6dOgoKCnKEhEmTJikkJESnT5/WihUr1KVLF3322Welrn195ZVX1KpVK73yyis6ceKExo0bp/j4eEVHR8tqteqNN97QgQMHNH78eA0fPlwffPCB0/N37typCRMmaOLEibLZbEpOTtaAAQM0YcIEffbZZ5oyZYosFov+/Oc/Ky4uTvv375e3t7ckac2aNerVq5eio6P12muvyWazadmyZRo0aJDOnDlz2evaz549q+7du+uXX37R1KlT1axZM3300UcaNGhQqdqr3c7YsWO1aNEiTZ48WW3atFF+fr527dqlY8eOXf4N8j/Dhg1TbGysli5dqoMHD+ovf/mLunTpoq+//lo33HCDoy4rK0sPPfSQnn76aU2ZMkV16tTRkSNH1KFDBxUWFuq5557TTTfdpFWrVmn8+PH68ccf9eqrrzpta86cOQoPD9esWbMcQbB3795au3atYmJiJJ3/R0dAQID+8Y9/6MYbb9Tx48e1cOFCRUdHa8eOHYqMjHRa57PPPqs77rhD//73v5WXl6ekpCR16dJFO3bsUNOmTa/qNShL3759NWXKFD377LN65ZVXdMcdd0iSbr75ZknS4sWLNXjwYP3ud7/TwoULZbVaNW/ePPXs2VOffPKJ4+P8xMREbd++Xc8//7yaNWumEydOaPv27Vc9P/fcc48GDhyoRx99VLt379Zf//pX7dmzR5s2bZLVapV09cdqeZV1rF6t48ePS7r6Y/pif/vb3/Tcc89pwIABGjdunGw2m3bt2qUDBw5c9nk//vijEhISHCF3586dev755/Xtt9/qjTfekCTl5+crNjZWTZo00SuvvKLg4GBlZ2drzZo1OnXqlKTzlwINGjRIgwYNUlJSkry8vHTgwAF9/vnnjm2dOXNGnTt31qFDh/Tss8+qVatW2r17t/72t7/pm2++0aeffnpV/5gDTDEAXNabb75pSLrsT3h4uNNzwsPDjSFDhjgex8XFGbfffvtltzN9+nRDkrF//36n8b179xqSjFGjRjmNb9q0yZBkPPvss4ZhGMbx48cNT09PY9CgQU51GzZsMCQZnTt3doytWbPGkGR06tTpivt/7tw5w263G926dTPuuecex/j+/fsNSUbr1q2NoqIix/isWbMMSUa/fv2c1jNmzBhDkpGXl+cYCw8PN7y9vY1Dhw45xjIyMgxJRoMGDYz8/HzH+MqVKw1JxgcffOAYu/XWW402bdoYdrvdaVtxcXFGgwYNnPq62Ny5cw1Jxv/93/85jY8YMcKQZLz55pvl3k5UVJTRv3//S27zUkreYxe+voZhGF999ZUhyZg8ebJjrHPnzoYk47PPPnOqfeaZZwxJxqZNm5zG//jHPxoWi8XYt2+fYRj/f95CQ0ONs2fPOupOnjxp+Pv7G927d79kn+fOnTMKCwuNiIgI46mnnnKMl7yf7rjjDqO4uNgx/vPPPxtWq9UYPny4Y2zSpEnGxX/1dO7c2en9aRiGIcmYNGmS4/Hy5csNScaaNWuc6vLz8w1/f38jPj7eabyoqMho3bq10b59e8dY3bp1jTFjxlxy/y6lpOcL99kwDGPJkiWGJGPx4sWGYVz9sWoYpf+MKHHxa1GeY7XExa/dxS51TJflp59+Mtzc3IwHH3zwsnVDhgwp9efghYqKigy73W689dZbhpubm3H8+HHDMAxj69athiRj5cqVl3zuiy++aEgyTpw4ccmaqVOnGnXq1DG2bNniNP7uu+8akozVq1dftv8LXeq9BlwJn3cAV+mtt97Sli1bSv389re/veJz27dvr507d2rUqFH65JNPdPLkyave7po1aySp1FnX9u3bq3nz5o6PuDdu3KiCggINHDjQqe7OO++85Lfy77333jLHX3vtNd1xxx3y8vKSu7u7rFarPvvsM+3du7dUbZ8+fZw+Om3evLmk82cXL1QynpmZ6TR+++23q2HDhqXqunTp4nSdcMl4ydmxH374Qd9++60efPB
|
||
|
"text/plain": [
|
||
|
"<Figure size 800x600 with 1 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Plot histogram\n",
|
||
|
"plt.figure(figsize=(8, 6))\n",
|
||
|
"plt.hist(y_predict_proba, bins=10, range=(0, 1), color='blue', alpha=0.7)\n",
|
||
|
"\n",
|
||
|
"# Réglage des limites des axes x et y\n",
|
||
|
"plt.xlim(0, 1)\n",
|
||
|
"plt.ylim(0, None) # Laissez le maximum sur l'axe y pour s'ajuster automatiquement\n",
|
||
|
"\n",
|
||
|
"plt.title('Histogramme des probabilités pour la classe 1')\n",
|
||
|
"plt.xlabel('Probabilité')\n",
|
||
|
"plt.ylabel('Fréquence')\n",
|
||
|
"plt.grid(True)\n",
|
||
|
"plt.show()\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3 (ipykernel)",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.11.6"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 5
|
||
|
}
|