3271 lines
115 KiB
Plaintext
3271 lines
115 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "56949d8f-4eaf-4685-9989-ba0b4b1945b7",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Baseline logit on spectacle companies with statmodels"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "72480e84-2ccc-481a-9353-1199e4358d62",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"import os\n",
|
||
"import s3fs\n",
|
||
"import re\n",
|
||
"from sklearn.linear_model import LogisticRegression\n",
|
||
"from sklearn.ensemble import RandomForestClassifier\n",
|
||
"from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, recall_score\n",
|
||
"from sklearn.utils import class_weight\n",
|
||
"from sklearn.neighbors import KNeighborsClassifier\n",
|
||
"from sklearn.pipeline import Pipeline\n",
|
||
"from sklearn.compose import ColumnTransformer\n",
|
||
"from sklearn.preprocessing import OneHotEncoder\n",
|
||
"from sklearn.impute import SimpleImputer\n",
|
||
"from sklearn.model_selection import GridSearchCV\n",
|
||
"from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler\n",
|
||
"from sklearn.metrics import make_scorer, f1_score, balanced_accuracy_score\n",
|
||
"import seaborn as sns\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score\n",
|
||
"from sklearn.exceptions import ConvergenceWarning, DataConversionWarning\n",
|
||
"\n",
|
||
"import statsmodels.api as sm\n",
|
||
"\n",
|
||
"import pickle\n",
|
||
"import warnings"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "7090dc21-7889-4776-a0a4-f7c6a5416d53",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Create filesystem object\n",
|
||
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "2f0d08c9-5b26-4eff-9c89-4a46f427dbf7",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def load_train_test():\n",
|
||
" BUCKET = \"projet-bdc2324-team1/Generalization/musique\"\n",
|
||
" File_path_train = BUCKET + \"/Train_set.csv\"\n",
|
||
" File_path_test = BUCKET + \"/Test_set.csv\"\n",
|
||
" \n",
|
||
" with fs.open( File_path_train, mode=\"rb\") as file_in:\n",
|
||
" dataset_train = pd.read_csv(file_in, sep=\",\")\n",
|
||
" # dataset_train['y_has_purchased'] = dataset_train['y_has_purchased'].fillna(0)\n",
|
||
"\n",
|
||
" with fs.open(File_path_test, mode=\"rb\") as file_in:\n",
|
||
" dataset_test = pd.read_csv(file_in, sep=\",\")\n",
|
||
" # dataset_test['y_has_purchased'] = dataset_test['y_has_purchased'].fillna(0)\n",
|
||
" \n",
|
||
" return dataset_train, dataset_test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "438d0138-a254-464c-9e94-f7436576c1d5",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def features_target_split(dataset_train, dataset_test):\n",
|
||
" features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n",
|
||
" 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n",
|
||
" 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n",
|
||
" X_train = dataset_train[features_l]\n",
|
||
" y_train = dataset_train[['y_has_purchased']]\n",
|
||
"\n",
|
||
" X_test = dataset_test[features_l]\n",
|
||
" y_test = dataset_test[['y_has_purchased']]\n",
|
||
" return X_train, X_test, y_train, y_test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "ebe9a887-61a4-4a5e-ac64-231307dd7647",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_570/3642896088.py:7: DtypeWarning: Columns (38) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||
" dataset_train = pd.read_csv(file_in, sep=\",\")\n",
|
||
"/tmp/ipykernel_570/3642896088.py:11: DtypeWarning: Columns (38) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
||
" dataset_test = pd.read_csv(file_in, sep=\",\")\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"dataset_train, dataset_test = load_train_test()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "b21fdea2-02c4-4222-b4e0-635e423f91c2",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"customer_id 0\n",
|
||
"nb_tickets 0\n",
|
||
"nb_purchases 0\n",
|
||
"total_amount 0\n",
|
||
"nb_suppliers 0\n",
|
||
"vente_internet_max 0\n",
|
||
"purchase_date_min 0\n",
|
||
"purchase_date_max 0\n",
|
||
"time_between_purchase 0\n",
|
||
"nb_tickets_internet 0\n",
|
||
"street_id 0\n",
|
||
"structure_id 327067\n",
|
||
"mcp_contact_id 135224\n",
|
||
"fidelity 0\n",
|
||
"tenant_id 0\n",
|
||
"is_partner 0\n",
|
||
"deleted_at 354365\n",
|
||
"gender 0\n",
|
||
"is_email_true 0\n",
|
||
"opt_in 0\n",
|
||
"last_buying_date 119201\n",
|
||
"max_price 119201\n",
|
||
"ticket_sum 0\n",
|
||
"average_price 115193\n",
|
||
"average_purchase_delay 119203\n",
|
||
"average_price_basket 119203\n",
|
||
"average_ticket_basket 119203\n",
|
||
"total_price 4008\n",
|
||
"purchase_count 0\n",
|
||
"first_buying_date 119201\n",
|
||
"country 56856\n",
|
||
"gender_label 0\n",
|
||
"gender_female 0\n",
|
||
"gender_male 0\n",
|
||
"gender_other 0\n",
|
||
"country_fr 56856\n",
|
||
"nb_campaigns 0\n",
|
||
"nb_campaigns_opened 0\n",
|
||
"time_to_open 224310\n",
|
||
"y_has_purchased 0\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"dataset_train.isna().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "42c4d034-8bc1-4ebb-a1ff-60c0a86f8f7c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X_train, X_test, y_train, y_test = features_target_split(dataset_train, dataset_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "94b4498d-6ae8-4c96-adbc-7ba1b8348160",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Shape train : (354365, 17)\n",
|
||
"Shape test : (151874, 17)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(\"Shape train : \", X_train.shape)\n",
|
||
"print(\"Shape test : \", X_test.shape)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"id": "6224fd31-c190-4168-b395-e0bf5806d79d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{0.0: 0.5481283836040216, 1.0: 5.694439980716696}"
|
||
]
|
||
},
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Compute Weights\n",
|
||
"weights = class_weight.compute_class_weight(class_weight = 'balanced', classes = np.unique(y_train['y_has_purchased']),\n",
|
||
" y = y_train['y_has_purchased'])\n",
|
||
"\n",
|
||
"weight_dict = {np.unique(y_train['y_has_purchased'])[i]: weights[i] for i in range(len(np.unique(y_train['y_has_purchased'])))}\n",
|
||
"weight_dict"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 58,
|
||
"id": "4680f202-979e-483f-89b8-9df877203bcf",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([0.54812838, 0.54812838, 0.54812838, ..., 5.69443998, 0.54812838,\n",
|
||
" 0.54812838])"
|
||
]
|
||
},
|
||
"execution_count": 58,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Calcul des poids inverses à la fréquence des classes\n",
|
||
"class_counts = np.bincount(y_train['y_has_purchased'])\n",
|
||
"class_weights = len(y_train['y_has_purchased']) / (2 * class_counts)\n",
|
||
"\n",
|
||
"# Sélection des poids correspondants à chaque observation\n",
|
||
"weights = class_weights[y_train['y_has_purchased'].values.astype(int)]\n",
|
||
"weights"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 65,
|
||
"id": "5f747be4-e70b-491c-8f0a-46cb278a2dee",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"[354365. 354365. 354365. ... 354365. 354365. 354365.]\n",
|
||
"354365\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(2 * weights * class_counts[y_train['y_has_purchased'].values.astype(int)])\n",
|
||
"print(len(y_train['y_has_purchased']))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 124,
|
||
"id": "648fb542-0186-493d-b274-be2c26a11967",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# model logit\n",
|
||
"X = X_train.astype(int)\n",
|
||
"# X = sm.add_constant(X.drop(\"gender_other\", axis=1))\n",
|
||
"y = y_train['y_has_purchased'].values\n",
|
||
"\n",
|
||
"# print(X,y)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 125,
|
||
"id": "978b9ebc-aa97-41d7-a48f-d1f79c1ed482",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>time_between_purchase</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>13</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>9</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354360</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354361</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354362</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>50</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>91</td>\n",
|
||
" <td>91</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>6</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354363</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>55</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>52</td>\n",
|
||
" <td>52</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354364</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>354365 rows × 17 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 0 0 0 0 \n",
|
||
"1 0 0 0 0 \n",
|
||
"2 0 0 0 0 \n",
|
||
"3 0 0 0 0 \n",
|
||
"4 0 0 0 0 \n",
|
||
"... ... ... ... ... \n",
|
||
"354360 0 0 0 0 \n",
|
||
"354361 0 0 0 0 \n",
|
||
"354362 2 2 50 1 \n",
|
||
"354363 1 1 55 1 \n",
|
||
"354364 0 0 0 0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0 550 550 \n",
|
||
"1 0 550 550 \n",
|
||
"2 0 550 550 \n",
|
||
"3 0 550 550 \n",
|
||
"4 0 550 550 \n",
|
||
"... ... ... ... \n",
|
||
"354360 0 550 550 \n",
|
||
"354361 0 550 550 \n",
|
||
"354362 0 91 91 \n",
|
||
"354363 0 52 52 \n",
|
||
"354364 0 550 550 \n",
|
||
"\n",
|
||
" time_between_purchase nb_tickets_internet fidelity is_email_true \\\n",
|
||
"0 -1 0 1 1 \n",
|
||
"1 -1 0 0 1 \n",
|
||
"2 -1 0 1 1 \n",
|
||
"3 -1 0 0 1 \n",
|
||
"4 -1 0 0 1 \n",
|
||
"... ... ... ... ... \n",
|
||
"354360 -1 0 0 1 \n",
|
||
"354361 -1 0 0 1 \n",
|
||
"354362 0 0 4 1 \n",
|
||
"354363 0 0 1 1 \n",
|
||
"354364 -1 0 0 1 \n",
|
||
"\n",
|
||
" opt_in gender_female gender_male gender_other nb_campaigns \\\n",
|
||
"0 1 1 0 0 13 \n",
|
||
"1 1 0 0 1 10 \n",
|
||
"2 1 0 1 0 14 \n",
|
||
"3 0 0 0 1 9 \n",
|
||
"4 0 0 0 1 4 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"354360 0 0 0 1 7 \n",
|
||
"354361 1 0 1 0 11 \n",
|
||
"354362 0 1 0 0 6 \n",
|
||
"354363 1 0 1 0 3 \n",
|
||
"354364 0 0 1 0 7 \n",
|
||
"\n",
|
||
" nb_campaigns_opened \n",
|
||
"0 4 \n",
|
||
"1 9 \n",
|
||
"2 0 \n",
|
||
"3 0 \n",
|
||
"4 0 \n",
|
||
"... ... \n",
|
||
"354360 0 \n",
|
||
"354361 2 \n",
|
||
"354362 6 \n",
|
||
"354363 0 \n",
|
||
"354364 0 \n",
|
||
"\n",
|
||
"[354365 rows x 17 columns]"
|
||
]
|
||
},
|
||
"execution_count": 125,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 138,
|
||
"id": "81b38ceb-5005-417d-a9a6-b2dac181a8fb",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>354365.000000</td>\n",
|
||
" <td>354365.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>406.981861</td>\n",
|
||
" <td>396.551502</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>189.343612</td>\n",
|
||
" <td>195.881681</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>0.009640</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>188.475293</td>\n",
|
||
" <td>153.457966</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" purchase_date_min purchase_date_max\n",
|
||
"count 354365.000000 354365.000000\n",
|
||
"mean 406.981861 396.551502\n",
|
||
"std 189.343612 195.881681\n",
|
||
"min 0.009640 0.000000\n",
|
||
"25% 188.475293 153.457966\n",
|
||
"50% 550.000000 550.000000\n",
|
||
"75% 550.000000 550.000000\n",
|
||
"max 550.000000 550.000000"
|
||
]
|
||
},
|
||
"execution_count": 138,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_train[[\"purchase_date_min\", \"purchase_date_max\"]].describe()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 143,
|
||
"id": "60effd66-2914-4cf9-aa0c-4e2f9dd13895",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"count 354365.000000\n",
|
||
"mean 10.430360\n",
|
||
"std 56.442718\n",
|
||
"min 0.000000\n",
|
||
"25% 0.000000\n",
|
||
"50% 0.000000\n",
|
||
"75% 0.000000\n",
|
||
"max 547.443350\n",
|
||
"dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 143,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"(X_train[\"purchase_date_min\"] - X_train[\"purchase_date_max\"]).describe()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 145,
|
||
"id": "7a99e480-9e11-448d-806e-3b71925a19db",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>time_between_purchase</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.0</td>\n",
|
||
" <td>550.0</td>\n",
|
||
" <td>-1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>13.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.0</td>\n",
|
||
" <td>550.0</td>\n",
|
||
" <td>-1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.0</td>\n",
|
||
" <td>550.0</td>\n",
|
||
" <td>-1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>14.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.0</td>\n",
|
||
" <td>550.0</td>\n",
|
||
" <td>-1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.0</td>\n",
|
||
" <td>550.0</td>\n",
|
||
" <td>-1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354358</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.0</td>\n",
|
||
" <td>550.0</td>\n",
|
||
" <td>-1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354359</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.0</td>\n",
|
||
" <td>550.0</td>\n",
|
||
" <td>-1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354360</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.0</td>\n",
|
||
" <td>550.0</td>\n",
|
||
" <td>-1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354361</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.0</td>\n",
|
||
" <td>550.0</td>\n",
|
||
" <td>-1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>11.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354364</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.0</td>\n",
|
||
" <td>550.0</td>\n",
|
||
" <td>-1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>179675 rows × 17 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 0.0 0.0 0.0 0.0 \n",
|
||
"1 0.0 0.0 0.0 0.0 \n",
|
||
"2 0.0 0.0 0.0 0.0 \n",
|
||
"3 0.0 0.0 0.0 0.0 \n",
|
||
"4 0.0 0.0 0.0 0.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"354358 0.0 0.0 0.0 0.0 \n",
|
||
"354359 0.0 0.0 0.0 0.0 \n",
|
||
"354360 0.0 0.0 0.0 0.0 \n",
|
||
"354361 0.0 0.0 0.0 0.0 \n",
|
||
"354364 0.0 0.0 0.0 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 550.0 550.0 \n",
|
||
"1 0.0 550.0 550.0 \n",
|
||
"2 0.0 550.0 550.0 \n",
|
||
"3 0.0 550.0 550.0 \n",
|
||
"4 0.0 550.0 550.0 \n",
|
||
"... ... ... ... \n",
|
||
"354358 0.0 550.0 550.0 \n",
|
||
"354359 0.0 550.0 550.0 \n",
|
||
"354360 0.0 550.0 550.0 \n",
|
||
"354361 0.0 550.0 550.0 \n",
|
||
"354364 0.0 550.0 550.0 \n",
|
||
"\n",
|
||
" time_between_purchase nb_tickets_internet fidelity is_email_true \\\n",
|
||
"0 -1.0 0.0 1 True \n",
|
||
"1 -1.0 0.0 0 True \n",
|
||
"2 -1.0 0.0 1 True \n",
|
||
"3 -1.0 0.0 0 True \n",
|
||
"4 -1.0 0.0 0 True \n",
|
||
"... ... ... ... ... \n",
|
||
"354358 -1.0 0.0 0 True \n",
|
||
"354359 -1.0 0.0 0 True \n",
|
||
"354360 -1.0 0.0 0 True \n",
|
||
"354361 -1.0 0.0 0 True \n",
|
||
"354364 -1.0 0.0 0 True \n",
|
||
"\n",
|
||
" opt_in gender_female gender_male gender_other nb_campaigns \\\n",
|
||
"0 True 1 0 0 13.0 \n",
|
||
"1 True 0 0 1 10.0 \n",
|
||
"2 True 0 1 0 14.0 \n",
|
||
"3 False 0 0 1 9.0 \n",
|
||
"4 False 0 0 1 4.0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"354358 False 1 0 0 1.0 \n",
|
||
"354359 True 0 1 0 12.0 \n",
|
||
"354360 False 0 0 1 7.0 \n",
|
||
"354361 True 0 1 0 11.0 \n",
|
||
"354364 False 0 1 0 7.0 \n",
|
||
"\n",
|
||
" nb_campaigns_opened \n",
|
||
"0 4.0 \n",
|
||
"1 9.0 \n",
|
||
"2 0.0 \n",
|
||
"3 0.0 \n",
|
||
"4 0.0 \n",
|
||
"... ... \n",
|
||
"354358 0.0 \n",
|
||
"354359 2.0 \n",
|
||
"354360 0.0 \n",
|
||
"354361 2.0 \n",
|
||
"354364 0.0 \n",
|
||
"\n",
|
||
"[179675 rows x 17 columns]"
|
||
]
|
||
},
|
||
"execution_count": 145,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_train[X_train[\"time_between_purchase\"]==-1]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 126,
|
||
"id": "2475f2fe-3d1f-4845-9ede-0416dac83271",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Colonnes à standardiser\n",
|
||
"\n",
|
||
"\"\"\"\n",
|
||
"var_num = ['nb_tickets', 'nb_purchases', \"total_amount\", \"nb_suppliers\", \"vente_internet_max\",\n",
|
||
" \"purchase_date_min\", \"purchase_date_max\", \"time_between_purchase\", \"nb_tickets_internet\",\n",
|
||
" \"fidelity\", \"nb_campaigns\", \"nb_campaigns_opened\"]\n",
|
||
" \"\"\"\n",
|
||
"\n",
|
||
"numeric_features = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n",
|
||
" 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n",
|
||
" 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n",
|
||
"\n",
|
||
"# Standardisation des colonnes sélectionnées\n",
|
||
"scaler = StandardScaler()\n",
|
||
"X[var_num] = scaler.fit_transform(X[var_num])\n",
|
||
"X[numeric_features] = scaler.fit_transform(X[numeric_features])\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 128,
|
||
"id": "1763bad4-36b5-4ebb-9702-b77ba19fb30e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>time_between_purchase</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>-0.024425</td>\n",
|
||
" <td>-0.050722</td>\n",
|
||
" <td>-0.048383</td>\n",
|
||
" <td>-0.768294</td>\n",
|
||
" <td>-0.599511</td>\n",
|
||
" <td>0.755994</td>\n",
|
||
" <td>0.783940</td>\n",
|
||
" <td>-0.192978</td>\n",
|
||
" <td>-0.264693</td>\n",
|
||
" <td>-0.004316</td>\n",
|
||
" <td>0.058193</td>\n",
|
||
" <td>1.151186</td>\n",
|
||
" <td>1.071079</td>\n",
|
||
" <td>-0.775306</td>\n",
|
||
" <td>-0.434568</td>\n",
|
||
" <td>0.607945</td>\n",
|
||
" <td>0.522567</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>-0.024425</td>\n",
|
||
" <td>-0.050722</td>\n",
|
||
" <td>-0.048383</td>\n",
|
||
" <td>-0.768294</td>\n",
|
||
" <td>-0.599511</td>\n",
|
||
" <td>0.755994</td>\n",
|
||
" <td>0.783940</td>\n",
|
||
" <td>-0.192978</td>\n",
|
||
" <td>-0.264693</td>\n",
|
||
" <td>-0.029071</td>\n",
|
||
" <td>0.058193</td>\n",
|
||
" <td>1.151186</td>\n",
|
||
" <td>-0.933638</td>\n",
|
||
" <td>-0.775306</td>\n",
|
||
" <td>2.301137</td>\n",
|
||
" <td>0.306155</td>\n",
|
||
" <td>1.701843</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>-0.024425</td>\n",
|
||
" <td>-0.050722</td>\n",
|
||
" <td>-0.048383</td>\n",
|
||
" <td>-0.768294</td>\n",
|
||
" <td>-0.599511</td>\n",
|
||
" <td>0.755994</td>\n",
|
||
" <td>0.783940</td>\n",
|
||
" <td>-0.192978</td>\n",
|
||
" <td>-0.264693</td>\n",
|
||
" <td>-0.004316</td>\n",
|
||
" <td>0.058193</td>\n",
|
||
" <td>1.151186</td>\n",
|
||
" <td>-0.933638</td>\n",
|
||
" <td>1.289813</td>\n",
|
||
" <td>-0.434568</td>\n",
|
||
" <td>0.708542</td>\n",
|
||
" <td>-0.420854</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>-0.024425</td>\n",
|
||
" <td>-0.050722</td>\n",
|
||
" <td>-0.048383</td>\n",
|
||
" <td>-0.768294</td>\n",
|
||
" <td>-0.599511</td>\n",
|
||
" <td>0.755994</td>\n",
|
||
" <td>0.783940</td>\n",
|
||
" <td>-0.192978</td>\n",
|
||
" <td>-0.264693</td>\n",
|
||
" <td>-0.029071</td>\n",
|
||
" <td>0.058193</td>\n",
|
||
" <td>-0.868669</td>\n",
|
||
" <td>-0.933638</td>\n",
|
||
" <td>-0.775306</td>\n",
|
||
" <td>2.301137</td>\n",
|
||
" <td>0.205558</td>\n",
|
||
" <td>-0.420854</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>-0.024425</td>\n",
|
||
" <td>-0.050722</td>\n",
|
||
" <td>-0.048383</td>\n",
|
||
" <td>-0.768294</td>\n",
|
||
" <td>-0.599511</td>\n",
|
||
" <td>0.755994</td>\n",
|
||
" <td>0.783940</td>\n",
|
||
" <td>-0.192978</td>\n",
|
||
" <td>-0.264693</td>\n",
|
||
" <td>-0.029071</td>\n",
|
||
" <td>0.058193</td>\n",
|
||
" <td>-0.868669</td>\n",
|
||
" <td>-0.933638</td>\n",
|
||
" <td>-0.775306</td>\n",
|
||
" <td>2.301137</td>\n",
|
||
" <td>-0.297426</td>\n",
|
||
" <td>-0.420854</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354360</th>\n",
|
||
" <td>-0.024425</td>\n",
|
||
" <td>-0.050722</td>\n",
|
||
" <td>-0.048383</td>\n",
|
||
" <td>-0.768294</td>\n",
|
||
" <td>-0.599511</td>\n",
|
||
" <td>0.755994</td>\n",
|
||
" <td>0.783940</td>\n",
|
||
" <td>-0.192978</td>\n",
|
||
" <td>-0.264693</td>\n",
|
||
" <td>-0.029071</td>\n",
|
||
" <td>0.058193</td>\n",
|
||
" <td>-0.868669</td>\n",
|
||
" <td>-0.933638</td>\n",
|
||
" <td>-0.775306</td>\n",
|
||
" <td>2.301137</td>\n",
|
||
" <td>0.004365</td>\n",
|
||
" <td>-0.420854</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354361</th>\n",
|
||
" <td>-0.024425</td>\n",
|
||
" <td>-0.050722</td>\n",
|
||
" <td>-0.048383</td>\n",
|
||
" <td>-0.768294</td>\n",
|
||
" <td>-0.599511</td>\n",
|
||
" <td>0.755994</td>\n",
|
||
" <td>0.783940</td>\n",
|
||
" <td>-0.192978</td>\n",
|
||
" <td>-0.264693</td>\n",
|
||
" <td>-0.029071</td>\n",
|
||
" <td>0.058193</td>\n",
|
||
" <td>1.151186</td>\n",
|
||
" <td>-0.933638</td>\n",
|
||
" <td>1.289813</td>\n",
|
||
" <td>-0.434568</td>\n",
|
||
" <td>0.406752</td>\n",
|
||
" <td>0.050856</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354362</th>\n",
|
||
" <td>-0.000838</td>\n",
|
||
" <td>0.092966</td>\n",
|
||
" <td>-0.009150</td>\n",
|
||
" <td>1.219633</td>\n",
|
||
" <td>-0.599511</td>\n",
|
||
" <td>-1.665887</td>\n",
|
||
" <td>-1.557073</td>\n",
|
||
" <td>-0.175269</td>\n",
|
||
" <td>-0.264693</td>\n",
|
||
" <td>0.069949</td>\n",
|
||
" <td>0.058193</td>\n",
|
||
" <td>-0.868669</td>\n",
|
||
" <td>1.071079</td>\n",
|
||
" <td>-0.775306</td>\n",
|
||
" <td>-0.434568</td>\n",
|
||
" <td>-0.096232</td>\n",
|
||
" <td>0.994277</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354363</th>\n",
|
||
" <td>-0.012631</td>\n",
|
||
" <td>0.021122</td>\n",
|
||
" <td>-0.005227</td>\n",
|
||
" <td>1.219633</td>\n",
|
||
" <td>-0.599511</td>\n",
|
||
" <td>-1.871668</td>\n",
|
||
" <td>-1.755983</td>\n",
|
||
" <td>-0.175269</td>\n",
|
||
" <td>-0.264693</td>\n",
|
||
" <td>-0.004316</td>\n",
|
||
" <td>0.058193</td>\n",
|
||
" <td>1.151186</td>\n",
|
||
" <td>-0.933638</td>\n",
|
||
" <td>1.289813</td>\n",
|
||
" <td>-0.434568</td>\n",
|
||
" <td>-0.398023</td>\n",
|
||
" <td>-0.420854</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354364</th>\n",
|
||
" <td>-0.024425</td>\n",
|
||
" <td>-0.050722</td>\n",
|
||
" <td>-0.048383</td>\n",
|
||
" <td>-0.768294</td>\n",
|
||
" <td>-0.599511</td>\n",
|
||
" <td>0.755994</td>\n",
|
||
" <td>0.783940</td>\n",
|
||
" <td>-0.192978</td>\n",
|
||
" <td>-0.264693</td>\n",
|
||
" <td>-0.029071</td>\n",
|
||
" <td>0.058193</td>\n",
|
||
" <td>-0.868669</td>\n",
|
||
" <td>-0.933638</td>\n",
|
||
" <td>1.289813</td>\n",
|
||
" <td>-0.434568</td>\n",
|
||
" <td>0.004365</td>\n",
|
||
" <td>-0.420854</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>354365 rows × 17 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 -0.024425 -0.050722 -0.048383 -0.768294 \n",
|
||
"1 -0.024425 -0.050722 -0.048383 -0.768294 \n",
|
||
"2 -0.024425 -0.050722 -0.048383 -0.768294 \n",
|
||
"3 -0.024425 -0.050722 -0.048383 -0.768294 \n",
|
||
"4 -0.024425 -0.050722 -0.048383 -0.768294 \n",
|
||
"... ... ... ... ... \n",
|
||
"354360 -0.024425 -0.050722 -0.048383 -0.768294 \n",
|
||
"354361 -0.024425 -0.050722 -0.048383 -0.768294 \n",
|
||
"354362 -0.000838 0.092966 -0.009150 1.219633 \n",
|
||
"354363 -0.012631 0.021122 -0.005227 1.219633 \n",
|
||
"354364 -0.024425 -0.050722 -0.048383 -0.768294 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 -0.599511 0.755994 0.783940 \n",
|
||
"1 -0.599511 0.755994 0.783940 \n",
|
||
"2 -0.599511 0.755994 0.783940 \n",
|
||
"3 -0.599511 0.755994 0.783940 \n",
|
||
"4 -0.599511 0.755994 0.783940 \n",
|
||
"... ... ... ... \n",
|
||
"354360 -0.599511 0.755994 0.783940 \n",
|
||
"354361 -0.599511 0.755994 0.783940 \n",
|
||
"354362 -0.599511 -1.665887 -1.557073 \n",
|
||
"354363 -0.599511 -1.871668 -1.755983 \n",
|
||
"354364 -0.599511 0.755994 0.783940 \n",
|
||
"\n",
|
||
" time_between_purchase nb_tickets_internet fidelity is_email_true \\\n",
|
||
"0 -0.192978 -0.264693 -0.004316 0.058193 \n",
|
||
"1 -0.192978 -0.264693 -0.029071 0.058193 \n",
|
||
"2 -0.192978 -0.264693 -0.004316 0.058193 \n",
|
||
"3 -0.192978 -0.264693 -0.029071 0.058193 \n",
|
||
"4 -0.192978 -0.264693 -0.029071 0.058193 \n",
|
||
"... ... ... ... ... \n",
|
||
"354360 -0.192978 -0.264693 -0.029071 0.058193 \n",
|
||
"354361 -0.192978 -0.264693 -0.029071 0.058193 \n",
|
||
"354362 -0.175269 -0.264693 0.069949 0.058193 \n",
|
||
"354363 -0.175269 -0.264693 -0.004316 0.058193 \n",
|
||
"354364 -0.192978 -0.264693 -0.029071 0.058193 \n",
|
||
"\n",
|
||
" opt_in gender_female gender_male gender_other nb_campaigns \\\n",
|
||
"0 1.151186 1.071079 -0.775306 -0.434568 0.607945 \n",
|
||
"1 1.151186 -0.933638 -0.775306 2.301137 0.306155 \n",
|
||
"2 1.151186 -0.933638 1.289813 -0.434568 0.708542 \n",
|
||
"3 -0.868669 -0.933638 -0.775306 2.301137 0.205558 \n",
|
||
"4 -0.868669 -0.933638 -0.775306 2.301137 -0.297426 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"354360 -0.868669 -0.933638 -0.775306 2.301137 0.004365 \n",
|
||
"354361 1.151186 -0.933638 1.289813 -0.434568 0.406752 \n",
|
||
"354362 -0.868669 1.071079 -0.775306 -0.434568 -0.096232 \n",
|
||
"354363 1.151186 -0.933638 1.289813 -0.434568 -0.398023 \n",
|
||
"354364 -0.868669 -0.933638 1.289813 -0.434568 0.004365 \n",
|
||
"\n",
|
||
" nb_campaigns_opened \n",
|
||
"0 0.522567 \n",
|
||
"1 1.701843 \n",
|
||
"2 -0.420854 \n",
|
||
"3 -0.420854 \n",
|
||
"4 -0.420854 \n",
|
||
"... ... \n",
|
||
"354360 -0.420854 \n",
|
||
"354361 0.050856 \n",
|
||
"354362 0.994277 \n",
|
||
"354363 -0.420854 \n",
|
||
"354364 -0.420854 \n",
|
||
"\n",
|
||
"[354365 rows x 17 columns]"
|
||
]
|
||
},
|
||
"execution_count": 128,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 122,
|
||
"id": "23d6c06c-8708-4714-906b-a1ed664377bb",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>const</th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>time_between_purchase</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>-0.024425</td>\n",
|
||
" <td>-0.050722</td>\n",
|
||
" <td>-0.048383</td>\n",
|
||
" <td>-0.768294</td>\n",
|
||
" <td>-0.599511</td>\n",
|
||
" <td>0.755994</td>\n",
|
||
" <td>0.783940</td>\n",
|
||
" <td>-0.192978</td>\n",
|
||
" <td>-0.264693</td>\n",
|
||
" <td>-0.004316</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.607945</td>\n",
|
||
" <td>0.522567</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>-0.024425</td>\n",
|
||
" <td>-0.050722</td>\n",
|
||
" <td>-0.048383</td>\n",
|
||
" <td>-0.768294</td>\n",
|
||
" <td>-0.599511</td>\n",
|
||
" <td>0.755994</td>\n",
|
||
" <td>0.783940</td>\n",
|
||
" <td>-0.192978</td>\n",
|
||
" <td>-0.264693</td>\n",
|
||
" <td>-0.029071</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.306155</td>\n",
|
||
" <td>1.701843</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>-0.024425</td>\n",
|
||
" <td>-0.050722</td>\n",
|
||
" <td>-0.048383</td>\n",
|
||
" <td>-0.768294</td>\n",
|
||
" <td>-0.599511</td>\n",
|
||
" <td>0.755994</td>\n",
|
||
" <td>0.783940</td>\n",
|
||
" <td>-0.192978</td>\n",
|
||
" <td>-0.264693</td>\n",
|
||
" <td>-0.004316</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.708542</td>\n",
|
||
" <td>-0.420854</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>-0.024425</td>\n",
|
||
" <td>-0.050722</td>\n",
|
||
" <td>-0.048383</td>\n",
|
||
" <td>-0.768294</td>\n",
|
||
" <td>-0.599511</td>\n",
|
||
" <td>0.755994</td>\n",
|
||
" <td>0.783940</td>\n",
|
||
" <td>-0.192978</td>\n",
|
||
" <td>-0.264693</td>\n",
|
||
" <td>-0.029071</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.205558</td>\n",
|
||
" <td>-0.420854</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>-0.024425</td>\n",
|
||
" <td>-0.050722</td>\n",
|
||
" <td>-0.048383</td>\n",
|
||
" <td>-0.768294</td>\n",
|
||
" <td>-0.599511</td>\n",
|
||
" <td>0.755994</td>\n",
|
||
" <td>0.783940</td>\n",
|
||
" <td>-0.192978</td>\n",
|
||
" <td>-0.264693</td>\n",
|
||
" <td>-0.029071</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>-0.297426</td>\n",
|
||
" <td>-0.420854</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354360</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>-0.024425</td>\n",
|
||
" <td>-0.050722</td>\n",
|
||
" <td>-0.048383</td>\n",
|
||
" <td>-0.768294</td>\n",
|
||
" <td>-0.599511</td>\n",
|
||
" <td>0.755994</td>\n",
|
||
" <td>0.783940</td>\n",
|
||
" <td>-0.192978</td>\n",
|
||
" <td>-0.264693</td>\n",
|
||
" <td>-0.029071</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.004365</td>\n",
|
||
" <td>-0.420854</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354361</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>-0.024425</td>\n",
|
||
" <td>-0.050722</td>\n",
|
||
" <td>-0.048383</td>\n",
|
||
" <td>-0.768294</td>\n",
|
||
" <td>-0.599511</td>\n",
|
||
" <td>0.755994</td>\n",
|
||
" <td>0.783940</td>\n",
|
||
" <td>-0.192978</td>\n",
|
||
" <td>-0.264693</td>\n",
|
||
" <td>-0.029071</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.406752</td>\n",
|
||
" <td>0.050856</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354362</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>-0.000838</td>\n",
|
||
" <td>0.092966</td>\n",
|
||
" <td>-0.009150</td>\n",
|
||
" <td>1.219633</td>\n",
|
||
" <td>-0.599511</td>\n",
|
||
" <td>-1.665887</td>\n",
|
||
" <td>-1.557073</td>\n",
|
||
" <td>-0.175269</td>\n",
|
||
" <td>-0.264693</td>\n",
|
||
" <td>0.069949</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>-0.096232</td>\n",
|
||
" <td>0.994277</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354363</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>-0.012631</td>\n",
|
||
" <td>0.021122</td>\n",
|
||
" <td>-0.005227</td>\n",
|
||
" <td>1.219633</td>\n",
|
||
" <td>-0.599511</td>\n",
|
||
" <td>-1.871668</td>\n",
|
||
" <td>-1.755983</td>\n",
|
||
" <td>-0.175269</td>\n",
|
||
" <td>-0.264693</td>\n",
|
||
" <td>-0.004316</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>-0.398023</td>\n",
|
||
" <td>-0.420854</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354364</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>-0.024425</td>\n",
|
||
" <td>-0.050722</td>\n",
|
||
" <td>-0.048383</td>\n",
|
||
" <td>-0.768294</td>\n",
|
||
" <td>-0.599511</td>\n",
|
||
" <td>0.755994</td>\n",
|
||
" <td>0.783940</td>\n",
|
||
" <td>-0.192978</td>\n",
|
||
" <td>-0.264693</td>\n",
|
||
" <td>-0.029071</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.004365</td>\n",
|
||
" <td>-0.420854</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>354365 rows × 17 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" const nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 1.0 -0.024425 -0.050722 -0.048383 -0.768294 \n",
|
||
"1 1.0 -0.024425 -0.050722 -0.048383 -0.768294 \n",
|
||
"2 1.0 -0.024425 -0.050722 -0.048383 -0.768294 \n",
|
||
"3 1.0 -0.024425 -0.050722 -0.048383 -0.768294 \n",
|
||
"4 1.0 -0.024425 -0.050722 -0.048383 -0.768294 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"354360 1.0 -0.024425 -0.050722 -0.048383 -0.768294 \n",
|
||
"354361 1.0 -0.024425 -0.050722 -0.048383 -0.768294 \n",
|
||
"354362 1.0 -0.000838 0.092966 -0.009150 1.219633 \n",
|
||
"354363 1.0 -0.012631 0.021122 -0.005227 1.219633 \n",
|
||
"354364 1.0 -0.024425 -0.050722 -0.048383 -0.768294 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 -0.599511 0.755994 0.783940 \n",
|
||
"1 -0.599511 0.755994 0.783940 \n",
|
||
"2 -0.599511 0.755994 0.783940 \n",
|
||
"3 -0.599511 0.755994 0.783940 \n",
|
||
"4 -0.599511 0.755994 0.783940 \n",
|
||
"... ... ... ... \n",
|
||
"354360 -0.599511 0.755994 0.783940 \n",
|
||
"354361 -0.599511 0.755994 0.783940 \n",
|
||
"354362 -0.599511 -1.665887 -1.557073 \n",
|
||
"354363 -0.599511 -1.871668 -1.755983 \n",
|
||
"354364 -0.599511 0.755994 0.783940 \n",
|
||
"\n",
|
||
" time_between_purchase nb_tickets_internet fidelity is_email_true \\\n",
|
||
"0 -0.192978 -0.264693 -0.004316 1 \n",
|
||
"1 -0.192978 -0.264693 -0.029071 1 \n",
|
||
"2 -0.192978 -0.264693 -0.004316 1 \n",
|
||
"3 -0.192978 -0.264693 -0.029071 1 \n",
|
||
"4 -0.192978 -0.264693 -0.029071 1 \n",
|
||
"... ... ... ... ... \n",
|
||
"354360 -0.192978 -0.264693 -0.029071 1 \n",
|
||
"354361 -0.192978 -0.264693 -0.029071 1 \n",
|
||
"354362 -0.175269 -0.264693 0.069949 1 \n",
|
||
"354363 -0.175269 -0.264693 -0.004316 1 \n",
|
||
"354364 -0.192978 -0.264693 -0.029071 1 \n",
|
||
"\n",
|
||
" opt_in gender_female gender_male nb_campaigns nb_campaigns_opened \n",
|
||
"0 1 1 0 0.607945 0.522567 \n",
|
||
"1 1 0 0 0.306155 1.701843 \n",
|
||
"2 1 0 1 0.708542 -0.420854 \n",
|
||
"3 0 0 0 0.205558 -0.420854 \n",
|
||
"4 0 0 0 -0.297426 -0.420854 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"354360 0 0 0 0.004365 -0.420854 \n",
|
||
"354361 1 0 1 0.406752 0.050856 \n",
|
||
"354362 0 1 0 -0.096232 0.994277 \n",
|
||
"354363 1 0 1 -0.398023 -0.420854 \n",
|
||
"354364 0 0 1 0.004365 -0.420854 \n",
|
||
"\n",
|
||
"[354365 rows x 17 columns]"
|
||
]
|
||
},
|
||
"execution_count": 122,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 133,
|
||
"id": "0e968aa1-fbec-47db-b570-4730ef7eebf2",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/opt/mamba/lib/python3.11/site-packages/statsmodels/discrete/discrete_model.py:2385: RuntimeWarning: overflow encountered in exp\n",
|
||
" return 1/(1+np.exp(-X))\n",
|
||
"/opt/mamba/lib/python3.11/site-packages/statsmodels/discrete/discrete_model.py:2443: RuntimeWarning: divide by zero encountered in log\n",
|
||
" return np.sum(np.log(self.cdf(q * linpred)))\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Warning: Maximum number of iterations has been exceeded.\n",
|
||
" Current function value: inf\n",
|
||
" Iterations: 35\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/opt/mamba/lib/python3.11/site-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
|
||
" warnings.warn(\"Maximum Likelihood optimization failed to \"\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" Logit Regression Results \n",
|
||
"==============================================================================\n",
|
||
"Dep. Variable: y No. Observations: 354365\n",
|
||
"Model: Logit Df Residuals: 354349\n",
|
||
"Method: MLE Df Model: 15\n",
|
||
"Date: Thu, 14 Mar 2024 Pseudo R-squ.: -inf\n",
|
||
"Time: 10:47:16 Log-Likelihood: -inf\n",
|
||
"converged: False LL-Null: -1.0540e+05\n",
|
||
"Covariance Type: nonrobust LLR p-value: 1.000\n",
|
||
"=========================================================================================\n",
|
||
" coef std err z P>|z| [0.025 0.975]\n",
|
||
"-----------------------------------------------------------------------------------------\n",
|
||
"nb_tickets 4.9213 0.267 18.448 0.000 4.398 5.444\n",
|
||
"nb_purchases -7.9446 0.140 -56.905 0.000 -8.218 -7.671\n",
|
||
"total_amount 0.3039 0.061 4.945 0.000 0.183 0.424\n",
|
||
"nb_suppliers 0.1067 0.008 13.678 0.000 0.091 0.122\n",
|
||
"vente_internet_max -0.2784 0.008 -34.612 0.000 -0.294 -0.263\n",
|
||
"purchase_date_min -41.9693 2.640 -15.895 0.000 -47.144 -36.794\n",
|
||
"purchase_date_max 43.2793 2.734 15.829 0.000 37.920 48.638\n",
|
||
"time_between_purchase 12.7237 0.789 16.132 0.000 11.178 14.270\n",
|
||
"nb_tickets_internet -0.0212 0.014 -1.510 0.131 -0.049 0.006\n",
|
||
"fidelity 22.0749 0.222 99.561 0.000 21.640 22.509\n",
|
||
"is_email_true 0.0225 0.004 6.145 0.000 0.015 0.030\n",
|
||
"opt_in -0.1245 0.004 -30.646 0.000 -0.133 -0.117\n",
|
||
"gender_female 0.0018 nan nan nan nan nan\n",
|
||
"gender_male 0.0118 nan nan nan nan nan\n",
|
||
"gender_other -0.0182 nan nan nan nan nan\n",
|
||
"nb_campaigns -0.0049 0.005 -0.961 0.336 -0.015 0.005\n",
|
||
"nb_campaigns_opened 0.0867 0.005 18.211 0.000 0.077 0.096\n",
|
||
"=========================================================================================\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Création du modèle de régression logistique avec poids équilibrés\n",
|
||
"# model_logit = sm.Logit(y, X, weights=weights)\n",
|
||
"model_logit = sm.Logit(y, X)\n",
|
||
"\n",
|
||
"# Ajustement du modèle aux données\n",
|
||
"result = model_logit.fit()\n",
|
||
"\n",
|
||
"# Affichage des résultats\n",
|
||
"print(result.summary())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 130,
|
||
"id": "d1660ef9-438f-4427-ac2d-aa8179614e40",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([ 1.07107945, -0.93363755])"
|
||
]
|
||
},
|
||
"execution_count": 130,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X[\"gender_female\"].unique()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 131,
|
||
"id": "2079bae6-bce3-4de7-bf49-180177c31a55",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"numeric_features = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n",
|
||
" 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n",
|
||
" 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n",
|
||
"\n",
|
||
"numeric_transformer = Pipeline(steps=[\n",
|
||
" #(\"imputer\", SimpleImputer(strategy=\"mean\")), \n",
|
||
" (\"scaler\", StandardScaler()) \n",
|
||
"])\n",
|
||
"\n",
|
||
"categorical_features = ['opt_in'] \n",
|
||
"\n",
|
||
"# Transformer for the categorical features\n",
|
||
"categorical_transformer = Pipeline(steps=[\n",
|
||
" #(\"imputer\", SimpleImputer(strategy=\"most_frequent\")), # Impute missing values with the most frequent\n",
|
||
" (\"onehot\", OneHotEncoder(handle_unknown='ignore', sparse_output=False))\n",
|
||
"])\n",
|
||
"\n",
|
||
"preproc = ColumnTransformer(\n",
|
||
" transformers=[\n",
|
||
" (\"num\", numeric_transformer, numeric_features),\n",
|
||
" (\"cat\", categorical_transformer, categorical_features)\n",
|
||
" ]\n",
|
||
")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 105,
|
||
"id": "a9fe1c60-0732-426f-b176-9c95718e546f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>const</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354360</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354361</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354362</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354363</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354364</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>354365 rows × 2 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" const gender_other\n",
|
||
"0 1.0 0\n",
|
||
"1 1.0 1\n",
|
||
"2 1.0 0\n",
|
||
"3 1.0 1\n",
|
||
"4 1.0 1\n",
|
||
"... ... ...\n",
|
||
"354360 1.0 1\n",
|
||
"354361 1.0 0\n",
|
||
"354362 1.0 0\n",
|
||
"354363 1.0 0\n",
|
||
"354364 1.0 0\n",
|
||
"\n",
|
||
"[354365 rows x 2 columns]"
|
||
]
|
||
},
|
||
"execution_count": 105,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"sm.add_constant(X[\"gender_other\"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 106,
|
||
"id": "b8c92b7c-1df0-4384-82e7-1e8cc0d333fa",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>time_between_purchase</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>13</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>9</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354360</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354361</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354362</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>50</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>91</td>\n",
|
||
" <td>91</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>6</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354363</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>55</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>52</td>\n",
|
||
" <td>52</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354364</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>354365 rows × 16 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 0 0 0 0 \n",
|
||
"1 0 0 0 0 \n",
|
||
"2 0 0 0 0 \n",
|
||
"3 0 0 0 0 \n",
|
||
"4 0 0 0 0 \n",
|
||
"... ... ... ... ... \n",
|
||
"354360 0 0 0 0 \n",
|
||
"354361 0 0 0 0 \n",
|
||
"354362 2 2 50 1 \n",
|
||
"354363 1 1 55 1 \n",
|
||
"354364 0 0 0 0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0 550 550 \n",
|
||
"1 0 550 550 \n",
|
||
"2 0 550 550 \n",
|
||
"3 0 550 550 \n",
|
||
"4 0 550 550 \n",
|
||
"... ... ... ... \n",
|
||
"354360 0 550 550 \n",
|
||
"354361 0 550 550 \n",
|
||
"354362 0 91 91 \n",
|
||
"354363 0 52 52 \n",
|
||
"354364 0 550 550 \n",
|
||
"\n",
|
||
" time_between_purchase nb_tickets_internet fidelity is_email_true \\\n",
|
||
"0 -1 0 1 1 \n",
|
||
"1 -1 0 0 1 \n",
|
||
"2 -1 0 1 1 \n",
|
||
"3 -1 0 0 1 \n",
|
||
"4 -1 0 0 1 \n",
|
||
"... ... ... ... ... \n",
|
||
"354360 -1 0 0 1 \n",
|
||
"354361 -1 0 0 1 \n",
|
||
"354362 0 0 4 1 \n",
|
||
"354363 0 0 1 1 \n",
|
||
"354364 -1 0 0 1 \n",
|
||
"\n",
|
||
" opt_in gender_female gender_male nb_campaigns nb_campaigns_opened \n",
|
||
"0 1 1 0 13 4 \n",
|
||
"1 1 0 0 10 9 \n",
|
||
"2 1 0 1 14 0 \n",
|
||
"3 0 0 0 9 0 \n",
|
||
"4 0 0 0 4 0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"354360 0 0 0 7 0 \n",
|
||
"354361 1 0 1 11 2 \n",
|
||
"354362 0 1 0 6 6 \n",
|
||
"354363 1 0 1 3 0 \n",
|
||
"354364 0 0 1 7 0 \n",
|
||
"\n",
|
||
"[354365 rows x 16 columns]"
|
||
]
|
||
},
|
||
"execution_count": 106,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X.drop(\"gender_other\", axis=1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "061dcabd-383d-4b76-a9f0-8647daed2c9e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 107,
|
||
"id": "fc4ffbf6-ab7e-47cf-a717-c25477d92493",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>time_between_purchase</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>13</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>9</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354360</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354361</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354362</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>50</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>91</td>\n",
|
||
" <td>91</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>6</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354363</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>55</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>52</td>\n",
|
||
" <td>52</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354364</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>550</td>\n",
|
||
" <td>-1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>354365 rows × 17 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 0 0 0 0 \n",
|
||
"1 0 0 0 0 \n",
|
||
"2 0 0 0 0 \n",
|
||
"3 0 0 0 0 \n",
|
||
"4 0 0 0 0 \n",
|
||
"... ... ... ... ... \n",
|
||
"354360 0 0 0 0 \n",
|
||
"354361 0 0 0 0 \n",
|
||
"354362 2 2 50 1 \n",
|
||
"354363 1 1 55 1 \n",
|
||
"354364 0 0 0 0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0 550 550 \n",
|
||
"1 0 550 550 \n",
|
||
"2 0 550 550 \n",
|
||
"3 0 550 550 \n",
|
||
"4 0 550 550 \n",
|
||
"... ... ... ... \n",
|
||
"354360 0 550 550 \n",
|
||
"354361 0 550 550 \n",
|
||
"354362 0 91 91 \n",
|
||
"354363 0 52 52 \n",
|
||
"354364 0 550 550 \n",
|
||
"\n",
|
||
" time_between_purchase nb_tickets_internet fidelity is_email_true \\\n",
|
||
"0 -1 0 1 1 \n",
|
||
"1 -1 0 0 1 \n",
|
||
"2 -1 0 1 1 \n",
|
||
"3 -1 0 0 1 \n",
|
||
"4 -1 0 0 1 \n",
|
||
"... ... ... ... ... \n",
|
||
"354360 -1 0 0 1 \n",
|
||
"354361 -1 0 0 1 \n",
|
||
"354362 0 0 4 1 \n",
|
||
"354363 0 0 1 1 \n",
|
||
"354364 -1 0 0 1 \n",
|
||
"\n",
|
||
" opt_in gender_female gender_male gender_other nb_campaigns \\\n",
|
||
"0 1 1 0 0 13 \n",
|
||
"1 1 0 0 1 10 \n",
|
||
"2 1 0 1 0 14 \n",
|
||
"3 0 0 0 1 9 \n",
|
||
"4 0 0 0 1 4 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"354360 0 0 0 1 7 \n",
|
||
"354361 1 0 1 0 11 \n",
|
||
"354362 0 1 0 0 6 \n",
|
||
"354363 1 0 1 0 3 \n",
|
||
"354364 0 0 1 0 7 \n",
|
||
"\n",
|
||
" nb_campaigns_opened \n",
|
||
"0 4 \n",
|
||
"1 9 \n",
|
||
"2 0 \n",
|
||
"3 0 \n",
|
||
"4 0 \n",
|
||
"... ... \n",
|
||
"354360 0 \n",
|
||
"354361 2 \n",
|
||
"354362 6 \n",
|
||
"354363 0 \n",
|
||
"354364 0 \n",
|
||
"\n",
|
||
"[354365 rows x 17 columns]"
|
||
]
|
||
},
|
||
"execution_count": 107,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 73,
|
||
"id": "f15b0d69-8470-4a36-bd25-9536a36c4756",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(354365,)"
|
||
]
|
||
},
|
||
"execution_count": 73,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"weights.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 74,
|
||
"id": "e97e26f6-b854-41e3-bbdf-318065b03254",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(354365, 17)"
|
||
]
|
||
},
|
||
"execution_count": 74,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 75,
|
||
"id": "49621874-1e8c-4cb5-84a9-a5c9715f3b06",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(354365,)"
|
||
]
|
||
},
|
||
"execution_count": 75,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"y.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 76,
|
||
"id": "8072cd81-d63f-430e-b0b2-c0589cf18871",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"nb_tickets 0\n",
|
||
"nb_purchases 0\n",
|
||
"total_amount 0\n",
|
||
"nb_suppliers 0\n",
|
||
"vente_internet_max 0\n",
|
||
"purchase_date_min 0\n",
|
||
"purchase_date_max 0\n",
|
||
"time_between_purchase 0\n",
|
||
"nb_tickets_internet 0\n",
|
||
"fidelity 0\n",
|
||
"is_email_true 0\n",
|
||
"opt_in 0\n",
|
||
"gender_female 0\n",
|
||
"gender_male 0\n",
|
||
"gender_other 0\n",
|
||
"nb_campaigns 0\n",
|
||
"nb_campaigns_opened 0\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 76,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_train.isna().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 80,
|
||
"id": "6f07a66f-5a46-4409-b0b6-ff5e212296f0",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([0., 1.])"
|
||
]
|
||
},
|
||
"execution_count": 80,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"y_train[\"y_has_purchased\"].unique()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 134,
|
||
"id": "4587c36f-94bf-458b-b819-60250eb17c59",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>nb_tickets</th>\n",
|
||
" <th>nb_purchases</th>\n",
|
||
" <th>total_amount</th>\n",
|
||
" <th>nb_suppliers</th>\n",
|
||
" <th>vente_internet_max</th>\n",
|
||
" <th>purchase_date_min</th>\n",
|
||
" <th>purchase_date_max</th>\n",
|
||
" <th>time_between_purchase</th>\n",
|
||
" <th>nb_tickets_internet</th>\n",
|
||
" <th>fidelity</th>\n",
|
||
" <th>is_email_true</th>\n",
|
||
" <th>opt_in</th>\n",
|
||
" <th>gender_female</th>\n",
|
||
" <th>gender_male</th>\n",
|
||
" <th>gender_other</th>\n",
|
||
" <th>nb_campaigns</th>\n",
|
||
" <th>nb_campaigns_opened</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>-1.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>13.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>-1.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>-1.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>14.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>-1.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>-1.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354360</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>-1.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354361</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>-1.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>11.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354362</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>50.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>91.030556</td>\n",
|
||
" <td>91.020139</td>\n",
|
||
" <td>0.010417</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354363</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>55.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>52.284028</td>\n",
|
||
" <td>52.284028</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>354364</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>550.000000</td>\n",
|
||
" <td>-1.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>354365 rows × 17 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||
"0 0.0 0.0 0.0 0.0 \n",
|
||
"1 0.0 0.0 0.0 0.0 \n",
|
||
"2 0.0 0.0 0.0 0.0 \n",
|
||
"3 0.0 0.0 0.0 0.0 \n",
|
||
"4 0.0 0.0 0.0 0.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"354360 0.0 0.0 0.0 0.0 \n",
|
||
"354361 0.0 0.0 0.0 0.0 \n",
|
||
"354362 2.0 2.0 50.0 1.0 \n",
|
||
"354363 1.0 1.0 55.0 1.0 \n",
|
||
"354364 0.0 0.0 0.0 0.0 \n",
|
||
"\n",
|
||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||
"0 0.0 550.000000 550.000000 \n",
|
||
"1 0.0 550.000000 550.000000 \n",
|
||
"2 0.0 550.000000 550.000000 \n",
|
||
"3 0.0 550.000000 550.000000 \n",
|
||
"4 0.0 550.000000 550.000000 \n",
|
||
"... ... ... ... \n",
|
||
"354360 0.0 550.000000 550.000000 \n",
|
||
"354361 0.0 550.000000 550.000000 \n",
|
||
"354362 0.0 91.030556 91.020139 \n",
|
||
"354363 0.0 52.284028 52.284028 \n",
|
||
"354364 0.0 550.000000 550.000000 \n",
|
||
"\n",
|
||
" time_between_purchase nb_tickets_internet fidelity is_email_true \\\n",
|
||
"0 -1.000000 0.0 1 True \n",
|
||
"1 -1.000000 0.0 0 True \n",
|
||
"2 -1.000000 0.0 1 True \n",
|
||
"3 -1.000000 0.0 0 True \n",
|
||
"4 -1.000000 0.0 0 True \n",
|
||
"... ... ... ... ... \n",
|
||
"354360 -1.000000 0.0 0 True \n",
|
||
"354361 -1.000000 0.0 0 True \n",
|
||
"354362 0.010417 0.0 4 True \n",
|
||
"354363 0.000000 0.0 1 True \n",
|
||
"354364 -1.000000 0.0 0 True \n",
|
||
"\n",
|
||
" opt_in gender_female gender_male gender_other nb_campaigns \\\n",
|
||
"0 True 1 0 0 13.0 \n",
|
||
"1 True 0 0 1 10.0 \n",
|
||
"2 True 0 1 0 14.0 \n",
|
||
"3 False 0 0 1 9.0 \n",
|
||
"4 False 0 0 1 4.0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"354360 False 0 0 1 7.0 \n",
|
||
"354361 True 0 1 0 11.0 \n",
|
||
"354362 False 1 0 0 6.0 \n",
|
||
"354363 True 0 1 0 3.0 \n",
|
||
"354364 False 0 1 0 7.0 \n",
|
||
"\n",
|
||
" nb_campaigns_opened \n",
|
||
"0 4.0 \n",
|
||
"1 9.0 \n",
|
||
"2 0.0 \n",
|
||
"3 0.0 \n",
|
||
"4 0.0 \n",
|
||
"... ... \n",
|
||
"354360 0.0 \n",
|
||
"354361 2.0 \n",
|
||
"354362 6.0 \n",
|
||
"354363 0.0 \n",
|
||
"354364 0.0 \n",
|
||
"\n",
|
||
"[354365 rows x 17 columns]"
|
||
]
|
||
},
|
||
"execution_count": 134,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_train"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|