diff --git a/Spectacle/2_bis_logit_baseline_statsmodels.ipynb b/Spectacle/2_bis_logit_baseline_statsmodels.ipynb new file mode 100644 index 0000000..515f8cb --- /dev/null +++ b/Spectacle/2_bis_logit_baseline_statsmodels.ipynb @@ -0,0 +1,3270 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "56949d8f-4eaf-4685-9989-ba0b4b1945b7", + "metadata": {}, + "source": [ + "# Baseline logit on spectacle companies with statmodels" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "72480e84-2ccc-481a-9353-1199e4358d62", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import os\n", + "import s3fs\n", + "import re\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, recall_score\n", + "from sklearn.utils import class_weight\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.model_selection import GridSearchCV\n", + "from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler\n", + "from sklearn.metrics import make_scorer, f1_score, balanced_accuracy_score\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score\n", + "from sklearn.exceptions import ConvergenceWarning, DataConversionWarning\n", + "\n", + "import statsmodels.api as sm\n", + "\n", + "import pickle\n", + "import warnings" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7090dc21-7889-4776-a0a4-f7c6a5416d53", + "metadata": {}, + "outputs": [], + "source": [ + "# Create filesystem object\n", + "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", + "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "2f0d08c9-5b26-4eff-9c89-4a46f427dbf7", + "metadata": {}, + "outputs": [], + "source": [ + "def load_train_test():\n", + " BUCKET = \"projet-bdc2324-team1/Generalization/musique\"\n", + " File_path_train = BUCKET + \"/Train_set.csv\"\n", + " File_path_test = BUCKET + \"/Test_set.csv\"\n", + " \n", + " with fs.open( File_path_train, mode=\"rb\") as file_in:\n", + " dataset_train = pd.read_csv(file_in, sep=\",\")\n", + " # dataset_train['y_has_purchased'] = dataset_train['y_has_purchased'].fillna(0)\n", + "\n", + " with fs.open(File_path_test, mode=\"rb\") as file_in:\n", + " dataset_test = pd.read_csv(file_in, sep=\",\")\n", + " # dataset_test['y_has_purchased'] = dataset_test['y_has_purchased'].fillna(0)\n", + " \n", + " return dataset_train, dataset_test" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "438d0138-a254-464c-9e94-f7436576c1d5", + "metadata": {}, + "outputs": [], + "source": [ + "def features_target_split(dataset_train, dataset_test):\n", + " features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n", + " 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n", + " 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n", + " X_train = dataset_train[features_l]\n", + " y_train = dataset_train[['y_has_purchased']]\n", + "\n", + " X_test = dataset_test[features_l]\n", + " y_test = dataset_test[['y_has_purchased']]\n", + " return X_train, X_test, y_train, y_test" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ebe9a887-61a4-4a5e-ac64-231307dd7647", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_570/3642896088.py:7: DtypeWarning: Columns (38) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " dataset_train = pd.read_csv(file_in, sep=\",\")\n", + "/tmp/ipykernel_570/3642896088.py:11: DtypeWarning: Columns (38) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " dataset_test = pd.read_csv(file_in, sep=\",\")\n" + ] + } + ], + "source": [ + "dataset_train, dataset_test = load_train_test()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b21fdea2-02c4-4222-b4e0-635e423f91c2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer_id 0\n", + "nb_tickets 0\n", + "nb_purchases 0\n", + "total_amount 0\n", + "nb_suppliers 0\n", + "vente_internet_max 0\n", + "purchase_date_min 0\n", + "purchase_date_max 0\n", + "time_between_purchase 0\n", + "nb_tickets_internet 0\n", + "street_id 0\n", + "structure_id 327067\n", + "mcp_contact_id 135224\n", + "fidelity 0\n", + "tenant_id 0\n", + "is_partner 0\n", + "deleted_at 354365\n", + "gender 0\n", + "is_email_true 0\n", + "opt_in 0\n", + "last_buying_date 119201\n", + "max_price 119201\n", + "ticket_sum 0\n", + "average_price 115193\n", + "average_purchase_delay 119203\n", + "average_price_basket 119203\n", + "average_ticket_basket 119203\n", + "total_price 4008\n", + "purchase_count 0\n", + "first_buying_date 119201\n", + "country 56856\n", + "gender_label 0\n", + "gender_female 0\n", + "gender_male 0\n", + "gender_other 0\n", + "country_fr 56856\n", + "nb_campaigns 0\n", + "nb_campaigns_opened 0\n", + "time_to_open 224310\n", + "y_has_purchased 0\n", + "dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset_train.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "42c4d034-8bc1-4ebb-a1ff-60c0a86f8f7c", + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = features_target_split(dataset_train, dataset_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "94b4498d-6ae8-4c96-adbc-7ba1b8348160", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape train : (354365, 17)\n", + "Shape test : (151874, 17)\n" + ] + } + ], + "source": [ + "print(\"Shape train : \", X_train.shape)\n", + "print(\"Shape test : \", X_test.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "6224fd31-c190-4168-b395-e0bf5806d79d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{0.0: 0.5481283836040216, 1.0: 5.694439980716696}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compute Weights\n", + "weights = class_weight.compute_class_weight(class_weight = 'balanced', classes = np.unique(y_train['y_has_purchased']),\n", + " y = y_train['y_has_purchased'])\n", + "\n", + "weight_dict = {np.unique(y_train['y_has_purchased'])[i]: weights[i] for i in range(len(np.unique(y_train['y_has_purchased'])))}\n", + "weight_dict" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "4680f202-979e-483f-89b8-9df877203bcf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.54812838, 0.54812838, 0.54812838, ..., 5.69443998, 0.54812838,\n", + " 0.54812838])" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calcul des poids inverses à la fréquence des classes\n", + "class_counts = np.bincount(y_train['y_has_purchased'])\n", + "class_weights = len(y_train['y_has_purchased']) / (2 * class_counts)\n", + "\n", + "# Sélection des poids correspondants à chaque observation\n", + "weights = class_weights[y_train['y_has_purchased'].values.astype(int)]\n", + "weights" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "5f747be4-e70b-491c-8f0a-46cb278a2dee", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[354365. 354365. 354365. ... 354365. 354365. 354365.]\n", + "354365\n" + ] + } + ], + "source": [ + "print(2 * weights * class_counts[y_train['y_has_purchased'].values.astype(int)])\n", + "print(len(y_train['y_has_purchased']))" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "id": "648fb542-0186-493d-b274-be2c26a11967", + "metadata": {}, + "outputs": [], + "source": [ + "# model logit\n", + "X = X_train.astype(int)\n", + "# X = sm.add_constant(X.drop(\"gender_other\", axis=1))\n", + "y = y_train['y_has_purchased'].values\n", + "\n", + "# print(X,y)" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "id": "978b9ebc-aa97-41d7-a48f-d1f79c1ed482", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nb_ticketsnb_purchasestotal_amountnb_suppliersvente_internet_maxpurchase_date_minpurchase_date_maxtime_between_purchasenb_tickets_internetfidelityis_email_trueopt_ingender_femalegender_malegender_othernb_campaignsnb_campaigns_opened
000000550550-10111100134
100000550550-10011001109
200000550550-10111010140
300000550550-1001000190
400000550550-1001000140
......................................................
35436000000550550-1001000170
35436100000550550-10011010112
35436222501091910041010066
35436311551052520011101030
35436400000550550-1001001070
\n", + "

354365 rows × 17 columns

\n", + "
" + ], + "text/plain": [ + " nb_tickets nb_purchases total_amount nb_suppliers \\\n", + "0 0 0 0 0 \n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "... ... ... ... ... \n", + "354360 0 0 0 0 \n", + "354361 0 0 0 0 \n", + "354362 2 2 50 1 \n", + "354363 1 1 55 1 \n", + "354364 0 0 0 0 \n", + "\n", + " vente_internet_max purchase_date_min purchase_date_max \\\n", + "0 0 550 550 \n", + "1 0 550 550 \n", + "2 0 550 550 \n", + "3 0 550 550 \n", + "4 0 550 550 \n", + "... ... ... ... \n", + "354360 0 550 550 \n", + "354361 0 550 550 \n", + "354362 0 91 91 \n", + "354363 0 52 52 \n", + "354364 0 550 550 \n", + "\n", + " time_between_purchase nb_tickets_internet fidelity is_email_true \\\n", + "0 -1 0 1 1 \n", + "1 -1 0 0 1 \n", + "2 -1 0 1 1 \n", + "3 -1 0 0 1 \n", + "4 -1 0 0 1 \n", + "... ... ... ... ... \n", + "354360 -1 0 0 1 \n", + "354361 -1 0 0 1 \n", + "354362 0 0 4 1 \n", + "354363 0 0 1 1 \n", + "354364 -1 0 0 1 \n", + "\n", + " opt_in gender_female gender_male gender_other nb_campaigns \\\n", + "0 1 1 0 0 13 \n", + "1 1 0 0 1 10 \n", + "2 1 0 1 0 14 \n", + "3 0 0 0 1 9 \n", + "4 0 0 0 1 4 \n", + "... ... ... ... ... ... \n", + "354360 0 0 0 1 7 \n", + "354361 1 0 1 0 11 \n", + "354362 0 1 0 0 6 \n", + "354363 1 0 1 0 3 \n", + "354364 0 0 1 0 7 \n", + "\n", + " nb_campaigns_opened \n", + "0 4 \n", + "1 9 \n", + "2 0 \n", + "3 0 \n", + "4 0 \n", + "... ... \n", + "354360 0 \n", + "354361 2 \n", + "354362 6 \n", + "354363 0 \n", + "354364 0 \n", + "\n", + "[354365 rows x 17 columns]" + ] + }, + "execution_count": 125, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "id": "81b38ceb-5005-417d-a9a6-b2dac181a8fb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
purchase_date_minpurchase_date_max
count354365.000000354365.000000
mean406.981861396.551502
std189.343612195.881681
min0.0096400.000000
25%188.475293153.457966
50%550.000000550.000000
75%550.000000550.000000
max550.000000550.000000
\n", + "
" + ], + "text/plain": [ + " purchase_date_min purchase_date_max\n", + "count 354365.000000 354365.000000\n", + "mean 406.981861 396.551502\n", + "std 189.343612 195.881681\n", + "min 0.009640 0.000000\n", + "25% 188.475293 153.457966\n", + "50% 550.000000 550.000000\n", + "75% 550.000000 550.000000\n", + "max 550.000000 550.000000" + ] + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train[[\"purchase_date_min\", \"purchase_date_max\"]].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "id": "60effd66-2914-4cf9-aa0c-4e2f9dd13895", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 354365.000000\n", + "mean 10.430360\n", + "std 56.442718\n", + "min 0.000000\n", + "25% 0.000000\n", + "50% 0.000000\n", + "75% 0.000000\n", + "max 547.443350\n", + "dtype: float64" + ] + }, + "execution_count": 143, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(X_train[\"purchase_date_min\"] - X_train[\"purchase_date_max\"]).describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "id": "7a99e480-9e11-448d-806e-3b71925a19db", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nb_ticketsnb_purchasestotal_amountnb_suppliersvente_internet_maxpurchase_date_minpurchase_date_maxtime_between_purchasenb_tickets_internetfidelityis_email_trueopt_ingender_femalegender_malegender_othernb_campaignsnb_campaigns_opened
00.00.00.00.00.0550.0550.0-1.00.01TrueTrue10013.04.0
10.00.00.00.00.0550.0550.0-1.00.00TrueTrue00110.09.0
20.00.00.00.00.0550.0550.0-1.00.01TrueTrue01014.00.0
30.00.00.00.00.0550.0550.0-1.00.00TrueFalse0019.00.0
40.00.00.00.00.0550.0550.0-1.00.00TrueFalse0014.00.0
......................................................
3543580.00.00.00.00.0550.0550.0-1.00.00TrueFalse1001.00.0
3543590.00.00.00.00.0550.0550.0-1.00.00TrueTrue01012.02.0
3543600.00.00.00.00.0550.0550.0-1.00.00TrueFalse0017.00.0
3543610.00.00.00.00.0550.0550.0-1.00.00TrueTrue01011.02.0
3543640.00.00.00.00.0550.0550.0-1.00.00TrueFalse0107.00.0
\n", + "

179675 rows × 17 columns

\n", + "
" + ], + "text/plain": [ + " nb_tickets nb_purchases total_amount nb_suppliers \\\n", + "0 0.0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 0.0 \n", + "... ... ... ... ... \n", + "354358 0.0 0.0 0.0 0.0 \n", + "354359 0.0 0.0 0.0 0.0 \n", + "354360 0.0 0.0 0.0 0.0 \n", + "354361 0.0 0.0 0.0 0.0 \n", + "354364 0.0 0.0 0.0 0.0 \n", + "\n", + " vente_internet_max purchase_date_min purchase_date_max \\\n", + "0 0.0 550.0 550.0 \n", + "1 0.0 550.0 550.0 \n", + "2 0.0 550.0 550.0 \n", + "3 0.0 550.0 550.0 \n", + "4 0.0 550.0 550.0 \n", + "... ... ... ... \n", + "354358 0.0 550.0 550.0 \n", + "354359 0.0 550.0 550.0 \n", + "354360 0.0 550.0 550.0 \n", + "354361 0.0 550.0 550.0 \n", + "354364 0.0 550.0 550.0 \n", + "\n", + " time_between_purchase nb_tickets_internet fidelity is_email_true \\\n", + "0 -1.0 0.0 1 True \n", + "1 -1.0 0.0 0 True \n", + "2 -1.0 0.0 1 True \n", + "3 -1.0 0.0 0 True \n", + "4 -1.0 0.0 0 True \n", + "... ... ... ... ... \n", + "354358 -1.0 0.0 0 True \n", + "354359 -1.0 0.0 0 True \n", + "354360 -1.0 0.0 0 True \n", + "354361 -1.0 0.0 0 True \n", + "354364 -1.0 0.0 0 True \n", + "\n", + " opt_in gender_female gender_male gender_other nb_campaigns \\\n", + "0 True 1 0 0 13.0 \n", + "1 True 0 0 1 10.0 \n", + "2 True 0 1 0 14.0 \n", + "3 False 0 0 1 9.0 \n", + "4 False 0 0 1 4.0 \n", + "... ... ... ... ... ... \n", + "354358 False 1 0 0 1.0 \n", + "354359 True 0 1 0 12.0 \n", + "354360 False 0 0 1 7.0 \n", + "354361 True 0 1 0 11.0 \n", + "354364 False 0 1 0 7.0 \n", + "\n", + " nb_campaigns_opened \n", + "0 4.0 \n", + "1 9.0 \n", + "2 0.0 \n", + "3 0.0 \n", + "4 0.0 \n", + "... ... \n", + "354358 0.0 \n", + "354359 2.0 \n", + "354360 0.0 \n", + "354361 2.0 \n", + "354364 0.0 \n", + "\n", + "[179675 rows x 17 columns]" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train[X_train[\"time_between_purchase\"]==-1]" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "id": "2475f2fe-3d1f-4845-9ede-0416dac83271", + "metadata": {}, + "outputs": [], + "source": [ + "# Colonnes à standardiser\n", + "\n", + "\"\"\"\n", + "var_num = ['nb_tickets', 'nb_purchases', \"total_amount\", \"nb_suppliers\", \"vente_internet_max\",\n", + " \"purchase_date_min\", \"purchase_date_max\", \"time_between_purchase\", \"nb_tickets_internet\",\n", + " \"fidelity\", \"nb_campaigns\", \"nb_campaigns_opened\"]\n", + " \"\"\"\n", + "\n", + "numeric_features = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n", + " 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n", + " 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n", + "\n", + "# Standardisation des colonnes sélectionnées\n", + "scaler = StandardScaler()\n", + "X[var_num] = scaler.fit_transform(X[var_num])\n", + "X[numeric_features] = scaler.fit_transform(X[numeric_features])\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "id": "1763bad4-36b5-4ebb-9702-b77ba19fb30e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nb_ticketsnb_purchasestotal_amountnb_suppliersvente_internet_maxpurchase_date_minpurchase_date_maxtime_between_purchasenb_tickets_internetfidelityis_email_trueopt_ingender_femalegender_malegender_othernb_campaignsnb_campaigns_opened
0-0.024425-0.050722-0.048383-0.768294-0.5995110.7559940.783940-0.192978-0.264693-0.0043160.0581931.1511861.071079-0.775306-0.4345680.6079450.522567
1-0.024425-0.050722-0.048383-0.768294-0.5995110.7559940.783940-0.192978-0.264693-0.0290710.0581931.151186-0.933638-0.7753062.3011370.3061551.701843
2-0.024425-0.050722-0.048383-0.768294-0.5995110.7559940.783940-0.192978-0.264693-0.0043160.0581931.151186-0.9336381.289813-0.4345680.708542-0.420854
3-0.024425-0.050722-0.048383-0.768294-0.5995110.7559940.783940-0.192978-0.264693-0.0290710.058193-0.868669-0.933638-0.7753062.3011370.205558-0.420854
4-0.024425-0.050722-0.048383-0.768294-0.5995110.7559940.783940-0.192978-0.264693-0.0290710.058193-0.868669-0.933638-0.7753062.301137-0.297426-0.420854
......................................................
354360-0.024425-0.050722-0.048383-0.768294-0.5995110.7559940.783940-0.192978-0.264693-0.0290710.058193-0.868669-0.933638-0.7753062.3011370.004365-0.420854
354361-0.024425-0.050722-0.048383-0.768294-0.5995110.7559940.783940-0.192978-0.264693-0.0290710.0581931.151186-0.9336381.289813-0.4345680.4067520.050856
354362-0.0008380.092966-0.0091501.219633-0.599511-1.665887-1.557073-0.175269-0.2646930.0699490.058193-0.8686691.071079-0.775306-0.434568-0.0962320.994277
354363-0.0126310.021122-0.0052271.219633-0.599511-1.871668-1.755983-0.175269-0.264693-0.0043160.0581931.151186-0.9336381.289813-0.434568-0.398023-0.420854
354364-0.024425-0.050722-0.048383-0.768294-0.5995110.7559940.783940-0.192978-0.264693-0.0290710.058193-0.868669-0.9336381.289813-0.4345680.004365-0.420854
\n", + "

354365 rows × 17 columns

\n", + "
" + ], + "text/plain": [ + " nb_tickets nb_purchases total_amount nb_suppliers \\\n", + "0 -0.024425 -0.050722 -0.048383 -0.768294 \n", + "1 -0.024425 -0.050722 -0.048383 -0.768294 \n", + "2 -0.024425 -0.050722 -0.048383 -0.768294 \n", + "3 -0.024425 -0.050722 -0.048383 -0.768294 \n", + "4 -0.024425 -0.050722 -0.048383 -0.768294 \n", + "... ... ... ... ... \n", + "354360 -0.024425 -0.050722 -0.048383 -0.768294 \n", + "354361 -0.024425 -0.050722 -0.048383 -0.768294 \n", + "354362 -0.000838 0.092966 -0.009150 1.219633 \n", + "354363 -0.012631 0.021122 -0.005227 1.219633 \n", + "354364 -0.024425 -0.050722 -0.048383 -0.768294 \n", + "\n", + " vente_internet_max purchase_date_min purchase_date_max \\\n", + "0 -0.599511 0.755994 0.783940 \n", + "1 -0.599511 0.755994 0.783940 \n", + "2 -0.599511 0.755994 0.783940 \n", + "3 -0.599511 0.755994 0.783940 \n", + "4 -0.599511 0.755994 0.783940 \n", + "... ... ... ... \n", + "354360 -0.599511 0.755994 0.783940 \n", + "354361 -0.599511 0.755994 0.783940 \n", + "354362 -0.599511 -1.665887 -1.557073 \n", + "354363 -0.599511 -1.871668 -1.755983 \n", + "354364 -0.599511 0.755994 0.783940 \n", + "\n", + " time_between_purchase nb_tickets_internet fidelity is_email_true \\\n", + "0 -0.192978 -0.264693 -0.004316 0.058193 \n", + "1 -0.192978 -0.264693 -0.029071 0.058193 \n", + "2 -0.192978 -0.264693 -0.004316 0.058193 \n", + "3 -0.192978 -0.264693 -0.029071 0.058193 \n", + "4 -0.192978 -0.264693 -0.029071 0.058193 \n", + "... ... ... ... ... \n", + "354360 -0.192978 -0.264693 -0.029071 0.058193 \n", + "354361 -0.192978 -0.264693 -0.029071 0.058193 \n", + "354362 -0.175269 -0.264693 0.069949 0.058193 \n", + "354363 -0.175269 -0.264693 -0.004316 0.058193 \n", + "354364 -0.192978 -0.264693 -0.029071 0.058193 \n", + "\n", + " opt_in gender_female gender_male gender_other nb_campaigns \\\n", + "0 1.151186 1.071079 -0.775306 -0.434568 0.607945 \n", + "1 1.151186 -0.933638 -0.775306 2.301137 0.306155 \n", + "2 1.151186 -0.933638 1.289813 -0.434568 0.708542 \n", + "3 -0.868669 -0.933638 -0.775306 2.301137 0.205558 \n", + "4 -0.868669 -0.933638 -0.775306 2.301137 -0.297426 \n", + "... ... ... ... ... ... \n", + "354360 -0.868669 -0.933638 -0.775306 2.301137 0.004365 \n", + "354361 1.151186 -0.933638 1.289813 -0.434568 0.406752 \n", + "354362 -0.868669 1.071079 -0.775306 -0.434568 -0.096232 \n", + "354363 1.151186 -0.933638 1.289813 -0.434568 -0.398023 \n", + "354364 -0.868669 -0.933638 1.289813 -0.434568 0.004365 \n", + "\n", + " nb_campaigns_opened \n", + "0 0.522567 \n", + "1 1.701843 \n", + "2 -0.420854 \n", + "3 -0.420854 \n", + "4 -0.420854 \n", + "... ... \n", + "354360 -0.420854 \n", + "354361 0.050856 \n", + "354362 0.994277 \n", + "354363 -0.420854 \n", + "354364 -0.420854 \n", + "\n", + "[354365 rows x 17 columns]" + ] + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "id": "23d6c06c-8708-4714-906b-a1ed664377bb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
constnb_ticketsnb_purchasestotal_amountnb_suppliersvente_internet_maxpurchase_date_minpurchase_date_maxtime_between_purchasenb_tickets_internetfidelityis_email_trueopt_ingender_femalegender_malenb_campaignsnb_campaigns_opened
01.0-0.024425-0.050722-0.048383-0.768294-0.5995110.7559940.783940-0.192978-0.264693-0.00431611100.6079450.522567
11.0-0.024425-0.050722-0.048383-0.768294-0.5995110.7559940.783940-0.192978-0.264693-0.02907111000.3061551.701843
21.0-0.024425-0.050722-0.048383-0.768294-0.5995110.7559940.783940-0.192978-0.264693-0.00431611010.708542-0.420854
31.0-0.024425-0.050722-0.048383-0.768294-0.5995110.7559940.783940-0.192978-0.264693-0.02907110000.205558-0.420854
41.0-0.024425-0.050722-0.048383-0.768294-0.5995110.7559940.783940-0.192978-0.264693-0.0290711000-0.297426-0.420854
......................................................
3543601.0-0.024425-0.050722-0.048383-0.768294-0.5995110.7559940.783940-0.192978-0.264693-0.02907110000.004365-0.420854
3543611.0-0.024425-0.050722-0.048383-0.768294-0.5995110.7559940.783940-0.192978-0.264693-0.02907111010.4067520.050856
3543621.0-0.0008380.092966-0.0091501.219633-0.599511-1.665887-1.557073-0.175269-0.2646930.0699491010-0.0962320.994277
3543631.0-0.0126310.021122-0.0052271.219633-0.599511-1.871668-1.755983-0.175269-0.264693-0.0043161101-0.398023-0.420854
3543641.0-0.024425-0.050722-0.048383-0.768294-0.5995110.7559940.783940-0.192978-0.264693-0.02907110010.004365-0.420854
\n", + "

354365 rows × 17 columns

\n", + "
" + ], + "text/plain": [ + " const nb_tickets nb_purchases total_amount nb_suppliers \\\n", + "0 1.0 -0.024425 -0.050722 -0.048383 -0.768294 \n", + "1 1.0 -0.024425 -0.050722 -0.048383 -0.768294 \n", + "2 1.0 -0.024425 -0.050722 -0.048383 -0.768294 \n", + "3 1.0 -0.024425 -0.050722 -0.048383 -0.768294 \n", + "4 1.0 -0.024425 -0.050722 -0.048383 -0.768294 \n", + "... ... ... ... ... ... \n", + "354360 1.0 -0.024425 -0.050722 -0.048383 -0.768294 \n", + "354361 1.0 -0.024425 -0.050722 -0.048383 -0.768294 \n", + "354362 1.0 -0.000838 0.092966 -0.009150 1.219633 \n", + "354363 1.0 -0.012631 0.021122 -0.005227 1.219633 \n", + "354364 1.0 -0.024425 -0.050722 -0.048383 -0.768294 \n", + "\n", + " vente_internet_max purchase_date_min purchase_date_max \\\n", + "0 -0.599511 0.755994 0.783940 \n", + "1 -0.599511 0.755994 0.783940 \n", + "2 -0.599511 0.755994 0.783940 \n", + "3 -0.599511 0.755994 0.783940 \n", + "4 -0.599511 0.755994 0.783940 \n", + "... ... ... ... \n", + "354360 -0.599511 0.755994 0.783940 \n", + "354361 -0.599511 0.755994 0.783940 \n", + "354362 -0.599511 -1.665887 -1.557073 \n", + "354363 -0.599511 -1.871668 -1.755983 \n", + "354364 -0.599511 0.755994 0.783940 \n", + "\n", + " time_between_purchase nb_tickets_internet fidelity is_email_true \\\n", + "0 -0.192978 -0.264693 -0.004316 1 \n", + "1 -0.192978 -0.264693 -0.029071 1 \n", + "2 -0.192978 -0.264693 -0.004316 1 \n", + "3 -0.192978 -0.264693 -0.029071 1 \n", + "4 -0.192978 -0.264693 -0.029071 1 \n", + "... ... ... ... ... \n", + "354360 -0.192978 -0.264693 -0.029071 1 \n", + "354361 -0.192978 -0.264693 -0.029071 1 \n", + "354362 -0.175269 -0.264693 0.069949 1 \n", + "354363 -0.175269 -0.264693 -0.004316 1 \n", + "354364 -0.192978 -0.264693 -0.029071 1 \n", + "\n", + " opt_in gender_female gender_male nb_campaigns nb_campaigns_opened \n", + "0 1 1 0 0.607945 0.522567 \n", + "1 1 0 0 0.306155 1.701843 \n", + "2 1 0 1 0.708542 -0.420854 \n", + "3 0 0 0 0.205558 -0.420854 \n", + "4 0 0 0 -0.297426 -0.420854 \n", + "... ... ... ... ... ... \n", + "354360 0 0 0 0.004365 -0.420854 \n", + "354361 1 0 1 0.406752 0.050856 \n", + "354362 0 1 0 -0.096232 0.994277 \n", + "354363 1 0 1 -0.398023 -0.420854 \n", + "354364 0 0 1 0.004365 -0.420854 \n", + "\n", + "[354365 rows x 17 columns]" + ] + }, + "execution_count": 122, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "id": "0e968aa1-fbec-47db-b570-4730ef7eebf2", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/mamba/lib/python3.11/site-packages/statsmodels/discrete/discrete_model.py:2385: RuntimeWarning: overflow encountered in exp\n", + " return 1/(1+np.exp(-X))\n", + "/opt/mamba/lib/python3.11/site-packages/statsmodels/discrete/discrete_model.py:2443: RuntimeWarning: divide by zero encountered in log\n", + " return np.sum(np.log(self.cdf(q * linpred)))\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Warning: Maximum number of iterations has been exceeded.\n", + " Current function value: inf\n", + " Iterations: 35\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/mamba/lib/python3.11/site-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n", + " warnings.warn(\"Maximum Likelihood optimization failed to \"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Logit Regression Results \n", + "==============================================================================\n", + "Dep. Variable: y No. Observations: 354365\n", + "Model: Logit Df Residuals: 354349\n", + "Method: MLE Df Model: 15\n", + "Date: Thu, 14 Mar 2024 Pseudo R-squ.: -inf\n", + "Time: 10:47:16 Log-Likelihood: -inf\n", + "converged: False LL-Null: -1.0540e+05\n", + "Covariance Type: nonrobust LLR p-value: 1.000\n", + "=========================================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "-----------------------------------------------------------------------------------------\n", + "nb_tickets 4.9213 0.267 18.448 0.000 4.398 5.444\n", + "nb_purchases -7.9446 0.140 -56.905 0.000 -8.218 -7.671\n", + "total_amount 0.3039 0.061 4.945 0.000 0.183 0.424\n", + "nb_suppliers 0.1067 0.008 13.678 0.000 0.091 0.122\n", + "vente_internet_max -0.2784 0.008 -34.612 0.000 -0.294 -0.263\n", + "purchase_date_min -41.9693 2.640 -15.895 0.000 -47.144 -36.794\n", + "purchase_date_max 43.2793 2.734 15.829 0.000 37.920 48.638\n", + "time_between_purchase 12.7237 0.789 16.132 0.000 11.178 14.270\n", + "nb_tickets_internet -0.0212 0.014 -1.510 0.131 -0.049 0.006\n", + "fidelity 22.0749 0.222 99.561 0.000 21.640 22.509\n", + "is_email_true 0.0225 0.004 6.145 0.000 0.015 0.030\n", + "opt_in -0.1245 0.004 -30.646 0.000 -0.133 -0.117\n", + "gender_female 0.0018 nan nan nan nan nan\n", + "gender_male 0.0118 nan nan nan nan nan\n", + "gender_other -0.0182 nan nan nan nan nan\n", + "nb_campaigns -0.0049 0.005 -0.961 0.336 -0.015 0.005\n", + "nb_campaigns_opened 0.0867 0.005 18.211 0.000 0.077 0.096\n", + "=========================================================================================\n" + ] + } + ], + "source": [ + "# Création du modèle de régression logistique avec poids équilibrés\n", + "# model_logit = sm.Logit(y, X, weights=weights)\n", + "model_logit = sm.Logit(y, X)\n", + "\n", + "# Ajustement du modèle aux données\n", + "result = model_logit.fit()\n", + "\n", + "# Affichage des résultats\n", + "print(result.summary())" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "id": "d1660ef9-438f-4427-ac2d-aa8179614e40", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 1.07107945, -0.93363755])" + ] + }, + "execution_count": 130, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X[\"gender_female\"].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "id": "2079bae6-bce3-4de7-bf49-180177c31a55", + "metadata": {}, + "outputs": [], + "source": [ + "numeric_features = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n", + " 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n", + " 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n", + "\n", + "numeric_transformer = Pipeline(steps=[\n", + " #(\"imputer\", SimpleImputer(strategy=\"mean\")), \n", + " (\"scaler\", StandardScaler()) \n", + "])\n", + "\n", + "categorical_features = ['opt_in'] \n", + "\n", + "# Transformer for the categorical features\n", + "categorical_transformer = Pipeline(steps=[\n", + " #(\"imputer\", SimpleImputer(strategy=\"most_frequent\")), # Impute missing values with the most frequent\n", + " (\"onehot\", OneHotEncoder(handle_unknown='ignore', sparse_output=False))\n", + "])\n", + "\n", + "preproc = ColumnTransformer(\n", + " transformers=[\n", + " (\"num\", numeric_transformer, numeric_features),\n", + " (\"cat\", categorical_transformer, categorical_features)\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "id": "a9fe1c60-0732-426f-b176-9c95718e546f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
constgender_other
01.00
11.01
21.00
31.01
41.01
.........
3543601.01
3543611.00
3543621.00
3543631.00
3543641.00
\n", + "

354365 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " const gender_other\n", + "0 1.0 0\n", + "1 1.0 1\n", + "2 1.0 0\n", + "3 1.0 1\n", + "4 1.0 1\n", + "... ... ...\n", + "354360 1.0 1\n", + "354361 1.0 0\n", + "354362 1.0 0\n", + "354363 1.0 0\n", + "354364 1.0 0\n", + "\n", + "[354365 rows x 2 columns]" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sm.add_constant(X[\"gender_other\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "id": "b8c92b7c-1df0-4384-82e7-1e8cc0d333fa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nb_ticketsnb_purchasestotal_amountnb_suppliersvente_internet_maxpurchase_date_minpurchase_date_maxtime_between_purchasenb_tickets_internetfidelityis_email_trueopt_ingender_femalegender_malenb_campaignsnb_campaigns_opened
000000550550-1011110134
100000550550-1001100109
200000550550-1011101140
300000550550-100100090
400000550550-100100040
...................................................
35436000000550550-100100070
35436100000550550-1001101112
3543622250109191004101066
3543631155105252001110130
35436400000550550-100100170
\n", + "

354365 rows × 16 columns

\n", + "
" + ], + "text/plain": [ + " nb_tickets nb_purchases total_amount nb_suppliers \\\n", + "0 0 0 0 0 \n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "... ... ... ... ... \n", + "354360 0 0 0 0 \n", + "354361 0 0 0 0 \n", + "354362 2 2 50 1 \n", + "354363 1 1 55 1 \n", + "354364 0 0 0 0 \n", + "\n", + " vente_internet_max purchase_date_min purchase_date_max \\\n", + "0 0 550 550 \n", + "1 0 550 550 \n", + "2 0 550 550 \n", + "3 0 550 550 \n", + "4 0 550 550 \n", + "... ... ... ... \n", + "354360 0 550 550 \n", + "354361 0 550 550 \n", + "354362 0 91 91 \n", + "354363 0 52 52 \n", + "354364 0 550 550 \n", + "\n", + " time_between_purchase nb_tickets_internet fidelity is_email_true \\\n", + "0 -1 0 1 1 \n", + "1 -1 0 0 1 \n", + "2 -1 0 1 1 \n", + "3 -1 0 0 1 \n", + "4 -1 0 0 1 \n", + "... ... ... ... ... \n", + "354360 -1 0 0 1 \n", + "354361 -1 0 0 1 \n", + "354362 0 0 4 1 \n", + "354363 0 0 1 1 \n", + "354364 -1 0 0 1 \n", + "\n", + " opt_in gender_female gender_male nb_campaigns nb_campaigns_opened \n", + "0 1 1 0 13 4 \n", + "1 1 0 0 10 9 \n", + "2 1 0 1 14 0 \n", + "3 0 0 0 9 0 \n", + "4 0 0 0 4 0 \n", + "... ... ... ... ... ... \n", + "354360 0 0 0 7 0 \n", + "354361 1 0 1 11 2 \n", + "354362 0 1 0 6 6 \n", + "354363 1 0 1 3 0 \n", + "354364 0 0 1 7 0 \n", + "\n", + "[354365 rows x 16 columns]" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X.drop(\"gender_other\", axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "061dcabd-383d-4b76-a9f0-8647daed2c9e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "fc4ffbf6-ab7e-47cf-a717-c25477d92493", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nb_ticketsnb_purchasestotal_amountnb_suppliersvente_internet_maxpurchase_date_minpurchase_date_maxtime_between_purchasenb_tickets_internetfidelityis_email_trueopt_ingender_femalegender_malegender_othernb_campaignsnb_campaigns_opened
000000550550-10111100134
100000550550-10011001109
200000550550-10111010140
300000550550-1001000190
400000550550-1001000140
......................................................
35436000000550550-1001000170
35436100000550550-10011010112
35436222501091910041010066
35436311551052520011101030
35436400000550550-1001001070
\n", + "

354365 rows × 17 columns

\n", + "
" + ], + "text/plain": [ + " nb_tickets nb_purchases total_amount nb_suppliers \\\n", + "0 0 0 0 0 \n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "... ... ... ... ... \n", + "354360 0 0 0 0 \n", + "354361 0 0 0 0 \n", + "354362 2 2 50 1 \n", + "354363 1 1 55 1 \n", + "354364 0 0 0 0 \n", + "\n", + " vente_internet_max purchase_date_min purchase_date_max \\\n", + "0 0 550 550 \n", + "1 0 550 550 \n", + "2 0 550 550 \n", + "3 0 550 550 \n", + "4 0 550 550 \n", + "... ... ... ... \n", + "354360 0 550 550 \n", + "354361 0 550 550 \n", + "354362 0 91 91 \n", + "354363 0 52 52 \n", + "354364 0 550 550 \n", + "\n", + " time_between_purchase nb_tickets_internet fidelity is_email_true \\\n", + "0 -1 0 1 1 \n", + "1 -1 0 0 1 \n", + "2 -1 0 1 1 \n", + "3 -1 0 0 1 \n", + "4 -1 0 0 1 \n", + "... ... ... ... ... \n", + "354360 -1 0 0 1 \n", + "354361 -1 0 0 1 \n", + "354362 0 0 4 1 \n", + "354363 0 0 1 1 \n", + "354364 -1 0 0 1 \n", + "\n", + " opt_in gender_female gender_male gender_other nb_campaigns \\\n", + "0 1 1 0 0 13 \n", + "1 1 0 0 1 10 \n", + "2 1 0 1 0 14 \n", + "3 0 0 0 1 9 \n", + "4 0 0 0 1 4 \n", + "... ... ... ... ... ... \n", + "354360 0 0 0 1 7 \n", + "354361 1 0 1 0 11 \n", + "354362 0 1 0 0 6 \n", + "354363 1 0 1 0 3 \n", + "354364 0 0 1 0 7 \n", + "\n", + " nb_campaigns_opened \n", + "0 4 \n", + "1 9 \n", + "2 0 \n", + "3 0 \n", + "4 0 \n", + "... ... \n", + "354360 0 \n", + "354361 2 \n", + "354362 6 \n", + "354363 0 \n", + "354364 0 \n", + "\n", + "[354365 rows x 17 columns]" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "f15b0d69-8470-4a36-bd25-9536a36c4756", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(354365,)" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "weights.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "e97e26f6-b854-41e3-bbdf-318065b03254", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(354365, 17)" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "49621874-1e8c-4cb5-84a9-a5c9715f3b06", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(354365,)" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "8072cd81-d63f-430e-b0b2-c0589cf18871", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "nb_tickets 0\n", + "nb_purchases 0\n", + "total_amount 0\n", + "nb_suppliers 0\n", + "vente_internet_max 0\n", + "purchase_date_min 0\n", + "purchase_date_max 0\n", + "time_between_purchase 0\n", + "nb_tickets_internet 0\n", + "fidelity 0\n", + "is_email_true 0\n", + "opt_in 0\n", + "gender_female 0\n", + "gender_male 0\n", + "gender_other 0\n", + "nb_campaigns 0\n", + "nb_campaigns_opened 0\n", + "dtype: int64" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "6f07a66f-5a46-4409-b0b6-ff5e212296f0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0., 1.])" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_train[\"y_has_purchased\"].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "id": "4587c36f-94bf-458b-b819-60250eb17c59", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nb_ticketsnb_purchasestotal_amountnb_suppliersvente_internet_maxpurchase_date_minpurchase_date_maxtime_between_purchasenb_tickets_internetfidelityis_email_trueopt_ingender_femalegender_malegender_othernb_campaignsnb_campaigns_opened
00.00.00.00.00.0550.000000550.000000-1.0000000.01TrueTrue10013.04.0
10.00.00.00.00.0550.000000550.000000-1.0000000.00TrueTrue00110.09.0
20.00.00.00.00.0550.000000550.000000-1.0000000.01TrueTrue01014.00.0
30.00.00.00.00.0550.000000550.000000-1.0000000.00TrueFalse0019.00.0
40.00.00.00.00.0550.000000550.000000-1.0000000.00TrueFalse0014.00.0
......................................................
3543600.00.00.00.00.0550.000000550.000000-1.0000000.00TrueFalse0017.00.0
3543610.00.00.00.00.0550.000000550.000000-1.0000000.00TrueTrue01011.02.0
3543622.02.050.01.00.091.03055691.0201390.0104170.04TrueFalse1006.06.0
3543631.01.055.01.00.052.28402852.2840280.0000000.01TrueTrue0103.00.0
3543640.00.00.00.00.0550.000000550.000000-1.0000000.00TrueFalse0107.00.0
\n", + "

354365 rows × 17 columns

\n", + "
" + ], + "text/plain": [ + " nb_tickets nb_purchases total_amount nb_suppliers \\\n", + "0 0.0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 0.0 \n", + "... ... ... ... ... \n", + "354360 0.0 0.0 0.0 0.0 \n", + "354361 0.0 0.0 0.0 0.0 \n", + "354362 2.0 2.0 50.0 1.0 \n", + "354363 1.0 1.0 55.0 1.0 \n", + "354364 0.0 0.0 0.0 0.0 \n", + "\n", + " vente_internet_max purchase_date_min purchase_date_max \\\n", + "0 0.0 550.000000 550.000000 \n", + "1 0.0 550.000000 550.000000 \n", + "2 0.0 550.000000 550.000000 \n", + "3 0.0 550.000000 550.000000 \n", + "4 0.0 550.000000 550.000000 \n", + "... ... ... ... \n", + "354360 0.0 550.000000 550.000000 \n", + "354361 0.0 550.000000 550.000000 \n", + "354362 0.0 91.030556 91.020139 \n", + "354363 0.0 52.284028 52.284028 \n", + "354364 0.0 550.000000 550.000000 \n", + "\n", + " time_between_purchase nb_tickets_internet fidelity is_email_true \\\n", + "0 -1.000000 0.0 1 True \n", + "1 -1.000000 0.0 0 True \n", + "2 -1.000000 0.0 1 True \n", + "3 -1.000000 0.0 0 True \n", + "4 -1.000000 0.0 0 True \n", + "... ... ... ... ... \n", + "354360 -1.000000 0.0 0 True \n", + "354361 -1.000000 0.0 0 True \n", + "354362 0.010417 0.0 4 True \n", + "354363 0.000000 0.0 1 True \n", + "354364 -1.000000 0.0 0 True \n", + "\n", + " opt_in gender_female gender_male gender_other nb_campaigns \\\n", + "0 True 1 0 0 13.0 \n", + "1 True 0 0 1 10.0 \n", + "2 True 0 1 0 14.0 \n", + "3 False 0 0 1 9.0 \n", + "4 False 0 0 1 4.0 \n", + "... ... ... ... ... ... \n", + "354360 False 0 0 1 7.0 \n", + "354361 True 0 1 0 11.0 \n", + "354362 False 1 0 0 6.0 \n", + "354363 True 0 1 0 3.0 \n", + "354364 False 0 1 0 7.0 \n", + "\n", + " nb_campaigns_opened \n", + "0 4.0 \n", + "1 9.0 \n", + "2 0.0 \n", + "3 0.0 \n", + "4 0.0 \n", + "... ... \n", + "354360 0.0 \n", + "354361 2.0 \n", + "354362 6.0 \n", + "354363 0.0 \n", + "354364 0.0 \n", + "\n", + "[354365 rows x 17 columns]" + ] + }, + "execution_count": 134, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}