diff --git a/Spectacle/2_bis_logit_baseline_statsmodels.ipynb b/Spectacle/2_bis_logit_baseline_statsmodels.ipynb new file mode 100644 index 0000000..515f8cb --- /dev/null +++ b/Spectacle/2_bis_logit_baseline_statsmodels.ipynb @@ -0,0 +1,3270 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "56949d8f-4eaf-4685-9989-ba0b4b1945b7", + "metadata": {}, + "source": [ + "# Baseline logit on spectacle companies with statmodels" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "72480e84-2ccc-481a-9353-1199e4358d62", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import os\n", + "import s3fs\n", + "import re\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, recall_score\n", + "from sklearn.utils import class_weight\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.model_selection import GridSearchCV\n", + "from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler\n", + "from sklearn.metrics import make_scorer, f1_score, balanced_accuracy_score\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score\n", + "from sklearn.exceptions import ConvergenceWarning, DataConversionWarning\n", + "\n", + "import statsmodels.api as sm\n", + "\n", + "import pickle\n", + "import warnings" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7090dc21-7889-4776-a0a4-f7c6a5416d53", + "metadata": {}, + "outputs": [], + "source": [ + "# Create filesystem object\n", + "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", + "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "2f0d08c9-5b26-4eff-9c89-4a46f427dbf7", + "metadata": {}, + "outputs": [], + "source": [ + "def load_train_test():\n", + " BUCKET = \"projet-bdc2324-team1/Generalization/musique\"\n", + " File_path_train = BUCKET + \"/Train_set.csv\"\n", + " File_path_test = BUCKET + \"/Test_set.csv\"\n", + " \n", + " with fs.open( File_path_train, mode=\"rb\") as file_in:\n", + " dataset_train = pd.read_csv(file_in, sep=\",\")\n", + " # dataset_train['y_has_purchased'] = dataset_train['y_has_purchased'].fillna(0)\n", + "\n", + " with fs.open(File_path_test, mode=\"rb\") as file_in:\n", + " dataset_test = pd.read_csv(file_in, sep=\",\")\n", + " # dataset_test['y_has_purchased'] = dataset_test['y_has_purchased'].fillna(0)\n", + " \n", + " return dataset_train, dataset_test" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "438d0138-a254-464c-9e94-f7436576c1d5", + "metadata": {}, + "outputs": [], + "source": [ + "def features_target_split(dataset_train, dataset_test):\n", + " features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n", + " 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n", + " 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n", + " X_train = dataset_train[features_l]\n", + " y_train = dataset_train[['y_has_purchased']]\n", + "\n", + " X_test = dataset_test[features_l]\n", + " y_test = dataset_test[['y_has_purchased']]\n", + " return X_train, X_test, y_train, y_test" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ebe9a887-61a4-4a5e-ac64-231307dd7647", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_570/3642896088.py:7: DtypeWarning: Columns (38) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " dataset_train = pd.read_csv(file_in, sep=\",\")\n", + "/tmp/ipykernel_570/3642896088.py:11: DtypeWarning: Columns (38) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " dataset_test = pd.read_csv(file_in, sep=\",\")\n" + ] + } + ], + "source": [ + "dataset_train, dataset_test = load_train_test()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b21fdea2-02c4-4222-b4e0-635e423f91c2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer_id 0\n", + "nb_tickets 0\n", + "nb_purchases 0\n", + "total_amount 0\n", + "nb_suppliers 0\n", + "vente_internet_max 0\n", + "purchase_date_min 0\n", + "purchase_date_max 0\n", + "time_between_purchase 0\n", + "nb_tickets_internet 0\n", + "street_id 0\n", + "structure_id 327067\n", + "mcp_contact_id 135224\n", + "fidelity 0\n", + "tenant_id 0\n", + "is_partner 0\n", + "deleted_at 354365\n", + "gender 0\n", + "is_email_true 0\n", + "opt_in 0\n", + "last_buying_date 119201\n", + "max_price 119201\n", + "ticket_sum 0\n", + "average_price 115193\n", + "average_purchase_delay 119203\n", + "average_price_basket 119203\n", + "average_ticket_basket 119203\n", + "total_price 4008\n", + "purchase_count 0\n", + "first_buying_date 119201\n", + "country 56856\n", + "gender_label 0\n", + "gender_female 0\n", + "gender_male 0\n", + "gender_other 0\n", + "country_fr 56856\n", + "nb_campaigns 0\n", + "nb_campaigns_opened 0\n", + "time_to_open 224310\n", + "y_has_purchased 0\n", + "dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset_train.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "42c4d034-8bc1-4ebb-a1ff-60c0a86f8f7c", + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = features_target_split(dataset_train, dataset_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "94b4498d-6ae8-4c96-adbc-7ba1b8348160", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape train : (354365, 17)\n", + "Shape test : (151874, 17)\n" + ] + } + ], + "source": [ + "print(\"Shape train : \", X_train.shape)\n", + "print(\"Shape test : \", X_test.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "6224fd31-c190-4168-b395-e0bf5806d79d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{0.0: 0.5481283836040216, 1.0: 5.694439980716696}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compute Weights\n", + "weights = class_weight.compute_class_weight(class_weight = 'balanced', classes = np.unique(y_train['y_has_purchased']),\n", + " y = y_train['y_has_purchased'])\n", + "\n", + "weight_dict = {np.unique(y_train['y_has_purchased'])[i]: weights[i] for i in range(len(np.unique(y_train['y_has_purchased'])))}\n", + "weight_dict" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "4680f202-979e-483f-89b8-9df877203bcf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.54812838, 0.54812838, 0.54812838, ..., 5.69443998, 0.54812838,\n", + " 0.54812838])" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calcul des poids inverses à la fréquence des classes\n", + "class_counts = np.bincount(y_train['y_has_purchased'])\n", + "class_weights = len(y_train['y_has_purchased']) / (2 * class_counts)\n", + "\n", + "# Sélection des poids correspondants à chaque observation\n", + "weights = class_weights[y_train['y_has_purchased'].values.astype(int)]\n", + "weights" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "5f747be4-e70b-491c-8f0a-46cb278a2dee", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[354365. 354365. 354365. ... 354365. 354365. 354365.]\n", + "354365\n" + ] + } + ], + "source": [ + "print(2 * weights * class_counts[y_train['y_has_purchased'].values.astype(int)])\n", + "print(len(y_train['y_has_purchased']))" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "id": "648fb542-0186-493d-b274-be2c26a11967", + "metadata": {}, + "outputs": [], + "source": [ + "# model logit\n", + "X = X_train.astype(int)\n", + "# X = sm.add_constant(X.drop(\"gender_other\", axis=1))\n", + "y = y_train['y_has_purchased'].values\n", + "\n", + "# print(X,y)" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "id": "978b9ebc-aa97-41d7-a48f-d1f79c1ed482", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | nb_tickets | \n", + "nb_purchases | \n", + "total_amount | \n", + "nb_suppliers | \n", + "vente_internet_max | \n", + "purchase_date_min | \n", + "purchase_date_max | \n", + "time_between_purchase | \n", + "nb_tickets_internet | \n", + "fidelity | \n", + "is_email_true | \n", + "opt_in | \n", + "gender_female | \n", + "gender_male | \n", + "gender_other | \n", + "nb_campaigns | \n", + "nb_campaigns_opened | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "13 | \n", + "4 | \n", + "
1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "10 | \n", + "9 | \n", + "
2 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "14 | \n", + "0 | \n", + "
3 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "9 | \n", + "0 | \n", + "
4 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "4 | \n", + "0 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
354360 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "7 | \n", + "0 | \n", + "
354361 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "11 | \n", + "2 | \n", + "
354362 | \n", + "2 | \n", + "2 | \n", + "50 | \n", + "1 | \n", + "0 | \n", + "91 | \n", + "91 | \n", + "0 | \n", + "0 | \n", + "4 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "6 | \n", + "6 | \n", + "
354363 | \n", + "1 | \n", + "1 | \n", + "55 | \n", + "1 | \n", + "0 | \n", + "52 | \n", + "52 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "3 | \n", + "0 | \n", + "
354364 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "7 | \n", + "0 | \n", + "
354365 rows × 17 columns
\n", + "\n", + " | purchase_date_min | \n", + "purchase_date_max | \n", + "
---|---|---|
count | \n", + "354365.000000 | \n", + "354365.000000 | \n", + "
mean | \n", + "406.981861 | \n", + "396.551502 | \n", + "
std | \n", + "189.343612 | \n", + "195.881681 | \n", + "
min | \n", + "0.009640 | \n", + "0.000000 | \n", + "
25% | \n", + "188.475293 | \n", + "153.457966 | \n", + "
50% | \n", + "550.000000 | \n", + "550.000000 | \n", + "
75% | \n", + "550.000000 | \n", + "550.000000 | \n", + "
max | \n", + "550.000000 | \n", + "550.000000 | \n", + "
\n", + " | nb_tickets | \n", + "nb_purchases | \n", + "total_amount | \n", + "nb_suppliers | \n", + "vente_internet_max | \n", + "purchase_date_min | \n", + "purchase_date_max | \n", + "time_between_purchase | \n", + "nb_tickets_internet | \n", + "fidelity | \n", + "is_email_true | \n", + "opt_in | \n", + "gender_female | \n", + "gender_male | \n", + "gender_other | \n", + "nb_campaigns | \n", + "nb_campaigns_opened | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "550.0 | \n", + "550.0 | \n", + "-1.0 | \n", + "0.0 | \n", + "1 | \n", + "True | \n", + "True | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "13.0 | \n", + "4.0 | \n", + "
1 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "550.0 | \n", + "550.0 | \n", + "-1.0 | \n", + "0.0 | \n", + "0 | \n", + "True | \n", + "True | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "10.0 | \n", + "9.0 | \n", + "
2 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "550.0 | \n", + "550.0 | \n", + "-1.0 | \n", + "0.0 | \n", + "1 | \n", + "True | \n", + "True | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "14.0 | \n", + "0.0 | \n", + "
3 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "550.0 | \n", + "550.0 | \n", + "-1.0 | \n", + "0.0 | \n", + "0 | \n", + "True | \n", + "False | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "9.0 | \n", + "0.0 | \n", + "
4 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "550.0 | \n", + "550.0 | \n", + "-1.0 | \n", + "0.0 | \n", + "0 | \n", + "True | \n", + "False | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "4.0 | \n", + "0.0 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
354358 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "550.0 | \n", + "550.0 | \n", + "-1.0 | \n", + "0.0 | \n", + "0 | \n", + "True | \n", + "False | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1.0 | \n", + "0.0 | \n", + "
354359 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "550.0 | \n", + "550.0 | \n", + "-1.0 | \n", + "0.0 | \n", + "0 | \n", + "True | \n", + "True | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "12.0 | \n", + "2.0 | \n", + "
354360 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "550.0 | \n", + "550.0 | \n", + "-1.0 | \n", + "0.0 | \n", + "0 | \n", + "True | \n", + "False | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "7.0 | \n", + "0.0 | \n", + "
354361 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "550.0 | \n", + "550.0 | \n", + "-1.0 | \n", + "0.0 | \n", + "0 | \n", + "True | \n", + "True | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "11.0 | \n", + "2.0 | \n", + "
354364 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "550.0 | \n", + "550.0 | \n", + "-1.0 | \n", + "0.0 | \n", + "0 | \n", + "True | \n", + "False | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "7.0 | \n", + "0.0 | \n", + "
179675 rows × 17 columns
\n", + "\n", + " | nb_tickets | \n", + "nb_purchases | \n", + "total_amount | \n", + "nb_suppliers | \n", + "vente_internet_max | \n", + "purchase_date_min | \n", + "purchase_date_max | \n", + "time_between_purchase | \n", + "nb_tickets_internet | \n", + "fidelity | \n", + "is_email_true | \n", + "opt_in | \n", + "gender_female | \n", + "gender_male | \n", + "gender_other | \n", + "nb_campaigns | \n", + "nb_campaigns_opened | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "-0.024425 | \n", + "-0.050722 | \n", + "-0.048383 | \n", + "-0.768294 | \n", + "-0.599511 | \n", + "0.755994 | \n", + "0.783940 | \n", + "-0.192978 | \n", + "-0.264693 | \n", + "-0.004316 | \n", + "0.058193 | \n", + "1.151186 | \n", + "1.071079 | \n", + "-0.775306 | \n", + "-0.434568 | \n", + "0.607945 | \n", + "0.522567 | \n", + "
1 | \n", + "-0.024425 | \n", + "-0.050722 | \n", + "-0.048383 | \n", + "-0.768294 | \n", + "-0.599511 | \n", + "0.755994 | \n", + "0.783940 | \n", + "-0.192978 | \n", + "-0.264693 | \n", + "-0.029071 | \n", + "0.058193 | \n", + "1.151186 | \n", + "-0.933638 | \n", + "-0.775306 | \n", + "2.301137 | \n", + "0.306155 | \n", + "1.701843 | \n", + "
2 | \n", + "-0.024425 | \n", + "-0.050722 | \n", + "-0.048383 | \n", + "-0.768294 | \n", + "-0.599511 | \n", + "0.755994 | \n", + "0.783940 | \n", + "-0.192978 | \n", + "-0.264693 | \n", + "-0.004316 | \n", + "0.058193 | \n", + "1.151186 | \n", + "-0.933638 | \n", + "1.289813 | \n", + "-0.434568 | \n", + "0.708542 | \n", + "-0.420854 | \n", + "
3 | \n", + "-0.024425 | \n", + "-0.050722 | \n", + "-0.048383 | \n", + "-0.768294 | \n", + "-0.599511 | \n", + "0.755994 | \n", + "0.783940 | \n", + "-0.192978 | \n", + "-0.264693 | \n", + "-0.029071 | \n", + "0.058193 | \n", + "-0.868669 | \n", + "-0.933638 | \n", + "-0.775306 | \n", + "2.301137 | \n", + "0.205558 | \n", + "-0.420854 | \n", + "
4 | \n", + "-0.024425 | \n", + "-0.050722 | \n", + "-0.048383 | \n", + "-0.768294 | \n", + "-0.599511 | \n", + "0.755994 | \n", + "0.783940 | \n", + "-0.192978 | \n", + "-0.264693 | \n", + "-0.029071 | \n", + "0.058193 | \n", + "-0.868669 | \n", + "-0.933638 | \n", + "-0.775306 | \n", + "2.301137 | \n", + "-0.297426 | \n", + "-0.420854 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
354360 | \n", + "-0.024425 | \n", + "-0.050722 | \n", + "-0.048383 | \n", + "-0.768294 | \n", + "-0.599511 | \n", + "0.755994 | \n", + "0.783940 | \n", + "-0.192978 | \n", + "-0.264693 | \n", + "-0.029071 | \n", + "0.058193 | \n", + "-0.868669 | \n", + "-0.933638 | \n", + "-0.775306 | \n", + "2.301137 | \n", + "0.004365 | \n", + "-0.420854 | \n", + "
354361 | \n", + "-0.024425 | \n", + "-0.050722 | \n", + "-0.048383 | \n", + "-0.768294 | \n", + "-0.599511 | \n", + "0.755994 | \n", + "0.783940 | \n", + "-0.192978 | \n", + "-0.264693 | \n", + "-0.029071 | \n", + "0.058193 | \n", + "1.151186 | \n", + "-0.933638 | \n", + "1.289813 | \n", + "-0.434568 | \n", + "0.406752 | \n", + "0.050856 | \n", + "
354362 | \n", + "-0.000838 | \n", + "0.092966 | \n", + "-0.009150 | \n", + "1.219633 | \n", + "-0.599511 | \n", + "-1.665887 | \n", + "-1.557073 | \n", + "-0.175269 | \n", + "-0.264693 | \n", + "0.069949 | \n", + "0.058193 | \n", + "-0.868669 | \n", + "1.071079 | \n", + "-0.775306 | \n", + "-0.434568 | \n", + "-0.096232 | \n", + "0.994277 | \n", + "
354363 | \n", + "-0.012631 | \n", + "0.021122 | \n", + "-0.005227 | \n", + "1.219633 | \n", + "-0.599511 | \n", + "-1.871668 | \n", + "-1.755983 | \n", + "-0.175269 | \n", + "-0.264693 | \n", + "-0.004316 | \n", + "0.058193 | \n", + "1.151186 | \n", + "-0.933638 | \n", + "1.289813 | \n", + "-0.434568 | \n", + "-0.398023 | \n", + "-0.420854 | \n", + "
354364 | \n", + "-0.024425 | \n", + "-0.050722 | \n", + "-0.048383 | \n", + "-0.768294 | \n", + "-0.599511 | \n", + "0.755994 | \n", + "0.783940 | \n", + "-0.192978 | \n", + "-0.264693 | \n", + "-0.029071 | \n", + "0.058193 | \n", + "-0.868669 | \n", + "-0.933638 | \n", + "1.289813 | \n", + "-0.434568 | \n", + "0.004365 | \n", + "-0.420854 | \n", + "
354365 rows × 17 columns
\n", + "\n", + " | const | \n", + "nb_tickets | \n", + "nb_purchases | \n", + "total_amount | \n", + "nb_suppliers | \n", + "vente_internet_max | \n", + "purchase_date_min | \n", + "purchase_date_max | \n", + "time_between_purchase | \n", + "nb_tickets_internet | \n", + "fidelity | \n", + "is_email_true | \n", + "opt_in | \n", + "gender_female | \n", + "gender_male | \n", + "nb_campaigns | \n", + "nb_campaigns_opened | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "1.0 | \n", + "-0.024425 | \n", + "-0.050722 | \n", + "-0.048383 | \n", + "-0.768294 | \n", + "-0.599511 | \n", + "0.755994 | \n", + "0.783940 | \n", + "-0.192978 | \n", + "-0.264693 | \n", + "-0.004316 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "0.607945 | \n", + "0.522567 | \n", + "
1 | \n", + "1.0 | \n", + "-0.024425 | \n", + "-0.050722 | \n", + "-0.048383 | \n", + "-0.768294 | \n", + "-0.599511 | \n", + "0.755994 | \n", + "0.783940 | \n", + "-0.192978 | \n", + "-0.264693 | \n", + "-0.029071 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0.306155 | \n", + "1.701843 | \n", + "
2 | \n", + "1.0 | \n", + "-0.024425 | \n", + "-0.050722 | \n", + "-0.048383 | \n", + "-0.768294 | \n", + "-0.599511 | \n", + "0.755994 | \n", + "0.783940 | \n", + "-0.192978 | \n", + "-0.264693 | \n", + "-0.004316 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0.708542 | \n", + "-0.420854 | \n", + "
3 | \n", + "1.0 | \n", + "-0.024425 | \n", + "-0.050722 | \n", + "-0.048383 | \n", + "-0.768294 | \n", + "-0.599511 | \n", + "0.755994 | \n", + "0.783940 | \n", + "-0.192978 | \n", + "-0.264693 | \n", + "-0.029071 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0.205558 | \n", + "-0.420854 | \n", + "
4 | \n", + "1.0 | \n", + "-0.024425 | \n", + "-0.050722 | \n", + "-0.048383 | \n", + "-0.768294 | \n", + "-0.599511 | \n", + "0.755994 | \n", + "0.783940 | \n", + "-0.192978 | \n", + "-0.264693 | \n", + "-0.029071 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "-0.297426 | \n", + "-0.420854 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
354360 | \n", + "1.0 | \n", + "-0.024425 | \n", + "-0.050722 | \n", + "-0.048383 | \n", + "-0.768294 | \n", + "-0.599511 | \n", + "0.755994 | \n", + "0.783940 | \n", + "-0.192978 | \n", + "-0.264693 | \n", + "-0.029071 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0.004365 | \n", + "-0.420854 | \n", + "
354361 | \n", + "1.0 | \n", + "-0.024425 | \n", + "-0.050722 | \n", + "-0.048383 | \n", + "-0.768294 | \n", + "-0.599511 | \n", + "0.755994 | \n", + "0.783940 | \n", + "-0.192978 | \n", + "-0.264693 | \n", + "-0.029071 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0.406752 | \n", + "0.050856 | \n", + "
354362 | \n", + "1.0 | \n", + "-0.000838 | \n", + "0.092966 | \n", + "-0.009150 | \n", + "1.219633 | \n", + "-0.599511 | \n", + "-1.665887 | \n", + "-1.557073 | \n", + "-0.175269 | \n", + "-0.264693 | \n", + "0.069949 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "-0.096232 | \n", + "0.994277 | \n", + "
354363 | \n", + "1.0 | \n", + "-0.012631 | \n", + "0.021122 | \n", + "-0.005227 | \n", + "1.219633 | \n", + "-0.599511 | \n", + "-1.871668 | \n", + "-1.755983 | \n", + "-0.175269 | \n", + "-0.264693 | \n", + "-0.004316 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "-0.398023 | \n", + "-0.420854 | \n", + "
354364 | \n", + "1.0 | \n", + "-0.024425 | \n", + "-0.050722 | \n", + "-0.048383 | \n", + "-0.768294 | \n", + "-0.599511 | \n", + "0.755994 | \n", + "0.783940 | \n", + "-0.192978 | \n", + "-0.264693 | \n", + "-0.029071 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0.004365 | \n", + "-0.420854 | \n", + "
354365 rows × 17 columns
\n", + "\n", + " | const | \n", + "gender_other | \n", + "
---|---|---|
0 | \n", + "1.0 | \n", + "0 | \n", + "
1 | \n", + "1.0 | \n", + "1 | \n", + "
2 | \n", + "1.0 | \n", + "0 | \n", + "
3 | \n", + "1.0 | \n", + "1 | \n", + "
4 | \n", + "1.0 | \n", + "1 | \n", + "
... | \n", + "... | \n", + "... | \n", + "
354360 | \n", + "1.0 | \n", + "1 | \n", + "
354361 | \n", + "1.0 | \n", + "0 | \n", + "
354362 | \n", + "1.0 | \n", + "0 | \n", + "
354363 | \n", + "1.0 | \n", + "0 | \n", + "
354364 | \n", + "1.0 | \n", + "0 | \n", + "
354365 rows × 2 columns
\n", + "\n", + " | nb_tickets | \n", + "nb_purchases | \n", + "total_amount | \n", + "nb_suppliers | \n", + "vente_internet_max | \n", + "purchase_date_min | \n", + "purchase_date_max | \n", + "time_between_purchase | \n", + "nb_tickets_internet | \n", + "fidelity | \n", + "is_email_true | \n", + "opt_in | \n", + "gender_female | \n", + "gender_male | \n", + "nb_campaigns | \n", + "nb_campaigns_opened | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "13 | \n", + "4 | \n", + "
1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "10 | \n", + "9 | \n", + "
2 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "14 | \n", + "0 | \n", + "
3 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "9 | \n", + "0 | \n", + "
4 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "4 | \n", + "0 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
354360 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "7 | \n", + "0 | \n", + "
354361 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "11 | \n", + "2 | \n", + "
354362 | \n", + "2 | \n", + "2 | \n", + "50 | \n", + "1 | \n", + "0 | \n", + "91 | \n", + "91 | \n", + "0 | \n", + "0 | \n", + "4 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "6 | \n", + "6 | \n", + "
354363 | \n", + "1 | \n", + "1 | \n", + "55 | \n", + "1 | \n", + "0 | \n", + "52 | \n", + "52 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "3 | \n", + "0 | \n", + "
354364 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "7 | \n", + "0 | \n", + "
354365 rows × 16 columns
\n", + "\n", + " | nb_tickets | \n", + "nb_purchases | \n", + "total_amount | \n", + "nb_suppliers | \n", + "vente_internet_max | \n", + "purchase_date_min | \n", + "purchase_date_max | \n", + "time_between_purchase | \n", + "nb_tickets_internet | \n", + "fidelity | \n", + "is_email_true | \n", + "opt_in | \n", + "gender_female | \n", + "gender_male | \n", + "gender_other | \n", + "nb_campaigns | \n", + "nb_campaigns_opened | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "13 | \n", + "4 | \n", + "
1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "10 | \n", + "9 | \n", + "
2 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "14 | \n", + "0 | \n", + "
3 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "9 | \n", + "0 | \n", + "
4 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "4 | \n", + "0 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
354360 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "7 | \n", + "0 | \n", + "
354361 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "11 | \n", + "2 | \n", + "
354362 | \n", + "2 | \n", + "2 | \n", + "50 | \n", + "1 | \n", + "0 | \n", + "91 | \n", + "91 | \n", + "0 | \n", + "0 | \n", + "4 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "6 | \n", + "6 | \n", + "
354363 | \n", + "1 | \n", + "1 | \n", + "55 | \n", + "1 | \n", + "0 | \n", + "52 | \n", + "52 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "3 | \n", + "0 | \n", + "
354364 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "550 | \n", + "550 | \n", + "-1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "7 | \n", + "0 | \n", + "
354365 rows × 17 columns
\n", + "\n", + " | nb_tickets | \n", + "nb_purchases | \n", + "total_amount | \n", + "nb_suppliers | \n", + "vente_internet_max | \n", + "purchase_date_min | \n", + "purchase_date_max | \n", + "time_between_purchase | \n", + "nb_tickets_internet | \n", + "fidelity | \n", + "is_email_true | \n", + "opt_in | \n", + "gender_female | \n", + "gender_male | \n", + "gender_other | \n", + "nb_campaigns | \n", + "nb_campaigns_opened | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "550.000000 | \n", + "550.000000 | \n", + "-1.000000 | \n", + "0.0 | \n", + "1 | \n", + "True | \n", + "True | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "13.0 | \n", + "4.0 | \n", + "
1 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "550.000000 | \n", + "550.000000 | \n", + "-1.000000 | \n", + "0.0 | \n", + "0 | \n", + "True | \n", + "True | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "10.0 | \n", + "9.0 | \n", + "
2 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "550.000000 | \n", + "550.000000 | \n", + "-1.000000 | \n", + "0.0 | \n", + "1 | \n", + "True | \n", + "True | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "14.0 | \n", + "0.0 | \n", + "
3 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "550.000000 | \n", + "550.000000 | \n", + "-1.000000 | \n", + "0.0 | \n", + "0 | \n", + "True | \n", + "False | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "9.0 | \n", + "0.0 | \n", + "
4 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "550.000000 | \n", + "550.000000 | \n", + "-1.000000 | \n", + "0.0 | \n", + "0 | \n", + "True | \n", + "False | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "4.0 | \n", + "0.0 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
354360 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "550.000000 | \n", + "550.000000 | \n", + "-1.000000 | \n", + "0.0 | \n", + "0 | \n", + "True | \n", + "False | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "7.0 | \n", + "0.0 | \n", + "
354361 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "550.000000 | \n", + "550.000000 | \n", + "-1.000000 | \n", + "0.0 | \n", + "0 | \n", + "True | \n", + "True | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "11.0 | \n", + "2.0 | \n", + "
354362 | \n", + "2.0 | \n", + "2.0 | \n", + "50.0 | \n", + "1.0 | \n", + "0.0 | \n", + "91.030556 | \n", + "91.020139 | \n", + "0.010417 | \n", + "0.0 | \n", + "4 | \n", + "True | \n", + "False | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "6.0 | \n", + "6.0 | \n", + "
354363 | \n", + "1.0 | \n", + "1.0 | \n", + "55.0 | \n", + "1.0 | \n", + "0.0 | \n", + "52.284028 | \n", + "52.284028 | \n", + "0.000000 | \n", + "0.0 | \n", + "1 | \n", + "True | \n", + "True | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "3.0 | \n", + "0.0 | \n", + "
354364 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "550.000000 | \n", + "550.000000 | \n", + "-1.000000 | \n", + "0.0 | \n", + "0 | \n", + "True | \n", + "False | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "7.0 | \n", + "0.0 | \n", + "
354365 rows × 17 columns
\n", + "