{ "cells": [ { "cell_type": "markdown", "id": "56949d8f-4eaf-4685-9989-ba0b4b1945b7", "metadata": {}, "source": [ "# Baseline logit on spectacle companies with statmodels" ] }, { "cell_type": "markdown", "id": "eae443dc-6c28-401a-a30e-e02f5f4da2df", "metadata": {}, "source": [ "## Importation des packages et des données" ] }, { "cell_type": "code", "execution_count": 1, "id": "72480e84-2ccc-481a-9353-1199e4358d62", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import os\n", "import s3fs\n", "import re\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, recall_score\n", "from sklearn.utils import class_weight\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.compose import ColumnTransformer\n", "from sklearn.preprocessing import OneHotEncoder\n", "from sklearn.impute import SimpleImputer\n", "from sklearn.model_selection import GridSearchCV\n", "from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler\n", "from sklearn.metrics import make_scorer, f1_score, balanced_accuracy_score\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score\n", "from sklearn.exceptions import ConvergenceWarning, DataConversionWarning\n", "\n", "import statsmodels.api as sm\n", "\n", "import pickle\n", "import warnings" ] }, { "cell_type": "code", "execution_count": 2, "id": "7090dc21-7889-4776-a0a4-f7c6a5416d53", "metadata": {}, "outputs": [], "source": [ "# Create filesystem object\n", "S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n", "fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})" ] }, { "cell_type": "code", "execution_count": 3, "id": "2f0d08c9-5b26-4eff-9c89-4a46f427dbf7", "metadata": {}, "outputs": [], "source": [ "def load_train_test():\n", " BUCKET = \"projet-bdc2324-team1/Generalization/musique\"\n", " File_path_train = BUCKET + \"/Train_set.csv\"\n", " File_path_test = BUCKET + \"/Test_set.csv\"\n", " \n", " with fs.open( File_path_train, mode=\"rb\") as file_in:\n", " dataset_train = pd.read_csv(file_in, sep=\",\")\n", " # dataset_train['y_has_purchased'] = dataset_train['y_has_purchased'].fillna(0)\n", "\n", " with fs.open(File_path_test, mode=\"rb\") as file_in:\n", " dataset_test = pd.read_csv(file_in, sep=\",\")\n", " # dataset_test['y_has_purchased'] = dataset_test['y_has_purchased'].fillna(0)\n", " \n", " return dataset_train, dataset_test" ] }, { "cell_type": "code", "execution_count": 4, "id": "438d0138-a254-464c-9e94-f7436576c1d5", "metadata": {}, "outputs": [], "source": [ "def features_target_split(dataset_train, dataset_test):\n", " features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n", " 'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',\n", " 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']\n", " X_train = dataset_train[features_l]\n", " y_train = dataset_train[['y_has_purchased']]\n", "\n", " X_test = dataset_test[features_l]\n", " y_test = dataset_test[['y_has_purchased']]\n", " return X_train, X_test, y_train, y_test" ] }, { "cell_type": "code", "execution_count": 5, "id": "ebe9a887-61a4-4a5e-ac64-231307dd7647", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_426/3642896088.py:7: DtypeWarning: Columns (38) have mixed types. Specify dtype option on import or set low_memory=False.\n", " dataset_train = pd.read_csv(file_in, sep=\",\")\n", "/tmp/ipykernel_426/3642896088.py:11: DtypeWarning: Columns (38) have mixed types. Specify dtype option on import or set low_memory=False.\n", " dataset_test = pd.read_csv(file_in, sep=\",\")\n" ] } ], "source": [ "dataset_train, dataset_test = load_train_test()" ] }, { "cell_type": "code", "execution_count": 6, "id": "b21fdea2-02c4-4222-b4e0-635e423f91c2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "customer_id 0\n", "nb_tickets 0\n", "nb_purchases 0\n", "total_amount 0\n", "nb_suppliers 0\n", "vente_internet_max 0\n", "purchase_date_min 0\n", "purchase_date_max 0\n", "time_between_purchase 0\n", "nb_tickets_internet 0\n", "street_id 0\n", "structure_id 327067\n", "mcp_contact_id 135224\n", "fidelity 0\n", "tenant_id 0\n", "is_partner 0\n", "deleted_at 354365\n", "gender 0\n", "is_email_true 0\n", "opt_in 0\n", "last_buying_date 119201\n", "max_price 119201\n", "ticket_sum 0\n", "average_price 115193\n", "average_purchase_delay 119203\n", "average_price_basket 119203\n", "average_ticket_basket 119203\n", "total_price 4008\n", "purchase_count 0\n", "first_buying_date 119201\n", "country 56856\n", "gender_label 0\n", "gender_female 0\n", "gender_male 0\n", "gender_other 0\n", "country_fr 56856\n", "nb_campaigns 0\n", "nb_campaigns_opened 0\n", "time_to_open 224310\n", "y_has_purchased 0\n", "dtype: int64" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset_train.isna().sum()" ] }, { "cell_type": "code", "execution_count": 7, "id": "42c4d034-8bc1-4ebb-a1ff-60c0a86f8f7c", "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = features_target_split(dataset_train, dataset_test)" ] }, { "cell_type": "code", "execution_count": 8, "id": "94b4498d-6ae8-4c96-adbc-7ba1b8348160", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Shape train : (354365, 17)\n", "Shape test : (151874, 17)\n" ] } ], "source": [ "print(\"Shape train : \", X_train.shape)\n", "print(\"Shape test : \", X_test.shape)" ] }, { "cell_type": "markdown", "id": "29206597-bce8-41e0-9b68-9b9a2843787a", "metadata": {}, "source": [ "## optionnel : calcul des poids\n", "On pourrait utiliser les poids pour gérer le déséquilibre de classe, mais dans une optique exploratoire, c'est pas indispensable et ça a pas été utilisé ici !" ] }, { "cell_type": "code", "execution_count": 9, "id": "6224fd31-c190-4168-b395-e0bf5806d79d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0.0: 0.5481283836040216, 1.0: 5.694439980716696}" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Compute Weights\n", "weights = class_weight.compute_class_weight(class_weight = 'balanced', classes = np.unique(y_train['y_has_purchased']),\n", " y = y_train['y_has_purchased'])\n", "\n", "weight_dict = {np.unique(y_train['y_has_purchased'])[i]: weights[i] for i in range(len(np.unique(y_train['y_has_purchased'])))}\n", "weight_dict" ] }, { "cell_type": "code", "execution_count": 10, "id": "4680f202-979e-483f-89b8-9df877203bcf", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0.54812838, 0.54812838, 0.54812838, ..., 5.69443998, 0.54812838,\n", " 0.54812838])" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Calcul des poids inverses à la fréquence des classes\n", "class_counts = np.bincount(y_train['y_has_purchased'])\n", "class_weights = len(y_train['y_has_purchased']) / (2 * class_counts)\n", "\n", "# Sélection des poids correspondants à chaque observation\n", "weights = class_weights[y_train['y_has_purchased'].values.astype(int)]\n", "weights" ] }, { "cell_type": "code", "execution_count": 11, "id": "5f747be4-e70b-491c-8f0a-46cb278a2dee", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[354365. 354365. 354365. ... 354365. 354365. 354365.]\n", "354365\n" ] } ], "source": [ "# verif\n", "print(2 * weights * class_counts[y_train['y_has_purchased'].values.astype(int)])\n", "print(len(y_train['y_has_purchased']))" ] }, { "cell_type": "markdown", "id": "bd1f7d9d-1aff-49e4-81ca-038f732b1595", "metadata": {}, "source": [ "## définition des variables X et y" ] }, { "cell_type": "code", "execution_count": 12, "id": "ab25a901-28da-4504-a7d1-bf41fa5068bc", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | nb_tickets | \n", "nb_purchases | \n", "total_amount | \n", "nb_suppliers | \n", "vente_internet_max | \n", "purchase_date_min | \n", "purchase_date_max | \n", "time_between_purchase | \n", "nb_tickets_internet | \n", "fidelity | \n", "is_email_true | \n", "opt_in | \n", "gender_female | \n", "gender_male | \n", "gender_other | \n", "nb_campaigns | \n", "nb_campaigns_opened | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "550.000000 | \n", "550.000000 | \n", "-1.000000 | \n", "0.0 | \n", "1 | \n", "True | \n", "True | \n", "1 | \n", "0 | \n", "0 | \n", "13.0 | \n", "4.0 | \n", "
1 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "550.000000 | \n", "550.000000 | \n", "-1.000000 | \n", "0.0 | \n", "0 | \n", "True | \n", "True | \n", "0 | \n", "0 | \n", "1 | \n", "10.0 | \n", "9.0 | \n", "
2 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "550.000000 | \n", "550.000000 | \n", "-1.000000 | \n", "0.0 | \n", "1 | \n", "True | \n", "True | \n", "0 | \n", "1 | \n", "0 | \n", "14.0 | \n", "0.0 | \n", "
3 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "550.000000 | \n", "550.000000 | \n", "-1.000000 | \n", "0.0 | \n", "0 | \n", "True | \n", "False | \n", "0 | \n", "0 | \n", "1 | \n", "9.0 | \n", "0.0 | \n", "
4 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "550.000000 | \n", "550.000000 | \n", "-1.000000 | \n", "0.0 | \n", "0 | \n", "True | \n", "False | \n", "0 | \n", "0 | \n", "1 | \n", "4.0 | \n", "0.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
354360 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "550.000000 | \n", "550.000000 | \n", "-1.000000 | \n", "0.0 | \n", "0 | \n", "True | \n", "False | \n", "0 | \n", "0 | \n", "1 | \n", "7.0 | \n", "0.0 | \n", "
354361 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "550.000000 | \n", "550.000000 | \n", "-1.000000 | \n", "0.0 | \n", "0 | \n", "True | \n", "True | \n", "0 | \n", "1 | \n", "0 | \n", "11.0 | \n", "2.0 | \n", "
354362 | \n", "2.0 | \n", "2.0 | \n", "50.0 | \n", "1.0 | \n", "0.0 | \n", "91.030556 | \n", "91.020139 | \n", "0.010417 | \n", "0.0 | \n", "4 | \n", "True | \n", "False | \n", "1 | \n", "0 | \n", "0 | \n", "6.0 | \n", "6.0 | \n", "
354363 | \n", "1.0 | \n", "1.0 | \n", "55.0 | \n", "1.0 | \n", "0.0 | \n", "52.284028 | \n", "52.284028 | \n", "0.000000 | \n", "0.0 | \n", "1 | \n", "True | \n", "True | \n", "0 | \n", "1 | \n", "0 | \n", "3.0 | \n", "0.0 | \n", "
354364 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "550.000000 | \n", "550.000000 | \n", "-1.000000 | \n", "0.0 | \n", "0 | \n", "True | \n", "False | \n", "0 | \n", "1 | \n", "0 | \n", "7.0 | \n", "0.0 | \n", "
354365 rows × 17 columns
\n", "\n", " | nb_tickets | \n", "nb_purchases | \n", "total_amount | \n", "nb_suppliers | \n", "vente_internet_max | \n", "purchase_date_min | \n", "purchase_date_max | \n", "time_between_purchase | \n", "nb_tickets_internet | \n", "fidelity | \n", "is_email_true | \n", "opt_in | \n", "gender_female | \n", "gender_male | \n", "gender_other | \n", "nb_campaigns | \n", "nb_campaigns_opened | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "550 | \n", "550 | \n", "-1 | \n", "0 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "0 | \n", "0 | \n", "13 | \n", "4 | \n", "
1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "550 | \n", "550 | \n", "-1 | \n", "0 | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "10 | \n", "9 | \n", "
2 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "550 | \n", "550 | \n", "-1 | \n", "0 | \n", "1 | \n", "1 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "14 | \n", "0 | \n", "
3 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "550 | \n", "550 | \n", "-1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "9 | \n", "0 | \n", "
4 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "550 | \n", "550 | \n", "-1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "4 | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
354360 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "550 | \n", "550 | \n", "-1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "7 | \n", "0 | \n", "
354361 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "550 | \n", "550 | \n", "-1 | \n", "0 | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "11 | \n", "2 | \n", "
354362 | \n", "2 | \n", "2 | \n", "50 | \n", "1 | \n", "0 | \n", "91 | \n", "91 | \n", "0 | \n", "0 | \n", "4 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "6 | \n", "6 | \n", "
354363 | \n", "1 | \n", "1 | \n", "55 | \n", "1 | \n", "0 | \n", "52 | \n", "52 | \n", "0 | \n", "0 | \n", "1 | \n", "1 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "3 | \n", "0 | \n", "
354364 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "550 | \n", "550 | \n", "-1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "7 | \n", "0 | \n", "
354365 rows × 17 columns
\n", "\n", " | purchase_date_min | \n", "purchase_date_max | \n", "
---|---|---|
count | \n", "354365.000000 | \n", "354365.000000 | \n", "
mean | \n", "406.981861 | \n", "396.551502 | \n", "
std | \n", "189.343612 | \n", "195.881681 | \n", "
min | \n", "0.009640 | \n", "0.000000 | \n", "
25% | \n", "188.475293 | \n", "153.457966 | \n", "
50% | \n", "550.000000 | \n", "550.000000 | \n", "
75% | \n", "550.000000 | \n", "550.000000 | \n", "
max | \n", "550.000000 | \n", "550.000000 | \n", "
\n", " | nb_tickets | \n", "nb_purchases | \n", "total_amount | \n", "nb_suppliers | \n", "vente_internet_max | \n", "purchase_date_min | \n", "purchase_date_max | \n", "time_between_purchase | \n", "nb_tickets_internet | \n", "fidelity | \n", "is_email_true | \n", "opt_in | \n", "gender_female | \n", "gender_male | \n", "gender_other | \n", "nb_campaigns | \n", "nb_campaigns_opened | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "550.0 | \n", "550.0 | \n", "-1.0 | \n", "0.0 | \n", "1 | \n", "True | \n", "True | \n", "1 | \n", "0 | \n", "0 | \n", "13.0 | \n", "4.0 | \n", "
1 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "550.0 | \n", "550.0 | \n", "-1.0 | \n", "0.0 | \n", "0 | \n", "True | \n", "True | \n", "0 | \n", "0 | \n", "1 | \n", "10.0 | \n", "9.0 | \n", "
2 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "550.0 | \n", "550.0 | \n", "-1.0 | \n", "0.0 | \n", "1 | \n", "True | \n", "True | \n", "0 | \n", "1 | \n", "0 | \n", "14.0 | \n", "0.0 | \n", "
3 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "550.0 | \n", "550.0 | \n", "-1.0 | \n", "0.0 | \n", "0 | \n", "True | \n", "False | \n", "0 | \n", "0 | \n", "1 | \n", "9.0 | \n", "0.0 | \n", "
4 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "550.0 | \n", "550.0 | \n", "-1.0 | \n", "0.0 | \n", "0 | \n", "True | \n", "False | \n", "0 | \n", "0 | \n", "1 | \n", "4.0 | \n", "0.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
354358 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "550.0 | \n", "550.0 | \n", "-1.0 | \n", "0.0 | \n", "0 | \n", "True | \n", "False | \n", "1 | \n", "0 | \n", "0 | \n", "1.0 | \n", "0.0 | \n", "
354359 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "550.0 | \n", "550.0 | \n", "-1.0 | \n", "0.0 | \n", "0 | \n", "True | \n", "True | \n", "0 | \n", "1 | \n", "0 | \n", "12.0 | \n", "2.0 | \n", "
354360 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "550.0 | \n", "550.0 | \n", "-1.0 | \n", "0.0 | \n", "0 | \n", "True | \n", "False | \n", "0 | \n", "0 | \n", "1 | \n", "7.0 | \n", "0.0 | \n", "
354361 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "550.0 | \n", "550.0 | \n", "-1.0 | \n", "0.0 | \n", "0 | \n", "True | \n", "True | \n", "0 | \n", "1 | \n", "0 | \n", "11.0 | \n", "2.0 | \n", "
354364 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "550.0 | \n", "550.0 | \n", "-1.0 | \n", "0.0 | \n", "0 | \n", "True | \n", "False | \n", "0 | \n", "1 | \n", "0 | \n", "7.0 | \n", "0.0 | \n", "
179675 rows × 17 columns
\n", "\n", " | const | \n", "nb_tickets | \n", "nb_purchases | \n", "total_amount | \n", "nb_suppliers | \n", "vente_internet_max | \n", "purchase_date_min | \n", "purchase_date_max | \n", "nb_tickets_internet | \n", "is_email_true | \n", "opt_in | \n", "gender_female | \n", "gender_male | \n", "nb_campaigns | \n", "nb_campaigns_opened | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1.0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "550 | \n", "550 | \n", "0 | \n", "1 | \n", "1 | \n", "1 | \n", "0 | \n", "13 | \n", "4 | \n", "
1 | \n", "1.0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "550 | \n", "550 | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "0 | \n", "10 | \n", "9 | \n", "
2 | \n", "1.0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "550 | \n", "550 | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "1 | \n", "14 | \n", "0 | \n", "
3 | \n", "1.0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "550 | \n", "550 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "9 | \n", "0 | \n", "
4 | \n", "1.0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "550 | \n", "550 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "4 | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
354360 | \n", "1.0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "550 | \n", "550 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "7 | \n", "0 | \n", "
354361 | \n", "1.0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "550 | \n", "550 | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "1 | \n", "11 | \n", "2 | \n", "
354362 | \n", "1.0 | \n", "2 | \n", "2 | \n", "50 | \n", "1 | \n", "0 | \n", "91 | \n", "91 | \n", "0 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "6 | \n", "6 | \n", "
354363 | \n", "1.0 | \n", "1 | \n", "1 | \n", "55 | \n", "1 | \n", "0 | \n", "52 | \n", "52 | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "1 | \n", "3 | \n", "0 | \n", "
354364 | \n", "1.0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "550 | \n", "550 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "7 | \n", "0 | \n", "
354365 rows × 15 columns
\n", "\n", " | const | \n", "nb_tickets | \n", "nb_purchases | \n", "total_amount | \n", "nb_suppliers | \n", "vente_internet_max | \n", "purchase_date_min | \n", "purchase_date_max | \n", "nb_tickets_internet | \n", "is_email_true | \n", "opt_in | \n", "gender_female | \n", "gender_male | \n", "nb_campaigns | \n", "nb_campaigns_opened | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1.0 | \n", "-0.024425 | \n", "-0.050722 | \n", "-0.048383 | \n", "-0.768294 | \n", "-0.599511 | \n", "0.755994 | \n", "0.783940 | \n", "-0.264693 | \n", "1 | \n", "1 | \n", "1 | \n", "0 | \n", "0.607945 | \n", "0.522567 | \n", "
1 | \n", "1.0 | \n", "-0.024425 | \n", "-0.050722 | \n", "-0.048383 | \n", "-0.768294 | \n", "-0.599511 | \n", "0.755994 | \n", "0.783940 | \n", "-0.264693 | \n", "1 | \n", "1 | \n", "0 | \n", "0 | \n", "0.306155 | \n", "1.701843 | \n", "
2 | \n", "1.0 | \n", "-0.024425 | \n", "-0.050722 | \n", "-0.048383 | \n", "-0.768294 | \n", "-0.599511 | \n", "0.755994 | \n", "0.783940 | \n", "-0.264693 | \n", "1 | \n", "1 | \n", "0 | \n", "1 | \n", "0.708542 | \n", "-0.420854 | \n", "
3 | \n", "1.0 | \n", "-0.024425 | \n", "-0.050722 | \n", "-0.048383 | \n", "-0.768294 | \n", "-0.599511 | \n", "0.755994 | \n", "0.783940 | \n", "-0.264693 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0.205558 | \n", "-0.420854 | \n", "
4 | \n", "1.0 | \n", "-0.024425 | \n", "-0.050722 | \n", "-0.048383 | \n", "-0.768294 | \n", "-0.599511 | \n", "0.755994 | \n", "0.783940 | \n", "-0.264693 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "-0.297426 | \n", "-0.420854 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
354360 | \n", "1.0 | \n", "-0.024425 | \n", "-0.050722 | \n", "-0.048383 | \n", "-0.768294 | \n", "-0.599511 | \n", "0.755994 | \n", "0.783940 | \n", "-0.264693 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0.004365 | \n", "-0.420854 | \n", "
354361 | \n", "1.0 | \n", "-0.024425 | \n", "-0.050722 | \n", "-0.048383 | \n", "-0.768294 | \n", "-0.599511 | \n", "0.755994 | \n", "0.783940 | \n", "-0.264693 | \n", "1 | \n", "1 | \n", "0 | \n", "1 | \n", "0.406752 | \n", "0.050856 | \n", "
354362 | \n", "1.0 | \n", "-0.000838 | \n", "0.092966 | \n", "-0.009150 | \n", "1.219633 | \n", "-0.599511 | \n", "-1.665887 | \n", "-1.557073 | \n", "-0.264693 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "-0.096232 | \n", "0.994277 | \n", "
354363 | \n", "1.0 | \n", "-0.012631 | \n", "0.021122 | \n", "-0.005227 | \n", "1.219633 | \n", "-0.599511 | \n", "-1.871668 | \n", "-1.755983 | \n", "-0.264693 | \n", "1 | \n", "1 | \n", "0 | \n", "1 | \n", "-0.398023 | \n", "-0.420854 | \n", "
354364 | \n", "1.0 | \n", "-0.024425 | \n", "-0.050722 | \n", "-0.048383 | \n", "-0.768294 | \n", "-0.599511 | \n", "0.755994 | \n", "0.783940 | \n", "-0.264693 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0.004365 | \n", "-0.420854 | \n", "
354365 rows × 15 columns
\n", "