base_test_train
This commit is contained in:
parent
71a5cb2a3e
commit
66754f957e
File diff suppressed because it is too large
Load Diff
814
code_base_train_test.ipynb
Normal file
814
code_base_train_test.ipynb
Normal file
|
@ -0,0 +1,814 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "bf34b03c-536f-4f93-93a5-e452552653aa",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Choisissez le type de compagnie : sport ? musique ? musee ? musique\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n",
|
||||
"Couverture Company 10 : 2016-03-07 - 2023-09-25\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_11/products_purchased_reduced.csv\n",
|
||||
"Couverture Company 11 : 2015-06-26 - 2023-11-08\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_12/products_purchased_reduced.csv\n",
|
||||
"Couverture Company 12 : 2016-06-14 - 2023-11-08\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_13/products_purchased_reduced.csv\n",
|
||||
"Couverture Company 13 : 2010-07-31 - 2023-11-08\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_14/products_purchased_reduced.csv\n",
|
||||
"Couverture Company 14 : 1901-01-01 - 2023-11-08\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n",
|
||||
"Data filtering : SUCCESS\n",
|
||||
"KPIs construction : SUCCESS\n",
|
||||
"Explanatory variable construction : SUCCESS\n",
|
||||
"Explained variable construction : SUCCESS\n",
|
||||
"Exportation dataset test : SUCCESS\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_10/customerplus_cleaned.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_10/campaigns_information.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_10/products_purchased_reduced.csv\n",
|
||||
"Data filtering : SUCCESS\n",
|
||||
"KPIs construction : SUCCESS\n",
|
||||
"Explanatory variable construction : SUCCESS\n",
|
||||
"Explained variable construction : SUCCESS\n",
|
||||
"Exportation dataset train : SUCCESS\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_11/customerplus_cleaned.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_11/campaigns_information.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_11/products_purchased_reduced.csv\n",
|
||||
"Data filtering : SUCCESS\n",
|
||||
"KPIs construction : SUCCESS\n",
|
||||
"Explanatory variable construction : SUCCESS\n",
|
||||
"Explained variable construction : SUCCESS\n",
|
||||
"Exportation dataset test : SUCCESS\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_11/customerplus_cleaned.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_11/campaigns_information.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_11/products_purchased_reduced.csv\n",
|
||||
"Data filtering : SUCCESS\n",
|
||||
"KPIs construction : SUCCESS\n",
|
||||
"Explanatory variable construction : SUCCESS\n",
|
||||
"Explained variable construction : SUCCESS\n",
|
||||
"Exportation dataset train : SUCCESS\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_12/customerplus_cleaned.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_12/campaigns_information.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_12/products_purchased_reduced.csv\n",
|
||||
"Data filtering : SUCCESS\n",
|
||||
"KPIs construction : SUCCESS\n",
|
||||
"Explanatory variable construction : SUCCESS\n",
|
||||
"Explained variable construction : SUCCESS\n",
|
||||
"Exportation dataset test : SUCCESS\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_12/customerplus_cleaned.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_12/campaigns_information.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_12/products_purchased_reduced.csv\n",
|
||||
"Data filtering : SUCCESS\n",
|
||||
"KPIs construction : SUCCESS\n",
|
||||
"Explanatory variable construction : SUCCESS\n",
|
||||
"Explained variable construction : SUCCESS\n",
|
||||
"Exportation dataset train : SUCCESS\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_13/customerplus_cleaned.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_13/campaigns_information.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_13/products_purchased_reduced.csv\n",
|
||||
"Data filtering : SUCCESS\n",
|
||||
"KPIs construction : SUCCESS\n",
|
||||
"Explanatory variable construction : SUCCESS\n",
|
||||
"Explained variable construction : SUCCESS\n",
|
||||
"Exportation dataset test : SUCCESS\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_13/customerplus_cleaned.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_13/campaigns_information.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_13/products_purchased_reduced.csv\n",
|
||||
"Data filtering : SUCCESS\n",
|
||||
"KPIs construction : SUCCESS\n",
|
||||
"Explanatory variable construction : SUCCESS\n",
|
||||
"Explained variable construction : SUCCESS\n",
|
||||
"Exportation dataset train : SUCCESS\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_14/customerplus_cleaned.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_14/campaigns_information.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_14/products_purchased_reduced.csv\n",
|
||||
"Data filtering : SUCCESS\n",
|
||||
"KPIs construction : SUCCESS\n",
|
||||
"Explanatory variable construction : SUCCESS\n",
|
||||
"Explained variable construction : SUCCESS\n",
|
||||
"Exportation dataset test : SUCCESS\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_14/customerplus_cleaned.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_14/campaigns_information.csv\n",
|
||||
"File path : projet-bdc2324-team1/0_Input/Company_14/products_purchased_reduced.csv\n",
|
||||
"Data filtering : SUCCESS\n",
|
||||
"KPIs construction : SUCCESS\n",
|
||||
"Explanatory variable construction : SUCCESS\n",
|
||||
"Explained variable construction : SUCCESS\n",
|
||||
"Exportation dataset train : SUCCESS\n",
|
||||
"FIN DE LA GENERATION DES DATASETS : SUCCESS\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Business Data Challenge - Team 1\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import os\n",
|
||||
"import s3fs\n",
|
||||
"import re\n",
|
||||
"import warnings\n",
|
||||
"from datetime import date, timedelta, datetime\n",
|
||||
"\n",
|
||||
"# Create filesystem object\n",
|
||||
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
|
||||
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Import KPI construction functions\n",
|
||||
"exec(open('0_KPI_functions.py').read())\n",
|
||||
"\n",
|
||||
"# Ignore warning\n",
|
||||
"warnings.filterwarnings('ignore')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def display_covering_time(df, company, datecover):\n",
|
||||
" \"\"\"\n",
|
||||
" This function draws the time coverage of each company\n",
|
||||
" \"\"\"\n",
|
||||
" min_date = df['purchase_date'].min().strftime(\"%Y-%m-%d\")\n",
|
||||
" max_date = df['purchase_date'].max().strftime(\"%Y-%m-%d\")\n",
|
||||
" datecover[company] = [datetime.strptime(min_date, \"%Y-%m-%d\") + timedelta(days=x) for x in range((datetime.strptime(max_date, \"%Y-%m-%d\") - datetime.strptime(min_date, \"%Y-%m-%d\")).days)]\n",
|
||||
" print(f'Couverture Company {company} : {min_date} - {max_date}')\n",
|
||||
" return datecover\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def compute_time_intersection(datecover):\n",
|
||||
" \"\"\"\n",
|
||||
" This function returns the time coverage for all companies\n",
|
||||
" \"\"\"\n",
|
||||
" timestamps_sets = [set(timestamps) for timestamps in datecover.values()]\n",
|
||||
" intersection = set.intersection(*timestamps_sets)\n",
|
||||
" intersection_list = list(intersection)\n",
|
||||
" formated_dates = [dt.strftime(\"%Y-%m-%d\") for dt in intersection_list]\n",
|
||||
" return sorted(formated_dates)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def df_coverage_modelization(sport, coverage_train = 0.7):\n",
|
||||
" \"\"\"\n",
|
||||
" This function returns start_date, end_of_features and final dates\n",
|
||||
" that help to construct train and test datasets\n",
|
||||
" \"\"\"\n",
|
||||
" datecover = {}\n",
|
||||
" for company in sport:\n",
|
||||
" df_products_purchased_reduced = display_databases(company, file_name = \"products_purchased_reduced\",\n",
|
||||
" datetime_col = ['purchase_date'])\n",
|
||||
" datecover = display_covering_time(df_products_purchased_reduced, company, datecover)\n",
|
||||
" #print(datecover.keys())\n",
|
||||
" dt_coverage = compute_time_intersection(datecover)\n",
|
||||
" start_date = dt_coverage[0]\n",
|
||||
" end_of_features = dt_coverage[int(0.7 * len(dt_coverage))]\n",
|
||||
" final_date = dt_coverage[-1]\n",
|
||||
" return start_date, end_of_features, final_date\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"def dataset_construction(min_date, end_features_date, max_date, directory_path):\n",
|
||||
" \n",
|
||||
" # Import customerplus\n",
|
||||
" df_customerplus_clean_0 = display_databases(directory_path, file_name = \"customerplus_cleaned\")\n",
|
||||
" df_campaigns_information = display_databases(directory_path, file_name = \"campaigns_information\", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])\n",
|
||||
" df_products_purchased_reduced = display_databases(directory_path, file_name = \"products_purchased_reduced\", datetime_col = ['purchase_date'])\n",
|
||||
" \n",
|
||||
" # Filtre de cohérence pour la mise en pratique de notre méthode\n",
|
||||
" max_date = pd.to_datetime(max_date, utc = True, format = 'ISO8601') \n",
|
||||
" end_features_date = pd.to_datetime(end_features_date, utc = True, format = 'ISO8601')\n",
|
||||
" min_date = pd.to_datetime(min_date, utc = True, format = 'ISO8601')\n",
|
||||
"\n",
|
||||
" #Filtre de la base df_campaigns_information\n",
|
||||
" df_campaigns_information = df_campaigns_information[(df_campaigns_information['sent_at'] <= end_features_date) & (df_campaigns_information['sent_at'] >= min_date)]\n",
|
||||
" df_campaigns_information['opened_at'][df_campaigns_information['opened_at'] >= end_features_date] = np.datetime64('NaT')\n",
|
||||
" \n",
|
||||
" #Filtre de la base df_products_purchased_reduced\n",
|
||||
" df_products_purchased_reduced = df_products_purchased_reduced[(df_products_purchased_reduced['purchase_date'] <= end_features_date) & (df_products_purchased_reduced['purchase_date'] >= min_date)]\n",
|
||||
"\n",
|
||||
" print(\"Data filtering : SUCCESS\")\n",
|
||||
" \n",
|
||||
" # Fusion de l'ensemble et creation des KPI\n",
|
||||
"\n",
|
||||
" # KPI sur les campagnes publicitaires\n",
|
||||
" df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information) \n",
|
||||
"\n",
|
||||
" # KPI sur le comportement d'achat\n",
|
||||
" df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)\n",
|
||||
"\n",
|
||||
" # KPI sur les données socio-démographiques\n",
|
||||
" df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)\n",
|
||||
" \n",
|
||||
" print(\"KPIs construction : SUCCESS\")\n",
|
||||
" \n",
|
||||
" # Fusion avec KPI liés au customer\n",
|
||||
" df_customer = pd.merge(df_customerplus_clean, df_campaigns_kpi, on = 'customer_id', how = 'left')\n",
|
||||
" \n",
|
||||
" # Fill NaN values\n",
|
||||
" df_customer[['nb_campaigns', 'nb_campaigns_opened']] = df_customer[['nb_campaigns', 'nb_campaigns_opened']].fillna(0)\n",
|
||||
" \n",
|
||||
" # Fusion avec KPI liés au comportement d'achat\n",
|
||||
" df_customer_product = pd.merge(df_tickets_kpi, df_customer, on = 'customer_id', how = 'outer')\n",
|
||||
" \n",
|
||||
" # Fill NaN values\n",
|
||||
" df_customer_product[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet']] = df_customer_product[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet']].fillna(0)\n",
|
||||
"\n",
|
||||
" print(\"Explanatory variable construction : SUCCESS\")\n",
|
||||
"\n",
|
||||
" # 2. Construction of the explained variable \n",
|
||||
" df_products_purchased_to_predict = df_products_purchased_reduced[(df_products_purchased_reduced['purchase_date'] <= max_date) & (df_products_purchased_reduced['purchase_date'] > end_features_date)]\n",
|
||||
"\n",
|
||||
" # Indicatrice d'achat\n",
|
||||
" df_products_purchased_to_predict['y_has_purchased'] = 1\n",
|
||||
"\n",
|
||||
" y = df_products_purchased_to_predict[['customer_id', 'y_has_purchased']].drop_duplicates()\n",
|
||||
"\n",
|
||||
" print(\"Explained variable construction : SUCCESS\")\n",
|
||||
" \n",
|
||||
" # 3. Merge between explained and explanatory variables\n",
|
||||
" dataset = pd.merge(df_customer_product, y, on = ['customer_id'], how = 'left')\n",
|
||||
"\n",
|
||||
" # 0 if there is no purchase\n",
|
||||
" dataset[['y_has_purchased']].fillna(0)\n",
|
||||
"\n",
|
||||
" # add id_company prefix to customer_id\n",
|
||||
" dataset['customer_id'] = directory_path + '_' + dataset['customer_id'].astype('str')\n",
|
||||
" \n",
|
||||
" return dataset\n",
|
||||
"\n",
|
||||
"## Exportation\n",
|
||||
"\n",
|
||||
"companies = {'musee' : ['1', '2', '3', '4', '101'],\n",
|
||||
" 'sport': ['5', '6', '7', '8', '9'],\n",
|
||||
" 'musique' : ['10', '11', '12', '13', '14']}\n",
|
||||
"\n",
|
||||
"type_of_comp = input('Choisissez le type de compagnie : sport ? musique ? musee ?')\n",
|
||||
"list_of_comp = companies[type_of_comp] \n",
|
||||
"# Dossier d'exportation\n",
|
||||
"BUCKET_OUT = f'projet-bdc2324-team1/Generalization/{type_of_comp}'\n",
|
||||
"\n",
|
||||
"# Create test dataset and train dataset for sport companies\n",
|
||||
"\n",
|
||||
"start_date, end_of_features, final_date = df_coverage_modelization(list_of_comp, coverage_train = 0.7)\n",
|
||||
"\n",
|
||||
"for company in list_of_comp:\n",
|
||||
" dataset_test = dataset_construction(min_date = start_date, end_features_date = end_of_features,\n",
|
||||
" max_date = final_date, directory_path = company) \n",
|
||||
"\n",
|
||||
" # Exportation\n",
|
||||
" FILE_KEY_OUT_S3 = \"dataset_test\" + company + \".csv\"\n",
|
||||
" FILE_PATH_OUT_S3 = BUCKET_OUT + \"/Test_set/\" + FILE_KEY_OUT_S3\n",
|
||||
" \n",
|
||||
" with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
|
||||
" dataset_test.to_csv(file_out, index = False)\n",
|
||||
" \n",
|
||||
" print(\"Exportation dataset test : SUCCESS\")\n",
|
||||
"\n",
|
||||
"# Dataset train\n",
|
||||
" dataset_train = dataset_construction(min_date = start_date, end_features_date = end_of_features,\n",
|
||||
" max_date = final_date, directory_path = company)\n",
|
||||
" # Export\n",
|
||||
" FILE_KEY_OUT_S3 = \"dataset_train\" + company + \".csv\" \n",
|
||||
" FILE_PATH_OUT_S3 = BUCKET_OUT + \"/Train_test/\" + FILE_KEY_OUT_S3\n",
|
||||
" \n",
|
||||
" with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:\n",
|
||||
" dataset_train.to_csv(file_out, index = False)\n",
|
||||
" \n",
|
||||
" print(\"Exportation dataset train : SUCCESS\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(\"FIN DE LA GENERATION DES DATASETS : SUCCESS\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "3721427e-5957-4556-b278-2e7ffca892f4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'projet-bdc2324-team1/Generalization/musique/Train_test/dataset_train14.csv'"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"FILE_PATH_OUT_S3"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "a3bfeeb6-2db0-4f1d-866c-8721343e97c5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"customer_id 0.000000\n",
|
||||
"nb_tickets 0.000000\n",
|
||||
"nb_purchases 0.000000\n",
|
||||
"total_amount 0.000000\n",
|
||||
"nb_suppliers 0.000000\n",
|
||||
"vente_internet_max 0.000000\n",
|
||||
"purchase_date_min 0.858950\n",
|
||||
"purchase_date_max 0.858950\n",
|
||||
"time_between_purchase 0.858950\n",
|
||||
"nb_tickets_internet 0.000000\n",
|
||||
"street_id 0.000000\n",
|
||||
"structure_id 0.869838\n",
|
||||
"mcp_contact_id 0.276677\n",
|
||||
"fidelity 0.000000\n",
|
||||
"tenant_id 0.000000\n",
|
||||
"is_partner 0.000000\n",
|
||||
"deleted_at 1.000000\n",
|
||||
"gender 0.000000\n",
|
||||
"is_email_true 0.000000\n",
|
||||
"opt_in 0.000000\n",
|
||||
"last_buying_date 0.709626\n",
|
||||
"max_price 0.709626\n",
|
||||
"ticket_sum 0.000000\n",
|
||||
"average_price 0.709626\n",
|
||||
"average_purchase_delay 0.709731\n",
|
||||
"average_price_basket 0.709731\n",
|
||||
"average_ticket_basket 0.709731\n",
|
||||
"total_price 0.000000\n",
|
||||
"purchase_count 0.000000\n",
|
||||
"first_buying_date 0.709626\n",
|
||||
"country 0.152090\n",
|
||||
"gender_label 0.000000\n",
|
||||
"gender_female 0.000000\n",
|
||||
"gender_male 0.000000\n",
|
||||
"gender_other 0.000000\n",
|
||||
"country_fr 0.152090\n",
|
||||
"has_tags 0.000000\n",
|
||||
"nb_campaigns 0.000000\n",
|
||||
"nb_campaigns_opened 0.000000\n",
|
||||
"time_to_open 0.848079\n",
|
||||
"y_has_purchased 1.000000\n",
|
||||
"dtype: float64"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
" dataset_train.isna().sum()/dataset_train.shape[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "a4c4e994-231b-4467-aa1b-0a5283c59dd5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>customer_id</th>\n",
|
||||
" <th>nb_tickets</th>\n",
|
||||
" <th>nb_purchases</th>\n",
|
||||
" <th>total_amount</th>\n",
|
||||
" <th>nb_suppliers</th>\n",
|
||||
" <th>vente_internet_max</th>\n",
|
||||
" <th>purchase_date_min</th>\n",
|
||||
" <th>purchase_date_max</th>\n",
|
||||
" <th>time_between_purchase</th>\n",
|
||||
" <th>nb_tickets_internet</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>gender_label</th>\n",
|
||||
" <th>gender_female</th>\n",
|
||||
" <th>gender_male</th>\n",
|
||||
" <th>gender_other</th>\n",
|
||||
" <th>country_fr</th>\n",
|
||||
" <th>has_tags</th>\n",
|
||||
" <th>nb_campaigns</th>\n",
|
||||
" <th>nb_campaigns_opened</th>\n",
|
||||
" <th>time_to_open</th>\n",
|
||||
" <th>y_has_purchased</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>14_1</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>70.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1464.938449</td>\n",
|
||||
" <td>1464.938021</td>\n",
|
||||
" <td>0.000428</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>other</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>9.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0 days 00:36:13</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>14_2</td>\n",
|
||||
" <td>7.0</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>145.0</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1466.030116</td>\n",
|
||||
" <td>365.335000</td>\n",
|
||||
" <td>1100.695116</td>\n",
|
||||
" <td>7.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>9.0</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" <td>0 days 02:30:09.250000</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>14_3</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>70.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1476.907894</td>\n",
|
||||
" <td>1476.907662</td>\n",
|
||||
" <td>0.000231</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>female</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>6.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0 days 20:58:45</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>14_4</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>32.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1465.907894</td>\n",
|
||||
" <td>1465.907465</td>\n",
|
||||
" <td>0.000428</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>6.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>14_5</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>70.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1465.373866</td>\n",
|
||||
" <td>1465.373819</td>\n",
|
||||
" <td>0.000046</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>female</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>7.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>343121</th>\n",
|
||||
" <td>14_6884748</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>343122</th>\n",
|
||||
" <td>14_6884749</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>343123</th>\n",
|
||||
" <td>14_6884750</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>343124</th>\n",
|
||||
" <td>14_6884751</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>female</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>343125</th>\n",
|
||||
" <td>14_6884753</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>male</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaT</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>343126 rows × 41 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
|
||||
"0 14_1 2.0 2.0 70.0 1.0 \n",
|
||||
"1 14_2 7.0 3.0 145.0 2.0 \n",
|
||||
"2 14_3 2.0 2.0 70.0 1.0 \n",
|
||||
"3 14_4 2.0 2.0 32.0 1.0 \n",
|
||||
"4 14_5 2.0 2.0 70.0 1.0 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"343121 14_6884748 0.0 0.0 0.0 0.0 \n",
|
||||
"343122 14_6884749 0.0 0.0 0.0 0.0 \n",
|
||||
"343123 14_6884750 0.0 0.0 0.0 0.0 \n",
|
||||
"343124 14_6884751 0.0 0.0 0.0 0.0 \n",
|
||||
"343125 14_6884753 0.0 0.0 0.0 0.0 \n",
|
||||
"\n",
|
||||
" vente_internet_max purchase_date_min purchase_date_max \\\n",
|
||||
"0 1.0 1464.938449 1464.938021 \n",
|
||||
"1 1.0 1466.030116 365.335000 \n",
|
||||
"2 1.0 1476.907894 1476.907662 \n",
|
||||
"3 1.0 1465.907894 1465.907465 \n",
|
||||
"4 1.0 1465.373866 1465.373819 \n",
|
||||
"... ... ... ... \n",
|
||||
"343121 0.0 NaN NaN \n",
|
||||
"343122 0.0 NaN NaN \n",
|
||||
"343123 0.0 NaN NaN \n",
|
||||
"343124 0.0 NaN NaN \n",
|
||||
"343125 0.0 NaN NaN \n",
|
||||
"\n",
|
||||
" time_between_purchase nb_tickets_internet ... gender_label \\\n",
|
||||
"0 0.000428 2.0 ... other \n",
|
||||
"1 1100.695116 7.0 ... male \n",
|
||||
"2 0.000231 2.0 ... female \n",
|
||||
"3 0.000428 2.0 ... male \n",
|
||||
"4 0.000046 2.0 ... female \n",
|
||||
"... ... ... ... ... \n",
|
||||
"343121 NaN 0.0 ... male \n",
|
||||
"343122 NaN 0.0 ... male \n",
|
||||
"343123 NaN 0.0 ... male \n",
|
||||
"343124 NaN 0.0 ... female \n",
|
||||
"343125 NaN 0.0 ... male \n",
|
||||
"\n",
|
||||
" gender_female gender_male gender_other country_fr has_tags \\\n",
|
||||
"0 0 0 1 1.0 0 \n",
|
||||
"1 0 1 0 1.0 1 \n",
|
||||
"2 1 0 0 1.0 0 \n",
|
||||
"3 0 1 0 1.0 0 \n",
|
||||
"4 1 0 0 1.0 0 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"343121 0 1 0 1.0 0 \n",
|
||||
"343122 0 1 0 1.0 0 \n",
|
||||
"343123 0 1 0 1.0 0 \n",
|
||||
"343124 1 0 0 1.0 0 \n",
|
||||
"343125 0 1 0 1.0 0 \n",
|
||||
"\n",
|
||||
" nb_campaigns nb_campaigns_opened time_to_open \\\n",
|
||||
"0 9.0 1.0 0 days 00:36:13 \n",
|
||||
"1 9.0 4.0 0 days 02:30:09.250000 \n",
|
||||
"2 6.0 1.0 0 days 20:58:45 \n",
|
||||
"3 6.0 0.0 NaT \n",
|
||||
"4 7.0 0.0 NaT \n",
|
||||
"... ... ... ... \n",
|
||||
"343121 0.0 0.0 NaT \n",
|
||||
"343122 0.0 0.0 NaT \n",
|
||||
"343123 0.0 0.0 NaT \n",
|
||||
"343124 0.0 0.0 NaT \n",
|
||||
"343125 0.0 0.0 NaT \n",
|
||||
"\n",
|
||||
" y_has_purchased \n",
|
||||
"0 NaN \n",
|
||||
"1 NaN \n",
|
||||
"2 NaN \n",
|
||||
"3 NaN \n",
|
||||
"4 NaN \n",
|
||||
"... ... \n",
|
||||
"343121 NaN \n",
|
||||
"343122 NaN \n",
|
||||
"343123 NaN \n",
|
||||
"343124 NaN \n",
|
||||
"343125 NaN \n",
|
||||
"\n",
|
||||
"[343126 rows x 41 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dataset_test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "75f9a672-641f-49a2-a8d6-7673845506f5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
Loading…
Reference in New Issue
Block a user