diff --git a/0_2_Dataset_construction.py b/0_2_Dataset_construction.py index 0ceb67a..9d246cd 100644 --- a/0_2_Dataset_construction.py +++ b/0_2_Dataset_construction.py @@ -115,7 +115,10 @@ def dataset_construction(min_date, end_features_date, max_date, directory_path): max_interval = (end_features_date - min_date) / np.timedelta64(1, 'D') + 1 df_customer_product[['purchase_date_max', 'purchase_date_min']] = df_customer_product[['purchase_date_max', 'purchase_date_min']].fillna(max_interval) df_customer_product[['time_between_purchase']] = df_customer_product[['time_between_purchase']].fillna(-1) - + + # Customers who have neither received an e-mail nor made a purchase during the feature estimation period are removed + df_customer_product = df_customer_product[(df_customer_product['nb_purchases'] > 0) | (df_customer_product['nb_campaigns'] > 0)] + print("Explanatory variable construction : SUCCESS") # 2. Construction of the explained variable