diff --git a/0_2_Dataset_construction.py b/0_2_Dataset_construction.py index 1c410f5..ae96532 100644 --- a/0_2_Dataset_construction.py +++ b/0_2_Dataset_construction.py @@ -42,7 +42,7 @@ def compute_time_intersection(datecover): return sorted(formated_dates) -def df_coverage_modelization(sport, coverage_train = 0.7): +def df_coverage_modelization(sport, coverage_features = 0.7): """ This function returns start_date, end_of_features and final dates that help to construct train and test datasets @@ -81,7 +81,7 @@ def dataset_construction(min_date, end_features_date, max_date, directory_path): df_campaigns_information['opened_at'][df_campaigns_information['opened_at'] >= end_features_date] = np.datetime64('NaT') #Filtre de la base df_products_purchased_reduced - df_products_purchased_reduced = df_products_purchased_reduced[(df_products_purchased_reduced['purchase_date'] <= end_features_date) & (df_products_purchased_reduced['purchase_date'] >= min_date)] + df_products_purchased_features = df_products_purchased_reduced[(df_products_purchased_reduced['purchase_date'] <= end_features_date) & (df_products_purchased_reduced['purchase_date'] >= min_date)] print("Data filtering : SUCCESS") @@ -91,7 +91,7 @@ def dataset_construction(min_date, end_features_date, max_date, directory_path): df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information) # KPI sur le comportement d'achat - df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced) + df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_features) # KPI sur les données socio-démographiques df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0) @@ -146,7 +146,7 @@ BUCKET_OUT = f'projet-bdc2324-team1/Generalization/{type_of_comp}' # Create test dataset and train dataset for sport companies -# start_date, end_of_features, final_date = df_coverage_modelization(list_of_comp, coverage_train = 0.7) +#start_date, end_of_features, final_date = df_coverage_modelization(list_of_comp, coverage_features = 0.7) start_date = "2021-05-01" end_of_features = "2022-11-01" final_date = "2023-11-01"