generalization #7
|
@ -42,7 +42,7 @@ def compute_time_intersection(datecover):
|
|||
return sorted(formated_dates)
|
||||
|
||||
|
||||
def df_coverage_modelization(sport, coverage_train = 0.7):
|
||||
def df_coverage_modelization(sport, coverage_features = 0.7):
|
||||
"""
|
||||
This function returns start_date, end_of_features and final dates
|
||||
that help to construct train and test datasets
|
||||
|
@ -81,7 +81,7 @@ def dataset_construction(min_date, end_features_date, max_date, directory_path):
|
|||
df_campaigns_information['opened_at'][df_campaigns_information['opened_at'] >= end_features_date] = np.datetime64('NaT')
|
||||
|
||||
#Filtre de la base df_products_purchased_reduced
|
||||
df_products_purchased_reduced = df_products_purchased_reduced[(df_products_purchased_reduced['purchase_date'] <= end_features_date) & (df_products_purchased_reduced['purchase_date'] >= min_date)]
|
||||
df_products_purchased_features = df_products_purchased_reduced[(df_products_purchased_reduced['purchase_date'] <= end_features_date) & (df_products_purchased_reduced['purchase_date'] >= min_date)]
|
||||
|
||||
print("Data filtering : SUCCESS")
|
||||
|
||||
|
@ -91,7 +91,7 @@ def dataset_construction(min_date, end_features_date, max_date, directory_path):
|
|||
df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information)
|
||||
|
||||
# KPI sur le comportement d'achat
|
||||
df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)
|
||||
df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_features)
|
||||
|
||||
# KPI sur les données socio-démographiques
|
||||
df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)
|
||||
|
@ -146,7 +146,7 @@ BUCKET_OUT = f'projet-bdc2324-team1/Generalization/{type_of_comp}'
|
|||
|
||||
# Create test dataset and train dataset for sport companies
|
||||
|
||||
# start_date, end_of_features, final_date = df_coverage_modelization(list_of_comp, coverage_train = 0.7)
|
||||
#start_date, end_of_features, final_date = df_coverage_modelization(list_of_comp, coverage_features = 0.7)
|
||||
start_date = "2021-05-01"
|
||||
end_of_features = "2022-11-01"
|
||||
final_date = "2023-11-01"
|
||||
|
|
Loading…
Reference in New Issue
Block a user