generalization #7
|
@ -42,7 +42,7 @@ def compute_time_intersection(datecover):
|
||||||
return sorted(formated_dates)
|
return sorted(formated_dates)
|
||||||
|
|
||||||
|
|
||||||
def df_coverage_modelization(sport, coverage_train = 0.7):
|
def df_coverage_modelization(sport, coverage_features = 0.7):
|
||||||
"""
|
"""
|
||||||
This function returns start_date, end_of_features and final dates
|
This function returns start_date, end_of_features and final dates
|
||||||
that help to construct train and test datasets
|
that help to construct train and test datasets
|
||||||
|
@ -81,7 +81,7 @@ def dataset_construction(min_date, end_features_date, max_date, directory_path):
|
||||||
df_campaigns_information['opened_at'][df_campaigns_information['opened_at'] >= end_features_date] = np.datetime64('NaT')
|
df_campaigns_information['opened_at'][df_campaigns_information['opened_at'] >= end_features_date] = np.datetime64('NaT')
|
||||||
|
|
||||||
#Filtre de la base df_products_purchased_reduced
|
#Filtre de la base df_products_purchased_reduced
|
||||||
df_products_purchased_reduced = df_products_purchased_reduced[(df_products_purchased_reduced['purchase_date'] <= end_features_date) & (df_products_purchased_reduced['purchase_date'] >= min_date)]
|
df_products_purchased_features = df_products_purchased_reduced[(df_products_purchased_reduced['purchase_date'] <= end_features_date) & (df_products_purchased_reduced['purchase_date'] >= min_date)]
|
||||||
|
|
||||||
print("Data filtering : SUCCESS")
|
print("Data filtering : SUCCESS")
|
||||||
|
|
||||||
|
@ -91,7 +91,7 @@ def dataset_construction(min_date, end_features_date, max_date, directory_path):
|
||||||
df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information)
|
df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_information)
|
||||||
|
|
||||||
# KPI sur le comportement d'achat
|
# KPI sur le comportement d'achat
|
||||||
df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)
|
df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_features)
|
||||||
|
|
||||||
# KPI sur les données socio-démographiques
|
# KPI sur les données socio-démographiques
|
||||||
df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)
|
df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)
|
||||||
|
@ -146,7 +146,7 @@ BUCKET_OUT = f'projet-bdc2324-team1/Generalization/{type_of_comp}'
|
||||||
|
|
||||||
# Create test dataset and train dataset for sport companies
|
# Create test dataset and train dataset for sport companies
|
||||||
|
|
||||||
# start_date, end_of_features, final_date = df_coverage_modelization(list_of_comp, coverage_train = 0.7)
|
#start_date, end_of_features, final_date = df_coverage_modelization(list_of_comp, coverage_features = 0.7)
|
||||||
start_date = "2021-05-01"
|
start_date = "2021-05-01"
|
||||||
end_of_features = "2022-11-01"
|
end_of_features = "2022-11-01"
|
||||||
final_date = "2023-11-01"
|
final_date = "2023-11-01"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user