generalization #7

Merged
arevelle-ensae merged 9 commits from generalization into main 2024-03-07 13:58:03 +01:00
3 changed files with 230 additions and 275 deletions
Showing only changes of commit c7ca8c560e - Show all commits

View File

@ -146,13 +146,22 @@ BUCKET_OUT = f'projet-bdc2324-team1/Generalization/{type_of_comp}'
# Create test dataset and train dataset for sport companies
<<<<<<< HEAD
#start_date, end_of_features, final_date = df_coverage_modelization(list_of_comp, coverage_features = 0.7)
=======
# start_date, end_of_features, final_date = df_coverage_modelization(list_of_comp, coverage_train = 0.7)
>>>>>>> main
start_date = "2021-05-01"
end_of_features = "2022-11-01"
final_date = "2023-11-01"
<<<<<<< HEAD
anonymous_customer = {'1' : 1_1, '2' : 2_12184, '3' : 3_1, '4' : 4_2, '101' : 101_1,
'5' : 5_191835, '6' : 6_591412, '7' : 7_49632, '8' : 8_1942, '9' : 9_19683}
=======
anonymous_customer = {'1' : 1, '2' : 12184, '3' : 1, '4' : 2, '101' : 1,
'5' : 191835, '6' : 591412, '7' : 49632, '8' : 1942, '9' : 19683}
>>>>>>> main
for company in list_of_comp:
dataset = dataset_construction(min_date = start_date, end_features_date = end_of_features,
@ -161,6 +170,7 @@ for company in list_of_comp:
# On retire le client anonyme
dataset = dataset[dataset['customer_id'] != anonymous_customer[company]]
<<<<<<< HEAD
#train test set
np.random.seed(42)
@ -170,6 +180,10 @@ for company in list_of_comp:
dataset = dataset.sample(frac=1).reset_index(drop=True)
dataset_train = dataset.iloc[:split_index]
dataset_test = dataset.iloc[split_index:]
=======
# On retire le client anonyme
dataset_test = dataset_test[dataset_test['customer_id'] != anonymous_customer[company]]
>>>>>>> main
# Exportation
FILE_KEY_OUT_S3 = "dataset_test" + company + ".csv"

View File

@ -90,6 +90,11 @@ def tickets_kpi_function(tickets_information = None):
# tickets_kpi = tickets_kpi.merge(avg_amount, how='left', on= 'event_type_id')
#Taux de ticket payé par internet selon les compagnies
#tickets_kpi["Taux_ticket_internet"] = tickets_kpi["nb_tickets_internet"]*100 / tickets_kpi["nb_tickets"]
#tickets_kpi['Taux_ticket_internet'] = tickets_kpi['Taux_ticket_internet'].fillna(0)
return tickets_kpi
def customerplus_kpi_function(customerplus_clean = None):

File diff suppressed because one or more lines are too long