diff --git a/utils_ml.py b/utils_ml.py index 3222188..4dbdbc9 100644 --- a/utils_ml.py +++ b/utils_ml.py @@ -28,7 +28,7 @@ import warnings def load_train_test(type_of_activity, type_of_model): - BUCKET = f"projet-bdc2324-team1/Generalization_v2/{type_of_activity}" + BUCKET = f"projet-bdc2324-team1/1_Temp/1_0_Modelling_Datasets/{type_of_activity}" File_path_train = BUCKET + "/Train_set.csv" File_path_test = BUCKET + "/Test_set.csv" @@ -52,7 +52,7 @@ def save_file_s3(File_name, type_of_activity, type_of_model, model): image_buffer = io.BytesIO() plt.savefig(image_buffer, format='png') image_buffer.seek(0) - FILE_PATH = f"projet-bdc2324-team1/{type_of_model}/{type_of_activity}/{model}/" + FILE_PATH = f"projet-bdc2324-team1/2_Output/2_1_Modeling_results/{type_of_model}/{type_of_activity}/{model}/" FILE_PATH_OUT_S3 = FILE_PATH + File_name + type_of_activity + '_' + model + '.png' with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file: s3_file.write(image_buffer.read()) @@ -61,16 +61,16 @@ def save_file_s3(File_name, type_of_activity, type_of_model, model): def save_result_set_s3(result_set, File_name, type_of_activity, type_of_model, model=None, model_path=False): if model_path: - FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/{type_of_model}/{type_of_activity}/{model}/" + File_name + '.csv' + FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/2_Output/2_1_Modeling_results/{type_of_model}/{type_of_activity}/{model}/" + File_name + '.csv' else: - FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/{type_of_model}/{type_of_activity}/" + File_name + '.csv' + FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/2_Output/2_1_Modeling_results/{type_of_model}/{type_of_activity}/" + File_name + '.csv' with fs.open(FILE_PATH_OUT_S3, 'w') as file_out: result_set.to_csv(file_out, index = False) def save_model_s3(File_name, type_of_activity, type_of_model, model, classifier): model_bytes = pickle.dumps(classifier) - FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/{type_of_model}/{type_of_activity}/{model}/" + File_name + '.pkl' + FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/2_Output/2_1_Modeling_results/{type_of_model}/{type_of_activity}/{model}/" + File_name + '.pkl' with fs.open(FILE_PATH_OUT_S3, 'wb') as f: f.write(model_bytes) diff --git a/utils_stat_desc.py b/utils_stat_desc.py index 681e623..471fe19 100644 --- a/utils_stat_desc.py +++ b/utils_stat_desc.py @@ -50,7 +50,6 @@ def load_files(nb_compagnie): df_campaigns_kpi["customer_id"]= directory_path + '_' + df_campaigns_kpi['customer_id'].astype('str') df_customerplus_clean["customer_id"]= directory_path + '_' + df_customerplus_clean['customer_id'].astype('str') df_products_purchased_reduced["customer_id"]= directory_path + '_' + df_products_purchased_reduced['customer_id'].astype('str') -<<<<<<< HEAD # Remove companies' outliers df_tickets_kpi = remove_outlier_total_amount(df_tickets_kpi) @@ -59,11 +58,9 @@ def load_files(nb_compagnie): for dataset in [df_campaigns_brut, df_campaigns_kpi, df_customerplus_clean, df_target_information]: dataset = dataset[dataset['customer_id'].isin(customer_id)] -======= df_target_KPI["customer_id"]= directory_path + '_' + df_target_KPI['customer_id'].astype('str') ->>>>>>> main # Concaténation customer = pd.concat([customer, df_customerplus_clean], ignore_index=True) campaigns_kpi = pd.concat([campaigns_kpi, df_campaigns_kpi], ignore_index=True) @@ -89,7 +86,7 @@ def save_file_s3(File_name, type_of_activity): image_buffer = io.BytesIO() plt.savefig(image_buffer, format='png') image_buffer.seek(0) - FILE_PATH = f"projet-bdc2324-team1/stat_desc/{type_of_activity}/" + FILE_PATH = f"projet-bdc2324-team1/2_Output/2_0_Descriptive_Statistics/{type_of_activity}/" FILE_PATH_OUT_S3 = FILE_PATH + File_name + type_of_activity + '.png' with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file: s3_file.write(image_buffer.read())