generalization #16

Merged
arevelle-ensae merged 2 commits from generalization into main 2024-03-29 11:15:56 +01:00
2 changed files with 6 additions and 9 deletions
Showing only changes of commit d6e2b2c57a - Show all commits

View File

@ -28,7 +28,7 @@ import warnings
def load_train_test(type_of_activity, type_of_model):
BUCKET = f"projet-bdc2324-team1/Generalization_v2/{type_of_activity}"
BUCKET = f"projet-bdc2324-team1/1_Temp/1_0_Modelling_Datasets/{type_of_activity}"
File_path_train = BUCKET + "/Train_set.csv"
File_path_test = BUCKET + "/Test_set.csv"
@ -52,7 +52,7 @@ def save_file_s3(File_name, type_of_activity, type_of_model, model):
image_buffer = io.BytesIO()
plt.savefig(image_buffer, format='png')
image_buffer.seek(0)
FILE_PATH = f"projet-bdc2324-team1/{type_of_model}/{type_of_activity}/{model}/"
FILE_PATH = f"projet-bdc2324-team1/2_Output/2_1_Modeling_results/{type_of_model}/{type_of_activity}/{model}/"
FILE_PATH_OUT_S3 = FILE_PATH + File_name + type_of_activity + '_' + model + '.png'
with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:
s3_file.write(image_buffer.read())
@ -61,16 +61,16 @@ def save_file_s3(File_name, type_of_activity, type_of_model, model):
def save_result_set_s3(result_set, File_name, type_of_activity, type_of_model, model=None, model_path=False):
if model_path:
FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/{type_of_model}/{type_of_activity}/{model}/" + File_name + '.csv'
FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/2_Output/2_1_Modeling_results/{type_of_model}/{type_of_activity}/{model}/" + File_name + '.csv'
else:
FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/{type_of_model}/{type_of_activity}/" + File_name + '.csv'
FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/2_Output/2_1_Modeling_results/{type_of_model}/{type_of_activity}/" + File_name + '.csv'
with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:
result_set.to_csv(file_out, index = False)
def save_model_s3(File_name, type_of_activity, type_of_model, model, classifier):
model_bytes = pickle.dumps(classifier)
FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/{type_of_model}/{type_of_activity}/{model}/" + File_name + '.pkl'
FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/2_Output/2_1_Modeling_results/{type_of_model}/{type_of_activity}/{model}/" + File_name + '.pkl'
with fs.open(FILE_PATH_OUT_S3, 'wb') as f:
f.write(model_bytes)

View File

@ -50,7 +50,6 @@ def load_files(nb_compagnie):
df_campaigns_kpi["customer_id"]= directory_path + '_' + df_campaigns_kpi['customer_id'].astype('str')
df_customerplus_clean["customer_id"]= directory_path + '_' + df_customerplus_clean['customer_id'].astype('str')
df_products_purchased_reduced["customer_id"]= directory_path + '_' + df_products_purchased_reduced['customer_id'].astype('str')
<<<<<<< HEAD
# Remove companies' outliers
df_tickets_kpi = remove_outlier_total_amount(df_tickets_kpi)
@ -59,11 +58,9 @@ def load_files(nb_compagnie):
for dataset in [df_campaigns_brut, df_campaigns_kpi, df_customerplus_clean, df_target_information]:
dataset = dataset[dataset['customer_id'].isin(customer_id)]
=======
df_target_KPI["customer_id"]= directory_path + '_' + df_target_KPI['customer_id'].astype('str')
>>>>>>> main
# Concaténation
customer = pd.concat([customer, df_customerplus_clean], ignore_index=True)
campaigns_kpi = pd.concat([campaigns_kpi, df_campaigns_kpi], ignore_index=True)
@ -89,7 +86,7 @@ def save_file_s3(File_name, type_of_activity):
image_buffer = io.BytesIO()
plt.savefig(image_buffer, format='png')
image_buffer.seek(0)
FILE_PATH = f"projet-bdc2324-team1/stat_desc/{type_of_activity}/"
FILE_PATH = f"projet-bdc2324-team1/2_Output/2_0_Descriptive_Statistics/{type_of_activity}/"
FILE_PATH_OUT_S3 = FILE_PATH + File_name + type_of_activity + '.png'
with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:
s3_file.write(image_buffer.read())