save model to pickle

This commit is contained in:
Alexis REVELLE 2024-03-20 12:06:47 +00:00
parent fbfc03a572
commit 605876dfb1
2 changed files with 15 additions and 2 deletions

View File

@ -89,6 +89,7 @@ print("Naive Bayes : Done")
# Logistic Regression
model_result = pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result)
print("Logistic : Done")
"""
model_result = pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result)
print("Logistic CV : Done")
@ -97,6 +98,6 @@ model_result = pipeline_randomF_benchmark(X_train, y_train, X_test, y_test, mode
print("Random Forest : Done")
model_result = pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result)
print("Random Forest CV: Done")
"""
# Save result
save_result_set_s3(model_result , "resultat", type_of_activity)

View File

@ -56,13 +56,20 @@ def save_file_s3(File_name, type_of_activity, model):
def save_result_set_s3(result_set, File_name, type_of_activity, model=None, model_path=False):
if model_path:
FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}" + File_name + '.csv'
FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}/" + File_name + '.csv'
else:
FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/Output_model/{type_of_activity}/" + File_name + '.csv'
with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:
result_set.to_csv(file_out, index = False)
def save_model_s3(File_name, type_of_activity, model, classifier):
model_bytes = pickle.dumps(classifier)
FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}/" + File_name + '.pkl'
with fs.open(FILE_PATH_OUT_S3, 'wb') as f:
f.write(model_bytes)
def compute_recall(group):
return recall_score(group['y_has_purchased'], group['prediction'])
@ -208,6 +215,7 @@ def pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result):
draw_prob_distribution(y_pred_prob, model)
draw_prob_distribution_companies(y_pred_prob, model)
draw_calibration_curve(X_test, y_pred_prob, model)
save_model_s3('LogisticRegression_Benchmark', type_of_activity, model, pipeline)
return model_result
@ -244,6 +252,7 @@ def pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result):
draw_prob_distribution(y_pred_prob, model)
draw_prob_distribution_companies(y_pred_prob, model)
draw_calibration_curve(X_test, y_pred_prob, model)
save_model_s3('LogisticRegression_cv', type_of_activity, model, grid_search)
return model_result
@ -275,6 +284,7 @@ def pipeline_randomF_benchmark(X_train, y_train, X_test, y_test, model_result):
draw_prob_distribution(y_pred_prob, model)
draw_prob_distribution_companies(y_pred_prob, model)
draw_calibration_curve(X_test, y_pred_prob, model)
save_model_s3('randomF_Benchmark', type_of_activity, model, pipeline)
return model_result
@ -315,6 +325,7 @@ def pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result):
draw_prob_distribution(y_pred_prob, model)
draw_prob_distribution_companies(y_pred_prob, model)
draw_calibration_curve(X_test, y_pred_prob, model)
save_model_s3('randomF_cv', type_of_activity, model, gridsearch)
return model_result
@ -343,4 +354,5 @@ def pipeline_naiveBayes_benchmark(X_train, y_train, X_test, y_test, model_result
draw_roc_curve(X_test, y_pred_prob, model)
draw_prob_distribution(y_pred_prob, model)
draw_calibration_curve(X_test, y_pred_prob, model)
save_model_s3('Naive_Bayes_Benchmark', type_of_activity, model, pipeline)
return model_result