save model to pickle

2024-03-20 12:06:47 +00:00 · 2024-03-20 12:06:47 +00:00 · 605876dfb1
commit 605876dfb1
parent fbfc03a572
2 changed files with 15 additions and 2 deletions
--- a/0_5_Machine_Learning.py
+++ b/0_5_Machine_Learning.py
@ -89,6 +89,7 @@ print("Naive Bayes : Done")
 # Logistic Regression
 model_result = pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result)
 print("Logistic : Done")
+"""
 model_result = pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result)
 print("Logistic CV : Done")

@ -97,6 +98,6 @@ model_result = pipeline_randomF_benchmark(X_train, y_train, X_test, y_test, mode
 print("Random Forest : Done")
 model_result = pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result)
 print("Random Forest CV: Done")
-
+"""
 # Save result
 save_result_set_s3(model_result , "resultat", type_of_activity)
--- a/utils_ml.py
+++ b/utils_ml.py
@ -56,13 +56,20 @@ def save_file_s3(File_name, type_of_activity, model):

 def save_result_set_s3(result_set, File_name, type_of_activity, model=None, model_path=False):
    if model_path:
-        FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}" + File_name + '.csv'
+        FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}/" + File_name + '.csv'
    else:
        FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/Output_model/{type_of_activity}/" + File_name + '.csv'
    with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:
        result_set.to_csv(file_out, index = False)


+def save_model_s3(File_name, type_of_activity, model, classifier):
+    model_bytes = pickle.dumps(classifier)
+    FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}/" + File_name + '.pkl'
+    with fs.open(FILE_PATH_OUT_S3, 'wb') as f:
+        f.write(model_bytes)
+    
+
 def compute_recall(group):
    return recall_score(group['y_has_purchased'], group['prediction'])
    
@ -208,6 +215,7 @@ def pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result):
    draw_prob_distribution(y_pred_prob, model)
    draw_prob_distribution_companies(y_pred_prob, model)
    draw_calibration_curve(X_test, y_pred_prob, model)
+    save_model_s3('LogisticRegression_Benchmark', type_of_activity, model, pipeline)
    return model_result


@ -244,6 +252,7 @@ def pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result):
    draw_prob_distribution(y_pred_prob, model)
    draw_prob_distribution_companies(y_pred_prob, model)
    draw_calibration_curve(X_test, y_pred_prob, model)
+    save_model_s3('LogisticRegression_cv', type_of_activity, model, grid_search)
    return model_result


@ -275,6 +284,7 @@ def pipeline_randomF_benchmark(X_train, y_train, X_test, y_test, model_result):
    draw_prob_distribution(y_pred_prob, model)
    draw_prob_distribution_companies(y_pred_prob, model)
    draw_calibration_curve(X_test, y_pred_prob, model)
+    save_model_s3('randomF_Benchmark', type_of_activity, model, pipeline)
    return model_result


@ -315,6 +325,7 @@ def pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result):
    draw_prob_distribution(y_pred_prob, model)
    draw_prob_distribution_companies(y_pred_prob, model)
    draw_calibration_curve(X_test, y_pred_prob, model)
+    save_model_s3('randomF_cv', type_of_activity, model, gridsearch)
    return model_result


@ -343,4 +354,5 @@ def pipeline_naiveBayes_benchmark(X_train, y_train, X_test, y_test, model_result
    draw_roc_curve(X_test, y_pred_prob, model)
    draw_prob_distribution(y_pred_prob, model)
    draw_calibration_curve(X_test, y_pred_prob, model)
+    save_model_s3('Naive_Bayes_Benchmark', type_of_activity, model, pipeline)
    return model_result