save model to pickle
This commit is contained in:
parent
fbfc03a572
commit
605876dfb1
|
@ -89,6 +89,7 @@ print("Naive Bayes : Done")
|
|||
# Logistic Regression
|
||||
model_result = pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result)
|
||||
print("Logistic : Done")
|
||||
"""
|
||||
model_result = pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result)
|
||||
print("Logistic CV : Done")
|
||||
|
||||
|
@ -97,6 +98,6 @@ model_result = pipeline_randomF_benchmark(X_train, y_train, X_test, y_test, mode
|
|||
print("Random Forest : Done")
|
||||
model_result = pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result)
|
||||
print("Random Forest CV: Done")
|
||||
|
||||
"""
|
||||
# Save result
|
||||
save_result_set_s3(model_result , "resultat", type_of_activity)
|
14
utils_ml.py
14
utils_ml.py
|
@ -56,13 +56,20 @@ def save_file_s3(File_name, type_of_activity, model):
|
|||
|
||||
def save_result_set_s3(result_set, File_name, type_of_activity, model=None, model_path=False):
|
||||
if model_path:
|
||||
FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}" + File_name + '.csv'
|
||||
FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}/" + File_name + '.csv'
|
||||
else:
|
||||
FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/Output_model/{type_of_activity}/" + File_name + '.csv'
|
||||
with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:
|
||||
result_set.to_csv(file_out, index = False)
|
||||
|
||||
|
||||
def save_model_s3(File_name, type_of_activity, model, classifier):
|
||||
model_bytes = pickle.dumps(classifier)
|
||||
FILE_PATH_OUT_S3 = f"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}/" + File_name + '.pkl'
|
||||
with fs.open(FILE_PATH_OUT_S3, 'wb') as f:
|
||||
f.write(model_bytes)
|
||||
|
||||
|
||||
def compute_recall(group):
|
||||
return recall_score(group['y_has_purchased'], group['prediction'])
|
||||
|
||||
|
@ -208,6 +215,7 @@ def pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result):
|
|||
draw_prob_distribution(y_pred_prob, model)
|
||||
draw_prob_distribution_companies(y_pred_prob, model)
|
||||
draw_calibration_curve(X_test, y_pred_prob, model)
|
||||
save_model_s3('LogisticRegression_Benchmark', type_of_activity, model, pipeline)
|
||||
return model_result
|
||||
|
||||
|
||||
|
@ -244,6 +252,7 @@ def pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result):
|
|||
draw_prob_distribution(y_pred_prob, model)
|
||||
draw_prob_distribution_companies(y_pred_prob, model)
|
||||
draw_calibration_curve(X_test, y_pred_prob, model)
|
||||
save_model_s3('LogisticRegression_cv', type_of_activity, model, grid_search)
|
||||
return model_result
|
||||
|
||||
|
||||
|
@ -275,6 +284,7 @@ def pipeline_randomF_benchmark(X_train, y_train, X_test, y_test, model_result):
|
|||
draw_prob_distribution(y_pred_prob, model)
|
||||
draw_prob_distribution_companies(y_pred_prob, model)
|
||||
draw_calibration_curve(X_test, y_pred_prob, model)
|
||||
save_model_s3('randomF_Benchmark', type_of_activity, model, pipeline)
|
||||
return model_result
|
||||
|
||||
|
||||
|
@ -315,6 +325,7 @@ def pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result):
|
|||
draw_prob_distribution(y_pred_prob, model)
|
||||
draw_prob_distribution_companies(y_pred_prob, model)
|
||||
draw_calibration_curve(X_test, y_pred_prob, model)
|
||||
save_model_s3('randomF_cv', type_of_activity, model, gridsearch)
|
||||
return model_result
|
||||
|
||||
|
||||
|
@ -343,4 +354,5 @@ def pipeline_naiveBayes_benchmark(X_train, y_train, X_test, y_test, model_result
|
|||
draw_roc_curve(X_test, y_pred_prob, model)
|
||||
draw_prob_distribution(y_pred_prob, model)
|
||||
draw_calibration_curve(X_test, y_pred_prob, model)
|
||||
save_model_s3('Naive_Bayes_Benchmark', type_of_activity, model, pipeline)
|
||||
return model_result
|
Loading…
Reference in New Issue
Block a user