generalization #11

Merged
arevelle-ensae merged 10 commits from generalization into main 2024-03-28 09:40:04 +01:00
2 changed files with 10 additions and 8 deletions
Showing only changes of commit 9763dfe7f9 - Show all commits

View File

@ -89,7 +89,7 @@ print("Naive Bayes : Done")
# Logistic Regression
model_result = pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result)
print("Logistic : Done")
"""
model_result = pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result)
print("Logistic CV : Done")
@ -98,6 +98,6 @@ model_result = pipeline_randomF_benchmark(X_train, y_train, X_test, y_test, mode
print("Random Forest : Done")
model_result = pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result)
print("Random Forest CV: Done")
"""
# Save result
save_result_set_s3(model_result , "resultat", type_of_activity)

View File

@ -78,7 +78,7 @@ def compute_recall_companies(dataset_test, y_pred, type_of_activity, model):
test = dataset_test.copy()
test['prediction'] = y_pred
test['company'] = dataset_test['customer_id'].str.split('_', expand=True)[0]
recall_scores_by_company = dataset_test.groupby('company').apply(compute_recall).reset_index(name='recall_score')
recall_scores_by_company = test.groupby('company').apply(compute_recall).reset_index(name='recall_score')
save_result_set_s3(recall_scores_by_company, 'recall_scores_by_company', type_of_activity, model=model, model_path=True)
@ -207,7 +207,7 @@ def pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result):
"AUC" : [auc(fpr, tpr)]}
)
model_result = pd.concat([model_result, result])
#compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
draw_confusion_matrix(y_test, y_pred, model)
draw_roc_curve(X_test, y_pred_prob, model)
@ -244,7 +244,7 @@ def pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result):
"AUC" : [auc(fpr, tpr)]}
)
model_result = pd.concat([model_result, result])
#compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
draw_confusion_matrix(y_test, y_pred, model)
draw_roc_curve(X_test, y_pred_prob, model)
@ -276,7 +276,7 @@ def pipeline_randomF_benchmark(X_train, y_train, X_test, y_test, model_result):
"AUC" : [auc(fpr, tpr)]}
)
model_result = pd.concat([model_result, result])
#compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
draw_confusion_matrix(y_test, y_pred, model)
draw_roc_curve(X_test, y_pred_prob, model)
@ -317,7 +317,7 @@ def pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result):
"AUC" : [auc(fpr, tpr)]}
)
model_result = pd.concat([model_result, result])
#compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
draw_confusion_matrix(y_test, y_pred, model)
draw_roc_curve(X_test, y_pred_prob, model)
@ -325,7 +325,7 @@ def pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result):
draw_prob_distribution(y_pred_prob, model)
draw_prob_distribution_companies(y_pred_prob, model)
draw_calibration_curve(X_test, y_pred_prob, model)
save_model_s3('randomF_cv', type_of_activity, model, gridsearch)
save_model_s3('randomF_cv', type_of_activity, model, grid_search)
return model_result
@ -350,6 +350,8 @@ def pipeline_naiveBayes_benchmark(X_train, y_train, X_test, y_test, model_result
"AUC" : [auc(fpr, tpr)]}
)
model_result = pd.concat([model_result, result])
compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
draw_confusion_matrix(y_test, y_pred, model)
draw_roc_curve(X_test, y_pred_prob, model)
draw_prob_distribution(y_pred_prob, model)