add steps

2024-03-18 19:38:01 +00:00 · 2024-03-18 19:38:01 +00:00 · 9e5e364aa3
commit 9e5e364aa3
parent 969cb8ec43
2 changed files with 10 additions and 5 deletions
--- a/0_5_Machine_Learning.py
+++ b/0_5_Machine_Learning.py
@ -84,14 +84,19 @@ model_result = pd.DataFrame(columns= ["Model", "Accuracy", "Recall", "F1_score",
 # Naive Bayes
 model_result = pipeline_naiveBayes_benchmark(X_train, y_train, X_test, y_test, model_result)
 print("Naive Bayes : Done")
 # Logistic Regression
 model_result = pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result)
 print("Logistic : Done")
 model_result = pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result)
 print("Logistic CV : Done")
 # Random Forest
 model_result = pipeline_randomF_benchmark(X_train, y_train, X_test, y_test, model_result)
 print("Random Forest : Done")
 model_result = pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result)
 print("Random Forest CV: Done")
 # Save result
 save_result_set_s3(model_result , "resultat", type_of_activity)
--- a/utils_ml.py
+++ b/utils_ml.py
@ -167,7 +167,7 @@ def draw_prob_distribution_companies(y_pred_prob, model):
    test = dataset_test.copy()
    test['probability to buy'] = y_pred_prob
    test['company'] = test['customer_id'].str.split('_', expand=True)[0]
-    sns.histplot(data=dataset_test, x='probability to buy', hue='company', element='step',
+    sns.histplot(data=test, x='probability to buy', hue='company', element='step',
             stat='count', common_norm=False, bins=10, palette='Set1', alpha=1)
    plt.xlim(0, 1)
    plt.ylim(0, None)
@ -200,7 +200,7 @@ def pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result):
                       "AUC" : [auc(fpr, tpr)]}
                       )
    model_result = pd.concat([model_result, result])
-    compute_recall_companies(dataset_test, y_pred, model)
+    #compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
    draw_confusion_matrix(y_test, y_pred, model)
    draw_roc_curve(X_test, y_pred_prob, model)
@ -236,7 +236,7 @@ def pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result):
                       "AUC" : [auc(fpr, tpr)]}
                       )
    model_result = pd.concat([model_result, result])
-    compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
+    #compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
    draw_confusion_matrix(y_test, y_pred, model)
    draw_roc_curve(X_test, y_pred_prob, model)
@ -267,7 +267,7 @@ def pipeline_randomF_benchmark(X_train, y_train, X_test, y_test, model_result):
                       "AUC" : [auc(fpr, tpr)]}
                       )
    model_result = pd.concat([model_result, result])
-    compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
+    #compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
    draw_confusion_matrix(y_test, y_pred, model)
    draw_roc_curve(X_test, y_pred_prob, model)
@ -308,7 +308,7 @@ def pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result):
                       "AUC" : [auc(fpr, tpr)]}
                       )
    model_result = pd.concat([model_result, result])
-    compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
+    #compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
    draw_confusion_matrix(y_test, y_pred, model)
    draw_roc_curve(X_test, y_pred_prob, model)