add steps

This commit is contained in:
Alexis REVELLE 2024-03-18 19:38:01 +00:00
parent 969cb8ec43
commit 9e5e364aa3
2 changed files with 10 additions and 5 deletions

View File

@ -84,14 +84,19 @@ model_result = pd.DataFrame(columns= ["Model", "Accuracy", "Recall", "F1_score",
# Naive Bayes # Naive Bayes
model_result = pipeline_naiveBayes_benchmark(X_train, y_train, X_test, y_test, model_result) model_result = pipeline_naiveBayes_benchmark(X_train, y_train, X_test, y_test, model_result)
print("Naive Bayes : Done")
# Logistic Regression # Logistic Regression
model_result = pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result) model_result = pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result)
print("Logistic : Done")
model_result = pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result) model_result = pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result)
print("Logistic CV : Done")
# Random Forest # Random Forest
model_result = pipeline_randomF_benchmark(X_train, y_train, X_test, y_test, model_result) model_result = pipeline_randomF_benchmark(X_train, y_train, X_test, y_test, model_result)
print("Random Forest : Done")
model_result = pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result) model_result = pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result)
print("Random Forest CV: Done")
# Save result # Save result
save_result_set_s3(model_result , "resultat", type_of_activity) save_result_set_s3(model_result , "resultat", type_of_activity)

View File

@ -167,7 +167,7 @@ def draw_prob_distribution_companies(y_pred_prob, model):
test = dataset_test.copy() test = dataset_test.copy()
test['probability to buy'] = y_pred_prob test['probability to buy'] = y_pred_prob
test['company'] = test['customer_id'].str.split('_', expand=True)[0] test['company'] = test['customer_id'].str.split('_', expand=True)[0]
sns.histplot(data=dataset_test, x='probability to buy', hue='company', element='step', sns.histplot(data=test, x='probability to buy', hue='company', element='step',
stat='count', common_norm=False, bins=10, palette='Set1', alpha=1) stat='count', common_norm=False, bins=10, palette='Set1', alpha=1)
plt.xlim(0, 1) plt.xlim(0, 1)
plt.ylim(0, None) plt.ylim(0, None)
@ -200,7 +200,7 @@ def pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result):
"AUC" : [auc(fpr, tpr)]} "AUC" : [auc(fpr, tpr)]}
) )
model_result = pd.concat([model_result, result]) model_result = pd.concat([model_result, result])
compute_recall_companies(dataset_test, y_pred, model) #compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
draw_confusion_matrix(y_test, y_pred, model) draw_confusion_matrix(y_test, y_pred, model)
draw_roc_curve(X_test, y_pred_prob, model) draw_roc_curve(X_test, y_pred_prob, model)
@ -236,7 +236,7 @@ def pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result):
"AUC" : [auc(fpr, tpr)]} "AUC" : [auc(fpr, tpr)]}
) )
model_result = pd.concat([model_result, result]) model_result = pd.concat([model_result, result])
compute_recall_companies(dataset_test, y_pred, type_of_activity, model) #compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
draw_confusion_matrix(y_test, y_pred, model) draw_confusion_matrix(y_test, y_pred, model)
draw_roc_curve(X_test, y_pred_prob, model) draw_roc_curve(X_test, y_pred_prob, model)
@ -267,7 +267,7 @@ def pipeline_randomF_benchmark(X_train, y_train, X_test, y_test, model_result):
"AUC" : [auc(fpr, tpr)]} "AUC" : [auc(fpr, tpr)]}
) )
model_result = pd.concat([model_result, result]) model_result = pd.concat([model_result, result])
compute_recall_companies(dataset_test, y_pred, type_of_activity, model) #compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
draw_confusion_matrix(y_test, y_pred, model) draw_confusion_matrix(y_test, y_pred, model)
draw_roc_curve(X_test, y_pred_prob, model) draw_roc_curve(X_test, y_pred_prob, model)
@ -308,7 +308,7 @@ def pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result):
"AUC" : [auc(fpr, tpr)]} "AUC" : [auc(fpr, tpr)]}
) )
model_result = pd.concat([model_result, result]) model_result = pd.concat([model_result, result])
compute_recall_companies(dataset_test, y_pred, type_of_activity, model) #compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
draw_confusion_matrix(y_test, y_pred, model) draw_confusion_matrix(y_test, y_pred, model)
draw_roc_curve(X_test, y_pred_prob, model) draw_roc_curve(X_test, y_pred_prob, model)