add steps
This commit is contained in:
parent
969cb8ec43
commit
9e5e364aa3
|
@ -84,14 +84,19 @@ model_result = pd.DataFrame(columns= ["Model", "Accuracy", "Recall", "F1_score",
|
||||||
|
|
||||||
# Naive Bayes
|
# Naive Bayes
|
||||||
model_result = pipeline_naiveBayes_benchmark(X_train, y_train, X_test, y_test, model_result)
|
model_result = pipeline_naiveBayes_benchmark(X_train, y_train, X_test, y_test, model_result)
|
||||||
|
print("Naive Bayes : Done")
|
||||||
|
|
||||||
# Logistic Regression
|
# Logistic Regression
|
||||||
model_result = pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result)
|
model_result = pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result)
|
||||||
|
print("Logistic : Done")
|
||||||
model_result = pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result)
|
model_result = pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result)
|
||||||
|
print("Logistic CV : Done")
|
||||||
|
|
||||||
# Random Forest
|
# Random Forest
|
||||||
model_result = pipeline_randomF_benchmark(X_train, y_train, X_test, y_test, model_result)
|
model_result = pipeline_randomF_benchmark(X_train, y_train, X_test, y_test, model_result)
|
||||||
|
print("Random Forest : Done")
|
||||||
model_result = pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result)
|
model_result = pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result)
|
||||||
|
print("Random Forest CV: Done")
|
||||||
|
|
||||||
# Save result
|
# Save result
|
||||||
save_result_set_s3(model_result , "resultat", type_of_activity)
|
save_result_set_s3(model_result , "resultat", type_of_activity)
|
10
utils_ml.py
10
utils_ml.py
|
@ -167,7 +167,7 @@ def draw_prob_distribution_companies(y_pred_prob, model):
|
||||||
test = dataset_test.copy()
|
test = dataset_test.copy()
|
||||||
test['probability to buy'] = y_pred_prob
|
test['probability to buy'] = y_pred_prob
|
||||||
test['company'] = test['customer_id'].str.split('_', expand=True)[0]
|
test['company'] = test['customer_id'].str.split('_', expand=True)[0]
|
||||||
sns.histplot(data=dataset_test, x='probability to buy', hue='company', element='step',
|
sns.histplot(data=test, x='probability to buy', hue='company', element='step',
|
||||||
stat='count', common_norm=False, bins=10, palette='Set1', alpha=1)
|
stat='count', common_norm=False, bins=10, palette='Set1', alpha=1)
|
||||||
plt.xlim(0, 1)
|
plt.xlim(0, 1)
|
||||||
plt.ylim(0, None)
|
plt.ylim(0, None)
|
||||||
|
@ -200,7 +200,7 @@ def pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result):
|
||||||
"AUC" : [auc(fpr, tpr)]}
|
"AUC" : [auc(fpr, tpr)]}
|
||||||
)
|
)
|
||||||
model_result = pd.concat([model_result, result])
|
model_result = pd.concat([model_result, result])
|
||||||
compute_recall_companies(dataset_test, y_pred, model)
|
#compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
||||||
|
|
||||||
draw_confusion_matrix(y_test, y_pred, model)
|
draw_confusion_matrix(y_test, y_pred, model)
|
||||||
draw_roc_curve(X_test, y_pred_prob, model)
|
draw_roc_curve(X_test, y_pred_prob, model)
|
||||||
|
@ -236,7 +236,7 @@ def pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result):
|
||||||
"AUC" : [auc(fpr, tpr)]}
|
"AUC" : [auc(fpr, tpr)]}
|
||||||
)
|
)
|
||||||
model_result = pd.concat([model_result, result])
|
model_result = pd.concat([model_result, result])
|
||||||
compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
#compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
||||||
|
|
||||||
draw_confusion_matrix(y_test, y_pred, model)
|
draw_confusion_matrix(y_test, y_pred, model)
|
||||||
draw_roc_curve(X_test, y_pred_prob, model)
|
draw_roc_curve(X_test, y_pred_prob, model)
|
||||||
|
@ -267,7 +267,7 @@ def pipeline_randomF_benchmark(X_train, y_train, X_test, y_test, model_result):
|
||||||
"AUC" : [auc(fpr, tpr)]}
|
"AUC" : [auc(fpr, tpr)]}
|
||||||
)
|
)
|
||||||
model_result = pd.concat([model_result, result])
|
model_result = pd.concat([model_result, result])
|
||||||
compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
#compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
||||||
|
|
||||||
draw_confusion_matrix(y_test, y_pred, model)
|
draw_confusion_matrix(y_test, y_pred, model)
|
||||||
draw_roc_curve(X_test, y_pred_prob, model)
|
draw_roc_curve(X_test, y_pred_prob, model)
|
||||||
|
@ -308,7 +308,7 @@ def pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result):
|
||||||
"AUC" : [auc(fpr, tpr)]}
|
"AUC" : [auc(fpr, tpr)]}
|
||||||
)
|
)
|
||||||
model_result = pd.concat([model_result, result])
|
model_result = pd.concat([model_result, result])
|
||||||
compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
#compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
||||||
|
|
||||||
draw_confusion_matrix(y_test, y_pred, model)
|
draw_confusion_matrix(y_test, y_pred, model)
|
||||||
draw_roc_curve(X_test, y_pred_prob, model)
|
draw_roc_curve(X_test, y_pred_prob, model)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user