generalization #11
|
@ -89,7 +89,7 @@ print("Naive Bayes : Done")
|
||||||
# Logistic Regression
|
# Logistic Regression
|
||||||
model_result = pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result)
|
model_result = pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result)
|
||||||
print("Logistic : Done")
|
print("Logistic : Done")
|
||||||
"""
|
|
||||||
model_result = pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result)
|
model_result = pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result)
|
||||||
print("Logistic CV : Done")
|
print("Logistic CV : Done")
|
||||||
|
|
||||||
|
@ -98,6 +98,6 @@ model_result = pipeline_randomF_benchmark(X_train, y_train, X_test, y_test, mode
|
||||||
print("Random Forest : Done")
|
print("Random Forest : Done")
|
||||||
model_result = pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result)
|
model_result = pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result)
|
||||||
print("Random Forest CV: Done")
|
print("Random Forest CV: Done")
|
||||||
"""
|
|
||||||
# Save result
|
# Save result
|
||||||
save_result_set_s3(model_result , "resultat", type_of_activity)
|
save_result_set_s3(model_result , "resultat", type_of_activity)
|
14
utils_ml.py
14
utils_ml.py
|
@ -78,7 +78,7 @@ def compute_recall_companies(dataset_test, y_pred, type_of_activity, model):
|
||||||
test = dataset_test.copy()
|
test = dataset_test.copy()
|
||||||
test['prediction'] = y_pred
|
test['prediction'] = y_pred
|
||||||
test['company'] = dataset_test['customer_id'].str.split('_', expand=True)[0]
|
test['company'] = dataset_test['customer_id'].str.split('_', expand=True)[0]
|
||||||
recall_scores_by_company = dataset_test.groupby('company').apply(compute_recall).reset_index(name='recall_score')
|
recall_scores_by_company = test.groupby('company').apply(compute_recall).reset_index(name='recall_score')
|
||||||
save_result_set_s3(recall_scores_by_company, 'recall_scores_by_company', type_of_activity, model=model, model_path=True)
|
save_result_set_s3(recall_scores_by_company, 'recall_scores_by_company', type_of_activity, model=model, model_path=True)
|
||||||
|
|
||||||
|
|
||||||
|
@ -207,7 +207,7 @@ def pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result):
|
||||||
"AUC" : [auc(fpr, tpr)]}
|
"AUC" : [auc(fpr, tpr)]}
|
||||||
)
|
)
|
||||||
model_result = pd.concat([model_result, result])
|
model_result = pd.concat([model_result, result])
|
||||||
#compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
||||||
|
|
||||||
draw_confusion_matrix(y_test, y_pred, model)
|
draw_confusion_matrix(y_test, y_pred, model)
|
||||||
draw_roc_curve(X_test, y_pred_prob, model)
|
draw_roc_curve(X_test, y_pred_prob, model)
|
||||||
|
@ -244,7 +244,7 @@ def pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result):
|
||||||
"AUC" : [auc(fpr, tpr)]}
|
"AUC" : [auc(fpr, tpr)]}
|
||||||
)
|
)
|
||||||
model_result = pd.concat([model_result, result])
|
model_result = pd.concat([model_result, result])
|
||||||
#compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
||||||
|
|
||||||
draw_confusion_matrix(y_test, y_pred, model)
|
draw_confusion_matrix(y_test, y_pred, model)
|
||||||
draw_roc_curve(X_test, y_pred_prob, model)
|
draw_roc_curve(X_test, y_pred_prob, model)
|
||||||
|
@ -276,7 +276,7 @@ def pipeline_randomF_benchmark(X_train, y_train, X_test, y_test, model_result):
|
||||||
"AUC" : [auc(fpr, tpr)]}
|
"AUC" : [auc(fpr, tpr)]}
|
||||||
)
|
)
|
||||||
model_result = pd.concat([model_result, result])
|
model_result = pd.concat([model_result, result])
|
||||||
#compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
||||||
|
|
||||||
draw_confusion_matrix(y_test, y_pred, model)
|
draw_confusion_matrix(y_test, y_pred, model)
|
||||||
draw_roc_curve(X_test, y_pred_prob, model)
|
draw_roc_curve(X_test, y_pred_prob, model)
|
||||||
|
@ -317,7 +317,7 @@ def pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result):
|
||||||
"AUC" : [auc(fpr, tpr)]}
|
"AUC" : [auc(fpr, tpr)]}
|
||||||
)
|
)
|
||||||
model_result = pd.concat([model_result, result])
|
model_result = pd.concat([model_result, result])
|
||||||
#compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
||||||
|
|
||||||
draw_confusion_matrix(y_test, y_pred, model)
|
draw_confusion_matrix(y_test, y_pred, model)
|
||||||
draw_roc_curve(X_test, y_pred_prob, model)
|
draw_roc_curve(X_test, y_pred_prob, model)
|
||||||
|
@ -325,7 +325,7 @@ def pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result):
|
||||||
draw_prob_distribution(y_pred_prob, model)
|
draw_prob_distribution(y_pred_prob, model)
|
||||||
draw_prob_distribution_companies(y_pred_prob, model)
|
draw_prob_distribution_companies(y_pred_prob, model)
|
||||||
draw_calibration_curve(X_test, y_pred_prob, model)
|
draw_calibration_curve(X_test, y_pred_prob, model)
|
||||||
save_model_s3('randomF_cv', type_of_activity, model, gridsearch)
|
save_model_s3('randomF_cv', type_of_activity, model, grid_search)
|
||||||
return model_result
|
return model_result
|
||||||
|
|
||||||
|
|
||||||
|
@ -350,6 +350,8 @@ def pipeline_naiveBayes_benchmark(X_train, y_train, X_test, y_test, model_result
|
||||||
"AUC" : [auc(fpr, tpr)]}
|
"AUC" : [auc(fpr, tpr)]}
|
||||||
)
|
)
|
||||||
model_result = pd.concat([model_result, result])
|
model_result = pd.concat([model_result, result])
|
||||||
|
compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
||||||
|
|
||||||
draw_confusion_matrix(y_test, y_pred, model)
|
draw_confusion_matrix(y_test, y_pred, model)
|
||||||
draw_roc_curve(X_test, y_pred_prob, model)
|
draw_roc_curve(X_test, y_pred_prob, model)
|
||||||
draw_prob_distribution(y_pred_prob, model)
|
draw_prob_distribution(y_pred_prob, model)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user