generalization #11
|
@ -89,7 +89,7 @@ print("Naive Bayes : Done")
|
|||
# Logistic Regression
|
||||
model_result = pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result)
|
||||
print("Logistic : Done")
|
||||
"""
|
||||
|
||||
model_result = pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result)
|
||||
print("Logistic CV : Done")
|
||||
|
||||
|
@ -98,6 +98,6 @@ model_result = pipeline_randomF_benchmark(X_train, y_train, X_test, y_test, mode
|
|||
print("Random Forest : Done")
|
||||
model_result = pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result)
|
||||
print("Random Forest CV: Done")
|
||||
"""
|
||||
|
||||
# Save result
|
||||
save_result_set_s3(model_result , "resultat", type_of_activity)
|
14
utils_ml.py
14
utils_ml.py
|
@ -78,7 +78,7 @@ def compute_recall_companies(dataset_test, y_pred, type_of_activity, model):
|
|||
test = dataset_test.copy()
|
||||
test['prediction'] = y_pred
|
||||
test['company'] = dataset_test['customer_id'].str.split('_', expand=True)[0]
|
||||
recall_scores_by_company = dataset_test.groupby('company').apply(compute_recall).reset_index(name='recall_score')
|
||||
recall_scores_by_company = test.groupby('company').apply(compute_recall).reset_index(name='recall_score')
|
||||
save_result_set_s3(recall_scores_by_company, 'recall_scores_by_company', type_of_activity, model=model, model_path=True)
|
||||
|
||||
|
||||
|
@ -207,7 +207,7 @@ def pipeline_logreg_benchmark(X_train, y_train, X_test, y_test, model_result):
|
|||
"AUC" : [auc(fpr, tpr)]}
|
||||
)
|
||||
model_result = pd.concat([model_result, result])
|
||||
#compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
||||
compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
||||
|
||||
draw_confusion_matrix(y_test, y_pred, model)
|
||||
draw_roc_curve(X_test, y_pred_prob, model)
|
||||
|
@ -244,7 +244,7 @@ def pipeline_logreg_cv(X_train, y_train, X_test, y_test, model_result):
|
|||
"AUC" : [auc(fpr, tpr)]}
|
||||
)
|
||||
model_result = pd.concat([model_result, result])
|
||||
#compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
||||
compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
||||
|
||||
draw_confusion_matrix(y_test, y_pred, model)
|
||||
draw_roc_curve(X_test, y_pred_prob, model)
|
||||
|
@ -276,7 +276,7 @@ def pipeline_randomF_benchmark(X_train, y_train, X_test, y_test, model_result):
|
|||
"AUC" : [auc(fpr, tpr)]}
|
||||
)
|
||||
model_result = pd.concat([model_result, result])
|
||||
#compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
||||
compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
||||
|
||||
draw_confusion_matrix(y_test, y_pred, model)
|
||||
draw_roc_curve(X_test, y_pred_prob, model)
|
||||
|
@ -317,7 +317,7 @@ def pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result):
|
|||
"AUC" : [auc(fpr, tpr)]}
|
||||
)
|
||||
model_result = pd.concat([model_result, result])
|
||||
#compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
||||
compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
||||
|
||||
draw_confusion_matrix(y_test, y_pred, model)
|
||||
draw_roc_curve(X_test, y_pred_prob, model)
|
||||
|
@ -325,7 +325,7 @@ def pipeline_randomF_cv(X_train, y_train, X_test, y_test, model_result):
|
|||
draw_prob_distribution(y_pred_prob, model)
|
||||
draw_prob_distribution_companies(y_pred_prob, model)
|
||||
draw_calibration_curve(X_test, y_pred_prob, model)
|
||||
save_model_s3('randomF_cv', type_of_activity, model, gridsearch)
|
||||
save_model_s3('randomF_cv', type_of_activity, model, grid_search)
|
||||
return model_result
|
||||
|
||||
|
||||
|
@ -350,6 +350,8 @@ def pipeline_naiveBayes_benchmark(X_train, y_train, X_test, y_test, model_result
|
|||
"AUC" : [auc(fpr, tpr)]}
|
||||
)
|
||||
model_result = pd.concat([model_result, result])
|
||||
compute_recall_companies(dataset_test, y_pred, type_of_activity, model)
|
||||
|
||||
draw_confusion_matrix(y_test, y_pred, model)
|
||||
draw_roc_curve(X_test, y_pred_prob, model)
|
||||
draw_prob_distribution(y_pred_prob, model)
|
||||
|
|
Loading…
Reference in New Issue
Block a user