take new databases as input
This commit is contained in:
parent
78aab14164
commit
0a7900c07f
|
@ -27,7 +27,8 @@ type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? mu
|
||||||
PATH = f"projet-bdc2324-team1/Output_expected_CA/{type_of_activity}/"
|
PATH = f"projet-bdc2324-team1/Output_expected_CA/{type_of_activity}/"
|
||||||
|
|
||||||
# type of model for the score
|
# type of model for the score
|
||||||
type_of_model = "LogisticRegression_cv"
|
# type_of_model = "LogisticRegression_cv"
|
||||||
|
type_of_model = "LogisticRegression_Benchmark"
|
||||||
|
|
||||||
# load train and test sets
|
# load train and test sets
|
||||||
dataset_train, dataset_test = load_train_test(type_of_activity)
|
dataset_train, dataset_test = load_train_test(type_of_activity)
|
||||||
|
@ -68,6 +69,8 @@ save_file_s3_ca("hist_score_adjusted_", type_of_activity)
|
||||||
X_test_table_adjusted_scores = (100 * X_test_segment.groupby("quartile")[["score","score_adjusted", "has_purchased"]].mean()).round(2).reset_index()
|
X_test_table_adjusted_scores = (100 * X_test_segment.groupby("quartile")[["score","score_adjusted", "has_purchased"]].mean()).round(2).reset_index()
|
||||||
X_test_table_adjusted_scores = X_test_table_adjusted_scores.rename(columns = {col : f"{col} (%)" for col in X_test_table_adjusted_scores.columns if col in ["score","score_adjusted", "has_purchased"]})
|
X_test_table_adjusted_scores = X_test_table_adjusted_scores.rename(columns = {col : f"{col} (%)" for col in X_test_table_adjusted_scores.columns if col in ["score","score_adjusted", "has_purchased"]})
|
||||||
|
|
||||||
|
print(X_test_table_adjusted_scores)
|
||||||
|
|
||||||
# save table
|
# save table
|
||||||
file_name = "table_adjusted_score_"
|
file_name = "table_adjusted_score_"
|
||||||
FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + ".csv"
|
FILE_PATH_OUT_S3 = PATH + file_name + type_of_activity + ".csv"
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -13,7 +13,8 @@ import io
|
||||||
# functions
|
# functions
|
||||||
|
|
||||||
def load_train_test(type_of_activity):
|
def load_train_test(type_of_activity):
|
||||||
BUCKET = f"projet-bdc2324-team1/Generalization/{type_of_activity}"
|
# BUCKET = f"projet-bdc2324-team1/Generalization/{type_of_activity}"
|
||||||
|
BUCKET = f"projet-bdc2324-team1/Generalization_v2/{type_of_activity}"
|
||||||
File_path_train = BUCKET + "/Train_set.csv"
|
File_path_train = BUCKET + "/Train_set.csv"
|
||||||
File_path_test = BUCKET + "/Test_set.csv"
|
File_path_test = BUCKET + "/Test_set.csv"
|
||||||
|
|
||||||
|
@ -31,7 +32,7 @@ def load_train_test(type_of_activity):
|
||||||
def features_target_split(dataset_train, dataset_test):
|
def features_target_split(dataset_train, dataset_test):
|
||||||
|
|
||||||
features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max',
|
features_l = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max',
|
||||||
'time_between_purchase', 'nb_tickets_internet', 'fidelity', 'is_email_true', 'opt_in', #'is_partner',
|
'time_between_purchase', 'fidelity', 'is_email_true', 'opt_in', #'is_partner', 'nb_tickets_internet',
|
||||||
'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']
|
'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']
|
||||||
|
|
||||||
# we suppress fidelity, time between purchase, and gender other (colinearity issue)
|
# we suppress fidelity, time between purchase, and gender other (colinearity issue)
|
||||||
|
@ -41,17 +42,18 @@ def features_target_split(dataset_train, dataset_test):
|
||||||
'opt_in', 'gender_female', 'gender_male', 'nb_campaigns', 'nb_campaigns_opened']
|
'opt_in', 'gender_female', 'gender_male', 'nb_campaigns', 'nb_campaigns_opened']
|
||||||
"""
|
"""
|
||||||
|
|
||||||
X_train = dataset_train[features_l]
|
X_train = dataset_train # [features_l]
|
||||||
y_train = dataset_train[['y_has_purchased']]
|
y_train = dataset_train[['y_has_purchased']]
|
||||||
|
|
||||||
X_test = dataset_test[features_l]
|
X_test = dataset_test # [features_l]
|
||||||
y_test = dataset_test[['y_has_purchased']]
|
y_test = dataset_test[['y_has_purchased']]
|
||||||
|
|
||||||
return X_train, X_test, y_train, y_test
|
return X_train, X_test, y_train, y_test
|
||||||
|
|
||||||
|
|
||||||
def load_model(type_of_activity, model):
|
def load_model(type_of_activity, model):
|
||||||
BUCKET = f"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}/"
|
# BUCKET = f"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}/"
|
||||||
|
BUCKET = f"projet-bdc2324-team1/basique/{type_of_activity}/{model}/"
|
||||||
filename = model + '.pkl'
|
filename = model + '.pkl'
|
||||||
file_path = BUCKET + filename
|
file_path = BUCKET + filename
|
||||||
with fs.open(file_path, mode="rb") as f:
|
with fs.open(file_path, mode="rb") as f:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user