generalization #11

Merged
arevelle-ensae merged 10 commits from generalization into main 2024-03-28 09:40:04 +01:00
2 changed files with 15 additions and 4 deletions
Showing only changes of commit 122c4c1f82 - Show all commits

View File

@ -56,7 +56,7 @@ weights = class_weight.compute_class_weight(class_weight = 'balanced', classes =
weight_dict = {np.unique(y_train['y_has_purchased'])[i]: weights[i] for i in range(len(np.unique(y_train['y_has_purchased'])))}
preproc = preprocess(type_of_model)
preproc = preprocess(type_of_model, type_of_activity)
# Object for storing results
model_result = pd.DataFrame(columns= ["Model", "Accuracy", "Recall", "F1_score", "AUC"])

View File

@ -83,7 +83,14 @@ def compute_recall_companies(dataset_test, y_pred, type_of_activity, model):
def features_target_split(dataset_train, dataset_test):
features_l = [']
features_l = ['nb_campaigns', 'taux_ouverture_mail', 'prop_purchases_internet', 'nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'time_to_open',
'purchases_10_2021','purchases_10_2022', 'purchases_11_2021', 'purchases_12_2021','purchases_1_2022', 'purchases_2_2022', 'purchases_3_2022',
'purchases_4_2022', 'purchases_5_2021', 'purchases_5_2022', 'purchases_6_2021', 'purchases_6_2022', 'purchases_7_2021', 'purchases_7_2022', 'purchases_8_2021',
'purchases_8_2022','purchases_9_2021', 'purchases_9_2022', 'purchase_date_min', 'purchase_date_max', 'nb_targets', 'gender_female', 'gender_male',
'achat_internet', 'categorie_age_0_10', 'categorie_age_10_20', 'categorie_age_20_30','categorie_age_30_40',
'categorie_age_40_50', 'categorie_age_50_60', 'categorie_age_60_70', 'categorie_age_70_80', 'categorie_age_plus_80','categorie_age_inconnue',
'country_fr', 'is_profession_known', 'is_zipcode_known', 'opt_in', 'target_optin', 'target_newsletter', 'target_scolaire', 'target_entreprise', 'target_famille',
'target_jeune', 'target_abonne']
X_train = dataset_train[features_l]
y_train = dataset_train[['y_has_purchased']]
@ -97,12 +104,15 @@ def preprocess(type_of_model, type_of_activity):
numeric_features = ['nb_campaigns', 'taux_ouverture_mail', 'prop_purchases_internet', 'nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers',
'purchases_10_2021','purchases_10_2022', 'purchases_11_2021', 'purchases_12_2021','purchases_1_2022', 'purchases_2_2022', 'purchases_3_2022',
'purchases_4_2022', 'purchases_5_2021', 'purchases_5_2022', 'purchases_6_2021', 'purchases_6_2022', 'purchases_7_2021', 'purchases_7_2022', 'purchases_8_2021',
'purchases_8_2022','purchases_9_2021', 'purchases_9_2022', 'purchase_date_min', 'purchase_date_max', 'nb_targets']
'purchases_8_2022','purchases_9_2021', 'purchases_9_2022', 'purchase_date_min', 'purchase_date_max', 'nb_targets', 'time_to_open']
binary_features = ['gender_female', 'gender_male', 'country_fr', 'achat_internet', 'categorie_age_0_10', 'categorie_age_10_20', 'categorie_age_20_30','categorie_age_30_40',
binary_features = ['gender_female', 'gender_male', 'achat_internet', 'categorie_age_0_10', 'categorie_age_10_20', 'categorie_age_20_30','categorie_age_30_40',
'categorie_age_40_50', 'categorie_age_50_60', 'categorie_age_60_70', 'categorie_age_70_80', 'categorie_age_plus_80','categorie_age_inconnue',
'country_fr', 'is_profession_known', 'is_zipcode_known', 'opt_in']
if type_of_activity=='musee':
binary_features.pop('time_to_open')
if type_of_model=='premium':
if type_of_activity=='musique':
binary_features.extend(['target_optin', 'target_newsletter'])
@ -113,6 +123,7 @@ def preprocess(type_of_model, type_of_activity):
numeric_transformer = Pipeline(steps=[
("imputer", SimpleImputer(strategy="constant", fill_value=0)),
("scaler", StandardScaler())
])