add kpi function for customerplus

This commit is contained in:
Antoine JOUBREL 2024-02-25 17:31:14 +00:00
parent 27e266c58e
commit fa9c1c790e
3 changed files with 30 additions and 24 deletions

View File

@ -63,7 +63,7 @@ def df_coverage_modelization(sport, coverage_train = 0.7):
def dataset_construction(min_date, end_features_date, max_date, directory_path): def dataset_construction(min_date, end_features_date, max_date, directory_path):
# Import customerplus # Import customerplus
df_customerplus_clean = display_databases(directory_path, file_name = "customerplus_cleaned") df_customerplus_clean_0 = display_databases(directory_path, file_name = "customerplus_cleaned")
df_campaigns_information = display_databases(directory_path, file_name = "campaigns_information", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at']) df_campaigns_information = display_databases(directory_path, file_name = "campaigns_information", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])
df_products_purchased_reduced = display_databases(directory_path, file_name = "products_purchased_reduced", datetime_col = ['purchase_date']) df_products_purchased_reduced = display_databases(directory_path, file_name = "products_purchased_reduced", datetime_col = ['purchase_date'])
@ -89,19 +89,8 @@ def dataset_construction(min_date, end_features_date, max_date, directory_path):
# KPI sur le comportement d'achat # KPI sur le comportement d'achat
df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced) df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)
# KPI sur les données socio-demographique # KPI sur les données socio-démographiques
df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)
## Le genre
df_customerplus_clean["gender_label"] = df_customerplus_clean["gender"].map({
0: 'female',
1: 'male',
2: 'other'
})
gender_dummies = pd.get_dummies(df_customerplus_clean["gender_label"], prefix='gender').astype(int)
df_customerplus_clean = pd.concat([df_customerplus_clean, gender_dummies], axis=1)
## Indicatrice si individue vit en France
df_customerplus_clean["country_fr"] = df_customerplus_clean["country"].apply(lambda x : int(x=="fr") if pd.notna(x) else np.nan)
print("KPIs construction : SUCCESS") print("KPIs construction : SUCCESS")

View File

@ -93,5 +93,22 @@ def tickets_kpi_function(tickets_information = None):
return tickets_kpi return tickets_kpi
def customerplus_kpi_function(customerplus_clean = None):
# KPI sur les données socio-demographique
## Le genre
customerplus_clean["gender_label"] = customerplus_clean["gender"].map({
0: 'female',
1: 'male',
2: 'other'
})
gender_dummies = pd.get_dummies(customerplus_clean["gender_label"], prefix='gender').astype(int)
customerplus_clean = pd.concat([customerplus_clean, gender_dummies], axis=1)
## Indicatrice si individue vit en France
customerplus_clean["country_fr"] = customerplus_clean["country"].apply(lambda x : int(x=="fr") if pd.notna(x) else np.nan)
# Dummy if the customer has a structure id (tags)
customerplus_clean['has_tags'] = customerplus_clean['structure_id'].apply(lambda x: 1 if not pd.isna(x) else 0)
return customerplus_clean

File diff suppressed because one or more lines are too long