Merge branch 'main' into generalization

This commit is contained in:
Alexis REVELLE 2024-03-10 08:46:23 +00:00
commit 198ef45247
4 changed files with 3780 additions and 116 deletions

View File

@ -110,6 +110,10 @@ def dataset_construction(min_date, end_features_date, max_date, directory_path):
# Fill NaN values
df_customer_product[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet']] = df_customer_product[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet']].fillna(0)
max_interval = (end_features_date - min_date) / np.timedelta64(1, 'D') + 1
df_customer_product[['purchase_date_max', 'purchase_date_min']] = df_customer_product[['purchase_date_max', 'purchase_date_min']].fillna(max_interval)
df_customer_product[['time_between_purchase']] = df_customer_product[['time_between_purchase']].fillna(-1)
print("Explanatory variable construction : SUCCESS")
# 2. Construction of the explained variable
@ -126,7 +130,7 @@ def dataset_construction(min_date, end_features_date, max_date, directory_path):
dataset = pd.merge(df_customer_product, y, on = ['customer_id'], how = 'left')
# 0 if there is no purchase
dataset[['y_has_purchased']].fillna(0)
dataset[['y_has_purchased']] = dataset[['y_has_purchased']].fillna(0)
# add id_company prefix to customer_id
dataset['customer_id'] = directory_path + '_' + dataset['customer_id'].astype('str')
@ -156,9 +160,9 @@ end_of_features = "2022-11-01"
final_date = "2023-11-01"
anonymous_customer = {'1' : 1_1, '2' : 2_12184, '3' : 3_1, '4' : 4_2, '101' : 101_1,
'5' : 5_191835, '6' : 6_591412, '7' : 7_49632, '8' : 8_1942, '9' : 9_19683,
'10' : 10_19521, '11' : 11_36 , '12' : 12_1706757, '13' : 13_8422, '14' : 14_6354 }
anonymous_customer = {'1' : '1_1', '2' : '2_12184', '3' : '3_1', '4' : '4_2', '101' : '101_1',
'5' : '5_191835', '6' : '6_591412', '7' : '7_49632', '8' : '8_1942', '9' : '9_19683',
'10' : '10_19521', '11' : '11_36', '12' : '12_1706757', '13' : '13_8422', '14' : '14_6354'}
for company in list_of_comp:
dataset = dataset_construction(min_date = start_date, end_features_date = end_of_features,

View File

@ -18,7 +18,8 @@ def campaigns_kpi_function(campaigns_information = None):
# Nombre de campagnes de mails
nb_campaigns = campaigns_information[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()
nb_campaigns.rename(columns = {'campaign_name' : 'nb_campaigns'}, inplace = True)
# Temps d'ouverture en min moyen
# Temps d'ouverture moyen (en minutes)
campaigns_information['time_to_open'] = pd.to_datetime(campaigns_information['opened_at'], utc = True, format = 'ISO8601') - pd.to_datetime(campaigns_information['delivered_at'], utc = True, format = 'ISO8601')
time_to_open = campaigns_information[['customer_id', 'time_to_open']].groupby('customer_id').mean().reset_index()

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long