Merge branch 'main' into generalization
This commit is contained in:
commit
198ef45247
|
@ -110,6 +110,10 @@ def dataset_construction(min_date, end_features_date, max_date, directory_path):
|
|||
# Fill NaN values
|
||||
df_customer_product[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet']] = df_customer_product[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'nb_tickets_internet']].fillna(0)
|
||||
|
||||
max_interval = (end_features_date - min_date) / np.timedelta64(1, 'D') + 1
|
||||
df_customer_product[['purchase_date_max', 'purchase_date_min']] = df_customer_product[['purchase_date_max', 'purchase_date_min']].fillna(max_interval)
|
||||
df_customer_product[['time_between_purchase']] = df_customer_product[['time_between_purchase']].fillna(-1)
|
||||
|
||||
print("Explanatory variable construction : SUCCESS")
|
||||
|
||||
# 2. Construction of the explained variable
|
||||
|
@ -126,7 +130,7 @@ def dataset_construction(min_date, end_features_date, max_date, directory_path):
|
|||
dataset = pd.merge(df_customer_product, y, on = ['customer_id'], how = 'left')
|
||||
|
||||
# 0 if there is no purchase
|
||||
dataset[['y_has_purchased']].fillna(0)
|
||||
dataset[['y_has_purchased']] = dataset[['y_has_purchased']].fillna(0)
|
||||
|
||||
# add id_company prefix to customer_id
|
||||
dataset['customer_id'] = directory_path + '_' + dataset['customer_id'].astype('str')
|
||||
|
@ -156,9 +160,9 @@ end_of_features = "2022-11-01"
|
|||
final_date = "2023-11-01"
|
||||
|
||||
|
||||
anonymous_customer = {'1' : 1_1, '2' : 2_12184, '3' : 3_1, '4' : 4_2, '101' : 101_1,
|
||||
'5' : 5_191835, '6' : 6_591412, '7' : 7_49632, '8' : 8_1942, '9' : 9_19683,
|
||||
'10' : 10_19521, '11' : 11_36 , '12' : 12_1706757, '13' : 13_8422, '14' : 14_6354 }
|
||||
anonymous_customer = {'1' : '1_1', '2' : '2_12184', '3' : '3_1', '4' : '4_2', '101' : '101_1',
|
||||
'5' : '5_191835', '6' : '6_591412', '7' : '7_49632', '8' : '8_1942', '9' : '9_19683',
|
||||
'10' : '10_19521', '11' : '11_36', '12' : '12_1706757', '13' : '13_8422', '14' : '14_6354'}
|
||||
|
||||
for company in list_of_comp:
|
||||
dataset = dataset_construction(min_date = start_date, end_features_date = end_of_features,
|
||||
|
|
|
@ -18,7 +18,8 @@ def campaigns_kpi_function(campaigns_information = None):
|
|||
# Nombre de campagnes de mails
|
||||
nb_campaigns = campaigns_information[['customer_id', 'campaign_name']].groupby('customer_id').count().reset_index()
|
||||
nb_campaigns.rename(columns = {'campaign_name' : 'nb_campaigns'}, inplace = True)
|
||||
# Temps d'ouverture en min moyen
|
||||
|
||||
# Temps d'ouverture moyen (en minutes)
|
||||
campaigns_information['time_to_open'] = pd.to_datetime(campaigns_information['opened_at'], utc = True, format = 'ISO8601') - pd.to_datetime(campaigns_information['delivered_at'], utc = True, format = 'ISO8601')
|
||||
time_to_open = campaigns_information[['customer_id', 'time_to_open']].groupby('customer_id').mean().reset_index()
|
||||
|
||||
|
|
File diff suppressed because one or more lines are too long
2877
Musee/2_modelisation_pipeline+visu.ipynb
Normal file
2877
Musee/2_modelisation_pipeline+visu.ipynb
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user