From 54fbad0344999c9e359cad0c3c925afd24f37d76 Mon Sep 17 00:00:00 2001 From: arevelle-ensae Date: Thu, 14 Mar 2024 21:00:14 +0000 Subject: [PATCH] identify target customer --- Descriptive_statistics/generate_stat_desc.py | 22 ++++++++++++-------- Descriptive_statistics/plot.py | 10 ++++++--- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/Descriptive_statistics/generate_stat_desc.py b/Descriptive_statistics/generate_stat_desc.py index 2086645..a427d8d 100644 --- a/Descriptive_statistics/generate_stat_desc.py +++ b/Descriptive_statistics/generate_stat_desc.py @@ -16,7 +16,7 @@ S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"] fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL}) companies = {'musee' : ['1', '2', '3', '4'], # , '101' - 'sport': ['5'], + 'sport': ['5', '6'], 'musique' : ['10', '11', '12', '13', '14']} @@ -30,13 +30,17 @@ customer, campaigns_kpi, campaigns_brut, tickets, products = load_files(list_of_ outlier_list = outlier_detection(tickets, list_of_comp) # Identify valid customer (customer who bought tickets after starting date or received mails after starting date) -customer_valid_list = valid_customer_detection(products) +customer_valid_list = valid_customer_detection(products, campaigns_brut) + +# Identify customer who bought during the period of y +consumer_target_period = identify_purchase_during_target_periode(products) databases = [customer, campaigns_kpi, campaigns_brut, tickets, products] for dataset in databases: dataset['customer_id'] = dataset['customer_id'].apply(lambda x: remove_elements(x, outlier_list))# remove outlier dataset['customer_id'] = dataset['customer_id'].isin(customer_valid_list) # keep only valid customer + dataset['has_purchased_target_period'] = np.where(dataset['customer_id'].isin(customer_valid_list), 1, 0) #print(f'shape of {dataset} : ', dataset.shape) # Generate graph and automatically saved them in the bucket @@ -46,16 +50,16 @@ maximum_price_paid(customer, type_of_activity) mailing_consent(customer, type_of_activity) -gender_bar(customer, type_of_activity) +#gender_bar(customer, type_of_activity) -country_bar(customer, type_of_activity) +#country_bar(customer, type_of_activity) -lazy_customer_plot(campaigns_kpi, type_of_activity) +#lazy_customer_plot(campaigns_kpi, type_of_activity) -# campaigns_effectiveness(customer, type_of_activity) +#campaigns_effectiveness(customer, type_of_activity) -sale_dynamics(products, campaigns_brut, type_of_activity) +#sale_dynamics(products, campaigns_brut, type_of_activity) -tickets_internet(tickets, type_of_activity) +#tickets_internet(tickets, type_of_activity) -box_plot_price_tickets(tickets, type_of_activity) +#box_plot_price_tickets(tickets, type_of_activity) diff --git a/Descriptive_statistics/plot.py b/Descriptive_statistics/plot.py index a6be21f..cd8f82a 100644 --- a/Descriptive_statistics/plot.py +++ b/Descriptive_statistics/plot.py @@ -99,11 +99,15 @@ def valid_customer_detection(products, campaigns_brut): consumer_valid = consumer_valid_product + consumer_valid_campaigns return consumer_valid + +def identify_purchase_during_target_periode(products): + products_target_period = products[products['purchase_date']>="2022-11-01" & products['purchase_date']<="2023-11-01"] + consumer_target_period = products_target_period['customer_id'].to_list() + return consumer_target_period + + def remove_elements(lst, elements_to_remove): return ''.join([x for x in lst if x not in elements_to_remove]) - -def keep_elements(lst, elements_to_remove): - return ''.join([x for x in lst if x in elements_to_remove]) def compute_nb_clients(customer, type_of_activity):