diff --git a/0_4_Generate_stat_desc.py b/0_4_Generate_stat_desc.py index c0821e0..27a632a 100644 --- a/0_4_Generate_stat_desc.py +++ b/0_4_Generate_stat_desc.py @@ -25,7 +25,7 @@ type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? mu list_of_comp = companies[type_of_activity] # Load files -customer, campaigns_kpi, campaigns_brut, tickets, products = load_files(list_of_comp) +customer, campaigns_kpi, campaigns_brut, tickets, products, targets = load_files(list_of_comp) # Identify anonymous customer for each company and remove them from our datasets outlier_list = outlier_detection(tickets, list_of_comp) @@ -67,4 +67,6 @@ tickets_internet(tickets, type_of_activity) already_bought_online(tickets, type_of_activity) -box_plot_price_tickets(tickets, type_of_activity) \ No newline at end of file +box_plot_price_tickets(tickets, type_of_activity) + +target_description(targets, type_of_activity) \ No newline at end of file diff --git a/utils_stat_desc.py b/utils_stat_desc.py index 7eedd9c..8c9e7d9 100644 --- a/utils_stat_desc.py +++ b/utils_stat_desc.py @@ -16,6 +16,7 @@ def load_files(nb_compagnie): campaigns_kpi = pd.DataFrame() products = pd.DataFrame() tickets = pd.DataFrame() + targets = pd.DataFrame() # début de la boucle permettant de générer des datasets agrégés pour les 5 compagnies de spectacle for directory_path in nb_compagnie: @@ -27,14 +28,21 @@ def load_files(nb_compagnie): df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_brut, max_date=pd.Timestamp.now(tz='UTC')) df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced) df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0) - - + df_target_KPI = targets_KPI(df_target = df_target_information) + + # Merge and + df_target_KPI = pd.merge(df_customerplus_clean_0[['customer_id']], df_target_KPI, how = 'left', on = 'customer_id') + targets_columns = list(df_target_KPI.columns) + targets_columns.remove('customer_id') + df_target_KPI[targets_columns] = df_target_KPI[targets_columns].fillna(0) + # creation de la colonne Number compagnie, qui permettra d'agréger les résultats df_tickets_kpi["number_company"]=int(directory_path) df_campaigns_brut["number_company"]=int(directory_path) df_campaigns_kpi["number_company"]=int(directory_path) df_customerplus_clean["number_company"]=int(directory_path) df_target_information["number_company"]=int(directory_path) + df_target_KPI["number_company"]=int(directory_path) # Traitement des index df_tickets_kpi["customer_id"]= directory_path + '_' + df_tickets_kpi['customer_id'].astype('str') @@ -42,6 +50,8 @@ def load_files(nb_compagnie): df_campaigns_kpi["customer_id"]= directory_path + '_' + df_campaigns_kpi['customer_id'].astype('str') df_customerplus_clean["customer_id"]= directory_path + '_' + df_customerplus_clean['customer_id'].astype('str') df_products_purchased_reduced["customer_id"]= directory_path + '_' + df_products_purchased_reduced['customer_id'].astype('str') + df_target_KPI["customer_id"]= directory_path + '_' + df_target_KPI['customer_id'].astype('str') + # Concaténation customer = pd.concat([customer, df_customerplus_clean], ignore_index=True) @@ -49,8 +59,9 @@ def load_files(nb_compagnie): campaigns_brut = pd.concat([campaigns_brut, df_campaigns_brut], ignore_index=True) tickets = pd.concat([tickets, df_tickets_kpi], ignore_index=True) products = pd.concat([products, df_products_purchased_reduced], ignore_index=True) + targets = pd.concat([targets, df_target_KPI], ignore_index=True) - return customer, campaigns_kpi, campaigns_brut, tickets, products + return customer, campaigns_kpi, campaigns_brut, tickets, products, targets def save_file_s3(File_name, type_of_activity): @@ -356,3 +367,34 @@ def box_plot_price_tickets(tickets, type_of_activity): plt.show() save_file_s3("box_plot_price_tickets_", type_of_activity) +def target_description(targets, type_of_activity): + + describe_target = targets.groupby('number_company').agg( + prop_target_jeune=('target_jeune', lambda x: (x.sum() / x.count())*100), + prop_target_scolaire=('target_scolaire', lambda x: (x.sum() / x.count())*100), + prop_target_entreprise=('target_entreprise', lambda x: (x.sum() / x.count())*100), + prop_target_famille=('target_famille', lambda x: (x.sum() / x.count())*100), + prop_target_optin=('target_optin', lambda x: (x.sum() / x.count())*100), + prop_target_optout=('target_optout', lambda x: (x.sum() / x.count())*100), + prop_target_newsletter=('target_newsletter', lambda x: (x.sum() / x.count())*100), + prop_target_abonne=('target_abonne', lambda x: (x.sum() / x.count())*100)) + + plot = describe_target.plot.bar() + + # Adding a title + plot.set_title(f"Distribution of Targets by Category for {type_of_activity} companies") + + # Adding labels for x and y axes + plot.set_xlabel("Company Number") + plot.set_ylabel("Target Proportion") + + plot.set_xticklabels(plot.get_xticklabels(), rotation=0, horizontalalignment='center') + + + # Adding a legend + plot.legend(["Youth", "School", "Enterprise", "Family", "Optin", "Optout", "Newsletter", "Subscriber"], title="Target Category") + + save_file_s3("target_category_proportion_", type_of_activity) + + +