Ajout graphiques sur targets

This commit is contained in:
Antoine JOUBREL 2024-03-27 18:39:54 +00:00
parent 38c3fc3148
commit 0ffbe06b12
2 changed files with 49 additions and 5 deletions

View File

@ -25,7 +25,7 @@ type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? mu
list_of_comp = companies[type_of_activity]
# Load files
customer, campaigns_kpi, campaigns_brut, tickets, products = load_files(list_of_comp)
customer, campaigns_kpi, campaigns_brut, tickets, products, targets = load_files(list_of_comp)
# Identify anonymous customer for each company and remove them from our datasets
outlier_list = outlier_detection(tickets, list_of_comp)
@ -67,4 +67,6 @@ tickets_internet(tickets, type_of_activity)
already_bought_online(tickets, type_of_activity)
box_plot_price_tickets(tickets, type_of_activity)
box_plot_price_tickets(tickets, type_of_activity)
target_description(targets, type_of_activity)

View File

@ -16,6 +16,7 @@ def load_files(nb_compagnie):
campaigns_kpi = pd.DataFrame()
products = pd.DataFrame()
tickets = pd.DataFrame()
targets = pd.DataFrame()
# début de la boucle permettant de générer des datasets agrégés pour les 5 compagnies de spectacle
for directory_path in nb_compagnie:
@ -27,14 +28,21 @@ def load_files(nb_compagnie):
df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_brut, max_date=pd.Timestamp.now(tz='UTC'))
df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)
df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)
df_target_KPI = targets_KPI(df_target = df_target_information)
# Merge and
df_target_KPI = pd.merge(df_customerplus_clean_0[['customer_id']], df_target_KPI, how = 'left', on = 'customer_id')
targets_columns = list(df_target_KPI.columns)
targets_columns.remove('customer_id')
df_target_KPI[targets_columns] = df_target_KPI[targets_columns].fillna(0)
# creation de la colonne Number compagnie, qui permettra d'agréger les résultats
df_tickets_kpi["number_company"]=int(directory_path)
df_campaigns_brut["number_company"]=int(directory_path)
df_campaigns_kpi["number_company"]=int(directory_path)
df_customerplus_clean["number_company"]=int(directory_path)
df_target_information["number_company"]=int(directory_path)
df_target_KPI["number_company"]=int(directory_path)
# Traitement des index
df_tickets_kpi["customer_id"]= directory_path + '_' + df_tickets_kpi['customer_id'].astype('str')
@ -42,6 +50,8 @@ def load_files(nb_compagnie):
df_campaigns_kpi["customer_id"]= directory_path + '_' + df_campaigns_kpi['customer_id'].astype('str')
df_customerplus_clean["customer_id"]= directory_path + '_' + df_customerplus_clean['customer_id'].astype('str')
df_products_purchased_reduced["customer_id"]= directory_path + '_' + df_products_purchased_reduced['customer_id'].astype('str')
df_target_KPI["customer_id"]= directory_path + '_' + df_target_KPI['customer_id'].astype('str')
# Concaténation
customer = pd.concat([customer, df_customerplus_clean], ignore_index=True)
@ -49,8 +59,9 @@ def load_files(nb_compagnie):
campaigns_brut = pd.concat([campaigns_brut, df_campaigns_brut], ignore_index=True)
tickets = pd.concat([tickets, df_tickets_kpi], ignore_index=True)
products = pd.concat([products, df_products_purchased_reduced], ignore_index=True)
targets = pd.concat([targets, df_target_KPI], ignore_index=True)
return customer, campaigns_kpi, campaigns_brut, tickets, products
return customer, campaigns_kpi, campaigns_brut, tickets, products, targets
def save_file_s3(File_name, type_of_activity):
@ -356,3 +367,34 @@ def box_plot_price_tickets(tickets, type_of_activity):
plt.show()
save_file_s3("box_plot_price_tickets_", type_of_activity)
def target_description(targets, type_of_activity):
describe_target = targets.groupby('number_company').agg(
prop_target_jeune=('target_jeune', lambda x: (x.sum() / x.count())*100),
prop_target_scolaire=('target_scolaire', lambda x: (x.sum() / x.count())*100),
prop_target_entreprise=('target_entreprise', lambda x: (x.sum() / x.count())*100),
prop_target_famille=('target_famille', lambda x: (x.sum() / x.count())*100),
prop_target_optin=('target_optin', lambda x: (x.sum() / x.count())*100),
prop_target_optout=('target_optout', lambda x: (x.sum() / x.count())*100),
prop_target_newsletter=('target_newsletter', lambda x: (x.sum() / x.count())*100),
prop_target_abonne=('target_abonne', lambda x: (x.sum() / x.count())*100))
plot = describe_target.plot.bar()
# Adding a title
plot.set_title(f"Distribution of Targets by Category for {type_of_activity} companies")
# Adding labels for x and y axes
plot.set_xlabel("Company Number")
plot.set_ylabel("Target Proportion")
plot.set_xticklabels(plot.get_xticklabels(), rotation=0, horizontalalignment='center')
# Adding a legend
plot.legend(["Youth", "School", "Enterprise", "Family", "Optin", "Optout", "Newsletter", "Subscriber"], title="Target Category")
save_file_s3("target_category_proportion_", type_of_activity)