From 089a8fd3d6b8d5f641cb988cec87acf4e7296a10 Mon Sep 17 00:00:00 2001 From: arevelle-ensae Date: Thu, 21 Mar 2024 08:16:29 +0000 Subject: [PATCH] fix labels --- utils_stat_desc.py | 88 ++++++++++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 42 deletions(-) diff --git a/utils_stat_desc.py b/utils_stat_desc.py index 7eedd9c..6372c63 100644 --- a/utils_stat_desc.py +++ b/utils_stat_desc.py @@ -122,7 +122,7 @@ def compute_nb_clients(customer, type_of_activity): plt.xlabel('Company') plt.ylabel("Number of clients (thousands)") - plt.title(f"Number of clients for {type_of_activity}") + plt.title(f"Number of clients Across {type_of_activity} Companies") plt.xticks(company_nb_clients["number_company"], ["{}".format(i) for i in company_nb_clients["number_company"]]) plt.show() save_file_s3("nb_clients_", type_of_activity) @@ -132,9 +132,9 @@ def maximum_price_paid(customer, type_of_activity): company_max_price = customer.groupby("number_company")["max_price"].max().reset_index() plt.bar(company_max_price["number_company"], company_max_price["max_price"]) - plt.xlabel('Company') + plt.xlabel('Company Number') plt.ylabel("Maximal price of a ticket Prix") - plt.title(f"Maximal price of a ticket for {type_of_activity}") + plt.title(f"Maximal price of a ticket Across {type_of_activity} Companies") plt.xticks(company_max_price["number_company"], ["{}".format(i) for i in company_max_price["number_company"]]) plt.show() save_file_s3("Maximal_price_", type_of_activity) @@ -142,12 +142,12 @@ def maximum_price_paid(customer, type_of_activity): def mailing_consent(customer, type_of_activity): mailing_consent = customer.groupby("number_company")["opt_in"].mean().reset_index() - + mailing_consent["opt_in"] *= 100 plt.bar(mailing_consent["number_company"], mailing_consent["opt_in"]) - plt.xlabel('Company') - plt.ylabel('Consent') - plt.title(f'Consent of mailing for {type_of_activity}') + plt.xlabel('Company Number') + plt.ylabel('Mailing Consent (%)') + plt.title(f'Consent of mailing Across {type_of_activity} Companies') plt.xticks(mailing_consent["number_company"], ["{}".format(i) for i in mailing_consent["number_company"]]) plt.show() save_file_s3("mailing_consent_", type_of_activity) @@ -167,16 +167,16 @@ def mailing_consent_by_target(customer): label_data = df_graph[df_graph['has_purchased_target_period'] == label] values = [label_data[label_data['number_company'] == category]['opt_in'].values[0]*100 for category in categories] - label_printed = "purchased" if label else "no purchase" + label_printed = "Purchase" if label else "No purchase" ax.bar(bar_positions, values, bar_width, label=label_printed) # Mise à jour des positions des barres pour le prochain groupe bar_positions = [pos + bar_width for pos in bar_positions] # Ajout des étiquettes, de la légende, etc. - ax.set_xlabel('Company') - ax.set_ylabel('Consent') - ax.set_title(f'Consent of mailing according to target for {type_of_activity}') + ax.set_xlabel('Company Number') + ax.set_ylabel('Mailing Consent (%)') + ax.set_title(f'Consent of mailing according to target Across {type_of_activity} Companies') ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))]) ax.set_xticklabels(categories) ax.legend() @@ -189,16 +189,20 @@ def mailing_consent_by_target(customer): def gender_bar(customer, type_of_activity): company_genders = customer.groupby("number_company")[["gender_male", "gender_female", "gender_other"]].mean().reset_index() - # Création du barplot - plt.bar(company_genders["number_company"], company_genders["gender_male"], label = "Homme") - plt.bar(company_genders["number_company"], company_genders["gender_female"], - bottom = company_genders["gender_male"], label = "Femme") - plt.bar(company_genders["number_company"], company_genders["gender_other"], - bottom = company_genders["gender_male"] + company_genders["gender_female"], label = "Inconnu") + company_genders["gender_male"] *= 100 + company_genders["gender_female"] *= 100 + company_genders["gender_other"] *= 100 - plt.xlabel('Company') - plt.ylabel("Gender") - plt.title(f"Gender of Customer for {type_of_activity}") + # Création du barplot + plt.bar(company_genders["number_company"], company_genders["gender_male"], label = "Male") + plt.bar(company_genders["number_company"], company_genders["gender_female"], + bottom = company_genders["gender_male"], label = "Female") + plt.bar(company_genders["number_company"], company_genders["gender_other"], + bottom = company_genders["gender_male"] + company_genders["gender_female"], label = "Unknown") + + plt.xlabel('Company Number') + plt.ylabel("Frequency (%)") + plt.title(f"Gender Distribution of Customers Across {type_of_activity} Companies") plt.legend() plt.xticks(company_genders["number_company"], ["{}".format(i) for i in company_genders["number_company"]]) plt.show() @@ -207,11 +211,12 @@ def gender_bar(customer, type_of_activity): def country_bar(customer, type_of_activity): company_country_fr = customer.groupby("number_company")["country_fr"].mean().reset_index() + company_country_fr["country_fr"] *= 100 plt.bar(company_country_fr["number_company"], company_country_fr["country_fr"]) - plt.xlabel('Company') - plt.ylabel("Share of French Customer") - plt.title(f"Share of French Customer for {type_of_activity}") + plt.xlabel('Company Number') + plt.ylabel("Share of French Customer (%)") + plt.title(f"Share of French Customer Across {type_of_activity} Companies") plt.xticks(company_country_fr["number_company"], ["{}".format(i) for i in company_country_fr["number_company"]]) plt.show() save_file_s3("country_bar_", type_of_activity) @@ -221,9 +226,8 @@ def lazy_customer_plot(campaigns_kpi, type_of_activity): company_lazy_customers = campaigns_kpi.groupby("number_company")["nb_campaigns_opened"].mean().reset_index() plt.bar(company_lazy_customers["number_company"], company_lazy_customers["nb_campaigns_opened"]) - plt.xlabel('Company') - plt.ylabel("Share of Customers who did not open mail") - plt.title(f"Share of Customers who did not open mail for {type_of_activity}") + plt.xlabel('Company Number') + plt.title(f"Share of Customers who did not Open Mail Across {type_of_activity} Companies") plt.xticks(company_lazy_customers["number_company"], ["{}".format(i) for i in company_lazy_customers["number_company"]]) plt.show() save_file_s3("lazy_customer_", type_of_activity) @@ -244,16 +248,16 @@ def campaigns_effectiveness(customer, type_of_activity): label_data = campaigns_effectiveness[campaigns_effectiveness['has_purchased_target_period'] == label] values = [label_data[label_data['number_company'] == category]['opt_in'].values[0]*100 for category in categories] - label_printed = "purchased" if label else "no purchase" + label_printed = "Purchase" if label else "No purchase" ax.bar(bar_positions, values, bar_width, label=label_printed) # Mise à jour des positions des barres pour le prochain groupe bar_positions = [pos + bar_width for pos in bar_positions] # Ajout des étiquettes, de la légende, etc. - ax.set_xlabel('Company') - ax.set_ylabel('Consent') - ax.set_title(f"Number of Customers who have consent to received mails for {type_of_activity} dependy on target") + ax.set_xlabel('Company Number') + ax.set_ylabel('Share of Consent (%)') + ax.set_title(f"Proportion of customers who have given their consent to receive emails, by customer class ({type_of_activity} companies)") ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))]) ax.set_xticklabels(categories) ax.legend() @@ -304,9 +308,9 @@ def sale_dynamics(products, campaigns_brut, type_of_activity): merged_data = pd.merge(purchases_graph_used_0, purchases_graph_used_1, on="purchase_date_month", suffixes=("_new", "_old")) - plt.bar(merged_data["purchase_date_month"], merged_data["nb_purchases_new"], width=12, label="Nouveau client") + plt.bar(merged_data["purchase_date_month"], merged_data["nb_purchases_new"], width=12, label="New Customers") plt.bar(merged_data["purchase_date_month"], merged_data["nb_purchases_old"], - bottom=merged_data["nb_purchases_new"], width=12, label="Ancien client") + bottom=merged_data["nb_purchases_new"], width=12, label="Existing Customers") # commande pr afficher slt @@ -314,7 +318,7 @@ def sale_dynamics(products, campaigns_brut, type_of_activity): plt.xlabel('Month') plt.ylabel("Number of Sales") - plt.title(f"Number of Sales for {type_of_activity}") + plt.title(f"Number of Sales Across {type_of_activity} Companies") plt.legend() plt.show() save_file_s3("sale_dynamics_", type_of_activity) @@ -322,12 +326,12 @@ def sale_dynamics(products, campaigns_brut, type_of_activity): def tickets_internet(tickets, type_of_activity): nb_tickets_internet = tickets.groupby("number_company")['prop_purchases_internet'].mean().reset_index() - + nb_tickets_internet['prop_purchases_internet'] *=100 plt.bar(nb_tickets_internet["number_company"], nb_tickets_internet["prop_purchases_internet"]) - plt.xlabel('Company') - plt.ylabel("Share of Purchases Bought Online") - plt.title(f"Share of Purchases Bought Online for {type_of_activity}") + plt.xlabel('Company Number') + plt.ylabel("Share of Purchases Bought Online (%)") + plt.title(f"Share of Online Purchases Across {type_of_activity} Companies") plt.xticks(nb_tickets_internet["number_company"], ["{}".format(i) for i in nb_tickets_internet["number_company"]]) plt.show() save_file_s3("tickets_internet_", type_of_activity) @@ -337,13 +341,13 @@ def already_bought_online(tickets, type_of_activity): nb_consumers_online = (tickets.groupby("number_company").agg({'achat_internet' : 'sum', 'customer_id' : 'nunique'} ).reset_index()) - nb_consumers_online["Share_consumers_internet"] = nb_consumers_online["achat_internet"]/ nb_consumers_online["customer_id"] + nb_consumers_online["Share_consumers_internet"] = (nb_consumers_online["achat_internet"]/ nb_consumers_online["customer_id"])*100 plt.bar(nb_consumers_online["number_company"], nb_consumers_online["Share_consumers_internet"]) - plt.xlabel('Company') - plt.ylabel("Share of Customer who Bought Online at least once") - plt.title(f"Share of Customer who Bought Online at least once for {type_of_activity}") + plt.xlabel('Company Number') + plt.ylabel("Share of Customer who Bought Online at least once (%)") + plt.title(f"Share of Customer who Bought Online at least once Across {type_of_activity} Companies") plt.xticks(nb_consumers_online["number_company"], ["{}".format(i) for i in nb_consumers_online["number_company"]]) plt.show() save_file_s3("First_buy_internet_", type_of_activity) @@ -352,7 +356,7 @@ def already_bought_online(tickets, type_of_activity): def box_plot_price_tickets(tickets, type_of_activity): price_tickets = tickets[(tickets['total_amount'] > 0)] sns.boxplot(data=price_tickets, y="total_amount", x="number_company", showfliers=False, showmeans=True) - plt.title(f"Box plot of price tickets for {type_of_activity}") + plt.title(f"Box plot of price tickets Across {type_of_activity} Companies") plt.show() save_file_s3("box_plot_price_tickets_", type_of_activity)