From 4ac11c6b37fee7e4128c4e23cbf0ef164f0c6304 Mon Sep 17 00:00:00 2001 From: arevelle-ensae Date: Thu, 14 Mar 2024 19:04:03 +0000 Subject: [PATCH] fix some plots --- Descriptive_statistics/generate_stat_desc.py | 5 +- Descriptive_statistics/plot.py | 49 ++++++-------------- 2 files changed, 18 insertions(+), 36 deletions(-) diff --git a/Descriptive_statistics/generate_stat_desc.py b/Descriptive_statistics/generate_stat_desc.py index 0f09598..e9aa4e6 100644 --- a/Descriptive_statistics/generate_stat_desc.py +++ b/Descriptive_statistics/generate_stat_desc.py @@ -37,6 +37,7 @@ databases = [customer, campaigns_kpi, campaigns_brut, tickets, products] for dataset in databases: dataset['customer_id'] = dataset['customer_id'].apply(lambda x: remove_elements(x, outlier_list))# remove outlier dataset['customer_id'] = dataset['customer_id'].isin(customer_valid_list) # keep only valid customer + #print(f'shape of {dataset} : ', dataset.shape) # Generate graph and automatically saved them in the bucket compute_nb_clients(customer, type_of_activity) @@ -51,8 +52,10 @@ country_bar(customer, type_of_activity) lazy_customer_plot(campaigns_kpi, type_of_activity) -campaigns_effectiveness(customer, type_of_activity) +# campaigns_effectiveness(customer, type_of_activity) sale_dynamics(products, campaigns_brut, type_of_activity) tickets_internet(tickets, type_of_activity) + +box_plot_price_tickets(tickets, type_of_activity) diff --git a/Descriptive_statistics/plot.py b/Descriptive_statistics/plot.py index 95ddf0e..968a0d5 100644 --- a/Descriptive_statistics/plot.py +++ b/Descriptive_statistics/plot.py @@ -54,7 +54,7 @@ def load_files(nb_compagnie): def save_file_s3(File_name, type_of_activity): FILE_PATH = f"projet-bdc2324-team1/stat_desc/{type_of_activity}/" - FILE_PATH_OUT_S3 = FILE_PATH + File_name + type_of_activity + 'png' + FILE_PATH_OUT_S3 = FILE_PATH + File_name + type_of_activity + '.png' with fs.open(FILE_PATH_OUT_S3, 'wb') as file_out: plt.savefig(file_out) @@ -126,30 +126,14 @@ def maximum_price_paid(customer, type_of_activity): def mailing_consent(customer, type_of_activity): - df_graph = customer.groupby(["number_company", "already_purchased"])["opt_in"].mean().reset_index() - fig, ax = plt.subplots(figsize=(10, 6)) - - categories = df_graph["number_company"].unique() - bar_width = 0.35 - bar_positions = np.arange(len(categories)) - - for label in df_graph["already_purchased"].unique(): - label_data = df_graph[df_graph['already_purchased'] == label] - values = [label_data[label_data['number_company'] == category]['opt_in'].values[0]*100 for category in categories] - - label_printed = "purchased" if label else "no purchase" - ax.bar(bar_positions, values, bar_width, label=label_printed) - - bar_positions = [pos + bar_width for pos in bar_positions] - - # Ajout des étiquettes, de la légende, etc. - ax.set_xlabel('Company') - ax.set_ylabel('Consent of mailing (%)') - ax.set_title(f'Consent of mailing for {type_of_activity}') - ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))]) - ax.set_xticklabels(categories) - ax.legend() - + mailing_consent = customer.groupby("number_company")["opt_in"].mean().reset_index() + + plt.bar(mailing_consent["number_company"], mailing_consent["opt_in"]) + + plt.xlabel('Company') + plt.ylabel('Consent of mailing (%)') + plt.title(f'Consent of mailing for {type_of_activity}') + plt.show() save_file_s3("mailing_consent_", type_of_activity) @@ -187,8 +171,8 @@ def country_bar(customer, type_of_activity): def lazy_customer_plot(campaigns_kpi, type_of_activity): - company_lazy_customers = campaigns_kpi.groupby("number_company")["no_campaign_opened"].mean().reset_index() - plt.bar(company_lazy_customers["number_company"], company_lazy_customers["no_campaign_opened"]) + company_lazy_customers = campaigns_kpi.groupby("number_company")["nb_campaigns_opened"].mean().reset_index() + plt.bar(company_lazy_customers["number_company"], company_lazy_customers["nb_campaigns_opened"]) plt.xlabel('Company') plt.ylabel("Share of Customers who did not open mail") @@ -200,14 +184,9 @@ def lazy_customer_plot(campaigns_kpi, type_of_activity): def campaigns_effectiveness(customer, type_of_activity): - customer["already_purchased"] = customer["purchase_count"]>0 + campaigns_effectiveness = customer.groupby("number_company")["opt_in"].mean().reset_index() - nb_customers_purchasing = customer_sport[customer["already_purchased"]].groupby(["number_company","already_purchased"])["customer_id"].count().reset_index() - nb_customers_no_purchase = customer_sport[~customer["already_purchased"]].groupby(["number_company","already_purchased"])["customer_id"].count().reset_index() - - plt.bar(nb_customers_purchasing["number_company"], nb_customers_purchasing["customer_id"]/1000, label = "has purchased") - plt.bar(nb_customers_no_purchase["number_company"], nb_customers_no_purchase["customer_id"]/1000, - bottom = nb_customers_purchasing["customer_id"]/1000, label = "has not purchased") + plt.bar(campaigns_effectiveness["number_company"], campaigns_effectiveness["opt_in"]) plt.xlabel('Company') plt.ylabel("Number of Customers (thousands)") @@ -266,7 +245,7 @@ def sale_dynamics(products, campaigns_brut, type_of_activity): def tickets_internet(tickets, type_of_activity): - nb_tickets_internet = products_purchased_reduced_spectacle.groupby("number_company")[["nb_tickets", "nb_tickets_internet"]].sum().reset_index() + nb_tickets_internet = tickets.groupby("number_company")[["nb_tickets", "nb_tickets_internet"]].sum().reset_index() nb_tickets_internet["Share_ticket_internet"] = nb_tickets_internet["nb_tickets_internet"]*100 / nb_tickets_internet["nb_tickets"] plt.bar(nb_tickets_internet["number_company"], nb_tickets_internet["Share_ticket_internet"])