fix some plots

This commit is contained in:
Alexis REVELLE 2024-03-14 19:04:03 +00:00
parent d42e81449a
commit 4ac11c6b37
2 changed files with 18 additions and 36 deletions

View File

@ -37,6 +37,7 @@ databases = [customer, campaigns_kpi, campaigns_brut, tickets, products]
for dataset in databases:
dataset['customer_id'] = dataset['customer_id'].apply(lambda x: remove_elements(x, outlier_list))# remove outlier
dataset['customer_id'] = dataset['customer_id'].isin(customer_valid_list) # keep only valid customer
#print(f'shape of {dataset} : ', dataset.shape)
# Generate graph and automatically saved them in the bucket
compute_nb_clients(customer, type_of_activity)
@ -51,8 +52,10 @@ country_bar(customer, type_of_activity)
lazy_customer_plot(campaigns_kpi, type_of_activity)
campaigns_effectiveness(customer, type_of_activity)
# campaigns_effectiveness(customer, type_of_activity)
sale_dynamics(products, campaigns_brut, type_of_activity)
tickets_internet(tickets, type_of_activity)
box_plot_price_tickets(tickets, type_of_activity)

View File

@ -54,7 +54,7 @@ def load_files(nb_compagnie):
def save_file_s3(File_name, type_of_activity):
FILE_PATH = f"projet-bdc2324-team1/stat_desc/{type_of_activity}/"
FILE_PATH_OUT_S3 = FILE_PATH + File_name + type_of_activity + 'png'
FILE_PATH_OUT_S3 = FILE_PATH + File_name + type_of_activity + '.png'
with fs.open(FILE_PATH_OUT_S3, 'wb') as file_out:
plt.savefig(file_out)
@ -126,30 +126,14 @@ def maximum_price_paid(customer, type_of_activity):
def mailing_consent(customer, type_of_activity):
df_graph = customer.groupby(["number_company", "already_purchased"])["opt_in"].mean().reset_index()
fig, ax = plt.subplots(figsize=(10, 6))
categories = df_graph["number_company"].unique()
bar_width = 0.35
bar_positions = np.arange(len(categories))
for label in df_graph["already_purchased"].unique():
label_data = df_graph[df_graph['already_purchased'] == label]
values = [label_data[label_data['number_company'] == category]['opt_in'].values[0]*100 for category in categories]
label_printed = "purchased" if label else "no purchase"
ax.bar(bar_positions, values, bar_width, label=label_printed)
bar_positions = [pos + bar_width for pos in bar_positions]
# Ajout des étiquettes, de la légende, etc.
ax.set_xlabel('Company')
ax.set_ylabel('Consent of mailing (%)')
ax.set_title(f'Consent of mailing for {type_of_activity}')
ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])
ax.set_xticklabels(categories)
ax.legend()
mailing_consent = customer.groupby("number_company")["opt_in"].mean().reset_index()
plt.bar(mailing_consent["number_company"], mailing_consent["opt_in"])
plt.xlabel('Company')
plt.ylabel('Consent of mailing (%)')
plt.title(f'Consent of mailing for {type_of_activity}')
plt.show()
save_file_s3("mailing_consent_", type_of_activity)
@ -187,8 +171,8 @@ def country_bar(customer, type_of_activity):
def lazy_customer_plot(campaigns_kpi, type_of_activity):
company_lazy_customers = campaigns_kpi.groupby("number_company")["no_campaign_opened"].mean().reset_index()
plt.bar(company_lazy_customers["number_company"], company_lazy_customers["no_campaign_opened"])
company_lazy_customers = campaigns_kpi.groupby("number_company")["nb_campaigns_opened"].mean().reset_index()
plt.bar(company_lazy_customers["number_company"], company_lazy_customers["nb_campaigns_opened"])
plt.xlabel('Company')
plt.ylabel("Share of Customers who did not open mail")
@ -200,14 +184,9 @@ def lazy_customer_plot(campaigns_kpi, type_of_activity):
def campaigns_effectiveness(customer, type_of_activity):
customer["already_purchased"] = customer["purchase_count"]>0
campaigns_effectiveness = customer.groupby("number_company")["opt_in"].mean().reset_index()
nb_customers_purchasing = customer_sport[customer["already_purchased"]].groupby(["number_company","already_purchased"])["customer_id"].count().reset_index()
nb_customers_no_purchase = customer_sport[~customer["already_purchased"]].groupby(["number_company","already_purchased"])["customer_id"].count().reset_index()
plt.bar(nb_customers_purchasing["number_company"], nb_customers_purchasing["customer_id"]/1000, label = "has purchased")
plt.bar(nb_customers_no_purchase["number_company"], nb_customers_no_purchase["customer_id"]/1000,
bottom = nb_customers_purchasing["customer_id"]/1000, label = "has not purchased")
plt.bar(campaigns_effectiveness["number_company"], campaigns_effectiveness["opt_in"])
plt.xlabel('Company')
plt.ylabel("Number of Customers (thousands)")
@ -266,7 +245,7 @@ def sale_dynamics(products, campaigns_brut, type_of_activity):
def tickets_internet(tickets, type_of_activity):
nb_tickets_internet = products_purchased_reduced_spectacle.groupby("number_company")[["nb_tickets", "nb_tickets_internet"]].sum().reset_index()
nb_tickets_internet = tickets.groupby("number_company")[["nb_tickets", "nb_tickets_internet"]].sum().reset_index()
nb_tickets_internet["Share_ticket_internet"] = nb_tickets_internet["nb_tickets_internet"]*100 / nb_tickets_internet["nb_tickets"]
plt.bar(nb_tickets_internet["number_company"], nb_tickets_internet["Share_ticket_internet"])