Amélioration graphique lazy + meilleur cadrage + enlever titre

This commit is contained in:
Antoine JOUBREL 2024-04-02 21:12:07 +00:00
parent 091693c889
commit ea3dcbb015
2 changed files with 60 additions and 36 deletions

View File

@ -5,6 +5,12 @@ import io
import s3fs
import re
import warnings
from datetime import date, timedelta, datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
# Ignore warning
warnings.filterwarnings('ignore')

View File

@ -1,13 +1,3 @@
import pandas as pd
import os
import s3fs
import io
import warnings
from datetime import date, timedelta, datetime
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
def load_files(nb_compagnie):
@ -84,7 +74,7 @@ def remove_outlier_total_amount(tickets):
def save_file_s3(File_name, type_of_activity):
image_buffer = io.BytesIO()
plt.savefig(image_buffer, format='png')
plt.savefig(image_buffer, format='png', pad_inches=1, bbox_inches="tight")
image_buffer.seek(0)
FILE_PATH = f"projet-bdc2324-team1/2_Output/2_0_Descriptive_Statistics/{type_of_activity}/"
FILE_PATH_OUT_S3 = FILE_PATH + File_name + type_of_activity + '.png'
@ -118,7 +108,7 @@ def outlier_detection(tickets, company_list, show_diagram=False):
plt.figure(figsize=(3, 3))
plt.pie(new_series, labels=new_series.index, autopct='%1.1f%%', startangle=140, pctdistance=0.5)
plt.axis('equal')
plt.title(f'Répartition des montants totaux pour la compagnie {company}')
# plt.title(f'Répartition des montants totaux pour la compagnie {company}')
plt.show()
return outlier_list
@ -147,11 +137,11 @@ def remove_elements(lst, elements_to_remove):
def compute_nb_clients(customer, type_of_activity):
company_nb_clients = customer[customer["purchase_count"]>0].groupby("number_company")["customer_id"].count().reset_index()
plt.figure(figsize=(4,3))
plt.bar(company_nb_clients["number_company"], company_nb_clients["customer_id"]/1000)
plt.xlabel('Company')
plt.xlabel('Company Number')
plt.ylabel("Number of clients (thousands)")
plt.title(f"Number of clients Across {type_of_activity} Companies")
# plt.title(f"Number of clients Across {type_of_activity} Companies")
plt.xticks(company_nb_clients["number_company"], ["{}".format(i) for i in company_nb_clients["number_company"]])
plt.show()
save_file_s3("nb_clients_", type_of_activity)
@ -163,7 +153,7 @@ def maximum_price_paid(customer, type_of_activity):
plt.xlabel('Company Number')
plt.ylabel("Maximal price of a ticket Prix")
plt.title(f"Maximal price of a ticket Across {type_of_activity} Companies")
# plt.title(f"Maximal price of a ticket Across {type_of_activity} Companies")
plt.xticks(company_max_price["number_company"], ["{}".format(i) for i in company_max_price["number_company"]])
plt.show()
save_file_s3("Maximal_price_", type_of_activity)
@ -176,7 +166,7 @@ def target_proportion(customer, type_of_activity):
plt.bar(df_y["number_company"], df_y["prop_has_purchased_target_period"])
plt.xlabel('Company Number')
plt.ylabel('Share (%)')
plt.title(f'Share of Customers who Bought during the Target Period Across {type_of_activity} Companies')
# plt.title(f'Share of Customers who Bought during the Target Period Across {type_of_activity} Companies')
plt.xticks(df_y["number_company"], ["{}".format(i) for i in df_y["number_company"]])
plt.show()
save_file_s3("share_target_", type_of_activity)
@ -189,7 +179,7 @@ def mailing_consent(customer, type_of_activity):
plt.xlabel('Company Number')
plt.ylabel('Mailing Consent (%)')
plt.title(f'Consent of mailing Across {type_of_activity} Companies')
# plt.title(f'Consent of mailing Across {type_of_activity} Companies')
plt.xticks(mailing_consent["number_company"], ["{}".format(i) for i in mailing_consent["number_company"]])
plt.show()
save_file_s3("mailing_consent_", type_of_activity)
@ -198,7 +188,7 @@ def mailing_consent(customer, type_of_activity):
def mailing_consent_by_target(customer):
df_graph = customer.groupby(["number_company", "has_purchased_target_period"])["opt_in"].mean().reset_index()
# Création du barplot groupé
fig, ax = plt.subplots(figsize=(10, 6))
fig, ax = plt.subplots(figsize=(5, 3))
categories = df_graph["number_company"].unique()
bar_width = 0.35
@ -218,7 +208,7 @@ def mailing_consent_by_target(customer):
# Ajout des étiquettes, de la légende, etc.
ax.set_xlabel('Company Number')
ax.set_ylabel('Mailing Consent (%)')
ax.set_title(f'Consent of mailing according to target Across {type_of_activity} Companies')
# ax.set_title(f'Consent of mailing according to target Across {type_of_activity} Companies')
ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])
ax.set_xticklabels(categories)
ax.legend()
@ -236,6 +226,7 @@ def gender_bar(customer, type_of_activity):
company_genders["gender_other"] *= 100
# Création du barplot
plt.figure(figsize=(4,3))
plt.bar(company_genders["number_company"], company_genders["gender_male"], label = "Male")
plt.bar(company_genders["number_company"], company_genders["gender_female"],
bottom = company_genders["gender_male"], label = "Female")
@ -244,7 +235,7 @@ def gender_bar(customer, type_of_activity):
plt.xlabel('Company Number')
plt.ylabel("Frequency (%)")
plt.title(f"Gender Distribution of Customers Across {type_of_activity} Companies")
# plt.title(f"Gender Distribution of Customers Across {type_of_activity} Companies")
plt.legend()
plt.xticks(company_genders["number_company"], ["{}".format(i) for i in company_genders["number_company"]])
plt.show()
@ -254,23 +245,46 @@ def gender_bar(customer, type_of_activity):
def country_bar(customer, type_of_activity):
company_country_fr = customer.groupby("number_company")["country_fr"].mean().reset_index()
company_country_fr["country_fr"] *= 100
plt.figure(figsize=(4,3))
plt.bar(company_country_fr["number_company"], company_country_fr["country_fr"])
plt.xlabel('Company Number')
plt.ylabel("Share of French Customer (%)")
plt.title(f"Share of French Customer Across {type_of_activity} Companies")
# plt.title(f"Share of French Customer Across {type_of_activity} Companies")
plt.xticks(company_country_fr["number_company"], ["{}".format(i) for i in company_country_fr["number_company"]])
plt.show()
save_file_s3("country_bar_", type_of_activity)
def lazy_customer_plot(campaigns_kpi, type_of_activity):
company_lazy_customers = campaigns_kpi.groupby("number_company")["nb_campaigns_opened"].mean().reset_index()
plt.bar(company_lazy_customers["number_company"], company_lazy_customers["nb_campaigns_opened"])
company_lazy_customers = campaigns_kpi.groupby("number_company")[["nb_campaigns", "taux_ouverture_mail"]].mean().reset_index()
company_lazy_customers["taux_ouverture_mail"] *= 100
# Initialize the figure
fig, ax1 = plt.subplots(figsize=(6, 3))
width = 0.4
x = range(len(company_lazy_customers))
# Plot the bars for "nb_campaigns" on the first y-axis
ax1.bar([i - width/2 for i in x], company_lazy_customers['nb_campaigns'], width=width, align='center', label='Amount of Campaigns', color = 'steelblue')
# Set labels and title for the first y-axis
ax1.set_ylabel('Number of Mails Received', color='steelblue')
ax1.tick_params(axis='y', labelcolor='steelblue')
# Create another y-axis for "taux_ouverture_mail"
ax2 = ax1.twinx()
# Plot the bars for "taux_ouverture_mail" on the second y-axis
ax2.bar([i + width/2 for i in x], company_lazy_customers['taux_ouverture_mail'], width=width, align='center', label='Open Mail Rate', color = 'darkorange')
# Set labels and title for the second y-axis
ax2.set_ylabel('Open Mail Rate (%)', color='darkorange')
ax2.tick_params(axis='y', labelcolor='darkorange')
# Set x-axis ticks and labels
ax1.set_xticks(x)
ax1.set_xticklabels(company_lazy_customers['number_company'])
plt.xlabel('Company Number')
plt.title(f"Share of Customers who did not Open Mail Across {type_of_activity} Companies")
plt.xticks(company_lazy_customers["number_company"], ["{}".format(i) for i in company_lazy_customers["number_company"]])
plt.show()
save_file_s3("lazy_customer_", type_of_activity)
@ -279,7 +293,7 @@ def campaigns_effectiveness(customer, type_of_activity):
campaigns_effectiveness = customer.groupby(["number_company", "has_purchased_target_period"])["opt_in"].mean().reset_index()
fig, ax = plt.subplots(figsize=(10, 6))
fig, ax = plt.subplots(figsize=(5, 3))
categories = campaigns_effectiveness["number_company"].unique()
bar_width = 0.35
@ -299,7 +313,7 @@ def campaigns_effectiveness(customer, type_of_activity):
# Ajout des étiquettes, de la légende, etc.
ax.set_xlabel('Company Number')
ax.set_ylabel('Share of Consent (%)')
ax.set_title(f"Proportion of customers who have given their consent to receive emails, by customer class ({type_of_activity} companies)")
# ax.set_title(f"Proportion of customers who have given their consent to receive emails, by customer class ({type_of_activity} companies)")
ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])
ax.set_xticklabels(categories)
ax.legend()
@ -349,6 +363,7 @@ def sale_dynamics(products, campaigns_brut, type_of_activity):
merged_data = pd.merge(purchases_graph_used_0, purchases_graph_used_1, on="purchase_date_month", suffixes=("_new", "_old"))
plt.figure(figsize=(5.5,4))
plt.bar(merged_data["purchase_date_month"], merged_data["nb_purchases_new"], width=12, label="New Customers")
plt.bar(merged_data["purchase_date_month"], merged_data["nb_purchases_old"],
@ -360,7 +375,7 @@ def sale_dynamics(products, campaigns_brut, type_of_activity):
plt.xlabel('Month')
plt.ylabel("Number of Sales")
plt.title(f"Number of Sales Across {type_of_activity} Companies")
# plt.title(f"Number of Sales Across {type_of_activity} Companies")
plt.legend()
plt.show()
save_file_s3("sale_dynamics_", type_of_activity)
@ -373,7 +388,7 @@ def tickets_internet(tickets, type_of_activity):
plt.xlabel('Company Number')
plt.ylabel("Share of Purchases Bought Online (%)")
plt.title(f"Share of Online Purchases Across {type_of_activity} Companies")
# plt.title(f"Share of Online Purchases Across {type_of_activity} Companies")
plt.xticks(nb_tickets_internet["number_company"], ["{}".format(i) for i in nb_tickets_internet["number_company"]])
plt.show()
save_file_s3("tickets_internet_", type_of_activity)
@ -389,7 +404,7 @@ def already_bought_online(tickets, type_of_activity):
plt.xlabel('Company Number')
plt.ylabel("Share of Customer who Bought Online at least once (%)")
plt.title(f"Share of Customer who Bought Online at least once Across {type_of_activity} Companies")
# plt.title(f"Share of Customer who Bought Online at least once Across {type_of_activity} Companies")
plt.xticks(nb_consumers_online["number_company"], ["{}".format(i) for i in nb_consumers_online["number_company"]])
plt.show()
save_file_s3("First_buy_internet_", type_of_activity)
@ -397,8 +412,11 @@ def already_bought_online(tickets, type_of_activity):
def box_plot_price_tickets(tickets, type_of_activity):
price_tickets = tickets[(tickets['total_amount'] > 0)]
plt.figure(figsize=(4,3))
sns.boxplot(data=price_tickets, y="total_amount", x="number_company", showfliers=False, showmeans=True)
plt.title(f"Box plot of price tickets Across {type_of_activity} Companies")
# plt.title(f"Box plot of price tickets Across {type_of_activity} Companies")
plt.xlabel('Company Number')
plt.ylabel("Total Amount Spent")
plt.show()
save_file_s3("box_plot_price_tickets_", type_of_activity)
@ -417,7 +435,7 @@ def target_description(targets, type_of_activity):
plot = describe_target.plot.bar()
# Adding a title
plot.set_title(f"Distribution of Targets by Category for {type_of_activity} companies")
# plot.set_title(f"Distribution of Targets by Category for {type_of_activity} companies")
# Adding labels for x and y axes
plot.set_xlabel("Company Number")