Amélioration graphique lazy + meilleur cadrage + enlever titre
This commit is contained in:
parent
091693c889
commit
ea3dcbb015
|
@ -5,6 +5,12 @@ import io
|
|||
import s3fs
|
||||
import re
|
||||
import warnings
|
||||
from datetime import date, timedelta, datetime
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.dates as mdates
|
||||
import seaborn as sns
|
||||
|
||||
|
||||
|
||||
# Ignore warning
|
||||
warnings.filterwarnings('ignore')
|
||||
|
|
|
@ -1,13 +1,3 @@
|
|||
import pandas as pd
|
||||
import os
|
||||
import s3fs
|
||||
import io
|
||||
import warnings
|
||||
from datetime import date, timedelta, datetime
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.dates as mdates
|
||||
import seaborn as sns
|
||||
|
||||
|
||||
def load_files(nb_compagnie):
|
||||
|
@ -84,7 +74,7 @@ def remove_outlier_total_amount(tickets):
|
|||
|
||||
def save_file_s3(File_name, type_of_activity):
|
||||
image_buffer = io.BytesIO()
|
||||
plt.savefig(image_buffer, format='png')
|
||||
plt.savefig(image_buffer, format='png', pad_inches=1, bbox_inches="tight")
|
||||
image_buffer.seek(0)
|
||||
FILE_PATH = f"projet-bdc2324-team1/2_Output/2_0_Descriptive_Statistics/{type_of_activity}/"
|
||||
FILE_PATH_OUT_S3 = FILE_PATH + File_name + type_of_activity + '.png'
|
||||
|
@ -118,7 +108,7 @@ def outlier_detection(tickets, company_list, show_diagram=False):
|
|||
plt.figure(figsize=(3, 3))
|
||||
plt.pie(new_series, labels=new_series.index, autopct='%1.1f%%', startangle=140, pctdistance=0.5)
|
||||
plt.axis('equal')
|
||||
plt.title(f'Répartition des montants totaux pour la compagnie {company}')
|
||||
# plt.title(f'Répartition des montants totaux pour la compagnie {company}')
|
||||
plt.show()
|
||||
return outlier_list
|
||||
|
||||
|
@ -147,11 +137,11 @@ def remove_elements(lst, elements_to_remove):
|
|||
|
||||
def compute_nb_clients(customer, type_of_activity):
|
||||
company_nb_clients = customer[customer["purchase_count"]>0].groupby("number_company")["customer_id"].count().reset_index()
|
||||
plt.figure(figsize=(4,3))
|
||||
plt.bar(company_nb_clients["number_company"], company_nb_clients["customer_id"]/1000)
|
||||
|
||||
plt.xlabel('Company')
|
||||
plt.xlabel('Company Number')
|
||||
plt.ylabel("Number of clients (thousands)")
|
||||
plt.title(f"Number of clients Across {type_of_activity} Companies")
|
||||
# plt.title(f"Number of clients Across {type_of_activity} Companies")
|
||||
plt.xticks(company_nb_clients["number_company"], ["{}".format(i) for i in company_nb_clients["number_company"]])
|
||||
plt.show()
|
||||
save_file_s3("nb_clients_", type_of_activity)
|
||||
|
@ -163,7 +153,7 @@ def maximum_price_paid(customer, type_of_activity):
|
|||
|
||||
plt.xlabel('Company Number')
|
||||
plt.ylabel("Maximal price of a ticket Prix")
|
||||
plt.title(f"Maximal price of a ticket Across {type_of_activity} Companies")
|
||||
# plt.title(f"Maximal price of a ticket Across {type_of_activity} Companies")
|
||||
plt.xticks(company_max_price["number_company"], ["{}".format(i) for i in company_max_price["number_company"]])
|
||||
plt.show()
|
||||
save_file_s3("Maximal_price_", type_of_activity)
|
||||
|
@ -176,7 +166,7 @@ def target_proportion(customer, type_of_activity):
|
|||
plt.bar(df_y["number_company"], df_y["prop_has_purchased_target_period"])
|
||||
plt.xlabel('Company Number')
|
||||
plt.ylabel('Share (%)')
|
||||
plt.title(f'Share of Customers who Bought during the Target Period Across {type_of_activity} Companies')
|
||||
# plt.title(f'Share of Customers who Bought during the Target Period Across {type_of_activity} Companies')
|
||||
plt.xticks(df_y["number_company"], ["{}".format(i) for i in df_y["number_company"]])
|
||||
plt.show()
|
||||
save_file_s3("share_target_", type_of_activity)
|
||||
|
@ -189,7 +179,7 @@ def mailing_consent(customer, type_of_activity):
|
|||
|
||||
plt.xlabel('Company Number')
|
||||
plt.ylabel('Mailing Consent (%)')
|
||||
plt.title(f'Consent of mailing Across {type_of_activity} Companies')
|
||||
# plt.title(f'Consent of mailing Across {type_of_activity} Companies')
|
||||
plt.xticks(mailing_consent["number_company"], ["{}".format(i) for i in mailing_consent["number_company"]])
|
||||
plt.show()
|
||||
save_file_s3("mailing_consent_", type_of_activity)
|
||||
|
@ -198,7 +188,7 @@ def mailing_consent(customer, type_of_activity):
|
|||
def mailing_consent_by_target(customer):
|
||||
df_graph = customer.groupby(["number_company", "has_purchased_target_period"])["opt_in"].mean().reset_index()
|
||||
# Création du barplot groupé
|
||||
fig, ax = plt.subplots(figsize=(10, 6))
|
||||
fig, ax = plt.subplots(figsize=(5, 3))
|
||||
|
||||
categories = df_graph["number_company"].unique()
|
||||
bar_width = 0.35
|
||||
|
@ -218,7 +208,7 @@ def mailing_consent_by_target(customer):
|
|||
# Ajout des étiquettes, de la légende, etc.
|
||||
ax.set_xlabel('Company Number')
|
||||
ax.set_ylabel('Mailing Consent (%)')
|
||||
ax.set_title(f'Consent of mailing according to target Across {type_of_activity} Companies')
|
||||
# ax.set_title(f'Consent of mailing according to target Across {type_of_activity} Companies')
|
||||
ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])
|
||||
ax.set_xticklabels(categories)
|
||||
ax.legend()
|
||||
|
@ -236,6 +226,7 @@ def gender_bar(customer, type_of_activity):
|
|||
company_genders["gender_other"] *= 100
|
||||
|
||||
# Création du barplot
|
||||
plt.figure(figsize=(4,3))
|
||||
plt.bar(company_genders["number_company"], company_genders["gender_male"], label = "Male")
|
||||
plt.bar(company_genders["number_company"], company_genders["gender_female"],
|
||||
bottom = company_genders["gender_male"], label = "Female")
|
||||
|
@ -244,7 +235,7 @@ def gender_bar(customer, type_of_activity):
|
|||
|
||||
plt.xlabel('Company Number')
|
||||
plt.ylabel("Frequency (%)")
|
||||
plt.title(f"Gender Distribution of Customers Across {type_of_activity} Companies")
|
||||
# plt.title(f"Gender Distribution of Customers Across {type_of_activity} Companies")
|
||||
plt.legend()
|
||||
plt.xticks(company_genders["number_company"], ["{}".format(i) for i in company_genders["number_company"]])
|
||||
plt.show()
|
||||
|
@ -254,23 +245,46 @@ def gender_bar(customer, type_of_activity):
|
|||
def country_bar(customer, type_of_activity):
|
||||
company_country_fr = customer.groupby("number_company")["country_fr"].mean().reset_index()
|
||||
company_country_fr["country_fr"] *= 100
|
||||
plt.figure(figsize=(4,3))
|
||||
plt.bar(company_country_fr["number_company"], company_country_fr["country_fr"])
|
||||
|
||||
plt.xlabel('Company Number')
|
||||
plt.ylabel("Share of French Customer (%)")
|
||||
plt.title(f"Share of French Customer Across {type_of_activity} Companies")
|
||||
# plt.title(f"Share of French Customer Across {type_of_activity} Companies")
|
||||
plt.xticks(company_country_fr["number_company"], ["{}".format(i) for i in company_country_fr["number_company"]])
|
||||
plt.show()
|
||||
save_file_s3("country_bar_", type_of_activity)
|
||||
|
||||
|
||||
def lazy_customer_plot(campaigns_kpi, type_of_activity):
|
||||
company_lazy_customers = campaigns_kpi.groupby("number_company")["nb_campaigns_opened"].mean().reset_index()
|
||||
plt.bar(company_lazy_customers["number_company"], company_lazy_customers["nb_campaigns_opened"])
|
||||
company_lazy_customers = campaigns_kpi.groupby("number_company")[["nb_campaigns", "taux_ouverture_mail"]].mean().reset_index()
|
||||
company_lazy_customers["taux_ouverture_mail"] *= 100
|
||||
|
||||
# Initialize the figure
|
||||
fig, ax1 = plt.subplots(figsize=(6, 3))
|
||||
width = 0.4
|
||||
x = range(len(company_lazy_customers))
|
||||
|
||||
# Plot the bars for "nb_campaigns" on the first y-axis
|
||||
ax1.bar([i - width/2 for i in x], company_lazy_customers['nb_campaigns'], width=width, align='center', label='Amount of Campaigns', color = 'steelblue')
|
||||
|
||||
# Set labels and title for the first y-axis
|
||||
ax1.set_ylabel('Number of Mails Received', color='steelblue')
|
||||
ax1.tick_params(axis='y', labelcolor='steelblue')
|
||||
|
||||
# Create another y-axis for "taux_ouverture_mail"
|
||||
ax2 = ax1.twinx()
|
||||
|
||||
# Plot the bars for "taux_ouverture_mail" on the second y-axis
|
||||
ax2.bar([i + width/2 for i in x], company_lazy_customers['taux_ouverture_mail'], width=width, align='center', label='Open Mail Rate', color = 'darkorange')
|
||||
|
||||
# Set labels and title for the second y-axis
|
||||
ax2.set_ylabel('Open Mail Rate (%)', color='darkorange')
|
||||
ax2.tick_params(axis='y', labelcolor='darkorange')
|
||||
|
||||
# Set x-axis ticks and labels
|
||||
ax1.set_xticks(x)
|
||||
ax1.set_xticklabels(company_lazy_customers['number_company'])
|
||||
|
||||
plt.xlabel('Company Number')
|
||||
plt.title(f"Share of Customers who did not Open Mail Across {type_of_activity} Companies")
|
||||
plt.xticks(company_lazy_customers["number_company"], ["{}".format(i) for i in company_lazy_customers["number_company"]])
|
||||
plt.show()
|
||||
save_file_s3("lazy_customer_", type_of_activity)
|
||||
|
||||
|
@ -279,7 +293,7 @@ def campaigns_effectiveness(customer, type_of_activity):
|
|||
|
||||
campaigns_effectiveness = customer.groupby(["number_company", "has_purchased_target_period"])["opt_in"].mean().reset_index()
|
||||
|
||||
fig, ax = plt.subplots(figsize=(10, 6))
|
||||
fig, ax = plt.subplots(figsize=(5, 3))
|
||||
|
||||
categories = campaigns_effectiveness["number_company"].unique()
|
||||
bar_width = 0.35
|
||||
|
@ -299,7 +313,7 @@ def campaigns_effectiveness(customer, type_of_activity):
|
|||
# Ajout des étiquettes, de la légende, etc.
|
||||
ax.set_xlabel('Company Number')
|
||||
ax.set_ylabel('Share of Consent (%)')
|
||||
ax.set_title(f"Proportion of customers who have given their consent to receive emails, by customer class ({type_of_activity} companies)")
|
||||
# ax.set_title(f"Proportion of customers who have given their consent to receive emails, by customer class ({type_of_activity} companies)")
|
||||
ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])
|
||||
ax.set_xticklabels(categories)
|
||||
ax.legend()
|
||||
|
@ -349,7 +363,8 @@ def sale_dynamics(products, campaigns_brut, type_of_activity):
|
|||
|
||||
|
||||
merged_data = pd.merge(purchases_graph_used_0, purchases_graph_used_1, on="purchase_date_month", suffixes=("_new", "_old"))
|
||||
|
||||
plt.figure(figsize=(5.5,4))
|
||||
|
||||
plt.bar(merged_data["purchase_date_month"], merged_data["nb_purchases_new"], width=12, label="New Customers")
|
||||
plt.bar(merged_data["purchase_date_month"], merged_data["nb_purchases_old"],
|
||||
bottom=merged_data["nb_purchases_new"], width=12, label="Existing Customers")
|
||||
|
@ -360,7 +375,7 @@ def sale_dynamics(products, campaigns_brut, type_of_activity):
|
|||
|
||||
plt.xlabel('Month')
|
||||
plt.ylabel("Number of Sales")
|
||||
plt.title(f"Number of Sales Across {type_of_activity} Companies")
|
||||
# plt.title(f"Number of Sales Across {type_of_activity} Companies")
|
||||
plt.legend()
|
||||
plt.show()
|
||||
save_file_s3("sale_dynamics_", type_of_activity)
|
||||
|
@ -373,7 +388,7 @@ def tickets_internet(tickets, type_of_activity):
|
|||
|
||||
plt.xlabel('Company Number')
|
||||
plt.ylabel("Share of Purchases Bought Online (%)")
|
||||
plt.title(f"Share of Online Purchases Across {type_of_activity} Companies")
|
||||
# plt.title(f"Share of Online Purchases Across {type_of_activity} Companies")
|
||||
plt.xticks(nb_tickets_internet["number_company"], ["{}".format(i) for i in nb_tickets_internet["number_company"]])
|
||||
plt.show()
|
||||
save_file_s3("tickets_internet_", type_of_activity)
|
||||
|
@ -389,7 +404,7 @@ def already_bought_online(tickets, type_of_activity):
|
|||
|
||||
plt.xlabel('Company Number')
|
||||
plt.ylabel("Share of Customer who Bought Online at least once (%)")
|
||||
plt.title(f"Share of Customer who Bought Online at least once Across {type_of_activity} Companies")
|
||||
# plt.title(f"Share of Customer who Bought Online at least once Across {type_of_activity} Companies")
|
||||
plt.xticks(nb_consumers_online["number_company"], ["{}".format(i) for i in nb_consumers_online["number_company"]])
|
||||
plt.show()
|
||||
save_file_s3("First_buy_internet_", type_of_activity)
|
||||
|
@ -397,8 +412,11 @@ def already_bought_online(tickets, type_of_activity):
|
|||
|
||||
def box_plot_price_tickets(tickets, type_of_activity):
|
||||
price_tickets = tickets[(tickets['total_amount'] > 0)]
|
||||
plt.figure(figsize=(4,3))
|
||||
sns.boxplot(data=price_tickets, y="total_amount", x="number_company", showfliers=False, showmeans=True)
|
||||
plt.title(f"Box plot of price tickets Across {type_of_activity} Companies")
|
||||
# plt.title(f"Box plot of price tickets Across {type_of_activity} Companies")
|
||||
plt.xlabel('Company Number')
|
||||
plt.ylabel("Total Amount Spent")
|
||||
plt.show()
|
||||
save_file_s3("box_plot_price_tickets_", type_of_activity)
|
||||
|
||||
|
@ -417,7 +435,7 @@ def target_description(targets, type_of_activity):
|
|||
plot = describe_target.plot.bar()
|
||||
|
||||
# Adding a title
|
||||
plot.set_title(f"Distribution of Targets by Category for {type_of_activity} companies")
|
||||
# plot.set_title(f"Distribution of Targets by Category for {type_of_activity} companies")
|
||||
|
||||
# Adding labels for x and y axes
|
||||
plot.set_xlabel("Company Number")
|
||||
|
|
Loading…
Reference in New Issue
Block a user