add pipeline ML
This commit is contained in:
parent
ee86fcaf84
commit
d328caa665
68
0_4_Generate_stat_desc.py
Normal file
68
0_4_Generate_stat_desc.py
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
import io
|
||||||
|
import s3fs
|
||||||
|
import re
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
# Ignore warning
|
||||||
|
warnings.filterwarnings('ignore')
|
||||||
|
|
||||||
|
exec(open('0_KPI_functions.py').read())
|
||||||
|
exec(open('utils_stat_desc.py').read())
|
||||||
|
|
||||||
|
# Create filesystem object
|
||||||
|
S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"]
|
||||||
|
fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})
|
||||||
|
|
||||||
|
companies = {'musee' : ['1', '2', '3', '4'], # , '101'
|
||||||
|
'sport': ['5'],
|
||||||
|
'musique' : ['10', '11', '12', '13', '14']}
|
||||||
|
|
||||||
|
|
||||||
|
type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? musee ?')
|
||||||
|
list_of_comp = companies[type_of_activity]
|
||||||
|
|
||||||
|
# Load files
|
||||||
|
customer, campaigns_kpi, campaigns_brut, tickets, products = load_files(list_of_comp)
|
||||||
|
|
||||||
|
# Identify anonymous customer for each company and remove them from our datasets
|
||||||
|
outlier_list = outlier_detection(tickets, list_of_comp)
|
||||||
|
|
||||||
|
# Identify valid customer (customer who bought tickets after starting date or received mails after starting date)
|
||||||
|
customer_valid_list = valid_customer_detection(products, campaigns_brut)
|
||||||
|
|
||||||
|
databases = [customer, campaigns_kpi, campaigns_brut, tickets, products]
|
||||||
|
|
||||||
|
for dataset in databases:
|
||||||
|
dataset['customer_id'] = dataset['customer_id'].apply(lambda x: remove_elements(x, outlier_list))# remove outlier
|
||||||
|
dataset = dataset[dataset['customer_id'].isin(customer_valid_list)] # keep only valid customer
|
||||||
|
#print(f'shape of {dataset} : ', dataset.shape)
|
||||||
|
|
||||||
|
# Identify customer who bought during the period of y
|
||||||
|
customer_target_period = identify_purchase_during_target_periode(products)
|
||||||
|
customer['has_purchased_target_period'] = np.where(customer['customer_id'].isin(customer_target_period), 1, 0)
|
||||||
|
|
||||||
|
# Generate graph and automatically saved them in the bucket
|
||||||
|
compute_nb_clients(customer, type_of_activity)
|
||||||
|
|
||||||
|
maximum_price_paid(customer, type_of_activity)
|
||||||
|
|
||||||
|
mailing_consent(customer, type_of_activity)
|
||||||
|
|
||||||
|
mailing_consent_by_target(customer)
|
||||||
|
|
||||||
|
gender_bar(customer, type_of_activity)
|
||||||
|
|
||||||
|
country_bar(customer, type_of_activity)
|
||||||
|
|
||||||
|
lazy_customer_plot(campaigns_kpi, type_of_activity)
|
||||||
|
|
||||||
|
#campaigns_effectiveness(customer, type_of_activity)
|
||||||
|
|
||||||
|
sale_dynamics(products, campaigns_brut, type_of_activity)
|
||||||
|
|
||||||
|
tickets_internet(tickets, type_of_activity)
|
||||||
|
|
||||||
|
box_plot_price_tickets(tickets, type_of_activity)
|
327
utils_stat_desc.py
Normal file
327
utils_stat_desc.py
Normal file
|
@ -0,0 +1,327 @@
|
||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
import s3fs
|
||||||
|
import io
|
||||||
|
import warnings
|
||||||
|
from datetime import date, timedelta, datetime
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import matplotlib.dates as mdates
|
||||||
|
import seaborn as sns
|
||||||
|
|
||||||
|
|
||||||
|
def load_files(nb_compagnie):
|
||||||
|
customer = pd.DataFrame()
|
||||||
|
campaigns_brut = pd.DataFrame()
|
||||||
|
campaigns_kpi = pd.DataFrame()
|
||||||
|
products = pd.DataFrame()
|
||||||
|
tickets = pd.DataFrame()
|
||||||
|
|
||||||
|
# début de la boucle permettant de générer des datasets agrégés pour les 5 compagnies de spectacle
|
||||||
|
for directory_path in nb_compagnie:
|
||||||
|
df_customerplus_clean_0 = display_databases(directory_path, file_name = "customerplus_cleaned")
|
||||||
|
df_campaigns_brut = display_databases(directory_path, file_name = "campaigns_information", datetime_col = ['opened_at', 'sent_at', 'campaign_sent_at'])
|
||||||
|
df_products_purchased_reduced = display_databases(directory_path, file_name = "products_purchased_reduced", datetime_col = ['purchase_date'])
|
||||||
|
df_target_information = display_databases(directory_path, file_name = "target_information")
|
||||||
|
|
||||||
|
df_campaigns_kpi = campaigns_kpi_function(campaigns_information = df_campaigns_brut)
|
||||||
|
df_tickets_kpi = tickets_kpi_function(tickets_information = df_products_purchased_reduced)
|
||||||
|
df_customerplus_clean = customerplus_kpi_function(customerplus_clean = df_customerplus_clean_0)
|
||||||
|
|
||||||
|
|
||||||
|
# creation de la colonne Number compagnie, qui permettra d'agréger les résultats
|
||||||
|
df_tickets_kpi["number_company"]=int(directory_path)
|
||||||
|
df_campaigns_brut["number_company"]=int(directory_path)
|
||||||
|
df_campaigns_kpi["number_company"]=int(directory_path)
|
||||||
|
df_customerplus_clean["number_company"]=int(directory_path)
|
||||||
|
df_target_information["number_company"]=int(directory_path)
|
||||||
|
|
||||||
|
# Traitement des index
|
||||||
|
df_tickets_kpi["customer_id"]= directory_path + '_' + df_tickets_kpi['customer_id'].astype('str')
|
||||||
|
df_campaigns_brut["customer_id"]= directory_path + '_' + df_campaigns_brut['customer_id'].astype('str')
|
||||||
|
df_campaigns_kpi["customer_id"]= directory_path + '_' + df_campaigns_kpi['customer_id'].astype('str')
|
||||||
|
df_customerplus_clean["customer_id"]= directory_path + '_' + df_customerplus_clean['customer_id'].astype('str')
|
||||||
|
df_products_purchased_reduced["customer_id"]= directory_path + '_' + df_products_purchased_reduced['customer_id'].astype('str')
|
||||||
|
|
||||||
|
# Concaténation
|
||||||
|
customer = pd.concat([customer, df_customerplus_clean], ignore_index=True)
|
||||||
|
campaigns_kpi = pd.concat([campaigns_kpi, df_campaigns_kpi], ignore_index=True)
|
||||||
|
campaigns_brut = pd.concat([campaigns_brut, df_campaigns_brut], ignore_index=True)
|
||||||
|
tickets = pd.concat([tickets, df_tickets_kpi], ignore_index=True)
|
||||||
|
products = pd.concat([products, df_products_purchased_reduced], ignore_index=True)
|
||||||
|
|
||||||
|
return customer, campaigns_kpi, campaigns_brut, tickets, products
|
||||||
|
|
||||||
|
|
||||||
|
def save_file_s3(File_name, type_of_activity):
|
||||||
|
image_buffer = io.BytesIO()
|
||||||
|
plt.savefig(image_buffer, format='png')
|
||||||
|
image_buffer.seek(0)
|
||||||
|
FILE_PATH = f"projet-bdc2324-team1/stat_desc/{type_of_activity}/"
|
||||||
|
FILE_PATH_OUT_S3 = FILE_PATH + File_name + type_of_activity + '.png'
|
||||||
|
with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:
|
||||||
|
s3_file.write(image_buffer.read())
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
|
||||||
|
def outlier_detection(tickets, company_list, show_diagram=False):
|
||||||
|
|
||||||
|
outlier_list = list()
|
||||||
|
|
||||||
|
for company in company_list:
|
||||||
|
total_amount_share = tickets[tickets['number_company']==int(company)].groupby('customer_id')['total_amount'].sum().reset_index()
|
||||||
|
total_amount_share['CA'] = total_amount_share['total_amount'].sum()
|
||||||
|
total_amount_share['share_total_amount'] = total_amount_share['total_amount']/total_amount_share['CA']
|
||||||
|
|
||||||
|
total_amount_share_index = total_amount_share.set_index('customer_id')
|
||||||
|
df_circulaire = total_amount_share_index['total_amount'].sort_values(axis = 0, ascending = False)
|
||||||
|
#print('df circulaire : ', df_circulaire.head())
|
||||||
|
top = df_circulaire[:1]
|
||||||
|
#print('top : ', top)
|
||||||
|
outlier_list.append(top.index[0])
|
||||||
|
rest = df_circulaire[1:]
|
||||||
|
|
||||||
|
rest_sum = rest.sum()
|
||||||
|
|
||||||
|
new_series = pd.concat([top, pd.Series([rest_sum], index=['Autre'])])
|
||||||
|
|
||||||
|
if show_diagram:
|
||||||
|
plt.figure(figsize=(3, 3))
|
||||||
|
plt.pie(new_series, labels=new_series.index, autopct='%1.1f%%', startangle=140, pctdistance=0.5)
|
||||||
|
plt.axis('equal')
|
||||||
|
plt.title(f'Répartition des montants totaux pour la compagnie {company}')
|
||||||
|
plt.show()
|
||||||
|
return outlier_list
|
||||||
|
|
||||||
|
|
||||||
|
def valid_customer_detection(products, campaigns_brut):
|
||||||
|
products_valid = products[products['purchase_date']>="2021-05-01"]
|
||||||
|
consumer_valid_product = products_valid['customer_id'].to_list()
|
||||||
|
|
||||||
|
campaigns_valid = campaigns_brut[campaigns_brut["sent_at"]>="2021-05-01"]
|
||||||
|
consumer_valid_campaigns = campaigns_valid['customer_id'].to_list()
|
||||||
|
|
||||||
|
consumer_valid = consumer_valid_product + consumer_valid_campaigns
|
||||||
|
return consumer_valid
|
||||||
|
|
||||||
|
|
||||||
|
def identify_purchase_during_target_periode(products):
|
||||||
|
products_target_period = products[(products['purchase_date']>="2022-11-01")
|
||||||
|
& (products['purchase_date']<="2023-11-01")]
|
||||||
|
customer_target_period = products_target_period['customer_id'].to_list()
|
||||||
|
return customer_target_period
|
||||||
|
|
||||||
|
|
||||||
|
def remove_elements(lst, elements_to_remove):
|
||||||
|
return ''.join([x for x in lst if x not in elements_to_remove])
|
||||||
|
|
||||||
|
|
||||||
|
def compute_nb_clients(customer, type_of_activity):
|
||||||
|
company_nb_clients = customer[customer["purchase_count"]>0].groupby("number_company")["customer_id"].count().reset_index()
|
||||||
|
plt.bar(company_nb_clients["number_company"], company_nb_clients["customer_id"]/1000)
|
||||||
|
|
||||||
|
plt.xlabel('Company')
|
||||||
|
plt.ylabel("Number of clients (thousands)")
|
||||||
|
plt.title(f"Number of clients for {type_of_activity}")
|
||||||
|
plt.xticks(company_nb_clients["number_company"], ["{}".format(i) for i in company_nb_clients["number_company"]])
|
||||||
|
plt.show()
|
||||||
|
save_file_s3("nb_clients_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
|
def maximum_price_paid(customer, type_of_activity):
|
||||||
|
company_max_price = customer.groupby("number_company")["max_price"].max().reset_index()
|
||||||
|
plt.bar(company_max_price["number_company"], company_max_price["max_price"])
|
||||||
|
|
||||||
|
plt.xlabel('Company')
|
||||||
|
plt.ylabel("Maximal price of a ticket Prix")
|
||||||
|
plt.title(f"Maximal price of a ticket for {type_of_activity}")
|
||||||
|
plt.xticks(company_max_price["number_company"], ["{}".format(i) for i in company_max_price["number_company"]])
|
||||||
|
plt.show()
|
||||||
|
save_file_s3("Maximal_price_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
|
def mailing_consent(customer, type_of_activity):
|
||||||
|
mailing_consent = customer.groupby("number_company")["opt_in"].mean().reset_index()
|
||||||
|
|
||||||
|
plt.bar(mailing_consent["number_company"], mailing_consent["opt_in"])
|
||||||
|
|
||||||
|
plt.xlabel('Company')
|
||||||
|
plt.ylabel('Consent')
|
||||||
|
plt.title(f'Consent of mailing for {type_of_activity}')
|
||||||
|
plt.xticks(mailing_consent["number_company"], ["{}".format(i) for i in mailing_consent["number_company"]])
|
||||||
|
plt.show()
|
||||||
|
save_file_s3("mailing_consent_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
|
def mailing_consent_by_target(customer):
|
||||||
|
df_graph = customer.groupby(["number_company", "has_purchased_target_period"])["opt_in"].mean().reset_index()
|
||||||
|
# Création du barplot groupé
|
||||||
|
fig, ax = plt.subplots(figsize=(10, 6))
|
||||||
|
|
||||||
|
categories = df_graph["number_company"].unique()
|
||||||
|
bar_width = 0.35
|
||||||
|
bar_positions = np.arange(len(categories))
|
||||||
|
|
||||||
|
# Grouper les données par label et créer les barres groupées
|
||||||
|
for label in df_graph["has_purchased_target_period"].unique():
|
||||||
|
label_data = df_graph[df_graph['has_purchased_target_period'] == label]
|
||||||
|
values = [label_data[label_data['number_company'] == category]['opt_in'].values[0]*100 for category in categories]
|
||||||
|
|
||||||
|
label_printed = "purchased" if label else "no purchase"
|
||||||
|
ax.bar(bar_positions, values, bar_width, label=label_printed)
|
||||||
|
|
||||||
|
# Mise à jour des positions des barres pour le prochain groupe
|
||||||
|
bar_positions = [pos + bar_width for pos in bar_positions]
|
||||||
|
|
||||||
|
# Ajout des étiquettes, de la légende, etc.
|
||||||
|
ax.set_xlabel('Company')
|
||||||
|
ax.set_ylabel('Consent')
|
||||||
|
ax.set_title(f'Consent of mailing according to target for {type_of_activity}')
|
||||||
|
ax.set_xticks([pos + bar_width / 2 for pos in np.arange(len(categories))])
|
||||||
|
ax.set_xticklabels(categories)
|
||||||
|
ax.legend()
|
||||||
|
|
||||||
|
# Affichage du plot
|
||||||
|
plt.show()
|
||||||
|
save_file_s3("mailing_consent_target_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
|
def gender_bar(customer, type_of_activity):
|
||||||
|
company_genders = customer.groupby("number_company")[["gender_male", "gender_female", "gender_other"]].mean().reset_index()
|
||||||
|
|
||||||
|
# Création du barplot
|
||||||
|
plt.bar(company_genders["number_company"], company_genders["gender_male"], label = "Homme")
|
||||||
|
plt.bar(company_genders["number_company"], company_genders["gender_female"],
|
||||||
|
bottom = company_genders["gender_male"], label = "Femme")
|
||||||
|
plt.bar(company_genders["number_company"], company_genders["gender_other"],
|
||||||
|
bottom = company_genders["gender_male"] + company_genders["gender_female"], label = "Inconnu")
|
||||||
|
|
||||||
|
plt.xlabel('Company')
|
||||||
|
plt.ylabel("Gender")
|
||||||
|
plt.title(f"Gender of Customer for {type_of_activity}")
|
||||||
|
plt.legend()
|
||||||
|
plt.xticks(company_genders["number_company"], ["{}".format(i) for i in company_genders["number_company"]])
|
||||||
|
plt.show()
|
||||||
|
save_file_s3("gender_bar_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
|
def country_bar(customer, type_of_activity):
|
||||||
|
company_country_fr = customer.groupby("number_company")["country_fr"].mean().reset_index()
|
||||||
|
plt.bar(company_country_fr["number_company"], company_country_fr["country_fr"])
|
||||||
|
|
||||||
|
plt.xlabel('Company')
|
||||||
|
plt.ylabel("Share of French Customer")
|
||||||
|
plt.title(f"Share of French Customer for {type_of_activity}")
|
||||||
|
plt.xticks(company_country_fr["number_company"], ["{}".format(i) for i in company_country_fr["number_company"]])
|
||||||
|
plt.show()
|
||||||
|
save_file_s3("country_bar_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
|
def lazy_customer_plot(campaigns_kpi, type_of_activity):
|
||||||
|
company_lazy_customers = campaigns_kpi.groupby("number_company")["nb_campaigns_opened"].mean().reset_index()
|
||||||
|
plt.bar(company_lazy_customers["number_company"], company_lazy_customers["nb_campaigns_opened"])
|
||||||
|
|
||||||
|
plt.xlabel('Company')
|
||||||
|
plt.ylabel("Share of Customers who did not open mail")
|
||||||
|
plt.title(f"Share of Customers who did not open mail for {type_of_activity}")
|
||||||
|
plt.xticks(company_lazy_customers["number_company"], ["{}".format(i) for i in company_lazy_customers["number_company"]])
|
||||||
|
plt.show()
|
||||||
|
save_file_s3("lazy_customer_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
|
def campaigns_effectiveness(customer, type_of_activity):
|
||||||
|
|
||||||
|
campaigns_effectiveness = customer.groupby("number_company")["opt_in"].mean().reset_index()
|
||||||
|
|
||||||
|
plt.bar(campaigns_effectiveness["number_company"], campaigns_effectiveness["opt_in"])
|
||||||
|
|
||||||
|
plt.xlabel('Company')
|
||||||
|
plt.ylabel("Number of Customers (thousands)")
|
||||||
|
plt.title(f"Number of Customers of have bought or have received mails for {type_of_activity}")
|
||||||
|
plt.legend()
|
||||||
|
plt.xticks(campaigns_effectiveness["number_company"], ["{}".format(i) for i in campaigns_effectiveness["number_company"]])
|
||||||
|
plt.show()
|
||||||
|
save_file_s3("campaigns_effectiveness_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
|
def sale_dynamics(products, campaigns_brut, type_of_activity):
|
||||||
|
purchase_min = products.groupby(['customer_id'])['purchase_date'].min().reset_index()
|
||||||
|
purchase_min.rename(columns = {'purchase_date' : 'first_purchase_event'}, inplace = True)
|
||||||
|
purchase_min['first_purchase_event'] = pd.to_datetime(purchase_min['first_purchase_event'])
|
||||||
|
purchase_min['first_purchase_month'] = pd.to_datetime(purchase_min['first_purchase_event'].dt.strftime('%Y-%m'))
|
||||||
|
|
||||||
|
# Mois du premier mails
|
||||||
|
first_mail_received = campaigns_brut.groupby('customer_id')['sent_at'].min().reset_index()
|
||||||
|
first_mail_received.rename(columns = {'sent_at' : 'first_email_reception'}, inplace = True)
|
||||||
|
first_mail_received['first_email_reception'] = pd.to_datetime(first_mail_received['first_email_reception'])
|
||||||
|
first_mail_received['first_email_month'] = pd.to_datetime(first_mail_received['first_email_reception'].dt.strftime('%Y-%m'))
|
||||||
|
|
||||||
|
# Fusion
|
||||||
|
known_customer = pd.merge(purchase_min[['customer_id', 'first_purchase_month']],
|
||||||
|
first_mail_received[['customer_id', 'first_email_month']], on = 'customer_id', how = 'outer')
|
||||||
|
|
||||||
|
# Mois à partir duquel le client est considere comme connu
|
||||||
|
|
||||||
|
known_customer['known_date'] = pd.to_datetime(known_customer[['first_email_month', 'first_purchase_month']].min(axis = 1), utc = True, format = 'ISO8601')
|
||||||
|
|
||||||
|
# Nombre de commande par mois
|
||||||
|
purchases_count = pd.merge(products[['customer_id', 'purchase_id', 'purchase_date']].drop_duplicates(), known_customer[['customer_id', 'known_date']], on = ['customer_id'], how = 'inner')
|
||||||
|
purchases_count['is_customer_known'] = purchases_count['purchase_date'] > purchases_count['known_date'] + pd.DateOffset(months=1)
|
||||||
|
purchases_count['purchase_date_month'] = pd.to_datetime(purchases_count['purchase_date'].dt.strftime('%Y-%m'))
|
||||||
|
purchases_count = purchases_count[purchases_count['customer_id'] != 1]
|
||||||
|
|
||||||
|
# Nombre de commande par mois par type de client
|
||||||
|
nb_purchases_graph = purchases_count.groupby(['purchase_date_month', 'is_customer_known'])['purchase_id'].count().reset_index()
|
||||||
|
nb_purchases_graph.rename(columns = {'purchase_id' : 'nb_purchases'}, inplace = True)
|
||||||
|
|
||||||
|
nb_purchases_graph_2 = purchases_count.groupby(['purchase_date_month', 'is_customer_known'])['customer_id'].nunique().reset_index()
|
||||||
|
nb_purchases_graph_2.rename(columns = {'customer_id' : 'nb_new_customer'}, inplace = True)
|
||||||
|
|
||||||
|
# Graphique en nombre de commande
|
||||||
|
purchases_graph = nb_purchases_graph
|
||||||
|
|
||||||
|
purchases_graph_used = purchases_graph[purchases_graph["purchase_date_month"] >= datetime(2021,3,1)]
|
||||||
|
purchases_graph_used_0 = purchases_graph_used[purchases_graph_used["is_customer_known"]==False]
|
||||||
|
purchases_graph_used_1 = purchases_graph_used[purchases_graph_used["is_customer_known"]==True]
|
||||||
|
|
||||||
|
|
||||||
|
merged_data = pd.merge(purchases_graph_used_0, purchases_graph_used_1, on="purchase_date_month", suffixes=("_new", "_old"))
|
||||||
|
|
||||||
|
plt.bar(merged_data["purchase_date_month"], merged_data["nb_purchases_new"], width=12, label="Nouveau client")
|
||||||
|
plt.bar(merged_data["purchase_date_month"], merged_data["nb_purchases_old"],
|
||||||
|
bottom=merged_data["nb_purchases_new"], width=12, label="Ancien client")
|
||||||
|
|
||||||
|
|
||||||
|
# commande pr afficher slt
|
||||||
|
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b%y'))
|
||||||
|
|
||||||
|
plt.xlabel('Month')
|
||||||
|
plt.ylabel("Number of Sales")
|
||||||
|
plt.title(f"Number of Sales for {type_of_activity}")
|
||||||
|
plt.legend()
|
||||||
|
plt.show()
|
||||||
|
save_file_s3("sale_dynamics_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
|
def tickets_internet(tickets, type_of_activity):
|
||||||
|
nb_tickets_internet = tickets.groupby("number_company")[["nb_tickets", "nb_tickets_internet"]].sum().reset_index()
|
||||||
|
nb_tickets_internet["Share_ticket_internet"] = nb_tickets_internet["nb_tickets_internet"]*100 / nb_tickets_internet["nb_tickets"]
|
||||||
|
|
||||||
|
plt.bar(nb_tickets_internet["number_company"], nb_tickets_internet["Share_ticket_internet"])
|
||||||
|
|
||||||
|
plt.xlabel('Company')
|
||||||
|
plt.ylabel("Share of Tickets Bought Online")
|
||||||
|
plt.title(f"Share of Tickets Bought Online for {type_of_activity}")
|
||||||
|
plt.xticks(nb_tickets_internet["number_company"], ["{}".format(i) for i in nb_tickets_internet["number_company"]])
|
||||||
|
plt.show()
|
||||||
|
save_file_s3("tickets_internet_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
|
def box_plot_price_tickets(tickets, type_of_activity):
|
||||||
|
price_tickets = tickets[(tickets['total_amount'] > 0)]
|
||||||
|
sns.boxplot(data=price_tickets, y="total_amount", x="number_company", showfliers=False, showmeans=True)
|
||||||
|
plt.title(f"Box plot of price tickets for {type_of_activity}")
|
||||||
|
plt.xticks(price_tickets["number_company"], ["{}".format(i) for i in price_tickets["number_company"]])
|
||||||
|
plt.show()
|
||||||
|
save_file_s3("box_plot_price_tickets_", type_of_activity)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user