add type of variables

This commit is contained in:
Alexis REVELLE 2024-04-04 08:58:34 +00:00
parent df4c28bdd8
commit e54e6c3b10

View File

@ -65,7 +65,7 @@ def load_files(nb_compagnie):
return customer, campaigns_kpi, campaigns_brut, tickets, products, targets
def remove_outlier_total_amount(tickets):
def remove_outlier_total_amount(tickets : pd.DataFrame):
Q1 = tickets['total_amount'].quantile(0.25)
Q3 = tickets['total_amount'].quantile(0.75)
IQR = Q3 - Q1
@ -89,7 +89,7 @@ def save_file_s3(File_name, type_of_activity):
plt.close()
def outlier_detection(tickets, company_list, show_diagram=False):
def outlier_detection(tickets : pd.DataFrame, company_list, show_diagram=False):
"""
detect anonymous customers
"""
@ -121,7 +121,7 @@ def outlier_detection(tickets, company_list, show_diagram=False):
return outlier_list
def valid_customer_detection(products, campaigns_brut):
def valid_customer_detection(products : pd.DataFrame, campaigns_brut : pd.DataFrame):
"""
identify customer that are in our time perimeter
"""
@ -135,7 +135,7 @@ def valid_customer_detection(products, campaigns_brut):
return consumer_valid
def identify_purchase_during_target_periode(products):
def identify_purchase_during_target_periode(products : pd.DataFrame):
"""
identify customer who purchased ticket during the target period
"""
@ -149,7 +149,7 @@ def remove_elements(lst, elements_to_remove):
return ''.join([x for x in lst if x not in elements_to_remove])
def compute_nb_clients(customer, type_of_activity):
def compute_nb_clients(customer: pd.DataFrame, type_of_activity: str):
company_nb_clients = customer[customer["purchase_count"]>0].groupby("number_company")["customer_id"].count().reset_index()
plt.figure(figsize=(4,3))
plt.bar(company_nb_clients["number_company"], company_nb_clients["customer_id"]/1000)
@ -161,7 +161,7 @@ def compute_nb_clients(customer, type_of_activity):
save_file_s3("nb_clients_", type_of_activity)
def maximum_price_paid(customer, type_of_activity):
def maximum_price_paid(customer: pd.DataFrame, type_of_activity: str):
company_max_price = customer.groupby("number_company")["max_price"].max().reset_index()
plt.bar(company_max_price["number_company"], company_max_price["max_price"])
@ -173,7 +173,7 @@ def maximum_price_paid(customer, type_of_activity):
save_file_s3("Maximal_price_", type_of_activity)
def target_proportion(customer, type_of_activity):
def target_proportion(customer: pd.DataFrame, type_of_activity: str):
df_y = customer.groupby(["number_company"]).agg({"has_purchased_target_period" : 'sum',
'customer_id' : 'nunique'}).reset_index()
df_y['prop_has_purchased_target_period'] = (df_y["has_purchased_target_period"]/df_y['customer_id'])*100
@ -186,7 +186,7 @@ def target_proportion(customer, type_of_activity):
save_file_s3("share_target_", type_of_activity)
def mailing_consent(customer, type_of_activity):
def mailing_consent(customer: pd.DataFrame, type_of_activity: str):
mailing_consent = customer.groupby("number_company")["opt_in"].mean().reset_index()
mailing_consent["opt_in"] *= 100
plt.bar(mailing_consent["number_company"], mailing_consent["opt_in"])
@ -199,7 +199,7 @@ def mailing_consent(customer, type_of_activity):
save_file_s3("mailing_consent_", type_of_activity)
def mailing_consent_by_target(customer):
def mailing_consent_by_target(customer: pd.DataFrame, type_of_activity: str):
df_graph = customer.groupby(["number_company", "has_purchased_target_period"])["opt_in"].mean().reset_index()
# Création du barplot groupé
fig, ax = plt.subplots(figsize=(5, 3))
@ -232,7 +232,7 @@ def mailing_consent_by_target(customer):
save_file_s3("mailing_consent_target_", type_of_activity)
def gender_bar(customer, type_of_activity):
def gender_bar(customer: pd.DataFrame, type_of_activity: str):
company_genders = customer.groupby("number_company")[["gender_male", "gender_female", "gender_other"]].mean().reset_index()
company_genders["gender_male"] *= 100
@ -256,7 +256,7 @@ def gender_bar(customer, type_of_activity):
save_file_s3("gender_bar_", type_of_activity)
def country_bar(customer, type_of_activity):
def country_bar(customer: pd.DataFrame, type_of_activity: str):
company_country_fr = customer.groupby("number_company")["country_fr"].mean().reset_index()
company_country_fr["country_fr"] *= 100
plt.figure(figsize=(4,3))
@ -269,7 +269,7 @@ def country_bar(customer, type_of_activity):
save_file_s3("country_bar_", type_of_activity)
def lazy_customer_plot(campaigns_kpi, type_of_activity):
def lazy_customer_plot(campaigns_kpi: pd.DataFrame, type_of_activity: str):
company_lazy_customers = campaigns_kpi.groupby("number_company")[["nb_campaigns", "taux_ouverture_mail"]].mean().reset_index()
company_lazy_customers["taux_ouverture_mail"] *= 100
@ -303,7 +303,7 @@ def lazy_customer_plot(campaigns_kpi, type_of_activity):
save_file_s3("lazy_customer_", type_of_activity)
def campaigns_effectiveness(customer, type_of_activity):
def campaigns_effectiveness(customer: pd.DataFrame, type_of_activity: str):
campaigns_effectiveness = customer.groupby(["number_company", "has_purchased_target_period"])["opt_in"].mean().reset_index()
@ -335,7 +335,7 @@ def campaigns_effectiveness(customer, type_of_activity):
save_file_s3("campaigns_effectiveness_", type_of_activity)
def sale_dynamics(products, campaigns_brut, type_of_activity):
def sale_dynamics(products : pd.DataFrame, campaigns_brut : pd.DataFrame, type_of_activity):
purchase_min = products.groupby(['customer_id'])['purchase_date'].min().reset_index()
purchase_min.rename(columns = {'purchase_date' : 'first_purchase_event'}, inplace = True)
purchase_min['first_purchase_event'] = pd.to_datetime(purchase_min['first_purchase_event'])
@ -395,7 +395,7 @@ def sale_dynamics(products, campaigns_brut, type_of_activity):
save_file_s3("sale_dynamics_", type_of_activity)
def tickets_internet(tickets, type_of_activity):
def tickets_internet(tickets: pd.DataFrame, type_of_activity: str):
nb_tickets_internet = tickets.groupby("number_company")['prop_purchases_internet'].mean().reset_index()
nb_tickets_internet['prop_purchases_internet'] *=100
plt.bar(nb_tickets_internet["number_company"], nb_tickets_internet["prop_purchases_internet"])
@ -408,7 +408,7 @@ def tickets_internet(tickets, type_of_activity):
save_file_s3("tickets_internet_", type_of_activity)
def already_bought_online(tickets, type_of_activity):
def already_bought_online(tickets: pd.DataFrame, type_of_activity: str):
nb_consumers_online = (tickets.groupby("number_company").agg({'achat_internet' : 'sum',
'customer_id' : 'nunique'}
).reset_index())
@ -424,7 +424,7 @@ def already_bought_online(tickets, type_of_activity):
save_file_s3("First_buy_internet_", type_of_activity)
def box_plot_price_tickets(tickets, type_of_activity):
def box_plot_price_tickets(tickets: pd.DataFrame, type_of_activity: str):
price_tickets = tickets[(tickets['total_amount'] > 0)]
plt.figure(figsize=(4,3))
sns.boxplot(data=price_tickets, y="total_amount", x="number_company", showfliers=False, showmeans=True)
@ -434,7 +434,7 @@ def box_plot_price_tickets(tickets, type_of_activity):
plt.show()
save_file_s3("box_plot_price_tickets_", type_of_activity)
def target_description(targets, type_of_activity):
def target_description(targets : pd.DataFrame, type_of_activity: str):
describe_target = targets.groupby('number_company').agg(
prop_target_jeune=('target_jeune', lambda x: (x.sum() / x.count())*100),