From e54e6c3b1032162c3bb17a865b8200d227a03466 Mon Sep 17 00:00:00 2001 From: arevelle-ensae Date: Thu, 4 Apr 2024 08:58:34 +0000 Subject: [PATCH] add type of variables --- utils_stat_desc.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/utils_stat_desc.py b/utils_stat_desc.py index 90bfcf8..ee1d4f1 100644 --- a/utils_stat_desc.py +++ b/utils_stat_desc.py @@ -65,7 +65,7 @@ def load_files(nb_compagnie): return customer, campaigns_kpi, campaigns_brut, tickets, products, targets -def remove_outlier_total_amount(tickets): +def remove_outlier_total_amount(tickets : pd.DataFrame): Q1 = tickets['total_amount'].quantile(0.25) Q3 = tickets['total_amount'].quantile(0.75) IQR = Q3 - Q1 @@ -89,7 +89,7 @@ def save_file_s3(File_name, type_of_activity): plt.close() -def outlier_detection(tickets, company_list, show_diagram=False): +def outlier_detection(tickets : pd.DataFrame, company_list, show_diagram=False): """ detect anonymous customers """ @@ -121,7 +121,7 @@ def outlier_detection(tickets, company_list, show_diagram=False): return outlier_list -def valid_customer_detection(products, campaigns_brut): +def valid_customer_detection(products : pd.DataFrame, campaigns_brut : pd.DataFrame): """ identify customer that are in our time perimeter """ @@ -135,7 +135,7 @@ def valid_customer_detection(products, campaigns_brut): return consumer_valid -def identify_purchase_during_target_periode(products): +def identify_purchase_during_target_periode(products : pd.DataFrame): """ identify customer who purchased ticket during the target period """ @@ -149,7 +149,7 @@ def remove_elements(lst, elements_to_remove): return ''.join([x for x in lst if x not in elements_to_remove]) -def compute_nb_clients(customer, type_of_activity): +def compute_nb_clients(customer: pd.DataFrame, type_of_activity: str): company_nb_clients = customer[customer["purchase_count"]>0].groupby("number_company")["customer_id"].count().reset_index() plt.figure(figsize=(4,3)) plt.bar(company_nb_clients["number_company"], company_nb_clients["customer_id"]/1000) @@ -161,7 +161,7 @@ def compute_nb_clients(customer, type_of_activity): save_file_s3("nb_clients_", type_of_activity) -def maximum_price_paid(customer, type_of_activity): +def maximum_price_paid(customer: pd.DataFrame, type_of_activity: str): company_max_price = customer.groupby("number_company")["max_price"].max().reset_index() plt.bar(company_max_price["number_company"], company_max_price["max_price"]) @@ -173,7 +173,7 @@ def maximum_price_paid(customer, type_of_activity): save_file_s3("Maximal_price_", type_of_activity) -def target_proportion(customer, type_of_activity): +def target_proportion(customer: pd.DataFrame, type_of_activity: str): df_y = customer.groupby(["number_company"]).agg({"has_purchased_target_period" : 'sum', 'customer_id' : 'nunique'}).reset_index() df_y['prop_has_purchased_target_period'] = (df_y["has_purchased_target_period"]/df_y['customer_id'])*100 @@ -186,7 +186,7 @@ def target_proportion(customer, type_of_activity): save_file_s3("share_target_", type_of_activity) -def mailing_consent(customer, type_of_activity): +def mailing_consent(customer: pd.DataFrame, type_of_activity: str): mailing_consent = customer.groupby("number_company")["opt_in"].mean().reset_index() mailing_consent["opt_in"] *= 100 plt.bar(mailing_consent["number_company"], mailing_consent["opt_in"]) @@ -199,7 +199,7 @@ def mailing_consent(customer, type_of_activity): save_file_s3("mailing_consent_", type_of_activity) -def mailing_consent_by_target(customer): +def mailing_consent_by_target(customer: pd.DataFrame, type_of_activity: str): df_graph = customer.groupby(["number_company", "has_purchased_target_period"])["opt_in"].mean().reset_index() # Création du barplot groupé fig, ax = plt.subplots(figsize=(5, 3)) @@ -232,7 +232,7 @@ def mailing_consent_by_target(customer): save_file_s3("mailing_consent_target_", type_of_activity) -def gender_bar(customer, type_of_activity): +def gender_bar(customer: pd.DataFrame, type_of_activity: str): company_genders = customer.groupby("number_company")[["gender_male", "gender_female", "gender_other"]].mean().reset_index() company_genders["gender_male"] *= 100 @@ -256,7 +256,7 @@ def gender_bar(customer, type_of_activity): save_file_s3("gender_bar_", type_of_activity) -def country_bar(customer, type_of_activity): +def country_bar(customer: pd.DataFrame, type_of_activity: str): company_country_fr = customer.groupby("number_company")["country_fr"].mean().reset_index() company_country_fr["country_fr"] *= 100 plt.figure(figsize=(4,3)) @@ -269,7 +269,7 @@ def country_bar(customer, type_of_activity): save_file_s3("country_bar_", type_of_activity) -def lazy_customer_plot(campaigns_kpi, type_of_activity): +def lazy_customer_plot(campaigns_kpi: pd.DataFrame, type_of_activity: str): company_lazy_customers = campaigns_kpi.groupby("number_company")[["nb_campaigns", "taux_ouverture_mail"]].mean().reset_index() company_lazy_customers["taux_ouverture_mail"] *= 100 @@ -303,7 +303,7 @@ def lazy_customer_plot(campaigns_kpi, type_of_activity): save_file_s3("lazy_customer_", type_of_activity) -def campaigns_effectiveness(customer, type_of_activity): +def campaigns_effectiveness(customer: pd.DataFrame, type_of_activity: str): campaigns_effectiveness = customer.groupby(["number_company", "has_purchased_target_period"])["opt_in"].mean().reset_index() @@ -335,7 +335,7 @@ def campaigns_effectiveness(customer, type_of_activity): save_file_s3("campaigns_effectiveness_", type_of_activity) -def sale_dynamics(products, campaigns_brut, type_of_activity): +def sale_dynamics(products : pd.DataFrame, campaigns_brut : pd.DataFrame, type_of_activity): purchase_min = products.groupby(['customer_id'])['purchase_date'].min().reset_index() purchase_min.rename(columns = {'purchase_date' : 'first_purchase_event'}, inplace = True) purchase_min['first_purchase_event'] = pd.to_datetime(purchase_min['first_purchase_event']) @@ -395,7 +395,7 @@ def sale_dynamics(products, campaigns_brut, type_of_activity): save_file_s3("sale_dynamics_", type_of_activity) -def tickets_internet(tickets, type_of_activity): +def tickets_internet(tickets: pd.DataFrame, type_of_activity: str): nb_tickets_internet = tickets.groupby("number_company")['prop_purchases_internet'].mean().reset_index() nb_tickets_internet['prop_purchases_internet'] *=100 plt.bar(nb_tickets_internet["number_company"], nb_tickets_internet["prop_purchases_internet"]) @@ -408,7 +408,7 @@ def tickets_internet(tickets, type_of_activity): save_file_s3("tickets_internet_", type_of_activity) -def already_bought_online(tickets, type_of_activity): +def already_bought_online(tickets: pd.DataFrame, type_of_activity: str): nb_consumers_online = (tickets.groupby("number_company").agg({'achat_internet' : 'sum', 'customer_id' : 'nunique'} ).reset_index()) @@ -424,7 +424,7 @@ def already_bought_online(tickets, type_of_activity): save_file_s3("First_buy_internet_", type_of_activity) -def box_plot_price_tickets(tickets, type_of_activity): +def box_plot_price_tickets(tickets: pd.DataFrame, type_of_activity: str): price_tickets = tickets[(tickets['total_amount'] > 0)] plt.figure(figsize=(4,3)) sns.boxplot(data=price_tickets, y="total_amount", x="number_company", showfliers=False, showmeans=True) @@ -434,7 +434,7 @@ def box_plot_price_tickets(tickets, type_of_activity): plt.show() save_file_s3("box_plot_price_tickets_", type_of_activity) -def target_description(targets, type_of_activity): +def target_description(targets : pd.DataFrame, type_of_activity: str): describe_target = targets.groupby('number_company').agg( prop_target_jeune=('target_jeune', lambda x: (x.sum() / x.count())*100),