add type of variables
This commit is contained in:
parent
df4c28bdd8
commit
e54e6c3b10
|
@ -65,7 +65,7 @@ def load_files(nb_compagnie):
|
|||
return customer, campaigns_kpi, campaigns_brut, tickets, products, targets
|
||||
|
||||
|
||||
def remove_outlier_total_amount(tickets):
|
||||
def remove_outlier_total_amount(tickets : pd.DataFrame):
|
||||
Q1 = tickets['total_amount'].quantile(0.25)
|
||||
Q3 = tickets['total_amount'].quantile(0.75)
|
||||
IQR = Q3 - Q1
|
||||
|
@ -89,7 +89,7 @@ def save_file_s3(File_name, type_of_activity):
|
|||
plt.close()
|
||||
|
||||
|
||||
def outlier_detection(tickets, company_list, show_diagram=False):
|
||||
def outlier_detection(tickets : pd.DataFrame, company_list, show_diagram=False):
|
||||
"""
|
||||
detect anonymous customers
|
||||
"""
|
||||
|
@ -121,7 +121,7 @@ def outlier_detection(tickets, company_list, show_diagram=False):
|
|||
return outlier_list
|
||||
|
||||
|
||||
def valid_customer_detection(products, campaigns_brut):
|
||||
def valid_customer_detection(products : pd.DataFrame, campaigns_brut : pd.DataFrame):
|
||||
"""
|
||||
identify customer that are in our time perimeter
|
||||
"""
|
||||
|
@ -135,7 +135,7 @@ def valid_customer_detection(products, campaigns_brut):
|
|||
return consumer_valid
|
||||
|
||||
|
||||
def identify_purchase_during_target_periode(products):
|
||||
def identify_purchase_during_target_periode(products : pd.DataFrame):
|
||||
"""
|
||||
identify customer who purchased ticket during the target period
|
||||
"""
|
||||
|
@ -149,7 +149,7 @@ def remove_elements(lst, elements_to_remove):
|
|||
return ''.join([x for x in lst if x not in elements_to_remove])
|
||||
|
||||
|
||||
def compute_nb_clients(customer, type_of_activity):
|
||||
def compute_nb_clients(customer: pd.DataFrame, type_of_activity: str):
|
||||
company_nb_clients = customer[customer["purchase_count"]>0].groupby("number_company")["customer_id"].count().reset_index()
|
||||
plt.figure(figsize=(4,3))
|
||||
plt.bar(company_nb_clients["number_company"], company_nb_clients["customer_id"]/1000)
|
||||
|
@ -161,7 +161,7 @@ def compute_nb_clients(customer, type_of_activity):
|
|||
save_file_s3("nb_clients_", type_of_activity)
|
||||
|
||||
|
||||
def maximum_price_paid(customer, type_of_activity):
|
||||
def maximum_price_paid(customer: pd.DataFrame, type_of_activity: str):
|
||||
company_max_price = customer.groupby("number_company")["max_price"].max().reset_index()
|
||||
plt.bar(company_max_price["number_company"], company_max_price["max_price"])
|
||||
|
||||
|
@ -173,7 +173,7 @@ def maximum_price_paid(customer, type_of_activity):
|
|||
save_file_s3("Maximal_price_", type_of_activity)
|
||||
|
||||
|
||||
def target_proportion(customer, type_of_activity):
|
||||
def target_proportion(customer: pd.DataFrame, type_of_activity: str):
|
||||
df_y = customer.groupby(["number_company"]).agg({"has_purchased_target_period" : 'sum',
|
||||
'customer_id' : 'nunique'}).reset_index()
|
||||
df_y['prop_has_purchased_target_period'] = (df_y["has_purchased_target_period"]/df_y['customer_id'])*100
|
||||
|
@ -186,7 +186,7 @@ def target_proportion(customer, type_of_activity):
|
|||
save_file_s3("share_target_", type_of_activity)
|
||||
|
||||
|
||||
def mailing_consent(customer, type_of_activity):
|
||||
def mailing_consent(customer: pd.DataFrame, type_of_activity: str):
|
||||
mailing_consent = customer.groupby("number_company")["opt_in"].mean().reset_index()
|
||||
mailing_consent["opt_in"] *= 100
|
||||
plt.bar(mailing_consent["number_company"], mailing_consent["opt_in"])
|
||||
|
@ -199,7 +199,7 @@ def mailing_consent(customer, type_of_activity):
|
|||
save_file_s3("mailing_consent_", type_of_activity)
|
||||
|
||||
|
||||
def mailing_consent_by_target(customer):
|
||||
def mailing_consent_by_target(customer: pd.DataFrame, type_of_activity: str):
|
||||
df_graph = customer.groupby(["number_company", "has_purchased_target_period"])["opt_in"].mean().reset_index()
|
||||
# Création du barplot groupé
|
||||
fig, ax = plt.subplots(figsize=(5, 3))
|
||||
|
@ -232,7 +232,7 @@ def mailing_consent_by_target(customer):
|
|||
save_file_s3("mailing_consent_target_", type_of_activity)
|
||||
|
||||
|
||||
def gender_bar(customer, type_of_activity):
|
||||
def gender_bar(customer: pd.DataFrame, type_of_activity: str):
|
||||
company_genders = customer.groupby("number_company")[["gender_male", "gender_female", "gender_other"]].mean().reset_index()
|
||||
|
||||
company_genders["gender_male"] *= 100
|
||||
|
@ -256,7 +256,7 @@ def gender_bar(customer, type_of_activity):
|
|||
save_file_s3("gender_bar_", type_of_activity)
|
||||
|
||||
|
||||
def country_bar(customer, type_of_activity):
|
||||
def country_bar(customer: pd.DataFrame, type_of_activity: str):
|
||||
company_country_fr = customer.groupby("number_company")["country_fr"].mean().reset_index()
|
||||
company_country_fr["country_fr"] *= 100
|
||||
plt.figure(figsize=(4,3))
|
||||
|
@ -269,7 +269,7 @@ def country_bar(customer, type_of_activity):
|
|||
save_file_s3("country_bar_", type_of_activity)
|
||||
|
||||
|
||||
def lazy_customer_plot(campaigns_kpi, type_of_activity):
|
||||
def lazy_customer_plot(campaigns_kpi: pd.DataFrame, type_of_activity: str):
|
||||
company_lazy_customers = campaigns_kpi.groupby("number_company")[["nb_campaigns", "taux_ouverture_mail"]].mean().reset_index()
|
||||
company_lazy_customers["taux_ouverture_mail"] *= 100
|
||||
|
||||
|
@ -303,7 +303,7 @@ def lazy_customer_plot(campaigns_kpi, type_of_activity):
|
|||
save_file_s3("lazy_customer_", type_of_activity)
|
||||
|
||||
|
||||
def campaigns_effectiveness(customer, type_of_activity):
|
||||
def campaigns_effectiveness(customer: pd.DataFrame, type_of_activity: str):
|
||||
|
||||
campaigns_effectiveness = customer.groupby(["number_company", "has_purchased_target_period"])["opt_in"].mean().reset_index()
|
||||
|
||||
|
@ -335,7 +335,7 @@ def campaigns_effectiveness(customer, type_of_activity):
|
|||
save_file_s3("campaigns_effectiveness_", type_of_activity)
|
||||
|
||||
|
||||
def sale_dynamics(products, campaigns_brut, type_of_activity):
|
||||
def sale_dynamics(products : pd.DataFrame, campaigns_brut : pd.DataFrame, type_of_activity):
|
||||
purchase_min = products.groupby(['customer_id'])['purchase_date'].min().reset_index()
|
||||
purchase_min.rename(columns = {'purchase_date' : 'first_purchase_event'}, inplace = True)
|
||||
purchase_min['first_purchase_event'] = pd.to_datetime(purchase_min['first_purchase_event'])
|
||||
|
@ -395,7 +395,7 @@ def sale_dynamics(products, campaigns_brut, type_of_activity):
|
|||
save_file_s3("sale_dynamics_", type_of_activity)
|
||||
|
||||
|
||||
def tickets_internet(tickets, type_of_activity):
|
||||
def tickets_internet(tickets: pd.DataFrame, type_of_activity: str):
|
||||
nb_tickets_internet = tickets.groupby("number_company")['prop_purchases_internet'].mean().reset_index()
|
||||
nb_tickets_internet['prop_purchases_internet'] *=100
|
||||
plt.bar(nb_tickets_internet["number_company"], nb_tickets_internet["prop_purchases_internet"])
|
||||
|
@ -408,7 +408,7 @@ def tickets_internet(tickets, type_of_activity):
|
|||
save_file_s3("tickets_internet_", type_of_activity)
|
||||
|
||||
|
||||
def already_bought_online(tickets, type_of_activity):
|
||||
def already_bought_online(tickets: pd.DataFrame, type_of_activity: str):
|
||||
nb_consumers_online = (tickets.groupby("number_company").agg({'achat_internet' : 'sum',
|
||||
'customer_id' : 'nunique'}
|
||||
).reset_index())
|
||||
|
@ -424,7 +424,7 @@ def already_bought_online(tickets, type_of_activity):
|
|||
save_file_s3("First_buy_internet_", type_of_activity)
|
||||
|
||||
|
||||
def box_plot_price_tickets(tickets, type_of_activity):
|
||||
def box_plot_price_tickets(tickets: pd.DataFrame, type_of_activity: str):
|
||||
price_tickets = tickets[(tickets['total_amount'] > 0)]
|
||||
plt.figure(figsize=(4,3))
|
||||
sns.boxplot(data=price_tickets, y="total_amount", x="number_company", showfliers=False, showmeans=True)
|
||||
|
@ -434,7 +434,7 @@ def box_plot_price_tickets(tickets, type_of_activity):
|
|||
plt.show()
|
||||
save_file_s3("box_plot_price_tickets_", type_of_activity)
|
||||
|
||||
def target_description(targets, type_of_activity):
|
||||
def target_description(targets : pd.DataFrame, type_of_activity: str):
|
||||
|
||||
describe_target = targets.groupby('number_company').agg(
|
||||
prop_target_jeune=('target_jeune', lambda x: (x.sum() / x.count())*100),
|
||||
|
|
Loading…
Reference in New Issue
Block a user