add type of variables
This commit is contained in:
parent
df4c28bdd8
commit
e54e6c3b10
|
@ -65,7 +65,7 @@ def load_files(nb_compagnie):
|
||||||
return customer, campaigns_kpi, campaigns_brut, tickets, products, targets
|
return customer, campaigns_kpi, campaigns_brut, tickets, products, targets
|
||||||
|
|
||||||
|
|
||||||
def remove_outlier_total_amount(tickets):
|
def remove_outlier_total_amount(tickets : pd.DataFrame):
|
||||||
Q1 = tickets['total_amount'].quantile(0.25)
|
Q1 = tickets['total_amount'].quantile(0.25)
|
||||||
Q3 = tickets['total_amount'].quantile(0.75)
|
Q3 = tickets['total_amount'].quantile(0.75)
|
||||||
IQR = Q3 - Q1
|
IQR = Q3 - Q1
|
||||||
|
@ -89,7 +89,7 @@ def save_file_s3(File_name, type_of_activity):
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
|
|
||||||
def outlier_detection(tickets, company_list, show_diagram=False):
|
def outlier_detection(tickets : pd.DataFrame, company_list, show_diagram=False):
|
||||||
"""
|
"""
|
||||||
detect anonymous customers
|
detect anonymous customers
|
||||||
"""
|
"""
|
||||||
|
@ -121,7 +121,7 @@ def outlier_detection(tickets, company_list, show_diagram=False):
|
||||||
return outlier_list
|
return outlier_list
|
||||||
|
|
||||||
|
|
||||||
def valid_customer_detection(products, campaigns_brut):
|
def valid_customer_detection(products : pd.DataFrame, campaigns_brut : pd.DataFrame):
|
||||||
"""
|
"""
|
||||||
identify customer that are in our time perimeter
|
identify customer that are in our time perimeter
|
||||||
"""
|
"""
|
||||||
|
@ -135,7 +135,7 @@ def valid_customer_detection(products, campaigns_brut):
|
||||||
return consumer_valid
|
return consumer_valid
|
||||||
|
|
||||||
|
|
||||||
def identify_purchase_during_target_periode(products):
|
def identify_purchase_during_target_periode(products : pd.DataFrame):
|
||||||
"""
|
"""
|
||||||
identify customer who purchased ticket during the target period
|
identify customer who purchased ticket during the target period
|
||||||
"""
|
"""
|
||||||
|
@ -149,7 +149,7 @@ def remove_elements(lst, elements_to_remove):
|
||||||
return ''.join([x for x in lst if x not in elements_to_remove])
|
return ''.join([x for x in lst if x not in elements_to_remove])
|
||||||
|
|
||||||
|
|
||||||
def compute_nb_clients(customer, type_of_activity):
|
def compute_nb_clients(customer: pd.DataFrame, type_of_activity: str):
|
||||||
company_nb_clients = customer[customer["purchase_count"]>0].groupby("number_company")["customer_id"].count().reset_index()
|
company_nb_clients = customer[customer["purchase_count"]>0].groupby("number_company")["customer_id"].count().reset_index()
|
||||||
plt.figure(figsize=(4,3))
|
plt.figure(figsize=(4,3))
|
||||||
plt.bar(company_nb_clients["number_company"], company_nb_clients["customer_id"]/1000)
|
plt.bar(company_nb_clients["number_company"], company_nb_clients["customer_id"]/1000)
|
||||||
|
@ -161,7 +161,7 @@ def compute_nb_clients(customer, type_of_activity):
|
||||||
save_file_s3("nb_clients_", type_of_activity)
|
save_file_s3("nb_clients_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
def maximum_price_paid(customer, type_of_activity):
|
def maximum_price_paid(customer: pd.DataFrame, type_of_activity: str):
|
||||||
company_max_price = customer.groupby("number_company")["max_price"].max().reset_index()
|
company_max_price = customer.groupby("number_company")["max_price"].max().reset_index()
|
||||||
plt.bar(company_max_price["number_company"], company_max_price["max_price"])
|
plt.bar(company_max_price["number_company"], company_max_price["max_price"])
|
||||||
|
|
||||||
|
@ -173,7 +173,7 @@ def maximum_price_paid(customer, type_of_activity):
|
||||||
save_file_s3("Maximal_price_", type_of_activity)
|
save_file_s3("Maximal_price_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
def target_proportion(customer, type_of_activity):
|
def target_proportion(customer: pd.DataFrame, type_of_activity: str):
|
||||||
df_y = customer.groupby(["number_company"]).agg({"has_purchased_target_period" : 'sum',
|
df_y = customer.groupby(["number_company"]).agg({"has_purchased_target_period" : 'sum',
|
||||||
'customer_id' : 'nunique'}).reset_index()
|
'customer_id' : 'nunique'}).reset_index()
|
||||||
df_y['prop_has_purchased_target_period'] = (df_y["has_purchased_target_period"]/df_y['customer_id'])*100
|
df_y['prop_has_purchased_target_period'] = (df_y["has_purchased_target_period"]/df_y['customer_id'])*100
|
||||||
|
@ -186,7 +186,7 @@ def target_proportion(customer, type_of_activity):
|
||||||
save_file_s3("share_target_", type_of_activity)
|
save_file_s3("share_target_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
def mailing_consent(customer, type_of_activity):
|
def mailing_consent(customer: pd.DataFrame, type_of_activity: str):
|
||||||
mailing_consent = customer.groupby("number_company")["opt_in"].mean().reset_index()
|
mailing_consent = customer.groupby("number_company")["opt_in"].mean().reset_index()
|
||||||
mailing_consent["opt_in"] *= 100
|
mailing_consent["opt_in"] *= 100
|
||||||
plt.bar(mailing_consent["number_company"], mailing_consent["opt_in"])
|
plt.bar(mailing_consent["number_company"], mailing_consent["opt_in"])
|
||||||
|
@ -199,7 +199,7 @@ def mailing_consent(customer, type_of_activity):
|
||||||
save_file_s3("mailing_consent_", type_of_activity)
|
save_file_s3("mailing_consent_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
def mailing_consent_by_target(customer):
|
def mailing_consent_by_target(customer: pd.DataFrame, type_of_activity: str):
|
||||||
df_graph = customer.groupby(["number_company", "has_purchased_target_period"])["opt_in"].mean().reset_index()
|
df_graph = customer.groupby(["number_company", "has_purchased_target_period"])["opt_in"].mean().reset_index()
|
||||||
# Création du barplot groupé
|
# Création du barplot groupé
|
||||||
fig, ax = plt.subplots(figsize=(5, 3))
|
fig, ax = plt.subplots(figsize=(5, 3))
|
||||||
|
@ -232,7 +232,7 @@ def mailing_consent_by_target(customer):
|
||||||
save_file_s3("mailing_consent_target_", type_of_activity)
|
save_file_s3("mailing_consent_target_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
def gender_bar(customer, type_of_activity):
|
def gender_bar(customer: pd.DataFrame, type_of_activity: str):
|
||||||
company_genders = customer.groupby("number_company")[["gender_male", "gender_female", "gender_other"]].mean().reset_index()
|
company_genders = customer.groupby("number_company")[["gender_male", "gender_female", "gender_other"]].mean().reset_index()
|
||||||
|
|
||||||
company_genders["gender_male"] *= 100
|
company_genders["gender_male"] *= 100
|
||||||
|
@ -256,7 +256,7 @@ def gender_bar(customer, type_of_activity):
|
||||||
save_file_s3("gender_bar_", type_of_activity)
|
save_file_s3("gender_bar_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
def country_bar(customer, type_of_activity):
|
def country_bar(customer: pd.DataFrame, type_of_activity: str):
|
||||||
company_country_fr = customer.groupby("number_company")["country_fr"].mean().reset_index()
|
company_country_fr = customer.groupby("number_company")["country_fr"].mean().reset_index()
|
||||||
company_country_fr["country_fr"] *= 100
|
company_country_fr["country_fr"] *= 100
|
||||||
plt.figure(figsize=(4,3))
|
plt.figure(figsize=(4,3))
|
||||||
|
@ -269,7 +269,7 @@ def country_bar(customer, type_of_activity):
|
||||||
save_file_s3("country_bar_", type_of_activity)
|
save_file_s3("country_bar_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
def lazy_customer_plot(campaigns_kpi, type_of_activity):
|
def lazy_customer_plot(campaigns_kpi: pd.DataFrame, type_of_activity: str):
|
||||||
company_lazy_customers = campaigns_kpi.groupby("number_company")[["nb_campaigns", "taux_ouverture_mail"]].mean().reset_index()
|
company_lazy_customers = campaigns_kpi.groupby("number_company")[["nb_campaigns", "taux_ouverture_mail"]].mean().reset_index()
|
||||||
company_lazy_customers["taux_ouverture_mail"] *= 100
|
company_lazy_customers["taux_ouverture_mail"] *= 100
|
||||||
|
|
||||||
|
@ -303,7 +303,7 @@ def lazy_customer_plot(campaigns_kpi, type_of_activity):
|
||||||
save_file_s3("lazy_customer_", type_of_activity)
|
save_file_s3("lazy_customer_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
def campaigns_effectiveness(customer, type_of_activity):
|
def campaigns_effectiveness(customer: pd.DataFrame, type_of_activity: str):
|
||||||
|
|
||||||
campaigns_effectiveness = customer.groupby(["number_company", "has_purchased_target_period"])["opt_in"].mean().reset_index()
|
campaigns_effectiveness = customer.groupby(["number_company", "has_purchased_target_period"])["opt_in"].mean().reset_index()
|
||||||
|
|
||||||
|
@ -335,7 +335,7 @@ def campaigns_effectiveness(customer, type_of_activity):
|
||||||
save_file_s3("campaigns_effectiveness_", type_of_activity)
|
save_file_s3("campaigns_effectiveness_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
def sale_dynamics(products, campaigns_brut, type_of_activity):
|
def sale_dynamics(products : pd.DataFrame, campaigns_brut : pd.DataFrame, type_of_activity):
|
||||||
purchase_min = products.groupby(['customer_id'])['purchase_date'].min().reset_index()
|
purchase_min = products.groupby(['customer_id'])['purchase_date'].min().reset_index()
|
||||||
purchase_min.rename(columns = {'purchase_date' : 'first_purchase_event'}, inplace = True)
|
purchase_min.rename(columns = {'purchase_date' : 'first_purchase_event'}, inplace = True)
|
||||||
purchase_min['first_purchase_event'] = pd.to_datetime(purchase_min['first_purchase_event'])
|
purchase_min['first_purchase_event'] = pd.to_datetime(purchase_min['first_purchase_event'])
|
||||||
|
@ -395,7 +395,7 @@ def sale_dynamics(products, campaigns_brut, type_of_activity):
|
||||||
save_file_s3("sale_dynamics_", type_of_activity)
|
save_file_s3("sale_dynamics_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
def tickets_internet(tickets, type_of_activity):
|
def tickets_internet(tickets: pd.DataFrame, type_of_activity: str):
|
||||||
nb_tickets_internet = tickets.groupby("number_company")['prop_purchases_internet'].mean().reset_index()
|
nb_tickets_internet = tickets.groupby("number_company")['prop_purchases_internet'].mean().reset_index()
|
||||||
nb_tickets_internet['prop_purchases_internet'] *=100
|
nb_tickets_internet['prop_purchases_internet'] *=100
|
||||||
plt.bar(nb_tickets_internet["number_company"], nb_tickets_internet["prop_purchases_internet"])
|
plt.bar(nb_tickets_internet["number_company"], nb_tickets_internet["prop_purchases_internet"])
|
||||||
|
@ -408,7 +408,7 @@ def tickets_internet(tickets, type_of_activity):
|
||||||
save_file_s3("tickets_internet_", type_of_activity)
|
save_file_s3("tickets_internet_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
def already_bought_online(tickets, type_of_activity):
|
def already_bought_online(tickets: pd.DataFrame, type_of_activity: str):
|
||||||
nb_consumers_online = (tickets.groupby("number_company").agg({'achat_internet' : 'sum',
|
nb_consumers_online = (tickets.groupby("number_company").agg({'achat_internet' : 'sum',
|
||||||
'customer_id' : 'nunique'}
|
'customer_id' : 'nunique'}
|
||||||
).reset_index())
|
).reset_index())
|
||||||
|
@ -424,7 +424,7 @@ def already_bought_online(tickets, type_of_activity):
|
||||||
save_file_s3("First_buy_internet_", type_of_activity)
|
save_file_s3("First_buy_internet_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
def box_plot_price_tickets(tickets, type_of_activity):
|
def box_plot_price_tickets(tickets: pd.DataFrame, type_of_activity: str):
|
||||||
price_tickets = tickets[(tickets['total_amount'] > 0)]
|
price_tickets = tickets[(tickets['total_amount'] > 0)]
|
||||||
plt.figure(figsize=(4,3))
|
plt.figure(figsize=(4,3))
|
||||||
sns.boxplot(data=price_tickets, y="total_amount", x="number_company", showfliers=False, showmeans=True)
|
sns.boxplot(data=price_tickets, y="total_amount", x="number_company", showfliers=False, showmeans=True)
|
||||||
|
@ -434,7 +434,7 @@ def box_plot_price_tickets(tickets, type_of_activity):
|
||||||
plt.show()
|
plt.show()
|
||||||
save_file_s3("box_plot_price_tickets_", type_of_activity)
|
save_file_s3("box_plot_price_tickets_", type_of_activity)
|
||||||
|
|
||||||
def target_description(targets, type_of_activity):
|
def target_description(targets : pd.DataFrame, type_of_activity: str):
|
||||||
|
|
||||||
describe_target = targets.groupby('number_company').agg(
|
describe_target = targets.groupby('number_company').agg(
|
||||||
prop_target_jeune=('target_jeune', lambda x: (x.sum() / x.count())*100),
|
prop_target_jeune=('target_jeune', lambda x: (x.sum() / x.count())*100),
|
||||||
|
|
Loading…
Reference in New Issue
Block a user