fix errors
This commit is contained in:
parent
089a8fd3d6
commit
52fd738fe5
|
@ -47,7 +47,9 @@ customer['has_purchased_target_period'] = np.where(customer['customer_id'].isin(
|
||||||
# Generate graph and automatically saved them in the bucket
|
# Generate graph and automatically saved them in the bucket
|
||||||
compute_nb_clients(customer, type_of_activity)
|
compute_nb_clients(customer, type_of_activity)
|
||||||
|
|
||||||
maximum_price_paid(customer, type_of_activity)
|
#maximum_price_paid(customer, type_of_activity)
|
||||||
|
|
||||||
|
target_proportion(customer, type_of_activity)
|
||||||
|
|
||||||
mailing_consent(customer, type_of_activity)
|
mailing_consent(customer, type_of_activity)
|
||||||
|
|
||||||
|
|
|
@ -55,16 +55,16 @@ weights = class_weight.compute_class_weight(class_weight = 'balanced', classes =
|
||||||
weight_dict = {np.unique(y_train['y_has_purchased'])[i]: weights[i] for i in range(len(np.unique(y_train['y_has_purchased'])))}
|
weight_dict = {np.unique(y_train['y_has_purchased'])[i]: weights[i] for i in range(len(np.unique(y_train['y_has_purchased'])))}
|
||||||
|
|
||||||
|
|
||||||
numeric_features = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max',
|
numeric_features = ['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max',
|
||||||
'time_between_purchase', 'nb_tickets_internet', 'is_email_true', 'opt_in', #'is_partner',
|
'purchase_date_min', 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet',
|
||||||
'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']
|
'nb_campaigns', 'nb_campaigns_opened']
|
||||||
|
|
||||||
numeric_transformer = Pipeline(steps=[
|
numeric_transformer = Pipeline(steps=[
|
||||||
#("imputer", SimpleImputer(strategy="mean")),
|
#("imputer", SimpleImputer(strategy="mean")),
|
||||||
("scaler", StandardScaler())
|
("scaler", StandardScaler())
|
||||||
])
|
])
|
||||||
|
|
||||||
categorical_features = ['opt_in']
|
categorical_features = ['opt_in', 'gender_male', 'gender_female']
|
||||||
|
|
||||||
# Transformer for the categorical features
|
# Transformer for the categorical features
|
||||||
categorical_transformer = Pipeline(steps=[
|
categorical_transformer = Pipeline(steps=[
|
||||||
|
|
|
@ -42,7 +42,14 @@ def load_files(nb_compagnie):
|
||||||
df_campaigns_kpi["customer_id"]= directory_path + '_' + df_campaigns_kpi['customer_id'].astype('str')
|
df_campaigns_kpi["customer_id"]= directory_path + '_' + df_campaigns_kpi['customer_id'].astype('str')
|
||||||
df_customerplus_clean["customer_id"]= directory_path + '_' + df_customerplus_clean['customer_id'].astype('str')
|
df_customerplus_clean["customer_id"]= directory_path + '_' + df_customerplus_clean['customer_id'].astype('str')
|
||||||
df_products_purchased_reduced["customer_id"]= directory_path + '_' + df_products_purchased_reduced['customer_id'].astype('str')
|
df_products_purchased_reduced["customer_id"]= directory_path + '_' + df_products_purchased_reduced['customer_id'].astype('str')
|
||||||
|
|
||||||
|
# Remove companies' outliers
|
||||||
|
df_tickets_kpi = remove_outlier_total_amount(df_tickets_kpi)
|
||||||
|
# harmonize set of customers across databases
|
||||||
|
customer_id = df_tickets_kpi['customer_id'].to_list()
|
||||||
|
for dataset in [df_campaigns_brut, df_campaigns_kpi, df_customerplus_clean, df_target_information]:
|
||||||
|
dataset = dataset[dataset['customer_id'].isin(customer_id)]
|
||||||
|
|
||||||
# Concaténation
|
# Concaténation
|
||||||
customer = pd.concat([customer, df_customerplus_clean], ignore_index=True)
|
customer = pd.concat([customer, df_customerplus_clean], ignore_index=True)
|
||||||
campaigns_kpi = pd.concat([campaigns_kpi, df_campaigns_kpi], ignore_index=True)
|
campaigns_kpi = pd.concat([campaigns_kpi, df_campaigns_kpi], ignore_index=True)
|
||||||
|
@ -53,6 +60,16 @@ def load_files(nb_compagnie):
|
||||||
return customer, campaigns_kpi, campaigns_brut, tickets, products
|
return customer, campaigns_kpi, campaigns_brut, tickets, products
|
||||||
|
|
||||||
|
|
||||||
|
def remove_outlier_total_amount(tickets):
|
||||||
|
Q1 = tickets['total_amount'].quantile(0.25)
|
||||||
|
Q3 = tickets['total_amount'].quantile(0.75)
|
||||||
|
IQR = Q3 - Q1
|
||||||
|
upper = Q3 +1.5*IQR
|
||||||
|
outliers = tickets[tickets['total_amount'] > upper]['customer_id'].to_list()
|
||||||
|
tickets = tickets[~tickets['customer_id'].isin(outliers)]
|
||||||
|
return tickets
|
||||||
|
|
||||||
|
|
||||||
def save_file_s3(File_name, type_of_activity):
|
def save_file_s3(File_name, type_of_activity):
|
||||||
image_buffer = io.BytesIO()
|
image_buffer = io.BytesIO()
|
||||||
plt.savefig(image_buffer, format='png')
|
plt.savefig(image_buffer, format='png')
|
||||||
|
@ -140,6 +157,19 @@ def maximum_price_paid(customer, type_of_activity):
|
||||||
save_file_s3("Maximal_price_", type_of_activity)
|
save_file_s3("Maximal_price_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
|
def target_proportion(customer, type_of_activity):
|
||||||
|
df_y = customer.groupby(["number_company"]).agg({"has_purchased_target_period" : 'sum',
|
||||||
|
'customer_id' : 'nunique'}).reset_index()
|
||||||
|
df_y['prop_has_purchased_target_period'] = (df_y["has_purchased_target_period"]/df_y['customer_id'])*100
|
||||||
|
plt.bar(df_y["number_company"], df_y["prop_has_purchased_target_period"])
|
||||||
|
plt.xlabel('Company Number')
|
||||||
|
plt.ylabel('Share (%)')
|
||||||
|
plt.title(f'Share of Customers who Bought during the Target Period Across {type_of_activity} Companies')
|
||||||
|
plt.xticks(df_y["number_company"], ["{}".format(i) for i in df_y["number_company"]])
|
||||||
|
plt.show()
|
||||||
|
save_file_s3("share_target_", type_of_activity)
|
||||||
|
|
||||||
|
|
||||||
def mailing_consent(customer, type_of_activity):
|
def mailing_consent(customer, type_of_activity):
|
||||||
mailing_consent = customer.groupby("number_company")["opt_in"].mean().reset_index()
|
mailing_consent = customer.groupby("number_company")["opt_in"].mean().reset_index()
|
||||||
mailing_consent["opt_in"] *= 100
|
mailing_consent["opt_in"] *= 100
|
||||||
|
|
Loading…
Reference in New Issue
Block a user