105 lines
3.3 KiB
Python
105 lines
3.3 KiB
Python
|
### importations ###
|
||
|
### not necesary ?? As we exec the utils .py file associated
|
||
|
|
||
|
"""
|
||
|
import pandas as pd
|
||
|
import numpy as np
|
||
|
import os
|
||
|
import io
|
||
|
import s3fs
|
||
|
import re
|
||
|
import pickle
|
||
|
import warnings
|
||
|
import matplotlib.pyplot as plt
|
||
|
"""
|
||
|
|
||
|
### --- beginning of the code --- ###
|
||
|
|
||
|
|
||
|
### hyperparameters of the code ###
|
||
|
|
||
|
###################################
|
||
|
|
||
|
# choose the type of companies for which you want to run the pipeline
|
||
|
activity = "sport"
|
||
|
|
||
|
# choose the model we use for the segmentation
|
||
|
model_name = "LogisticRegression_Benchmark"
|
||
|
|
||
|
###################################
|
||
|
|
||
|
|
||
|
# execute file including functions we need
|
||
|
exec(open('utils_segmentation_2TP.py').read())
|
||
|
|
||
|
warnings.filterwarnings('ignore')
|
||
|
|
||
|
# Create filesystem object
|
||
|
S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"]
|
||
|
fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})
|
||
|
|
||
|
# load test set
|
||
|
dataset_test = load_test_file(type_of_activity)
|
||
|
|
||
|
# Load Model
|
||
|
model = load_model(type_of_activity, model_name)
|
||
|
|
||
|
|
||
|
### Preprocessing of data
|
||
|
X_test = dataset_test[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max',
|
||
|
'time_between_purchase', 'nb_tickets_internet', 'is_email_true', 'opt_in', #'is_partner',
|
||
|
'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']]
|
||
|
|
||
|
y_test = dataset_test[['y_has_purchased']]
|
||
|
|
||
|
|
||
|
X_test_segment = X_test
|
||
|
|
||
|
X_test_segment.insert(X_test.shape[1], "country_fr", dataset_test["country_fr"])
|
||
|
|
||
|
# add y_has_purchased to X_test
|
||
|
X_test_segment["has_purchased"] = y_test
|
||
|
|
||
|
# Add prediction and probability to dataset_test
|
||
|
y_pred = model.predict(X_test)
|
||
|
X_test_segment["has_purchased_estim"] = y_pred
|
||
|
|
||
|
y_pred_prob = model.predict_proba(X_test)[:, 1]
|
||
|
X_test_segment['score'] = y_pred_prob
|
||
|
|
||
|
X_test_segment["segment"] = np.where(X_test_segment['score']<0.25, '1',
|
||
|
np.where(X_test_segment['score']<0.5, '2',
|
||
|
np.where(X_test_segment['score']<0.75, '3', '4')))
|
||
|
|
||
|
### 1. business KPIs
|
||
|
|
||
|
business_var = ["nb_tickets", "nb_purchases", "total_amount", "nb_campaigns"]
|
||
|
X_test_business_fig = df_business_fig(X_test_segment, "segment", business_var)
|
||
|
|
||
|
# save histogram to Minio
|
||
|
hist_segment_business_KPIs(X_test_business_fig, "segment", "size", "nb_tickets",
|
||
|
"nb_purchases", "total_amount", "nb_campaigns")
|
||
|
save_file_s3_mp(File_name = "segments_business_KPIs_", type_of_activity = activity)
|
||
|
|
||
|
|
||
|
### 2. description of marketing personae (spider chart)
|
||
|
|
||
|
# table summarizing variables relative to marketing personae
|
||
|
X_test_segment_mp = df_segment_mp(X_test_segment, "segment", "gender_female",
|
||
|
"gender_male", "gender_other", "country_fr")
|
||
|
|
||
|
# table relative to purchasing behaviour
|
||
|
X_test_segment_pb = df_segment_pb(X_test_segment, "segment", "nb_tickets_internet", "nb_tickets",
|
||
|
"nb_campaigns_opened", "nb_campaigns", "opt_in")
|
||
|
|
||
|
# concatenation of tables to prepare the plot
|
||
|
X_test_segment_caract = pd.concat([X_test_segment_pb, X_test_segment_mp[['share_known_gender', 'share_of_women', 'country_fr']]], axis=1)
|
||
|
|
||
|
# visualization and save the graphic to the MinIo
|
||
|
categories = list(X_test_segment_caract.drop("segment", axis=1).columns)
|
||
|
radar_mp_plot_all(df=X_test_segment_caract, categories=categories)
|
||
|
save_file_s3_mp(File_name = "spider_chart_all_", type_of_activity = activity)
|
||
|
|
||
|
|
||
|
|