87 lines
2.7 KiB
Python
87 lines
2.7 KiB
Python
|
|
# Packages
|
|
import pandas as pd
|
|
import numpy as np
|
|
import os
|
|
import io
|
|
import s3fs
|
|
import re
|
|
import pickle
|
|
import warnings
|
|
import matplotlib.pyplot as plt
|
|
from tabulate import tabulate
|
|
|
|
###################################
|
|
|
|
# choose the model we use for the segmentation
|
|
# model_name = "LogisticRegression_Benchmark"
|
|
model_name = "LogisticRegression_cv"
|
|
|
|
###################################
|
|
|
|
|
|
# execute file including functions we need
|
|
exec(open('utils_segmentation.py').read())
|
|
|
|
warnings.filterwarnings('ignore')
|
|
|
|
# Create filesystem object
|
|
S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"]
|
|
fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})
|
|
|
|
|
|
# choose the type of companies for which you want to run the pipeline
|
|
# type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? musee ?')
|
|
for type_of_activity in ['musee', 'sport', 'musique'] :
|
|
|
|
|
|
# load test set
|
|
dataset_test = load_test_file(type_of_activity)
|
|
|
|
# Load Model
|
|
model = load_model(type_of_activity, model_name)
|
|
|
|
|
|
### Preprocessing of data
|
|
X_test = dataset_test.drop(columns = 'y_has_purchased')
|
|
|
|
y_test = dataset_test[['y_has_purchased']]
|
|
|
|
X_test_segment = X_test
|
|
|
|
# add y_has_purchased to X_test
|
|
X_test_segment["has_purchased"] = y_test
|
|
|
|
# Add prediction and probability to dataset_test
|
|
y_pred = model.predict(X_test)
|
|
X_test_segment["has_purchased_estim"] = y_pred
|
|
|
|
y_pred_prob = model.predict_proba(X_test)[:, 1]
|
|
X_test_segment['score'] = y_pred_prob
|
|
|
|
X_test_segment["segment"] = np.where(X_test_segment['score']<0.25, '1',
|
|
np.where(X_test_segment['score']<0.5, '2',
|
|
np.where(X_test_segment['score']<0.75, '3', '4')))
|
|
|
|
### 1. business KPIs
|
|
|
|
business_var = ["nb_tickets", "nb_purchases", "total_amount", "nb_campaigns"]
|
|
X_test_business_fig = df_business_fig(X_test_segment, "segment", business_var)
|
|
print(f"business figures for {type_of_activity} companies :\n")
|
|
print(X_test_business_fig)
|
|
print("\n")
|
|
|
|
# save histogram to Minio
|
|
hist_segment_business_KPIs(X_test_business_fig, "segment", "size", "nb_tickets",
|
|
"nb_purchases", "total_amount", "nb_campaigns", type_of_activity)
|
|
save_file_s3_mp(File_name = "segments_business_KPI_", type_of_activity = type_of_activity)
|
|
|
|
|
|
### 2. description of marketing personae
|
|
## A. Spider chart
|
|
radar_mp_plot_all(df = X_test_segment, type_of_activity = type_of_activity)
|
|
save_file_s3_mp(File_name = "spider_chart_all_", type_of_activity = type_of_activity)
|
|
|
|
## B. Latex table
|
|
known_sociodemo_caracteristics(df = X_test_segment, type_of_activity = type_of_activity)
|