BDC-team-1/6_Segmentation_and_Marketing_Personae.py

87 lines
2.7 KiB
Python

# Packages
import pandas as pd
import numpy as np
import os
import io
import s3fs
import re
import pickle
import warnings
import matplotlib.pyplot as plt
from tabulate import tabulate
###################################
# choose the model we use for the segmentation
# model_name = "LogisticRegression_Benchmark"
model_name = "LogisticRegression_cv"
###################################
# execute file including functions we need
exec(open('utils_segmentation.py').read())
warnings.filterwarnings('ignore')
# Create filesystem object
S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"]
fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})
# choose the type of companies for which you want to run the pipeline
# type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? musee ?')
for type_of_activity in ['musee', 'sport', 'musique'] :
# load test set
dataset_test = load_test_file(type_of_activity)
# Load Model
model = load_model(type_of_activity, model_name)
### Preprocessing of data
X_test = dataset_test.drop(columns = 'y_has_purchased')
y_test = dataset_test[['y_has_purchased']]
X_test_segment = X_test
# add y_has_purchased to X_test
X_test_segment["has_purchased"] = y_test
# Add prediction and probability to dataset_test
y_pred = model.predict(X_test)
X_test_segment["has_purchased_estim"] = y_pred
y_pred_prob = model.predict_proba(X_test)[:, 1]
X_test_segment['score'] = y_pred_prob
X_test_segment["segment"] = np.where(X_test_segment['score']<0.25, '1',
np.where(X_test_segment['score']<0.5, '2',
np.where(X_test_segment['score']<0.75, '3', '4')))
### 1. business KPIs
business_var = ["nb_tickets", "nb_purchases", "total_amount", "nb_campaigns"]
X_test_business_fig = df_business_fig(X_test_segment, "segment", business_var)
print(f"business figures for {type_of_activity} companies :\n")
print(X_test_business_fig)
print("\n")
# save histogram to Minio
hist_segment_business_KPIs(X_test_business_fig, "segment", "size", "nb_tickets",
"nb_purchases", "total_amount", "nb_campaigns", type_of_activity)
save_file_s3_mp(File_name = "segments_business_KPI_", type_of_activity = type_of_activity)
### 2. description of marketing personae
## A. Spider chart
radar_mp_plot_all(df = X_test_segment, type_of_activity = type_of_activity)
save_file_s3_mp(File_name = "spider_chart_all_", type_of_activity = type_of_activity)
## B. Latex table
known_sociodemo_caracteristics(df = X_test_segment, type_of_activity = type_of_activity)