# Packages import pandas as pd import numpy as np import os import io import s3fs import re import pickle import warnings import matplotlib.pyplot as plt from tabulate import tabulate ################################### # choose the model we use for the segmentation # model_name = "LogisticRegression_Benchmark" model_name = "LogisticRegression_cv" ################################### # execute file including functions we need exec(open('utils_segmentation.py').read()) warnings.filterwarnings('ignore') # Create filesystem object S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"] fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL}) # choose the type of companies for which you want to run the pipeline # type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? musee ?') for type_of_activity in ['musee', 'sport', 'musique'] : # load test set dataset_test = load_test_file(type_of_activity) # Load Model model = load_model(type_of_activity, model_name) ### Preprocessing of data X_test = dataset_test.drop(columns = 'y_has_purchased') y_test = dataset_test[['y_has_purchased']] X_test_segment = X_test # add y_has_purchased to X_test X_test_segment["has_purchased"] = y_test # Add prediction and probability to dataset_test y_pred = model.predict(X_test) X_test_segment["has_purchased_estim"] = y_pred y_pred_prob = model.predict_proba(X_test)[:, 1] X_test_segment['score'] = y_pred_prob X_test_segment["segment"] = np.where(X_test_segment['score']<0.25, '1', np.where(X_test_segment['score']<0.5, '2', np.where(X_test_segment['score']<0.75, '3', '4'))) ### 1. business KPIs business_var = ["nb_tickets", "nb_purchases", "total_amount", "nb_campaigns"] X_test_business_fig = df_business_fig(X_test_segment, "segment", business_var) print(f"business figures for {type_of_activity} companies :\n") print(X_test_business_fig) print("\n") # save histogram to Minio hist_segment_business_KPIs(X_test_business_fig, "segment", "size", "nb_tickets", "nb_purchases", "total_amount", "nb_campaigns", type_of_activity) save_file_s3_mp(File_name = "segments_business_KPI_", type_of_activity = type_of_activity) ### 2. description of marketing personae ## A. Spider chart radar_mp_plot_all(df = X_test_segment, type_of_activity = type_of_activity) save_file_s3_mp(File_name = "spider_chart_all_", type_of_activity = type_of_activity) ## B. Latex table known_sociodemo_caracteristics(df = X_test_segment, type_of_activity = type_of_activity)