BDC-team-1/6_Segmentation_and_Marketing_Personae.py


# Packages
import pandas as pd
import numpy as np
import os
import io
import s3fs
import re
import pickle
import warnings
import matplotlib.pyplot as plt
from tabulate import tabulate

###################################

# choose the model we use for the segmentation
# model_name = "LogisticRegression_Benchmark"
model_name = "LogisticRegression_cv"

###################################


# execute file including functions we need
exec(open('utils_segmentation.py').read())

warnings.filterwarnings('ignore')

# Create filesystem object
S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"]
fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})


# choose the type of companies for which you want to run the pipeline
# type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? musee ?')
for type_of_activity in ['musee', 'sport', 'musique'] :


    # load test set
    dataset_test = load_test_file(type_of_activity)

    # Load Model
    model = load_model(type_of_activity, model_name)


    ### Preprocessing of data
    X_test = dataset_test.drop(columns = 'y_has_purchased')

    y_test = dataset_test[['y_has_purchased']]

    X_test_segment = X_test

    # add y_has_purchased to X_test
    X_test_segment["has_purchased"] = y_test

    # Add prediction and probability to dataset_test
    y_pred = model.predict(X_test)
    X_test_segment["has_purchased_estim"] = y_pred

    y_pred_prob = model.predict_proba(X_test)[:, 1]
    X_test_segment['score'] = y_pred_prob

    X_test_segment["segment"] = np.where(X_test_segment['score']<0.25, '1',
                       np.where(X_test_segment['score']<0.5, '2',
                       np.where(X_test_segment['score']<0.75, '3', '4')))

    ### 1. business KPIs

    business_var = ["nb_tickets", "nb_purchases", "total_amount", "nb_campaigns"]
    X_test_business_fig = df_business_fig(X_test_segment, "segment", business_var)
    print(f"business figures for {type_of_activity} companies :\n")
    print(X_test_business_fig)
    print("\n")

    # save histogram to Minio
    hist_segment_business_KPIs(X_test_business_fig, "segment", "size", "nb_tickets",
                               "nb_purchases", "total_amount", "nb_campaigns", type_of_activity)
    save_file_s3_mp(File_name = "segments_business_KPI_", type_of_activity = type_of_activity)


    ### 2. description of marketing personae
    ## A. Spider chart
    radar_mp_plot_all(df = X_test_segment, type_of_activity = type_of_activity)
    save_file_s3_mp(File_name = "spider_chart_all_", type_of_activity = type_of_activity)

    ## B. Latex table
    known_sociodemo_caracteristics(df = X_test_segment, type_of_activity = type_of_activity)