### importations ### ### not necesary ?? As we exec the utils .py file associated """ import pandas as pd import numpy as np import os import io import s3fs import re import pickle import warnings import matplotlib.pyplot as plt """ ### --- beginning of the code --- ### ### hyperparameters of the code ### ################################### # choose the type of companies for which you want to run the pipeline type_of_activity = "sport" # choose the model we use for the segmentation model_name = "LogisticRegression_Benchmark" ################################### # execute file including functions we need exec(open('utils_segmentation_V2TP.py').read()) warnings.filterwarnings('ignore') # Create filesystem object S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"] fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL}) # load test set dataset_test = load_test_file(type_of_activity) # Load Model model = load_model(type_of_activity, model_name) ### Preprocessing of data X_test = dataset_test[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', 'time_between_purchase', 'nb_tickets_internet', 'is_email_true', 'opt_in', #'is_partner', 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened', 'country_fr']] y_test = dataset_test[['y_has_purchased']] X_test_segment = X_test # add y_has_purchased to X_test X_test_segment["has_purchased"] = y_test # Add prediction and probability to dataset_test y_pred = model.predict(X_test) X_test_segment["has_purchased_estim"] = y_pred y_pred_prob = model.predict_proba(X_test)[:, 1] X_test_segment['score'] = y_pred_prob X_test_segment["segment"] = np.where(X_test_segment['score']<0.25, '1', np.where(X_test_segment['score']<0.5, '2', np.where(X_test_segment['score']<0.75, '3', '4'))) ### 1. business KPIs business_var = ["nb_tickets", "nb_purchases", "total_amount", "nb_campaigns"] X_test_business_fig = df_business_fig(X_test_segment, "segment", business_var) # save histogram to Minio hist_segment_business_KPIs(X_test_business_fig, "segment", "size", "nb_tickets", "nb_purchases", "total_amount", "nb_campaigns") save_file_s3_mp(File_name = "segments_business_KPIs_", type_of_activity = type_of_activity) ### 2. description of marketing personae (spider chart) # table summarizing variables relative to marketing personae X_test_segment_mp = df_segment_mp(X_test_segment, "segment", "gender_female", "gender_male", "gender_other", "country_fr") # table relative to purchasing behaviour X_test_segment_pb = df_segment_pb(X_test_segment, "segment", "nb_tickets_internet", "nb_tickets", "nb_campaigns_opened", "nb_campaigns", "opt_in") # concatenation of tables to prepare the plot X_test_segment_caract = pd.concat([X_test_segment_pb, X_test_segment_mp[['share_known_gender', 'share_of_women', 'country_fr']]], axis=1) # visualization and save the graphic to the MinIo categories = list(X_test_segment_caract.drop("segment", axis=1).columns) radar_mp_plot_all(df=X_test_segment_caract, categories=categories) save_file_s3_mp(File_name = "spider_chart_all_", type_of_activity = type_of_activity)