Ajout description marketing personae

2024-03-31 16:35:58 +00:00 · 2024-03-31 16:35:58 +00:00 · 8e61e9d2a4
commit 8e61e9d2a4
parent 7341752be0
5 changed files with 288 additions and 350 deletions
--- a/0_6_Segmentation.py
+++ b/0_6_Segmentation.py
@ -1,40 +0,0 @@
 import pandas as pd
 import numpy as np
 import os
 import io
 import s3fs
 import re
 import pickle
 import warnings
 exec(open('utils_segmentation.py').read())
 warnings.filterwarnings('ignore')
 # Create filesystem object
 S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"]
 fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})
 # choose the type of companies for which you want to run the pipeline
 type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? musee ?')
 # load test set
 dataset_test = load_test_file(type_of_activity)
 # Load Model 
 model = load_model(type_of_activity, 'LogisticRegression_Benchmark')
 # Processing
 X_test = dataset_test[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', 
            'time_between_purchase', 'nb_tickets_internet',  'is_email_true', 'opt_in', #'is_partner',
            'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']]
 y_test = dataset_test[['y_has_purchased']]
 # Prediction
 y_pred_prob = model.predict_proba(X_test)[:, 1]
 # Add probability to dataset_test
 dataset_test['Probability_to_buy'] = y_pred_prob
 print('probability added to dataset_test')
 print(dataset_test.head())
--- a/0_6_segmentation_V2TP.py
+++ b/0_6_segmentation_V2TP.py
@ -1,99 +0,0 @@
 ### importations ###
 ### not necesary ?? As we exec the utils .py file associated 
 """
 import pandas as pd
 import numpy as np
 import os
 import io
 import s3fs
 import re
 import pickle
 import warnings
 import matplotlib.pyplot as plt
 """
 ### --- beginning of the code --- ###
 ### hyperparameters of the code ###
 ###################################
 # choose the type of companies for which you want to run the pipeline
 type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? musee ?')
 # choose the model we use for the segmentation
 model_name = "LogisticRegression_Benchmark"
 ###################################
 # execute file including functions we need
 exec(open('utils_segmentation_V2TP.py').read())
 warnings.filterwarnings('ignore')
 # Create filesystem object
 S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"]
 fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})
 # load test set
 dataset_test = load_test_file(type_of_activity)
 # Load Model 
 model = load_model(type_of_activity, model_name)
 ### Preprocessing of data
 X_test = dataset_test[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', 
            'time_between_purchase', 'nb_tickets_internet',  'is_email_true', 'opt_in', #'is_partner',
            'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened', 'country_fr']]
 y_test = dataset_test[['y_has_purchased']]
 X_test_segment = X_test
 # add y_has_purchased to X_test
 X_test_segment["has_purchased"] = y_test
 # Add prediction and probability to dataset_test
 y_pred = model.predict(X_test)
 X_test_segment["has_purchased_estim"] = y_pred
 y_pred_prob = model.predict_proba(X_test)[:, 1]
 X_test_segment['score'] = y_pred_prob
 X_test_segment["segment"] = np.where(X_test_segment['score']<0.25, '1',
                   np.where(X_test_segment['score']<0.5, '2',
                   np.where(X_test_segment['score']<0.75, '3', '4')))
 ### 1. business KPIs 
 business_var = ["nb_tickets", "nb_purchases", "total_amount", "nb_campaigns"]
 X_test_business_fig = df_business_fig(X_test_segment, "segment", business_var)
 # save histogram to Minio
 hist_segment_business_KPIs(X_test_business_fig, "segment", "size", "nb_tickets", 
                           "nb_purchases", "total_amount", "nb_campaigns")
 save_file_s3_mp(File_name = "segments_business_KPI_", type_of_activity = type_of_activity)
 ### 2. description of marketing personae (spider chart)
 # table summarizing variables relative to marketing personae
 X_test_segment_mp = df_segment_mp(X_test_segment, "segment", "gender_female", 
                                  "gender_male", "gender_other", "country_fr")
 # table relative to purchasing behaviour
 X_test_segment_pb = df_segment_pb(X_test_segment, "segment", "nb_tickets_internet", "nb_tickets", 
                                  "nb_campaigns_opened", "nb_campaigns", "opt_in")
 # concatenation of tables to prepare the plot
 X_test_segment_caract = pd.concat([X_test_segment_pb, X_test_segment_mp[['share_known_gender', 'share_of_women', 'country_fr']]], axis=1)
 # visualization and save the graphic to the MinIo
 categories = list(X_test_segment_caract.drop("segment", axis=1).columns)
 radar_mp_plot_all(df=X_test_segment_caract, categories=categories)
 save_file_s3_mp(File_name = "spider_chart_all_", type_of_activity = type_of_activity)
--- a/6_Segmentation_and_Marketing_Personae.py
+++ b/6_Segmentation_and_Marketing_Personae.py
@ -0,0 +1,82 @@
 # Packages
 import pandas as pd
 import numpy as np
 import os
 import io
 import s3fs
 import re
 import pickle
 import warnings
 import matplotlib.pyplot as plt
 from tabulate import tabulate
 ###################################
 # choose the model we use for the segmentation
 model_name = "LogisticRegression_Benchmark"
 ###################################
 # execute file including functions we need
 exec(open('utils_segmentation.py').read())
 warnings.filterwarnings('ignore')
 # Create filesystem object
 S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"]
 fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})
 # choose the type of companies for which you want to run the pipeline
 # type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? musee ?')
 for type_of_activity in ['musee', 'sport', 'musique'] : 
    # load test set
    dataset_test = load_test_file(type_of_activity)
    # Load Model 
    model = load_model(type_of_activity, model_name)
    ### Preprocessing of data
    X_test = dataset_test.drop(columns = 'y_has_purchased')
    y_test = dataset_test[['y_has_purchased']]
    X_test_segment = X_test
    # add y_has_purchased to X_test
    X_test_segment["has_purchased"] = y_test
    # Add prediction and probability to dataset_test
    y_pred = model.predict(X_test)
    X_test_segment["has_purchased_estim"] = y_pred
    y_pred_prob = model.predict_proba(X_test)[:, 1]
    X_test_segment['score'] = y_pred_prob
    X_test_segment["segment"] = np.where(X_test_segment['score']<0.25, '1',
                       np.where(X_test_segment['score']<0.5, '2',
                       np.where(X_test_segment['score']<0.75, '3', '4')))
    ### 1. business KPIs 
    business_var = ["nb_tickets", "nb_purchases", "total_amount", "nb_campaigns"]
    X_test_business_fig = df_business_fig(X_test_segment, "segment", business_var)
    # save histogram to Minio
    hist_segment_business_KPIs(X_test_business_fig, "segment", "size", "nb_tickets", 
                               "nb_purchases", "total_amount", "nb_campaigns", type_of_activity)
    save_file_s3_mp(File_name = "segments_business_KPI_", type_of_activity = type_of_activity)
    ### 2. description of marketing personae 
    ## A. Spider chart
    radar_mp_plot_all(df = X_test_segment, type_of_activity = type_of_activity)
    save_file_s3_mp(File_name = "spider_chart_all_", type_of_activity = type_of_activity)
    ## B. Latex table
    known_sociodemo_caracteristics(df = X_test_segment, type_of_activity = type_of_activity)
--- a/utils_segmentation.py
+++ b/utils_segmentation.py
@ -1,15 +1,12 @@
-import pandas as pd
+### importations ###
 import numpy as np
 import os
 import io
 import s3fs
 import re
 import pickle
 import warnings
 ### functions for segmentation and graphics associated ###
 def load_model(type_of_activity, model):
-    BUCKET = f"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}/"
+    BUCKET = f"projet-bdc2324-team1/2_Output/2_1_Modeling_results/standard/{type_of_activity}/{model}/"
    filename = model + '.pkl'
    file_path = BUCKET + filename
    with fs.open(file_path, mode="rb") as f:
@ -20,8 +17,207 @@ def load_model(type_of_activity, model):
 def load_test_file(type_of_activity):
-    file_path_test = f"projet-bdc2324-team1/Generalization/{type_of_activity}/Test_set.csv"
+    file_path_test = f"projet-bdc2324-team1/1_Temp/1_0_Modelling_Datasets/{type_of_activity}/Test_set.csv"
    with fs.open(file_path_test, mode="rb") as file_in:
        dataset_test = pd.read_csv(file_in, sep=",")
    return dataset_test
 def save_file_s3_mp(File_name, type_of_activity):
    image_buffer = io.BytesIO()
    plt.savefig(image_buffer, format='png', dpi=110)
    image_buffer.seek(0)
    PATH = f"projet-bdc2324-team1/2_Output/2_2_Segmentation_and_Marketing_Personae/{type_of_activity}/"
    FILE_PATH_OUT_S3 = PATH + File_name + type_of_activity + '.png'
    with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:
        s3_file.write(image_buffer.read())
    plt.close()
 def save_txt_file_s3(file_name, type_of_activity, content):
    FILE_PATH = f"projet-bdc2324-team1/2_Output/2_2_Segmentation_and_Marketing_Personae/{type_of_activity}/"
    FILE_PATH_OUT_S3 = FILE_PATH + file_name + type_of_activity + '.txt'
    with fs.open(FILE_PATH_OUT_S3, 'w') as s3_file:
        s3_file.write(content)
 def df_business_fig(df, segment, list_var) :
    df_business_kpi = df.groupby(segment)[list_var].sum().reset_index()
    df_business_kpi.insert(1, "size", df.groupby(segment).size().values)
    all_var = ["size"] + list_var
    df_business_kpi[all_var] = 100 * df_business_kpi[all_var] / df_business_kpi[all_var].sum()
    return df_business_kpi
 def hist_segment_business_KPIs(df, segment, size, nb_tickets, nb_purchases, total_amount, nb_campaigns, type_of_activity) :
    plt.figure()
    df_plot = df[[segment, size, nb_tickets, nb_purchases, total_amount, nb_campaigns]]
    x = ["number of\ncustomers", "number of\ntickets", "number of\npurchases", "total\namount", 
         "number of\ncampaigns"]
    bottom = np.zeros(5)
    # types of blue color
    colors = plt.cm.Blues(np.linspace(0.1, 0.9, 4))
    for i in range(4) :
        height = list(df_plot.loc[i,size:].values)
        plt.bar(x=x, height=height, label = str(df_plot[segment][i]), bottom=bottom, color=colors[i])
        bottom+=height
    # Ajust margins
    plt.subplots_adjust(left = 0.125, right = 0.8, bottom = 0.1, top = 0.9)
    plt.legend(title = "segment", loc = "upper right", bbox_to_anchor=(1.2, 1))
    plt.ylabel("Fraction represented by the segment (%)")
    plt.title(f"Relative weight of each segment regarding business KPIs\nfor {type_of_activity} companies", size=12)
    # plt.show()
 # def df_segment_mp(df) :
 #     df_mp = df.groupby("segment")[["gender_female", "gender_male", "gender_other", "country_fr"]].mean().reset_index()
 #     df_mp.insert(3, "share_known_gender", df_mp["gender_female"]+df_mp["gender_male"])
 #     df_mp.insert(4, "share_of_women", df_mp["gender_female"]/(df_mp["share_known_gender"]))
 #     return df_mp
 # def df_segment_pb (df) :
 #     df_pb = df.groupby("segment")[["prop_purchases_internet", "taux_ouverture_mail", "opt_in"]].mean().reset_index()
 #     return df_pb
 def radar_mp_plot(df, categories, index) :
    categories = categories
    # true values are used to print the true value in parenthesis
    tvalues = list(df.loc[index,categories]) 
    max_values = df[categories].max()
    # values are true values / max among the 4 segments, allows to 
    # put values in relation with the values for other segments
    # if the point has a maximal abscisse it means that value is maximal for the segment considered
    # , event if not equal to 1
    values = list(df.loc[index,categories]/max_values)
    # values normalized are used to adjust the value around the circle
    # for instance if the maximum of values is equal to 0.8, we want the point to be 
    # at 8/10th of the circle radius, not at the edge 
    values_normalized = [ max(values) * elt for elt in values]
    # Nb of categories
    num_categories = len(categories)
    angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()
    # Initialize graphic
    fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
    # we have to draw first a transparent line (alpha=0) of values to adjust the radius of the circle
    # which is based on max(value)
    ax.plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5)
    ax.plot(angles + angles[:1], values_normalized + values_normalized[:1], color='black', alpha = 0.5, linewidth=1.2)
    # fill the sector
    ax.fill(angles, values_normalized, color='orange', alpha=0.4)
    # labels
    ax.set_yticklabels([])
    ax.set_xticks(angles)
    ticks = [categories[i].replace("_"," ") + f"\n({round(100 * tvalues[i],2)}%)" for i in range(len(categories))]
    ax.set_xticklabels(ticks, color="black")
    ax.spines['polar'].set_visible(False)
    plt.title(f'Characteristics of the segment {index+1}\n')
    # plt.show()
 def radar_mp_plot_all(df, type_of_activity) :
    # table summarizing variables relative to marketing personae
    df_mp = df.groupby("segment")[["gender_female", "gender_male", "gender_other", "country_fr"]].mean().reset_index()
    df_mp.insert(3, "share_known_gender", df_mp["gender_female"]+df_mp["gender_male"])
    df_mp.insert(4, "share_of_women", df_mp["gender_female"]/(df_mp["share_known_gender"]))
    # table relative to purchasing behaviour
    df_pb = df.groupby("segment")[["prop_purchases_internet", "taux_ouverture_mail", "opt_in"]].mean().reset_index()
    # concatenation of tables to prepare the plot
    df_used = pd.concat([df_pb, df_mp[['share_known_gender', 'share_of_women', 'country_fr']]], axis=1)
    # visualization
    nb_segments = df_used.shape[0]
    categories = list(df_used.drop("segment", axis=1).columns)
    # Initialize graphic
    fig, ax = plt.subplots(2,2, figsize=(25, 20), subplot_kw=dict(polar=True))
    for index in range(nb_segments) :
        row = index // 2  # Division entière pour obtenir le numéro de ligne
        col = index % 2 
        # true values are used to print the true value in parenthesis
        tvalues = list(df_used.loc[index,categories]) 
        max_values = df_used[categories].max()
        # values are true values / max among the 4 segments, allows to 
        # put values in relation with the values for other segments
        # if the point has a maximal abscisse it means that value is maximal for the segment considered
        # , event if not equal to 1
        values = list(df_used.loc[index,categories]/max_values)
        # values normalized are used to adjust the value around the circle
        # for instance if the maximum of values is equal to 0.8, we want the point to be 
        # at 8/10th of the circle radius, not at the edge 
        values_normalized = [ max(values) * elt for elt in values]
        # Nb of categories
        num_categories = len(categories)
        angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()
        # we have to draw first a transparent line (alpha=0) of values to adjust the radius of the circle
        # which is based on max(value)
        ax[row, col].plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5)
        ax[row, col].plot(angles + angles[:1], values_normalized + values_normalized[:1], color='black', alpha = 0.5,
                          linewidth=1.2)
        # fill the sector
        ax[row, col].fill(angles, values_normalized, color='orange', alpha=0.4, label = index)
        # labels
        ax[row, col].set_yticklabels([])
        ax[row, col].set_xticks(angles)
        ticks = [categories[i].replace("_"," ") + f"\n({round(100 * tvalues[i],2)}%)" for i in range(len(categories))]
        ax[row, col].set_xticklabels(ticks, color="black", size = 20)
        ax[row, col].spines['polar'].set_visible(False)
        ax[row, col].set_title(f'Segment {index+1}\n', size = 24)
    fig.suptitle(f"Characteristics of marketing personae of {type_of_activity} companies", size=32)
    # plt.show()
 def known_sociodemo_caracteristics(df, type_of_activity) :
    table_share_known = df.groupby("segment")[["is_profession_known", "is_zipcode_known", "categorie_age_inconnue", "gender_other"]].mean().mul(100).reset_index()
    table_share_known.columns = ['Segment', 'Share of Known Profession (%)', 'Share of Known Zipcode (%)', 'Share of Unknown Age (%)', 'Share of Unknown Gender (%)']
    table_share_known= table_share_known.pivot_table(index=None, columns='Segment')
    # Arrondir les valeurs du DataFrame à une décimale
    table_share_known_rounded = table_share_known.round(1)
    # Convertir le DataFrame en format LaTeX avec les valeurs arrondies et le symbole '%'
    latex_table = tabulate(table_share_known_rounded, headers='keys', tablefmt='latex_raw', floatfmt=".1f")
    latex_table = latex_table.replace('%', '\\%')
    save_txt_file_s3("table_known_socio_demo_caracteristics", type_of_activity, latex_table)
--- a/utils_segmentation_V2TP.py
+++ b/utils_segmentation_V2TP.py
@ -1,201 +0,0 @@
 ### importations ###
 import pandas as pd
 import numpy as np
 import os
 import io
 import s3fs
 import re
 import pickle
 import warnings
 import matplotlib.pyplot as plt
 ### functions for segmentation and graphics associated ###
 def load_model(type_of_activity, model):
    BUCKET = f"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}/"
    filename = model + '.pkl'
    file_path = BUCKET + filename
    with fs.open(file_path, mode="rb") as f:
        model_bytes = f.read()
    model = pickle.loads(model_bytes)
    return model
 def load_test_file(type_of_activity):
    file_path_test = f"projet-bdc2324-team1/Generalization/{type_of_activity}/Test_set.csv"
    with fs.open(file_path_test, mode="rb") as file_in:
        dataset_test = pd.read_csv(file_in, sep=",")
    return dataset_test
 def save_file_s3_mp(File_name, type_of_activity):
    image_buffer = io.BytesIO()
    plt.savefig(image_buffer, format='png', dpi=110)
    image_buffer.seek(0)
    PATH = f"projet-bdc2324-team1/Output_marketing_personae_analysis/{type_of_activity}/"
    FILE_PATH_OUT_S3 = PATH + File_name + type_of_activity + '.png'
    with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:
        s3_file.write(image_buffer.read())
    plt.close()
 def df_business_fig(df, segment, list_var) :
    df_business_kpi = df.groupby(segment)[list_var].sum().reset_index()
    df_business_kpi.insert(1, "size", df.groupby(segment).size().values)
    all_var = ["size"] + list_var
    df_business_kpi[all_var] = 100 * df_business_kpi[all_var] / df_business_kpi[all_var].sum()
    return df_business_kpi
 def hist_segment_business_KPIs(df, segment, size, nb_tickets, nb_purchases, total_amount, nb_campaigns) :
    plt.figure()
    df_plot = df[[segment, size, nb_tickets, nb_purchases, total_amount, nb_campaigns]]
    x = ["number of\ncustomers", "number of\ntickets", "number of\npurchases", "total\namount", 
         "number of\ncampaigns"]
    bottom = np.zeros(5)
    # types of blue color
    colors = plt.cm.Blues(np.linspace(0.1, 0.9, 4))
    for i in range(4) :
        height = list(df_plot.loc[i,size:].values)
        plt.bar(x=x, height=height, label = str(df_plot[segment][i]), bottom=bottom, color=colors[i])
        bottom+=height
    # Ajust margins
    plt.subplots_adjust(left = 0.125, right = 0.8, bottom = 0.1, top = 0.9)
    plt.legend(title = "segment", loc = "upper right", bbox_to_anchor=(1.2, 1))
    plt.ylabel("Fraction represented by the segment (%)")
    plt.title(f"Relative weight of each segment regarding business KPIs\nfor {type_of_activity} companies", size=12)
    # plt.show()
 def df_segment_mp(df, segment, gender_female, gender_male, gender_other, country_fr) :
    df_mp = df.groupby(segment)[[gender_female, gender_male, gender_other, country_fr]].mean().reset_index()
    df_mp.insert(3, "share_known_gender", df_mp[gender_female]+df_mp[gender_male])
    df_mp.insert(4, "share_of_women", df_mp[gender_female]/(df_mp["share_known_gender"]))
    return df_mp
 def df_segment_pb (df, segment, nb_tickets_internet, nb_tickets, nb_campaigns_opened, nb_campaigns, opt_in) :
    df_used = df
    df_used["share_tickets_internet"] = df_used[nb_tickets_internet]/df_used[nb_tickets]
    df_used["share_campaigns_opened"] = df_used[nb_campaigns_opened]/df_used[nb_campaigns]
    df_pb = df_used.groupby(segment)[["share_tickets_internet", "share_campaigns_opened", opt_in]].mean().reset_index()
    return df_pb
 def radar_mp_plot(df, categories, index) :
    categories = categories
    # true values are used to print the true value in parenthesis
    tvalues = list(df.loc[index,categories]) 
    max_values = df[categories].max()
    # values are true values / max among the 4 segments, allows to 
    # put values in relation with the values for other segments
    # if the point has a maximal abscisse it means that value is maximal for the segment considered
    # , event if not equal to 1
    values = list(df.loc[index,categories]/max_values)
    # values normalized are used to adjust the value around the circle
    # for instance if the maximum of values is equal to 0.8, we want the point to be 
    # at 8/10th of the circle radius, not at the edge 
    values_normalized = [ max(values) * elt for elt in values]
    # Nb of categories
    num_categories = len(categories)
    angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()
    # Initialize graphic
    fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
    # we have to draw first a transparent line (alpha=0) of values to adjust the radius of the circle
    # which is based on max(value)
    ax.plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5)
    ax.plot(angles + angles[:1], values_normalized + values_normalized[:1], color='black', alpha = 0.5, linewidth=1.2)
    # fill the sector
    ax.fill(angles, values_normalized, color='orange', alpha=0.4)
    # labels
    ax.set_yticklabels([])
    ax.set_xticks(angles)
    ticks = [categories[i].replace("_"," ") + f"\n({round(100 * tvalues[i],2)}%)" for i in range(len(categories))]
    ax.set_xticklabels(ticks, color="black")
    ax.spines['polar'].set_visible(False)
    plt.title(f'Characteristics of the segment {index+1}\n')
    # plt.show()
 def radar_mp_plot_all(df, categories) :
    nb_segments = df.shape[0]
    categories = categories
    # Initialize graphic
    fig, ax = plt.subplots(2,2, figsize=(25, 20), subplot_kw=dict(polar=True))
    for index in range(nb_segments) :
        row = index // 2  # Division entière pour obtenir le numéro de ligne
        col = index % 2 
        # true values are used to print the true value in parenthesis
        tvalues = list(df.loc[index,categories]) 
        max_values = df[categories].max()
        # values are true values / max among the 4 segments, allows to 
        # put values in relation with the values for other segments
        # if the point has a maximal abscisse it means that value is maximal for the segment considered
        # , event if not equal to 1
        values = list(df.loc[index,categories]/max_values)
        # values normalized are used to adjust the value around the circle
        # for instance if the maximum of values is equal to 0.8, we want the point to be 
        # at 8/10th of the circle radius, not at the edge 
        values_normalized = [ max(values) * elt for elt in values]
        # Nb of categories
        num_categories = len(categories)
        angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()
        # we have to draw first a transparent line (alpha=0) of values to adjust the radius of the circle
        # which is based on max(value)
        ax[row, col].plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5)
        ax[row, col].plot(angles + angles[:1], values_normalized + values_normalized[:1], color='black', alpha = 0.5,
                          linewidth=1.2)
        # fill the sector
        ax[row, col].fill(angles, values_normalized, color='orange', alpha=0.4, label = index)
        # labels
        ax[row, col].set_yticklabels([])
        ax[row, col].set_xticks(angles)
        ticks = [categories[i].replace("_"," ") + f"\n({round(100 * tvalues[i],2)}%)" for i in range(len(categories))]
        ax[row, col].set_xticklabels(ticks, color="black", size = 20)
        ax[row, col].spines['polar'].set_visible(False)
        ax[row, col].set_title(f'Segment {index+1}\n', size = 24)
    fig.suptitle(f"Characteristics of marketing personae of {type_of_activity} companies", size=32)
    # plt.show()