From 8e61e9d2a4298f28ed8fcffb6622939865319535 Mon Sep 17 00:00:00 2001 From: ajoubrel-ensae Date: Sun, 31 Mar 2024 16:35:58 +0000 Subject: [PATCH] Ajout description marketing personae --- 0_6_Segmentation.py | 40 ----- 0_6_segmentation_V2TP.py | 99 ----------- 6_Segmentation_and_Marketing_Personae.py | 82 +++++++++ utils_segmentation.py | 216 +++++++++++++++++++++-- utils_segmentation_V2TP.py | 201 --------------------- 5 files changed, 288 insertions(+), 350 deletions(-) delete mode 100644 0_6_Segmentation.py delete mode 100644 0_6_segmentation_V2TP.py create mode 100644 6_Segmentation_and_Marketing_Personae.py delete mode 100644 utils_segmentation_V2TP.py diff --git a/0_6_Segmentation.py b/0_6_Segmentation.py deleted file mode 100644 index 7331442..0000000 --- a/0_6_Segmentation.py +++ /dev/null @@ -1,40 +0,0 @@ -import pandas as pd -import numpy as np -import os -import io -import s3fs -import re -import pickle -import warnings - - -exec(open('utils_segmentation.py').read()) -warnings.filterwarnings('ignore') - -# Create filesystem object -S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"] -fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL}) - -# choose the type of companies for which you want to run the pipeline -type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? musee ?') - -# load test set -dataset_test = load_test_file(type_of_activity) - -# Load Model -model = load_model(type_of_activity, 'LogisticRegression_Benchmark') - -# Processing -X_test = dataset_test[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', - 'time_between_purchase', 'nb_tickets_internet', 'is_email_true', 'opt_in', #'is_partner', - 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']] - -y_test = dataset_test[['y_has_purchased']] - -# Prediction -y_pred_prob = model.predict_proba(X_test)[:, 1] - -# Add probability to dataset_test -dataset_test['Probability_to_buy'] = y_pred_prob -print('probability added to dataset_test') -print(dataset_test.head()) \ No newline at end of file diff --git a/0_6_segmentation_V2TP.py b/0_6_segmentation_V2TP.py deleted file mode 100644 index 8458e39..0000000 --- a/0_6_segmentation_V2TP.py +++ /dev/null @@ -1,99 +0,0 @@ -### importations ### -### not necesary ?? As we exec the utils .py file associated - -""" -import pandas as pd -import numpy as np -import os -import io -import s3fs -import re -import pickle -import warnings -import matplotlib.pyplot as plt -""" - -### --- beginning of the code --- ### - - -### hyperparameters of the code ### - -################################### - -# choose the type of companies for which you want to run the pipeline -type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? musee ?') - -# choose the model we use for the segmentation -model_name = "LogisticRegression_Benchmark" - -################################### - - -# execute file including functions we need -exec(open('utils_segmentation_V2TP.py').read()) - -warnings.filterwarnings('ignore') - -# Create filesystem object -S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"] -fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL}) - -# load test set -dataset_test = load_test_file(type_of_activity) - -# Load Model -model = load_model(type_of_activity, model_name) - - -### Preprocessing of data -X_test = dataset_test[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', - 'time_between_purchase', 'nb_tickets_internet', 'is_email_true', 'opt_in', #'is_partner', - 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened', 'country_fr']] - -y_test = dataset_test[['y_has_purchased']] - -X_test_segment = X_test - -# add y_has_purchased to X_test -X_test_segment["has_purchased"] = y_test - -# Add prediction and probability to dataset_test -y_pred = model.predict(X_test) -X_test_segment["has_purchased_estim"] = y_pred - -y_pred_prob = model.predict_proba(X_test)[:, 1] -X_test_segment['score'] = y_pred_prob - -X_test_segment["segment"] = np.where(X_test_segment['score']<0.25, '1', - np.where(X_test_segment['score']<0.5, '2', - np.where(X_test_segment['score']<0.75, '3', '4'))) - -### 1. business KPIs - -business_var = ["nb_tickets", "nb_purchases", "total_amount", "nb_campaigns"] -X_test_business_fig = df_business_fig(X_test_segment, "segment", business_var) - -# save histogram to Minio -hist_segment_business_KPIs(X_test_business_fig, "segment", "size", "nb_tickets", - "nb_purchases", "total_amount", "nb_campaigns") -save_file_s3_mp(File_name = "segments_business_KPI_", type_of_activity = type_of_activity) - - -### 2. description of marketing personae (spider chart) - -# table summarizing variables relative to marketing personae -X_test_segment_mp = df_segment_mp(X_test_segment, "segment", "gender_female", - "gender_male", "gender_other", "country_fr") - -# table relative to purchasing behaviour -X_test_segment_pb = df_segment_pb(X_test_segment, "segment", "nb_tickets_internet", "nb_tickets", - "nb_campaigns_opened", "nb_campaigns", "opt_in") - -# concatenation of tables to prepare the plot -X_test_segment_caract = pd.concat([X_test_segment_pb, X_test_segment_mp[['share_known_gender', 'share_of_women', 'country_fr']]], axis=1) - -# visualization and save the graphic to the MinIo -categories = list(X_test_segment_caract.drop("segment", axis=1).columns) -radar_mp_plot_all(df=X_test_segment_caract, categories=categories) -save_file_s3_mp(File_name = "spider_chart_all_", type_of_activity = type_of_activity) - diff --git a/6_Segmentation_and_Marketing_Personae.py b/6_Segmentation_and_Marketing_Personae.py new file mode 100644 index 0000000..ad3c4d6 --- /dev/null +++ b/6_Segmentation_and_Marketing_Personae.py @@ -0,0 +1,82 @@ + +# Packages +import pandas as pd +import numpy as np +import os +import io +import s3fs +import re +import pickle +import warnings +import matplotlib.pyplot as plt +from tabulate import tabulate + +################################### + +# choose the model we use for the segmentation +model_name = "LogisticRegression_Benchmark" + +################################### + + +# execute file including functions we need +exec(open('utils_segmentation.py').read()) + +warnings.filterwarnings('ignore') + +# Create filesystem object +S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"] +fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL}) + + +# choose the type of companies for which you want to run the pipeline +# type_of_activity = input('Choisissez le type de compagnie : sport ? musique ? musee ?') +for type_of_activity in ['musee', 'sport', 'musique'] : + + + # load test set + dataset_test = load_test_file(type_of_activity) + + # Load Model + model = load_model(type_of_activity, model_name) + + + ### Preprocessing of data + X_test = dataset_test.drop(columns = 'y_has_purchased') + + y_test = dataset_test[['y_has_purchased']] + + X_test_segment = X_test + + # add y_has_purchased to X_test + X_test_segment["has_purchased"] = y_test + + # Add prediction and probability to dataset_test + y_pred = model.predict(X_test) + X_test_segment["has_purchased_estim"] = y_pred + + y_pred_prob = model.predict_proba(X_test)[:, 1] + X_test_segment['score'] = y_pred_prob + + X_test_segment["segment"] = np.where(X_test_segment['score']<0.25, '1', + np.where(X_test_segment['score']<0.5, '2', + np.where(X_test_segment['score']<0.75, '3', '4'))) + + ### 1. business KPIs + + business_var = ["nb_tickets", "nb_purchases", "total_amount", "nb_campaigns"] + X_test_business_fig = df_business_fig(X_test_segment, "segment", business_var) + + # save histogram to Minio + hist_segment_business_KPIs(X_test_business_fig, "segment", "size", "nb_tickets", + "nb_purchases", "total_amount", "nb_campaigns", type_of_activity) + save_file_s3_mp(File_name = "segments_business_KPI_", type_of_activity = type_of_activity) + + + ### 2. description of marketing personae + ## A. Spider chart + radar_mp_plot_all(df = X_test_segment, type_of_activity = type_of_activity) + save_file_s3_mp(File_name = "spider_chart_all_", type_of_activity = type_of_activity) + + ## B. Latex table + known_sociodemo_caracteristics(df = X_test_segment, type_of_activity = type_of_activity) diff --git a/utils_segmentation.py b/utils_segmentation.py index 42f3afb..54a89b5 100644 --- a/utils_segmentation.py +++ b/utils_segmentation.py @@ -1,15 +1,12 @@ -import pandas as pd -import numpy as np -import os -import io -import s3fs -import re -import pickle -import warnings +### importations ### + + +### functions for segmentation and graphics associated ### + def load_model(type_of_activity, model): - BUCKET = f"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}/" + BUCKET = f"projet-bdc2324-team1/2_Output/2_1_Modeling_results/standard/{type_of_activity}/{model}/" filename = model + '.pkl' file_path = BUCKET + filename with fs.open(file_path, mode="rb") as f: @@ -20,8 +17,207 @@ def load_model(type_of_activity, model): def load_test_file(type_of_activity): - file_path_test = f"projet-bdc2324-team1/Generalization/{type_of_activity}/Test_set.csv" + file_path_test = f"projet-bdc2324-team1/1_Temp/1_0_Modelling_Datasets/{type_of_activity}/Test_set.csv" with fs.open(file_path_test, mode="rb") as file_in: dataset_test = pd.read_csv(file_in, sep=",") return dataset_test + +def save_file_s3_mp(File_name, type_of_activity): + image_buffer = io.BytesIO() + plt.savefig(image_buffer, format='png', dpi=110) + image_buffer.seek(0) + PATH = f"projet-bdc2324-team1/2_Output/2_2_Segmentation_and_Marketing_Personae/{type_of_activity}/" + FILE_PATH_OUT_S3 = PATH + File_name + type_of_activity + '.png' + with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file: + s3_file.write(image_buffer.read()) + plt.close() + +def save_txt_file_s3(file_name, type_of_activity, content): + FILE_PATH = f"projet-bdc2324-team1/2_Output/2_2_Segmentation_and_Marketing_Personae/{type_of_activity}/" + FILE_PATH_OUT_S3 = FILE_PATH + file_name + type_of_activity + '.txt' + with fs.open(FILE_PATH_OUT_S3, 'w') as s3_file: + s3_file.write(content) + +def df_business_fig(df, segment, list_var) : + df_business_kpi = df.groupby(segment)[list_var].sum().reset_index() + df_business_kpi.insert(1, "size", df.groupby(segment).size().values) + all_var = ["size"] + list_var + df_business_kpi[all_var] = 100 * df_business_kpi[all_var] / df_business_kpi[all_var].sum() + + return df_business_kpi + + +def hist_segment_business_KPIs(df, segment, size, nb_tickets, nb_purchases, total_amount, nb_campaigns, type_of_activity) : + + plt.figure() + + df_plot = df[[segment, size, nb_tickets, nb_purchases, total_amount, nb_campaigns]] + + x = ["number of\ncustomers", "number of\ntickets", "number of\npurchases", "total\namount", + "number of\ncampaigns"] + + bottom = np.zeros(5) + + # types of blue color + colors = plt.cm.Blues(np.linspace(0.1, 0.9, 4)) + + for i in range(4) : + height = list(df_plot.loc[i,size:].values) + plt.bar(x=x, height=height, label = str(df_plot[segment][i]), bottom=bottom, color=colors[i]) + bottom+=height + + # Ajust margins + plt.subplots_adjust(left = 0.125, right = 0.8, bottom = 0.1, top = 0.9) + + plt.legend(title = "segment", loc = "upper right", bbox_to_anchor=(1.2, 1)) + plt.ylabel("Fraction represented by the segment (%)") + plt.title(f"Relative weight of each segment regarding business KPIs\nfor {type_of_activity} companies", size=12) + # plt.show() + + +# def df_segment_mp(df) : +# df_mp = df.groupby("segment")[["gender_female", "gender_male", "gender_other", "country_fr"]].mean().reset_index() +# df_mp.insert(3, "share_known_gender", df_mp["gender_female"]+df_mp["gender_male"]) +# df_mp.insert(4, "share_of_women", df_mp["gender_female"]/(df_mp["share_known_gender"])) +# return df_mp + + +# def df_segment_pb (df) : +# df_pb = df.groupby("segment")[["prop_purchases_internet", "taux_ouverture_mail", "opt_in"]].mean().reset_index() +# return df_pb + + +def radar_mp_plot(df, categories, index) : + categories = categories + + # true values are used to print the true value in parenthesis + tvalues = list(df.loc[index,categories]) + + max_values = df[categories].max() + + # values are true values / max among the 4 segments, allows to + # put values in relation with the values for other segments + # if the point has a maximal abscisse it means that value is maximal for the segment considered + # , event if not equal to 1 + + values = list(df.loc[index,categories]/max_values) + + # values normalized are used to adjust the value around the circle + # for instance if the maximum of values is equal to 0.8, we want the point to be + # at 8/10th of the circle radius, not at the edge + values_normalized = [ max(values) * elt for elt in values] + + # Nb of categories + num_categories = len(categories) + + angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist() + + # Initialize graphic + fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True)) + + # we have to draw first a transparent line (alpha=0) of values to adjust the radius of the circle + # which is based on max(value) + ax.plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5) + ax.plot(angles + angles[:1], values_normalized + values_normalized[:1], color='black', alpha = 0.5, linewidth=1.2) + + # fill the sector + ax.fill(angles, values_normalized, color='orange', alpha=0.4) + + # labels + ax.set_yticklabels([]) + ax.set_xticks(angles) + ticks = [categories[i].replace("_"," ") + f"\n({round(100 * tvalues[i],2)}%)" for i in range(len(categories))] + ax.set_xticklabels(ticks, color="black") + + ax.spines['polar'].set_visible(False) + + plt.title(f'Characteristics of the segment {index+1}\n') + + # plt.show() + + +def radar_mp_plot_all(df, type_of_activity) : + + # table summarizing variables relative to marketing personae + df_mp = df.groupby("segment")[["gender_female", "gender_male", "gender_other", "country_fr"]].mean().reset_index() + df_mp.insert(3, "share_known_gender", df_mp["gender_female"]+df_mp["gender_male"]) + df_mp.insert(4, "share_of_women", df_mp["gender_female"]/(df_mp["share_known_gender"])) + + # table relative to purchasing behaviour + df_pb = df.groupby("segment")[["prop_purchases_internet", "taux_ouverture_mail", "opt_in"]].mean().reset_index() + + # concatenation of tables to prepare the plot + df_used = pd.concat([df_pb, df_mp[['share_known_gender', 'share_of_women', 'country_fr']]], axis=1) + + # visualization + nb_segments = df_used.shape[0] + categories = list(df_used.drop("segment", axis=1).columns) + + # Initialize graphic + fig, ax = plt.subplots(2,2, figsize=(25, 20), subplot_kw=dict(polar=True)) + + for index in range(nb_segments) : + row = index // 2 # Division entière pour obtenir le numéro de ligne + col = index % 2 + + # true values are used to print the true value in parenthesis + tvalues = list(df_used.loc[index,categories]) + + max_values = df_used[categories].max() + + # values are true values / max among the 4 segments, allows to + # put values in relation with the values for other segments + # if the point has a maximal abscisse it means that value is maximal for the segment considered + # , event if not equal to 1 + + values = list(df_used.loc[index,categories]/max_values) + + # values normalized are used to adjust the value around the circle + # for instance if the maximum of values is equal to 0.8, we want the point to be + # at 8/10th of the circle radius, not at the edge + values_normalized = [ max(values) * elt for elt in values] + + # Nb of categories + num_categories = len(categories) + + angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist() + + # we have to draw first a transparent line (alpha=0) of values to adjust the radius of the circle + # which is based on max(value) + ax[row, col].plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5) + ax[row, col].plot(angles + angles[:1], values_normalized + values_normalized[:1], color='black', alpha = 0.5, + linewidth=1.2) + + # fill the sector + ax[row, col].fill(angles, values_normalized, color='orange', alpha=0.4, label = index) + + # labels + ax[row, col].set_yticklabels([]) + ax[row, col].set_xticks(angles) + ticks = [categories[i].replace("_"," ") + f"\n({round(100 * tvalues[i],2)}%)" for i in range(len(categories))] + ax[row, col].set_xticklabels(ticks, color="black", size = 20) + + ax[row, col].spines['polar'].set_visible(False) + + ax[row, col].set_title(f'Segment {index+1}\n', size = 24) + + fig.suptitle(f"Characteristics of marketing personae of {type_of_activity} companies", size=32) + # plt.show() + +def known_sociodemo_caracteristics(df, type_of_activity) : + + table_share_known = df.groupby("segment")[["is_profession_known", "is_zipcode_known", "categorie_age_inconnue", "gender_other"]].mean().mul(100).reset_index() + table_share_known.columns = ['Segment', 'Share of Known Profession (%)', 'Share of Known Zipcode (%)', 'Share of Unknown Age (%)', 'Share of Unknown Gender (%)'] + table_share_known= table_share_known.pivot_table(index=None, columns='Segment') + + # Arrondir les valeurs du DataFrame à une décimale + table_share_known_rounded = table_share_known.round(1) + + # Convertir le DataFrame en format LaTeX avec les valeurs arrondies et le symbole '%' + latex_table = tabulate(table_share_known_rounded, headers='keys', tablefmt='latex_raw', floatfmt=".1f") + latex_table = latex_table.replace('%', '\\%') + + save_txt_file_s3("table_known_socio_demo_caracteristics", type_of_activity, latex_table) + + diff --git a/utils_segmentation_V2TP.py b/utils_segmentation_V2TP.py deleted file mode 100644 index 6c02c01..0000000 --- a/utils_segmentation_V2TP.py +++ /dev/null @@ -1,201 +0,0 @@ -### importations ### - -import pandas as pd -import numpy as np -import os -import io -import s3fs -import re -import pickle -import warnings -import matplotlib.pyplot as plt - - -### functions for segmentation and graphics associated ### - -def load_model(type_of_activity, model): - BUCKET = f"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}/" - filename = model + '.pkl' - file_path = BUCKET + filename - with fs.open(file_path, mode="rb") as f: - model_bytes = f.read() - - model = pickle.loads(model_bytes) - return model - - -def load_test_file(type_of_activity): - file_path_test = f"projet-bdc2324-team1/Generalization/{type_of_activity}/Test_set.csv" - with fs.open(file_path_test, mode="rb") as file_in: - dataset_test = pd.read_csv(file_in, sep=",") - return dataset_test - - -def save_file_s3_mp(File_name, type_of_activity): - image_buffer = io.BytesIO() - plt.savefig(image_buffer, format='png', dpi=110) - image_buffer.seek(0) - PATH = f"projet-bdc2324-team1/Output_marketing_personae_analysis/{type_of_activity}/" - FILE_PATH_OUT_S3 = PATH + File_name + type_of_activity + '.png' - with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file: - s3_file.write(image_buffer.read()) - plt.close() - - -def df_business_fig(df, segment, list_var) : - df_business_kpi = df.groupby(segment)[list_var].sum().reset_index() - df_business_kpi.insert(1, "size", df.groupby(segment).size().values) - all_var = ["size"] + list_var - df_business_kpi[all_var] = 100 * df_business_kpi[all_var] / df_business_kpi[all_var].sum() - - return df_business_kpi - - -def hist_segment_business_KPIs(df, segment, size, nb_tickets, nb_purchases, total_amount, nb_campaigns) : - - plt.figure() - - df_plot = df[[segment, size, nb_tickets, nb_purchases, total_amount, nb_campaigns]] - - x = ["number of\ncustomers", "number of\ntickets", "number of\npurchases", "total\namount", - "number of\ncampaigns"] - - bottom = np.zeros(5) - - # types of blue color - colors = plt.cm.Blues(np.linspace(0.1, 0.9, 4)) - - for i in range(4) : - height = list(df_plot.loc[i,size:].values) - plt.bar(x=x, height=height, label = str(df_plot[segment][i]), bottom=bottom, color=colors[i]) - bottom+=height - - # Ajust margins - plt.subplots_adjust(left = 0.125, right = 0.8, bottom = 0.1, top = 0.9) - - plt.legend(title = "segment", loc = "upper right", bbox_to_anchor=(1.2, 1)) - plt.ylabel("Fraction represented by the segment (%)") - plt.title(f"Relative weight of each segment regarding business KPIs\nfor {type_of_activity} companies", size=12) - # plt.show() - - -def df_segment_mp(df, segment, gender_female, gender_male, gender_other, country_fr) : - df_mp = df.groupby(segment)[[gender_female, gender_male, gender_other, country_fr]].mean().reset_index() - df_mp.insert(3, "share_known_gender", df_mp[gender_female]+df_mp[gender_male]) - df_mp.insert(4, "share_of_women", df_mp[gender_female]/(df_mp["share_known_gender"])) - return df_mp - - -def df_segment_pb (df, segment, nb_tickets_internet, nb_tickets, nb_campaigns_opened, nb_campaigns, opt_in) : - df_used = df - df_used["share_tickets_internet"] = df_used[nb_tickets_internet]/df_used[nb_tickets] - df_used["share_campaigns_opened"] = df_used[nb_campaigns_opened]/df_used[nb_campaigns] - df_pb = df_used.groupby(segment)[["share_tickets_internet", "share_campaigns_opened", opt_in]].mean().reset_index() - return df_pb - - -def radar_mp_plot(df, categories, index) : - categories = categories - - # true values are used to print the true value in parenthesis - tvalues = list(df.loc[index,categories]) - - max_values = df[categories].max() - - # values are true values / max among the 4 segments, allows to - # put values in relation with the values for other segments - # if the point has a maximal abscisse it means that value is maximal for the segment considered - # , event if not equal to 1 - - values = list(df.loc[index,categories]/max_values) - - # values normalized are used to adjust the value around the circle - # for instance if the maximum of values is equal to 0.8, we want the point to be - # at 8/10th of the circle radius, not at the edge - values_normalized = [ max(values) * elt for elt in values] - - # Nb of categories - num_categories = len(categories) - - angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist() - - # Initialize graphic - fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True)) - - # we have to draw first a transparent line (alpha=0) of values to adjust the radius of the circle - # which is based on max(value) - ax.plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5) - ax.plot(angles + angles[:1], values_normalized + values_normalized[:1], color='black', alpha = 0.5, linewidth=1.2) - - # fill the sector - ax.fill(angles, values_normalized, color='orange', alpha=0.4) - - # labels - ax.set_yticklabels([]) - ax.set_xticks(angles) - ticks = [categories[i].replace("_"," ") + f"\n({round(100 * tvalues[i],2)}%)" for i in range(len(categories))] - ax.set_xticklabels(ticks, color="black") - - ax.spines['polar'].set_visible(False) - - plt.title(f'Characteristics of the segment {index+1}\n') - - # plt.show() - - -def radar_mp_plot_all(df, categories) : - - nb_segments = df.shape[0] - categories = categories - - # Initialize graphic - fig, ax = plt.subplots(2,2, figsize=(25, 20), subplot_kw=dict(polar=True)) - - for index in range(nb_segments) : - row = index // 2 # Division entière pour obtenir le numéro de ligne - col = index % 2 - - # true values are used to print the true value in parenthesis - tvalues = list(df.loc[index,categories]) - - max_values = df[categories].max() - - # values are true values / max among the 4 segments, allows to - # put values in relation with the values for other segments - # if the point has a maximal abscisse it means that value is maximal for the segment considered - # , event if not equal to 1 - - values = list(df.loc[index,categories]/max_values) - - # values normalized are used to adjust the value around the circle - # for instance if the maximum of values is equal to 0.8, we want the point to be - # at 8/10th of the circle radius, not at the edge - values_normalized = [ max(values) * elt for elt in values] - - # Nb of categories - num_categories = len(categories) - - angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist() - - # we have to draw first a transparent line (alpha=0) of values to adjust the radius of the circle - # which is based on max(value) - ax[row, col].plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5) - ax[row, col].plot(angles + angles[:1], values_normalized + values_normalized[:1], color='black', alpha = 0.5, - linewidth=1.2) - - # fill the sector - ax[row, col].fill(angles, values_normalized, color='orange', alpha=0.4, label = index) - - # labels - ax[row, col].set_yticklabels([]) - ax[row, col].set_xticks(angles) - ticks = [categories[i].replace("_"," ") + f"\n({round(100 * tvalues[i],2)}%)" for i in range(len(categories))] - ax[row, col].set_xticklabels(ticks, color="black", size = 20) - - ax[row, col].spines['polar'].set_visible(False) - - ax[row, col].set_title(f'Segment {index+1}\n', size = 24) - - fig.suptitle(f"Characteristics of marketing personae of {type_of_activity} companies", size=32) - # plt.show() -