BDC-team-1/Sport/Modelization/segment_analysis_sport_0_6.ipynb

2857 lines
1.2 MiB
Plaintext
Raw Normal View History

2024-03-26 12:20:03 +01:00
{
"cells": [
{
"cell_type": "markdown",
"id": "c488134e-680f-44e4-8c43-40c246140519",
"metadata": {},
"source": [
"# Analysis of segments and marketing personae associated"
]
},
{
"cell_type": "code",
2024-03-31 19:57:10 +02:00
"execution_count": 5,
2024-03-26 12:20:03 +01:00
"id": "9a8b8c3a-8e74-49f3-91d1-cccfc057fdcd",
"metadata": {},
"outputs": [],
"source": [
"# importations\n",
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import io\n",
"import s3fs\n",
"import re\n",
"import pickle\n",
2024-03-26 16:00:39 +01:00
"import warnings\n",
"import matplotlib.pyplot as plt"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "code",
2024-03-31 19:57:10 +02:00
"execution_count": 19,
2024-03-26 12:20:03 +01:00
"id": "d553c868-695f-4d57-96d6-d5c6629cefb2",
"metadata": {},
"outputs": [],
"source": [
"def load_model(type_of_activity, model):\n",
2024-03-31 19:57:10 +02:00
" #BUCKET = f\"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}/\"\n",
" BUCKET = f\"projet-bdc2324-team1/2_Output/2_1_Modeling_results/standard/{type_of_activity}/{model}/\"\n",
2024-03-26 12:20:03 +01:00
" filename = model + '.pkl'\n",
" file_path = BUCKET + filename\n",
" with fs.open(file_path, mode=\"rb\") as f:\n",
" model_bytes = f.read()\n",
"\n",
" model = pickle.loads(model_bytes)\n",
" return model\n",
"\n",
"\n",
"def load_test_file(type_of_activity):\n",
2024-03-31 19:57:10 +02:00
" #file_path_test = f\"projet-bdc2324-team1/Generalization/{type_of_activity}/Test_set.csv\"\n",
" file_path_test = f\"projet-bdc2324-team1/1_Temp/1_0_Modelling_Datasets/{type_of_activity}/Test_set.csv\"\n",
2024-03-26 12:20:03 +01:00
" with fs.open(file_path_test, mode=\"rb\") as file_in:\n",
" dataset_test = pd.read_csv(file_in, sep=\",\")\n",
" return dataset_test"
]
},
{
"cell_type": "code",
2024-03-31 19:57:10 +02:00
"execution_count": 12,
2024-03-26 12:20:03 +01:00
"id": "3af80fea-a937-4ea8-bece-cfeaa89d1055",
"metadata": {},
"outputs": [],
"source": [
"# exec(open('utils_segmentation.py').read())\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"# choose the type of companies for which you want to run the pipeline\n",
"type_of_activity = \"sport\""
]
},
{
"cell_type": "code",
2024-03-31 19:57:10 +02:00
"execution_count": 24,
2024-03-26 12:20:03 +01:00
"id": "cc6af7fa-33b2-4d58-ada4-e2ee7262bab9",
"metadata": {},
"outputs": [],
"source": [
"# load test set\n",
"dataset_test = load_test_file(type_of_activity)\n",
"\n",
"# Load Model \n",
"model = load_model(type_of_activity, 'LogisticRegression_Benchmark')"
]
},
{
"cell_type": "code",
2024-03-31 19:57:10 +02:00
"execution_count": 25,
"id": "8238ee71-47ec-4621-9813-4b5d2fd03efd",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
2024-03-31 19:57:10 +02:00
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>is_email_true</th>\n",
" <th>opt_in</th>\n",
" <th>...</th>\n",
2024-03-31 19:57:10 +02:00
" <th>purchases_5_2022</th>\n",
" <th>purchases_6_2021</th>\n",
" <th>purchases_6_2022</th>\n",
" <th>purchases_7_2021</th>\n",
" <th>purchases_7_2022</th>\n",
" <th>purchases_8_2021</th>\n",
" <th>purchases_8_2022</th>\n",
" <th>purchases_9_2021</th>\n",
" <th>purchases_9_2022</th>\n",
" <th>y_has_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
2024-03-31 19:57:10 +02:00
" <td>5_4317407</td>\n",
" <td>969908</td>\n",
" <td>NaN</td>\n",
" <td>6156473.0</td>\n",
" <td>1</td>\n",
2024-03-31 19:57:10 +02:00
" <td>1771</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>True</td>\n",
" <td>0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-31 19:57:10 +02:00
" <td>5_477635</td>\n",
" <td>109121</td>\n",
" <td>NaN</td>\n",
" <td>6213652.0</td>\n",
" <td>2</td>\n",
" <td>1771</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>True</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-31 19:57:10 +02:00
" <td>5_411639</td>\n",
" <td>92929</td>\n",
" <td>NaN</td>\n",
" <td>6160271.0</td>\n",
" <td>4</td>\n",
" <td>1771</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>True</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-31 19:57:10 +02:00
" <td>5_326623</td>\n",
" <td>79862</td>\n",
" <td>NaN</td>\n",
" <td>6140109.0</td>\n",
" <td>1</td>\n",
" <td>1771</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>True</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2024-03-31 19:57:10 +02:00
" <td>5_383915</td>\n",
" <td>85421</td>\n",
" <td>NaN</td>\n",
" <td>6149409.0</td>\n",
" <td>2</td>\n",
" <td>1771</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>True</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96091</th>\n",
2024-03-31 19:57:10 +02:00
" <td>9_91205</td>\n",
" <td>76215</td>\n",
" <td>NaN</td>\n",
" <td>47280.0</td>\n",
" <td>0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>1490</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>True</td>\n",
" <td>1</td>\n",
2024-03-31 19:57:10 +02:00
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96092</th>\n",
2024-03-31 19:57:10 +02:00
" <td>9_369887</td>\n",
" <td>815891</td>\n",
" <td>NaN</td>\n",
" <td>30764537.0</td>\n",
" <td>4</td>\n",
" <td>1490</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>True</td>\n",
" <td>0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96093</th>\n",
" <td>9_1007562</td>\n",
" <td>1</td>\n",
2024-03-31 19:57:10 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>1490</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>True</td>\n",
" <td>0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96094</th>\n",
2024-03-31 19:57:10 +02:00
" <td>9_15037</td>\n",
" <td>12992</td>\n",
" <td>NaN</td>\n",
" <td>2213448.0</td>\n",
" <td>0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>1490</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>True</td>\n",
" <td>1</td>\n",
2024-03-31 19:57:10 +02:00
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96095</th>\n",
" <td>9_135370</td>\n",
" <td>76215</td>\n",
" <td>NaN</td>\n",
2024-03-31 19:57:10 +02:00
" <td>2164740.0</td>\n",
" <td>0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>1490</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
2024-03-31 19:57:10 +02:00
" <td>True</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
2024-03-31 19:57:10 +02:00
"<p>96096 rows × 87 columns</p>\n",
"</div>"
],
"text/plain": [
2024-03-31 19:57:10 +02:00
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"0 5_4317407 969908 NaN 6156473.0 1 \n",
"1 5_477635 109121 NaN 6213652.0 2 \n",
"2 5_411639 92929 NaN 6160271.0 4 \n",
"3 5_326623 79862 NaN 6140109.0 1 \n",
"4 5_383915 85421 NaN 6149409.0 2 \n",
"... ... ... ... ... ... \n",
"96091 9_91205 76215 NaN 47280.0 0 \n",
"96092 9_369887 815891 NaN 30764537.0 4 \n",
"96093 9_1007562 1 NaN NaN 0 \n",
"96094 9_15037 12992 NaN 2213448.0 0 \n",
"96095 9_135370 76215 NaN 2164740.0 0 \n",
"\n",
2024-03-31 19:57:10 +02:00
" tenant_id is_partner deleted_at is_email_true opt_in ... \\\n",
"0 1771 False NaN True 0 ... \n",
"1 1771 False NaN True 0 ... \n",
"2 1771 False NaN True 0 ... \n",
"3 1771 False NaN True 1 ... \n",
"4 1771 False NaN True 1 ... \n",
"... ... ... ... ... ... ... \n",
"96091 1490 False NaN True 1 ... \n",
"96092 1490 False NaN True 0 ... \n",
"96093 1490 False NaN True 0 ... \n",
"96094 1490 False NaN True 1 ... \n",
"96095 1490 False NaN True 1 ... \n",
"\n",
2024-03-31 19:57:10 +02:00
" purchases_5_2022 purchases_6_2021 purchases_6_2022 purchases_7_2021 \\\n",
"0 0.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"96091 0.0 0.0 0.0 0.0 \n",
"96092 0.0 0.0 0.0 0.0 \n",
"96093 0.0 0.0 0.0 0.0 \n",
"96094 0.0 0.0 0.0 0.0 \n",
"96095 0.0 0.0 0.0 0.0 \n",
"\n",
2024-03-31 19:57:10 +02:00
" purchases_7_2022 purchases_8_2021 purchases_8_2022 purchases_9_2021 \\\n",
"0 0.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 1.0 \n",
"4 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"96091 0.0 0.0 0.0 0.0 \n",
"96092 0.0 0.0 0.0 0.0 \n",
"96093 0.0 0.0 0.0 0.0 \n",
"96094 0.0 0.0 0.0 0.0 \n",
"96095 0.0 0.0 0.0 0.0 \n",
"\n",
2024-03-31 19:57:10 +02:00
" purchases_9_2022 y_has_purchased \n",
"0 0.0 0.0 \n",
"1 0.0 0.0 \n",
"2 0.0 0.0 \n",
"3 0.0 0.0 \n",
"4 0.0 0.0 \n",
"... ... ... \n",
"96091 0.0 0.0 \n",
"96092 0.0 1.0 \n",
"96093 0.0 0.0 \n",
"96094 0.0 0.0 \n",
"96095 0.0 0.0 \n",
"\n",
2024-03-31 19:57:10 +02:00
"[96096 rows x 87 columns]"
]
},
2024-03-31 19:57:10 +02:00
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset_test"
]
},
{
"cell_type": "code",
2024-03-29 13:43:36 +01:00
"execution_count": 15,
"id": "5d0d0c18-8930-4304-84df-d5885ab21b16",
"metadata": {},
"outputs": [],
"source": [
"# added : recup age\n",
"\n",
"def generate_test_set(type_of_comp):\n",
" file_path_list = fs.ls(f\"projet-bdc2324-team1/1_Temp/1_0_Modelling_Datasets/{type_of_comp}/Test_set\")\n",
" test_set = pd.DataFrame()\n",
" for file in file_path_list:\n",
" print(file)\n",
" with fs.open(file, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in, sep=\",\")\n",
" test_set = pd.concat([test_set, df], ignore_index = True)\n",
" return test_set\n",
"\n",
"def generate_train_set(type_of_comp):\n",
" file_path_list = fs.ls(f\"projet-bdc2324-team1/1_Temp/1_0_Modelling_Datasets/{type_of_comp}/Train_set\")\n",
" train_set = pd.DataFrame()\n",
" for file in file_path_list:\n",
" print(file)\n",
" with fs.open(file, mode=\"rb\") as file_in:\n",
" df = pd.read_csv(file_in, sep=\",\")\n",
" train_set = pd.concat([train_set, df], ignore_index = True)\n",
" return train_set\n",
"\n",
"def recup_var(df, activity, var) :\n",
" \n",
" df_test = generate_test_set(activity)\n",
" df_train = generate_train_set(activity)\n",
" df_all = pd.concat([df_train, df_test], ignore_index=True)\n",
"\n",
" df_used = df\n",
" \n",
" df_used = df_used.set_index(\"customer_id\")\n",
" df_used[var] = df_all.set_index(\"customer_id\")[var]\n",
" df_used = df_used.reset_index()\n",
"\n",
" return df_used"
]
},
{
"cell_type": "code",
2024-03-31 19:57:10 +02:00
"execution_count": 23,
2024-03-29 13:43:36 +01:00
"id": "51843556-d785-4d11-abfa-d4e603b32fe7",
"metadata": {},
"outputs": [
{
2024-03-31 19:57:10 +02:00
"data": {
"text/plain": [
"Index(['customer_id', 'street_id', 'structure_id', 'mcp_contact_id',\n",
" 'fidelity', 'tenant_id', 'is_partner', 'deleted_at', 'is_email_true',\n",
" 'opt_in', 'profession', 'last_buying_date', 'max_price', 'ticket_sum',\n",
" 'average_price', 'average_purchase_delay', 'average_price_basket',\n",
" 'average_ticket_basket', 'total_price', 'preferred_category',\n",
" 'preferred_supplier', 'preferred_formula', 'purchase_count',\n",
" 'first_buying_date', 'last_visiting_date', 'zipcode', 'country', 'age',\n",
" 'gender_label', 'gender_female', 'gender_male', 'gender_other',\n",
" 'categorie_age_0_10', 'categorie_age_10_20', 'categorie_age_20_30',\n",
" 'categorie_age_30_40', 'categorie_age_40_50', 'categorie_age_50_60',\n",
" 'categorie_age_60_70', 'categorie_age_70_80', 'categorie_age_plus_80',\n",
" 'categorie_age_inconnue', 'country_fr', 'is_profession_known',\n",
" 'is_zipcode_known', 'nb_campaigns', 'nb_campaigns_opened',\n",
" 'time_to_open', 'taux_ouverture_mail', 'nb_targets', 'target_jeune',\n",
" 'target_optin', 'target_optout', 'target_scolaire', 'target_entreprise',\n",
" 'target_famille', 'target_newsletter', 'target_abonne', 'nb_tickets',\n",
" 'nb_purchases', 'total_amount', 'nb_suppliers', 'achat_internet',\n",
" 'purchase_date_min', 'purchase_date_max', 'time_between_purchase',\n",
" 'nb_purchases_internet', 'prop_purchases_internet', 'purchases_10_2021',\n",
" 'purchases_10_2022', 'purchases_11_2021', 'purchases_12_2021',\n",
" 'purchases_1_2022', 'purchases_2_2022', 'purchases_3_2022',\n",
" 'purchases_4_2022', 'purchases_5_2021', 'purchases_5_2022',\n",
" 'purchases_6_2021', 'purchases_6_2022', 'purchases_7_2021',\n",
" 'purchases_7_2022', 'purchases_8_2021', 'purchases_8_2022',\n",
" 'purchases_9_2021', 'purchases_9_2022', 'y_has_purchased',\n",
" 'has_purchased'],\n",
" dtype='object')"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset_test = recup_var(dataset_test, type_of_activity, \"age\")\n",
"dataset_test"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "e4287c1a-eab6-4897-91d6-d21804518dc4",
"metadata": {},
"outputs": [
2024-03-29 13:43:36 +01:00
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
2024-03-31 19:57:10 +02:00
" <th>street_id</th>\n",
" <th>structure_id</th>\n",
" <th>mcp_contact_id</th>\n",
" <th>fidelity</th>\n",
" <th>tenant_id</th>\n",
" <th>is_partner</th>\n",
" <th>deleted_at</th>\n",
" <th>is_email_true</th>\n",
" <th>opt_in</th>\n",
2024-03-29 13:43:36 +01:00
" <th>...</th>\n",
2024-03-31 19:57:10 +02:00
" <th>purchases_7_2022</th>\n",
" <th>purchases_8_2021</th>\n",
" <th>purchases_8_2022</th>\n",
" <th>purchases_9_2021</th>\n",
" <th>purchases_9_2022</th>\n",
2024-03-29 13:43:36 +01:00
" <th>y_has_purchased</th>\n",
2024-03-31 19:57:10 +02:00
" <th>has_purchased</th>\n",
" <th>has_purchased_estim</th>\n",
" <th>score</th>\n",
" <th>segment</th>\n",
2024-03-29 13:43:36 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
2024-03-31 19:57:10 +02:00
" <td>5_4317407</td>\n",
" <td>969908</td>\n",
" <td>NaN</td>\n",
" <td>6156473.0</td>\n",
2024-03-29 13:43:36 +01:00
" <td>1</td>\n",
2024-03-31 19:57:10 +02:00
" <td>1771</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>True</td>\n",
2024-03-29 13:43:36 +01:00
" <td>0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>...</td>\n",
2024-03-29 13:43:36 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.0</td>\n",
" <td>0.366661</td>\n",
" <td>2</td>\n",
2024-03-29 13:43:36 +01:00
" </tr>\n",
" <tr>\n",
2024-03-31 19:57:10 +02:00
" <th>1</th>\n",
" <td>5_477635</td>\n",
" <td>109121</td>\n",
" <td>NaN</td>\n",
" <td>6213652.0</td>\n",
" <td>2</td>\n",
" <td>1771</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>True</td>\n",
2024-03-29 13:43:36 +01:00
" <td>0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>...</td>\n",
2024-03-29 13:43:36 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.376898</td>\n",
" <td>2</td>\n",
2024-03-29 13:43:36 +01:00
" </tr>\n",
" <tr>\n",
2024-03-31 19:57:10 +02:00
" <th>2</th>\n",
" <td>5_411639</td>\n",
" <td>92929</td>\n",
2024-03-29 13:43:36 +01:00
" <td>NaN</td>\n",
2024-03-31 19:57:10 +02:00
" <td>6160271.0</td>\n",
" <td>4</td>\n",
" <td>1771</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>True</td>\n",
2024-03-29 13:43:36 +01:00
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.971493</td>\n",
" <td>4</td>\n",
2024-03-29 13:43:36 +01:00
" </tr>\n",
" <tr>\n",
2024-03-31 19:57:10 +02:00
" <th>3</th>\n",
" <td>5_326623</td>\n",
" <td>79862</td>\n",
" <td>NaN</td>\n",
" <td>6140109.0</td>\n",
2024-03-29 13:43:36 +01:00
" <td>1</td>\n",
2024-03-31 19:57:10 +02:00
" <td>1771</td>\n",
" <td>False</td>\n",
2024-03-29 13:43:36 +01:00
" <td>NaN</td>\n",
2024-03-26 12:20:03 +01:00
" <td>True</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.042499</td>\n",
2024-03-26 12:20:03 +01:00
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
2024-03-31 19:57:10 +02:00
" <th>4</th>\n",
" <td>5_383915</td>\n",
" <td>85421</td>\n",
" <td>NaN</td>\n",
" <td>6149409.0</td>\n",
" <td>2</td>\n",
" <td>1771</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
2024-03-26 12:20:03 +01:00
" <td>True</td>\n",
" <td>1</td>\n",
2024-03-31 19:57:10 +02:00
" <td>...</td>\n",
2024-03-26 12:20:03 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.0</td>\n",
" <td>0.351686</td>\n",
" <td>2</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96091</th>\n",
2024-03-31 19:57:10 +02:00
" <td>9_91205</td>\n",
" <td>76215</td>\n",
" <td>NaN</td>\n",
" <td>47280.0</td>\n",
2024-03-26 12:20:03 +01:00
" <td>0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>1490</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>True</td>\n",
2024-03-26 12:20:03 +01:00
" <td>1</td>\n",
2024-03-31 19:57:10 +02:00
" <td>...</td>\n",
2024-03-26 12:20:03 +01:00
" <td>0.0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.004917</td>\n",
" <td>1</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>96092</th>\n",
2024-03-31 19:57:10 +02:00
" <td>9_369887</td>\n",
" <td>815891</td>\n",
" <td>NaN</td>\n",
" <td>30764537.0</td>\n",
" <td>4</td>\n",
" <td>1490</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
2024-03-26 12:20:03 +01:00
" <td>True</td>\n",
" <td>0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-26 12:20:03 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.797374</td>\n",
" <td>4</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>96093</th>\n",
2024-03-31 19:57:10 +02:00
" <td>9_1007562</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>1490</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>True</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
2024-03-26 12:20:03 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.039944</td>\n",
2024-03-26 12:20:03 +01:00
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96094</th>\n",
2024-03-31 19:57:10 +02:00
" <td>9_15037</td>\n",
" <td>12992</td>\n",
" <td>NaN</td>\n",
" <td>2213448.0</td>\n",
2024-03-26 12:20:03 +01:00
" <td>0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>1490</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>True</td>\n",
2024-03-26 12:20:03 +01:00
" <td>1</td>\n",
2024-03-31 19:57:10 +02:00
" <td>...</td>\n",
2024-03-26 12:20:03 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.049646</td>\n",
2024-03-26 12:20:03 +01:00
" <td>1</td>\n",
2024-03-31 19:57:10 +02:00
" </tr>\n",
" <tr>\n",
" <th>96095</th>\n",
" <td>9_135370</td>\n",
" <td>76215</td>\n",
" <td>NaN</td>\n",
" <td>2164740.0</td>\n",
2024-03-26 12:20:03 +01:00
" <td>0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>1490</td>\n",
" <td>False</td>\n",
2024-03-26 12:20:03 +01:00
" <td>NaN</td>\n",
2024-03-31 19:57:10 +02:00
" <td>True</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
2024-03-26 12:20:03 +01:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.007398</td>\n",
" <td>1</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2024-03-31 19:57:10 +02:00
"<p>96096 rows × 91 columns</p>\n",
2024-03-26 12:20:03 +01:00
"</div>"
],
"text/plain": [
2024-03-31 19:57:10 +02:00
" customer_id street_id structure_id mcp_contact_id fidelity \\\n",
"0 5_4317407 969908 NaN 6156473.0 1 \n",
"1 5_477635 109121 NaN 6213652.0 2 \n",
"2 5_411639 92929 NaN 6160271.0 4 \n",
"3 5_326623 79862 NaN 6140109.0 1 \n",
"4 5_383915 85421 NaN 6149409.0 2 \n",
"... ... ... ... ... ... \n",
"96091 9_91205 76215 NaN 47280.0 0 \n",
"96092 9_369887 815891 NaN 30764537.0 4 \n",
"96093 9_1007562 1 NaN NaN 0 \n",
"96094 9_15037 12992 NaN 2213448.0 0 \n",
"96095 9_135370 76215 NaN 2164740.0 0 \n",
2024-03-26 12:20:03 +01:00
"\n",
2024-03-31 19:57:10 +02:00
" tenant_id is_partner deleted_at is_email_true opt_in ... \\\n",
"0 1771 False NaN True 0 ... \n",
"1 1771 False NaN True 0 ... \n",
"2 1771 False NaN True 0 ... \n",
"3 1771 False NaN True 1 ... \n",
"4 1771 False NaN True 1 ... \n",
"... ... ... ... ... ... ... \n",
"96091 1490 False NaN True 1 ... \n",
"96092 1490 False NaN True 0 ... \n",
"96093 1490 False NaN True 0 ... \n",
"96094 1490 False NaN True 1 ... \n",
"96095 1490 False NaN True 1 ... \n",
2024-03-26 12:20:03 +01:00
"\n",
2024-03-31 19:57:10 +02:00
" purchases_7_2022 purchases_8_2021 purchases_8_2022 purchases_9_2021 \\\n",
"0 0.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 1.0 \n",
"4 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"96091 0.0 0.0 0.0 0.0 \n",
"96092 0.0 0.0 0.0 0.0 \n",
"96093 0.0 0.0 0.0 0.0 \n",
"96094 0.0 0.0 0.0 0.0 \n",
"96095 0.0 0.0 0.0 0.0 \n",
2024-03-26 12:20:03 +01:00
"\n",
2024-03-31 19:57:10 +02:00
" purchases_9_2022 y_has_purchased has_purchased has_purchased_estim \\\n",
"0 0.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 1.0 \n",
"3 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"96091 0.0 0.0 0.0 0.0 \n",
"96092 0.0 1.0 1.0 1.0 \n",
"96093 0.0 0.0 0.0 0.0 \n",
"96094 0.0 0.0 0.0 0.0 \n",
"96095 0.0 0.0 0.0 0.0 \n",
2024-03-26 12:20:03 +01:00
"\n",
" score segment \n",
2024-03-31 19:57:10 +02:00
"0 0.366661 2 \n",
"1 0.376898 2 \n",
"2 0.971493 4 \n",
"3 0.042499 1 \n",
"4 0.351686 2 \n",
2024-03-26 12:20:03 +01:00
"... ... ... \n",
2024-03-31 19:57:10 +02:00
"96091 0.004917 1 \n",
"96092 0.797374 4 \n",
"96093 0.039944 1 \n",
"96094 0.049646 1 \n",
"96095 0.007398 1 \n",
2024-03-26 12:20:03 +01:00
"\n",
2024-03-31 19:57:10 +02:00
"[96096 rows x 91 columns]"
2024-03-26 12:20:03 +01:00
]
},
2024-03-31 19:57:10 +02:00
"execution_count": 29,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Processing\n",
2024-03-31 19:57:10 +02:00
"\"\"\"\n",
2024-03-26 12:20:03 +01:00
"X_test = dataset_test[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n",
2024-03-29 13:43:36 +01:00
" 'time_between_purchase', 'nb_tickets_internet', 'is_email_true', 'opt_in', 'age', #'is_partner',\n",
2024-03-27 16:06:31 +01:00
" 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened', 'country_fr']]\n",
2024-03-31 19:57:10 +02:00
"\"\"\"\n",
"\"\"\"\n",
"X_test = dataset_test[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'purchase_date_min', 'purchase_date_max', \n",
" 'time_between_purchase', 'is_email_true', 'opt_in', 'age', #'is_partner',\n",
" 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened', 'country_fr']]\n",
"\"\"\"\n",
2024-03-26 12:20:03 +01:00
"\n",
"y_test = dataset_test[['y_has_purchased']]\n",
"\n",
"\n",
2024-03-31 19:57:10 +02:00
"# X_test_segment = X_test\n",
"X_test_segment = dataset_test\n",
2024-03-26 12:20:03 +01:00
"\n",
2024-03-27 16:06:31 +01:00
"# X_test_segment.insert(X_test.shape[1], \"country_fr\", dataset_test[\"country_fr\"])\n",
2024-03-26 12:20:03 +01:00
"\n",
"# add y_has_purchased to X_test\n",
"X_test_segment[\"has_purchased\"] = y_test\n",
"\n",
"# Add prediction and probability to dataset_test\n",
2024-03-31 19:57:10 +02:00
"# y_pred = model.predict(X_test)\n",
"y_pred = model.predict(dataset_test)\n",
"\n",
2024-03-26 12:20:03 +01:00
"X_test_segment[\"has_purchased_estim\"] = y_pred\n",
"\n",
2024-03-31 19:57:10 +02:00
"#y_pred_prob = model.predict_proba(X_test)[:, 1]\n",
"y_pred_prob = model.predict_proba(dataset_test)[:, 1]\n",
"\n",
2024-03-26 12:20:03 +01:00
"X_test_segment['score'] = y_pred_prob\n",
"\n",
"X_test_segment[\"segment\"] = np.where(X_test_segment['score']<0.25, '1',\n",
" np.where(X_test_segment['score']<0.5, '2',\n",
" np.where(X_test_segment['score']<0.75, '3', '4')))\n",
"\n",
"X_test_segment"
]
},
2024-03-31 19:57:10 +02:00
{
"cell_type": "code",
"execution_count": 28,
"id": "d0d3e25f-3f0d-40ca-adb6-6f87e24edc8f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['customer_id', 'street_id', 'structure_id', 'mcp_contact_id',\n",
" 'fidelity', 'tenant_id', 'is_partner', 'deleted_at', 'is_email_true',\n",
" 'opt_in', 'profession', 'last_buying_date', 'max_price', 'ticket_sum',\n",
" 'average_price', 'average_purchase_delay', 'average_price_basket',\n",
" 'average_ticket_basket', 'total_price', 'preferred_category',\n",
" 'preferred_supplier', 'preferred_formula', 'purchase_count',\n",
" 'first_buying_date', 'last_visiting_date', 'zipcode', 'country', 'age',\n",
" 'gender_label', 'gender_female', 'gender_male', 'gender_other',\n",
" 'categorie_age_0_10', 'categorie_age_10_20', 'categorie_age_20_30',\n",
" 'categorie_age_30_40', 'categorie_age_40_50', 'categorie_age_50_60',\n",
" 'categorie_age_60_70', 'categorie_age_70_80', 'categorie_age_plus_80',\n",
" 'categorie_age_inconnue', 'country_fr', 'is_profession_known',\n",
" 'is_zipcode_known', 'nb_campaigns', 'nb_campaigns_opened',\n",
" 'time_to_open', 'taux_ouverture_mail', 'nb_targets', 'target_jeune',\n",
" 'target_optin', 'target_optout', 'target_scolaire', 'target_entreprise',\n",
" 'target_famille', 'target_newsletter', 'target_abonne', 'nb_tickets',\n",
" 'nb_purchases', 'total_amount', 'nb_suppliers', 'achat_internet',\n",
" 'purchase_date_min', 'purchase_date_max', 'time_between_purchase',\n",
" 'nb_purchases_internet', 'prop_purchases_internet', 'purchases_10_2021',\n",
" 'purchases_10_2022', 'purchases_11_2021', 'purchases_12_2021',\n",
" 'purchases_1_2022', 'purchases_2_2022', 'purchases_3_2022',\n",
" 'purchases_4_2022', 'purchases_5_2021', 'purchases_5_2022',\n",
" 'purchases_6_2021', 'purchases_6_2022', 'purchases_7_2021',\n",
" 'purchases_7_2022', 'purchases_8_2021', 'purchases_8_2022',\n",
" 'purchases_9_2021', 'purchases_9_2022', 'y_has_purchased',\n",
" 'has_purchased', 'has_purchased_estim'],\n",
" dtype='object')"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset_test.columns"
]
},
2024-03-26 12:20:03 +01:00
{
"cell_type": "markdown",
"id": "9058c3b2-8fa2-4322-a57b-395da4033eaf",
"metadata": {},
"source": [
"## 1. Business KPIs"
]
},
{
"cell_type": "code",
2024-03-31 19:57:10 +02:00
"execution_count": 31,
2024-03-26 12:20:03 +01:00
"id": "3067d919-50c9-49e9-b0a6-b676a5dbae56",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
2024-03-31 19:57:10 +02:00
" <th>nb_purchases_internet</th>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>segment</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-31 19:57:10 +02:00
" <td>34667.0</td>\n",
" <td>14116.0</td>\n",
" <td>6.772701e+05</td>\n",
" <td>5836.0</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-31 19:57:10 +02:00
" <td>36994.0</td>\n",
" <td>16853.0</td>\n",
" <td>1.215306e+06</td>\n",
" <td>10363.0</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-31 19:57:10 +02:00
" <td>40121.0</td>\n",
" <td>17157.0</td>\n",
" <td>1.059581e+06</td>\n",
" <td>10628.0</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2024-03-31 19:57:10 +02:00
" <td>413816.0</td>\n",
" <td>101811.0</td>\n",
" <td>1.751393e+07</td>\n",
" <td>34378.0</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-31 19:57:10 +02:00
" nb_tickets nb_purchases total_amount nb_purchases_internet\n",
2024-03-26 12:20:03 +01:00
"segment \n",
2024-03-31 19:57:10 +02:00
"1 34667.0 14116.0 6.772701e+05 5836.0\n",
"2 36994.0 16853.0 1.215306e+06 10363.0\n",
"3 40121.0 17157.0 1.059581e+06 10628.0\n",
"4 413816.0 101811.0 1.751393e+07 34378.0"
2024-03-26 12:20:03 +01:00
]
},
2024-03-31 19:57:10 +02:00
"execution_count": 31,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# business figures\n",
2024-03-31 19:57:10 +02:00
"X_test_segment.groupby(\"segment\")[[\"nb_tickets\", \"nb_purchases\", \"total_amount\",\n",
" \"nb_purchases_internet\"]].sum()"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "code",
2024-03-31 19:57:10 +02:00
"execution_count": 32,
"id": "5b1acd28-b346-45b1-8da2-b79ca7f4fa96",
"metadata": {},
"outputs": [],
"source": [
"def df_business_fig(df, segment, list_var) :\n",
" df_business_kpi = df.groupby(segment)[list_var].sum().reset_index()\n",
" df_business_kpi.insert(1, \"size\", df.groupby(segment).size().values)\n",
" all_var = [\"size\"] + list_var\n",
" df_business_kpi[all_var] = 100 * df_business_kpi[all_var] / df_business_kpi[all_var].sum()\n",
"\n",
" return df_business_kpi"
]
},
{
"cell_type": "code",
2024-03-31 19:57:10 +02:00
"execution_count": 33,
"id": "bd63d787-3ef8-4f23-9069-e9b16b4a0de8",
2024-03-26 12:20:03 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>size</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_campaigns</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
2024-03-31 19:57:10 +02:00
" <td>57.890027</td>\n",
" <td>6.595725</td>\n",
" <td>9.414621</td>\n",
" <td>3.309231</td>\n",
" <td>56.178807</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
2024-03-31 19:57:10 +02:00
" <td>17.360764</td>\n",
" <td>7.038459</td>\n",
" <td>11.240054</td>\n",
" <td>5.938147</td>\n",
" <td>13.839223</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
2024-03-31 19:57:10 +02:00
" <td>10.909923</td>\n",
" <td>7.633400</td>\n",
" <td>11.442806</td>\n",
" <td>5.177254</td>\n",
" <td>10.487089</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
2024-03-31 19:57:10 +02:00
" <td>13.839286</td>\n",
" <td>78.732415</td>\n",
" <td>67.902519</td>\n",
" <td>85.575368</td>\n",
" <td>19.494881</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" segment size nb_tickets nb_purchases total_amount nb_campaigns\n",
2024-03-31 19:57:10 +02:00
"0 1 57.890027 6.595725 9.414621 3.309231 56.178807\n",
"1 2 17.360764 7.038459 11.240054 5.938147 13.839223\n",
"2 3 10.909923 7.633400 11.442806 5.177254 10.487089\n",
"3 4 13.839286 78.732415 67.902519 85.575368 19.494881"
2024-03-26 12:20:03 +01:00
]
},
2024-03-31 19:57:10 +02:00
"execution_count": 33,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"business_var = [\"nb_tickets\", \"nb_purchases\", \"total_amount\", \"nb_campaigns\"]\n",
"X_test_business_fig = df_business_fig(X_test_segment, \"segment\",\n",
" business_var)\n",
2024-03-26 12:20:03 +01:00
"X_test_business_fig"
]
},
{
"cell_type": "code",
2024-03-31 19:57:10 +02:00
"execution_count": 34,
2024-03-26 12:20:03 +01:00
"id": "d2f618b6-c984-4790-bd8f-29c7d01c6707",
"metadata": {},
"outputs": [],
"source": [
"def hist_segment_business_KPIs(df, segment, size, nb_tickets, nb_purchases, total_amount, nb_campaigns) :\n",
" \n",
" plt.figure()\n",
"\n",
" df_plot = df[[segment, size, nb_tickets, nb_purchases, total_amount, nb_campaigns]]\n",
" \n",
" x = [\"number of\\ncustomers\", \"number of\\ntickets\", \"number of\\npurchases\", \"total\\namount\", \n",
" \"number of\\ncampaigns\"]\n",
"\n",
" # liste_var = [size, nb_tickets, nb_purchases, total_amount]\n",
" \n",
" bottom = np.zeros(5)\n",
" \n",
" # Définir une palette de couleurs\n",
" colors = plt.cm.Blues(np.linspace(0.1, 0.9, 4))\n",
" \n",
" for i in range(4) :\n",
" # print(str(df_plot[segment][i]))\n",
" # segment = df_plot[segment][i]\n",
" height = list(df_plot.loc[i,size:].values)\n",
" \n",
" plt.bar(x=x, height=height, label = str(df_plot[segment][i]), bottom=bottom, color=colors[i])\n",
2024-03-26 12:20:03 +01:00
" \n",
" bottom+=height\n",
2024-03-26 16:00:39 +01:00
"\n",
" # Ajuster les marges\n",
" plt.subplots_adjust(left = 0.125, right = 0.8, bottom = 0.1, top = 0.9)\n",
2024-03-26 12:20:03 +01:00
" \n",
" plt.legend(title = \"segment\", loc = \"upper right\", bbox_to_anchor=(1.2, 1))\n",
" plt.ylabel(\"Fraction represented by the segment (%)\")\n",
" plt.title(f\"Relative weight of each segment regarding business KPIs\\nfor {type_of_activity} companies\", size=12)\n",
" # plt.title(\"test\")\n",
2024-03-26 16:00:39 +01:00
" # plt.show()\n",
2024-03-26 12:20:03 +01:00
" "
]
},
{
"cell_type": "code",
2024-03-31 19:57:10 +02:00
"execution_count": 35,
"id": "14b6ae5c-d704-4f5d-9f9b-5646e29ea470",
2024-03-26 12:20:03 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-03-31 19:57:10 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAksAAAHhCAYAAAB+0voXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB6GUlEQVR4nO3dd3xO5/8/8NdBcufOHiKDyDAiSEjFSpBEELMUtUnQltrUnqG21oeiFG2Cmq1R1AglsYkRe4udNHaI7Fy/P/xyvm5JbvfNHXfo6/l45MG5zrje57rPffLOda5zjiSEECAiIiKiPBXRdwBEREREhRmTJSIiIiI1mCwRERERqcFkiYiIiEgNJktEREREajBZIiIiIlKDyRIRERGRGkyWiIiIiNRgskRERESkxkeXLEVERECSJPmnWLFicHBwQIcOHXD16tV32mZUVBQkSUJUVJTW6164cAFhYWG4efNmrnmhoaFwcXF5p5gKs/fZr7CwMEiShIcPH7512alTp2LTpk3vVI86jx8/RocOHVCiRAlIkoRWrVrpvI73kXM8/vnnn/oO5aNXUMdQYSdJEsLCwuTpnPNmXuepgqbNd74g6v1Y5XceePnyJZo0aQIDAwMsX74cQN6/F0uVKoXu3bvj3r17b90mvd1HlyzlCA8Px+HDh7F7927069cPmzdvRp06dfDkyZMPGseFCxcwceLEPE9C48aNw8aNGz9oPB/Ch9qvgvpF9/3332Pjxo343//+h8OHD2PmzJk6r4MKh/9qsvSmZs2a4fDhw3BwcNB3KB/MV199hcOHD+s7DJ169uwZGjVqhL179+LPP/9Et27dVObn/F7ctWsXvv76a6xevRp169ZFcnKyniL+dBTTdwDvqnLlyvDx8QEABAQEICsrCxMmTMCmTZvQvXt3PUf3SpkyZfQdQoH42Pfr3LlzKFOmDDp37qzvUKiQy8rKQmZmJhQKhb5DUfHy5UsYGxtrvLytrS1sbW0LMKLCp1SpUihVqpS+w9CZxMREBAcH4/r169i+fTsCAwNzLfP678XAwEBkZWXh+++/x6ZNm3i+e08fbc/Sm3IOkH///Vel/Pjx4/j8889hbW0NIyMjeHt7Y926dW/d3vHjx9GhQwe4uLhAqVTCxcUFHTt2xK1bt+RlIiIi8OWXXwJ4dWDmdIFGREQAyH25ytvbG3Xr1s1VV1ZWFkqWLInWrVvLZenp6Zg8eTIqVKgAhUIBW1tbdO/eHQ8ePFAb999//w1JkhATEyOXrV+/HpIkoVmzZirLenl5oU2bNvK0EAI///wzqlatCqVSCSsrK7Rt2xY3btxQWS+vy3BPnz5Fz549YW1tDVNTUzRr1gw3btzIdTkgx7///ouOHTvCwsICdnZ26NGjB549eybPlyQJycnJWLZsmdyuAQEBavf98ePH6NOnD0qWLAlDQ0O4ublhzJgxSEtLAwDcvHkTkiRh9+7duHjxorzdt11+Xbt2LWrXrg0TExOYmpoiODgYp06dUllGk+Mlx7179/DNN9/AyckJhoaGcHR0RNu2bXMduxkZGRgzZgwcHR1hbm6OBg0a4PLly2pjBYAHDx7I2885dvz8/LB7926V5Xbv3o2goCCYm5vD2NgYfn5++Oeff3Jt76+//oKXlxcUCgXc3Nwwd+7cPC9xSJKEfv36ITw8HO7u7lAqlfDx8cGRI0cghMCsWbPg6uoKU1NT1K9fH9euXctVlyYx5dR9/vx5nR5DOcfHzJkzMXnyZLi6ukKhUGDv3r0AND+XHDhwALVr14aRkRFKliyJcePGYenSpbkug61duxaNGjWCg4MDlEolPDw8MHLkyFy9AKGhoTA1NcXZs2fRqFEjmJmZISgoCACQlJSEr7/+GjY2NjA1NUXjxo1x5cqVXDHldRkuICAAlStXRkxMDOrWrQtjY2O4ublh+vTpyM7OVln//PnzaNSoEYyNjWFra4u+ffvK5xpNhy/cuXMHrVu3hrm5OSwsLNClS5dc57P8zhcuLi4IDQ2Vp1++fImhQ4fC1dUVRkZGsLa2ho+PD1avXi0vk9cx6uLigubNm2PHjh347LPPoFQqUaFCBfz222+56kxISECvXr1QqlQpGBoawtXVFRMnTkRmZqbKcgsXLkSVKlVgamoKMzMzVKhQAaNHj9Yq1re5desW6tSpg7t372LPnj15Jkp5qVWrlrx+fjQ9X/zXfbQ9S2+Ki4sDAJQvX14u27t3Lxo3boyaNWti0aJFsLCwwJo1a9C+fXu8fPlS5cv3pps3b8Ld3R0dOnSAtbU14uPjsXDhQlSvXh0XLlxA8eLF0axZM0ydOhWjR4/GggUL8NlnnwHIv+ele/fuGDhwIK5evYpy5crJ5ZGRkbh//77cI5adnY2WLVti//79GD58OHx9fXHr1i1MmDABAQEBOH78OJRKZZ51+Pv7w8DAALt370b16tUBvPoFpFQqER0djYyMDBgYGCAxMRHnzp3Dt99+K6/bq1cvREREYMCAAZgxYwYeP36MSZMmwdfXF6dPn4adnV2edWZnZ6NFixY4fvw4wsLC8Nlnn+Hw4cNo3Lhxvu3bpk0btG/fHj179sTZs2cxatQoAJBPWocPH0b9+vURGBiIcePGAQDMzc3z3V5qaioCAwNx/fp1TJw4EV5eXti/fz+mTZuG2NhY/P3333BwcMDhw4fRp08fPHv2DCtXrgQAVKxYMd/tTp06FWPHjkX37t0xduxYpKenY9asWahbty6OHTsmr6vJ8QK8SpSqV6+OjIwMjB49Gl5eXnj06BF27tyJJ0+eqLTx6NGj4efnh6VLlyIpKQkjRoxAixYtcPHiRRQtWjTfmLt27YqTJ09iypQpKF++PJ4+fYqTJ0/i0aNH8jK///47unXrhpYtW2LZsmUwMDDAL7/8guDgYOzcuVP+Zbxjxw60bt0a9erVw9q1a5GZmYkffvghV2KXY+vWrTh16hSmT58OSZIwYsQINGvWDCEhIbhx4wbmz5+PZ8+eYciQIWjTpg1iY2PlX2iaxpRD18dQjp9++gnly5fHDz/8AHNzc5QrV07jc8mZM2fQsGFDlC9fHsuWLYOxsTEWLVqE33//PVc9V69eRdOmTTFo0CCYmJjg0qVLmDFjBo4dO4Y9e/aoLJueno7PP/8cvXr1wsiRI5GZmQkhBFq1aoVDhw5h/PjxqF69Og4ePIgmTZq8dR9zJCQkoHPnzvjuu+8wYcIEbNy4EaNGjYKjo6N8iSc+Ph7+/v4wMTHBwoULUaJECaxevRr9+vXTuB4A+OKLL9CuXTv07t0b58+fx7hx43DhwgUcPXoUBgYGWm1ryJAhWLFiBSZPngxvb28kJyfj3LlzKsd4fk6fPo3vvvsOI0eOhJ2dHZYuXYqePXuibNmyqFevntwuNWrUQJEiRTB+/HiUKVMGhw8fxuTJk3Hz5k2Eh4cDANasWYM+ffqgf//++OGHH1CkSBFcu3YNFy5c0EmsAHDx4kUMHjwYALBv3z54eHho3E45f5Co61XU5HxBAMRHJjw8XAAQR44cERkZGeL58+dix44dwt7eXtSrV09kZGTIy1aoUEF4e3urlAkhRPPmzYWDg4PIysoSQgixd+9eAUDs3bs333ozMzPFixcvhImJiZg7d65c/scff+S7bkhIiHB2dpanHz58KAwNDcXo0aNVlmvXrp2ws7OT41y9erUAINavX6+yXExMjAAgfv75Z7VtVKdOHVG/fn15umzZsmLYsGGiSJEiIjo6WgghxMqVKwUAceXKFSGEEIcPHxYAxI8//qiyrTt37gilUimGDx+e7379/fffAoBYuHChyrrTpk0TAMSECRPksgkTJggAYubMmSrL9unTRxgZGYns7Gy5zMTERISEhKjd1xyLFi0SAMS6detUymfMmCEAiMjISLnM399fVKpU6a3bvH37tihWrJjo37+/Svnz58+Fvb29aNeuXb7r5ne89OjRQxgYGIgLFy7ku27O8di0aVOV8nXr1gkA4vDhw2rjNjU1FYMGDcp3fnJysrC2thYtWrRQKc/KyhJVqlQRNWrUkMuqV68unJycRFpamlz2/PlzYWN
2024-03-26 12:20:03 +01:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"hist_segment_business_KPIs(X_test_business_fig, \"segment\", \"size\", *business_var)"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "code",
2024-03-26 16:00:39 +01:00
"execution_count": 13,
"id": "f358fba3-f778-4414-bf55-c830be647ddd",
2024-03-26 12:20:03 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2024-03-26 16:00:39 +01:00
"'projet-bdc2324-team1/Output_marketing_personae_analysis/sport/segments_business_KPIs_sport.csv'"
2024-03-26 12:20:03 +01:00
]
},
2024-03-26 16:00:39 +01:00
"execution_count": 13,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
2024-03-26 16:00:39 +01:00
"source": [
"activity = \"sport\"\n",
"PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{activity}/\"\n",
"\n",
"file_name = \"segments_business_KPIs_\" + activity\n",
"FILE_PATH_OUT_S3 = PATH + file_name + \".csv\"\n",
"\n",
"FILE_PATH_OUT_S3"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "3eee7b59-f658-402d-95b2-fa051188fd10",
"metadata": {},
"outputs": [],
"source": [
"def save_file_s3_mp(File_name, type_of_activity):\n",
" image_buffer = io.BytesIO()\n",
" plt.savefig(image_buffer, format='png')\n",
" image_buffer.seek(0)\n",
" PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{type_of_activity}/\"\n",
" FILE_PATH_OUT_S3 = PATH + File_name + type_of_activity + '.png'\n",
" with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:\n",
" s3_file.write(image_buffer.read())\n",
" plt.close()"
]
},
2024-03-26 16:00:39 +01:00
{
"cell_type": "code",
"execution_count": 94,
2024-03-26 16:00:39 +01:00
"id": "1790cb81-3304-41f1-a371-d8c926d32906",
"metadata": {},
"outputs": [],
2024-03-26 12:20:03 +01:00
"source": [
"# save to Minio\n",
"\n",
"activity = \"sport\"\n",
"PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{activity}/\"\n",
"\n",
"file_name = \"segments_business_KPI_\" + activity\n",
"# file_name = \"segments_business_KPIs_\" + activity\n",
2024-03-26 16:00:39 +01:00
"FILE_PATH_OUT_S3 = PATH + file_name + \".png\"\n",
2024-03-26 12:20:03 +01:00
"\n",
"hist_segment_business_KPIs(X_test_business_fig, \"segment\", \"size\", \"nb_tickets\", \n",
" \"nb_purchases\", \"total_amount\", \"nb_campaigns\")\n",
"\n",
"image_buffer = io.BytesIO()\n",
"plt.savefig(image_buffer, format='png', dpi=110)\n",
2024-03-26 12:20:03 +01:00
"image_buffer.seek(0)\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:\n",
" s3_file.write(image_buffer.read())\n",
"plt.close()"
]
},
{
"cell_type": "code",
"execution_count": 91,
"id": "cbf2cc62-1144-48c6-90d8-e12c8e510e02",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAksAAAHhCAYAAAB+0voXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB6L0lEQVR4nO3deVxO6f8/8NdB3d3ti7SQFktC0chWqISswwdjpzAzjJ2xrzHZZ3z4YBjMFMY6Y5lhLDGUnSzZd9lrskfau35/+HW+btXtvrlzx7yej0cPznWW632u+9ynd9e5zjmSEEKAiIiIiPJVTN8BEBERERVlTJaIiIiI1GCyRERERKQGkyUiIiIiNZgsEREREanBZImIiIhIDSZLRERERGowWSIiIiJSg8kSERERkRofXbIUGRkJSZLknxIlSsDBwQGdOnXC1atX32mb0dHRkCQJ0dHRWq974cIFhIWF4ebNm3nmhYaGwsXF5Z1iKsreZ7/CwsIgSRIePnz41mWnTZuGzZs3v1M96jx+/BidOnVCqVKlIEkS2rRpo/M63kfu8fj777/rO5SPXmEdQ0WdJEkICwuTp3PPm/mdpwqbNt/5wqj3Y1XQeeDly5do1qwZDAwMsGLFCgD5/14sU6YMevbsiXv37r11m/R2H12ylCsiIgKHDx/G7t27MWDAAPz555+oV68enjx58kHjuHDhAiZPnpzvSWjChAnYtGnTB43nQ/hQ+1VYv+i+++47bNq0Cf/9739x+PBhzJo1S+d1UNHwb02W3tSiRQscPnwYDg4O+g7lg/nyyy9x+PBhfYehU8+ePUOTJk2wd+9e/P777+jRo4fK/Nzfi7t27cJXX32FNWvWoH79+khJSdFTxJ+OEvoO4F1VrVoVPj4+AICAgABkZ2dj0qRJ2Lx5M3r27Knn6F4pV66cvkMoFB/7fp07dw7lypVD165d9R0KFXHZ2dnIysqCQqHQdygqXr58CWNjY42Xt7W1ha2tbSFGVPSUKVMGZcqU0XcYOpOUlITg4GBcv34d27dvR2BgYJ5lXv+9GBgYiOzsbHz33XfYvHkzz3fv6aPtWXpT7gHyzz//qJQfP34cn3/+OaytrWFkZARvb2+sX7/+rds7fvw4OnXqBBcXFyiVSri4uKBz5864deuWvExkZCS++OILAK8OzNwu0MjISAB5L1d5e3ujfv36eerKzs5G6dKl0bZtW7ksIyMD4eHhqFSpEhQKBWxtbdGzZ088ePBAbdx//fUXJElCbGysXLZhwwZIkoQWLVqoLOvl5YV27drJ00II/Pjjj6hevTqUSiWsrKzQvn173LhxQ2W9/C7DPX36FL1794a1tTVMTU3RokUL3LhxI8/lgFz//PMPOnfuDAsLC9jZ2aFXr1549uyZPF+SJKSkpGD58uVyuwYEBKjd98ePH6Nfv34oXbo0DA0N4ebmhnHjxiE9PR0AcPPmTUiShN27d+PixYvydt92+XXdunWoW7cuTExMYGpqiuDgYJw6dUplGU2Ol1z37t3D119/DScnJxgaGsLR0RHt27fPc+xmZmZi3LhxcHR0hLm5ORo1aoTLly+rjRUAHjx4IG8/99jx8/PD7t27VZbbvXs3goKCYG5uDmNjY/j5+eHvv//Os70//vgDXl5eUCgUcHNzw7x58/K9xCFJEgYMGICIiAi4u7tDqVTCx8cHR44cgRACs2fPhqurK0xNTdGwYUNcu3YtT12axJRb9/nz53V6DOUeH7NmzUJ4eDhcXV2hUCiwd+9eAJqfSw4cOIC6devCyMgIpUuXxoQJE7Bs2bI8l8HWrVuHJk2awMHBAUqlEh4eHhg9enSeXoDQ0FCYmpri7NmzaNKkCczMzBAUFAQASE5OxldffQUbGxuYmpqiadOmuHLlSp6Y8rsMFxAQgKpVqyI2Nhb169eHsbEx3NzcMGPGDOTk5Kisf/78eTRp0gTGxsawtbVF//795XONpsMX7ty5g7Zt28Lc3BwWFhbo1q1bnvNZQecLFxcXhIaGytMvX77E8OHD4erqCiMjI1hbW8PHxwdr1qyRl8nvGHVxcUHLli2xY8cOfPbZZ1AqlahUqRJ++eWXPHUmJiaiT58+KFOmDAwNDeHq6orJkycjKytLZblFixahWrVqMDU1hZmZGSpVqoSxY8dqFevb3Lp1C/Xq1cPdu3exZ8+efBOl/NSpU0devyCani/+7T7anqU3xcfHAwAqVqwol+3duxdNmzZF7dq1sXjxYlhYWGDt2rXo2LEjXr58qfLle9PNmzfh7u6OTp06wdraGgkJCVi0aBFq1qyJCxcuoGTJkmjRogWmTZuGsWPHYuHChfjss88AFNzz0rNnTwwePBhXr15FhQoV5PKoqCjcv39f7hHLyclB69atsX//fowcORK+vr64desWJk2ahICAABw/fhxKpTLfOvz9/WFgYIDdu3ejZs2aAF79AlIqlYiJiUFmZiYMDAyQlJSEc+fO4ZtvvpHX7dOnDyIjIzFo0CDMnDkTjx8/xpQpU+Dr64vTp0/Dzs4u3zpzcnLQqlUrHD9+HGFhYfjss89w+PBhNG3atMD2bdeuHTp27IjevXvj7NmzGDNmDADIJ63Dhw+jYcOGCAwMxIQJEwAA5ubmBW4vLS0NgYGBuH79OiZPngwvLy/s378f06dPR1xcHP766y84ODjg8OHD6NevH549e4ZVq1YBACpXrlzgdqdNm4bx48ejZ8+eGD9+PDIyMjB79mzUr18fx44dk9fV5HgBXiVKNWvWRGZmJsaOHQsvLy88evQIO3fuxJMnT1TaeOzYsfDz88OyZcuQnJyMUaNGoVWrVrh48SKKFy9eYMzdu3fHyZMnMXXqVFSsWBFPnz7FyZMn8ejRI3mZX3/9FT169EDr1q2xfPlyGBgY4KeffkJwcDB27twp/zLesWMH2rZtiwYNGmDdunXIysrC999/nyexy7V161acOnUKM2bMgCRJGDVqFFq0aIGQkBDcuHEDCxYswLNnzzBs2DC0a9cOcXFx8i80TWPKpetjKNf//vc/VKxYEd9//z3Mzc1RoUIFjc8lZ86cQePGjVGxYkUsX74cxsbGWLx4MX799dc89Vy9ehXNmzfHkCFDYGJigkuXLmHmzJk4duwY9uzZo7JsRkYGPv/8c/Tp0wejR49GVlYWhBBo06YNDh06hIkTJ6JmzZo4ePAgmjVr9tZ9zJWYmIiuXbvi22+/xaRJk7Bp0yaMGTMGjo6O8iWehIQE+Pv7w8TEBIsWLUKpUqWwZs0aDBgwQON6AOA///kPOnTogL59++L8+fOYMGECLly4gKNHj8LAwECrbQ0bNgwrV65EeHg4vL29kZKSgnPnzqkc4wU5ffo0vv32W4wePRp2dnZYtmwZevfujfLly6NBgwZyu9SqVQvFihXDxIkTUa5cORw+fBjh4eG4efMmIiIiAABr165Fv379MHDgQHz//fcoVqwYrl27hgsXLugkVgC4ePEihg4dCgDYt28fPDw8NG6n3D9I1PUqanK+IADiIxMRESEAiCNHjojMzEzx/PlzsWPHDmFvby8aNGggMjMz5WUrVaokvL29VcqEEKJly5bCwcFBZGdnCyGE2Lt3rwAg9u7dW2C9WVlZ4sWLF8LExETMmzdPLv/tt98KXDckJEQ4OzvL0w8fPhSGhoZi7NixKst16NBB2NnZyXGuWbNGABAbNmxQWS42NlYAED/++KPaNqpXr55o2LChPF2+fHkxYsQIUaxYMRETEyOEEGLVqlUCgLhy5YoQQojDhw8LAOKHH35Q2dadO3eEUqkUI0eOLHC//vrrLwFALFq0SGXd6dOnCwBi0qRJctmkSZMEADFr1iyVZfv16yeMjIxETk6OXGZiYiJCQkLU7muuxYsXCwBi/fr1KuUzZ84UAERUVJRc5u/vL6pUqfLWbd6+fVuUKFFCDBw4UKX8+fPnwt7eXnTo0KHAdQs6Xnr16iUMDAzEhQsXClw393hs3ry5Svn69esFAHH48GG1cZuamoohQ4YUOD8lJUVYW1uLVq1aqZRnZ2eLatWqiVq1asllNWvWFE5OTiI9PV0ue/78ubC
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"hist_segment_business_KPIs(X_test_business_fig, \"segment\", \"size\", \"nb_tickets\", \n",
" \"nb_purchases\", \"total_amount\", \"nb_campaigns\")"
]
},
{
"cell_type": "code",
"execution_count": 93,
"id": "7a42523d-f80f-488b-ad8f-39dd793cddd6",
"metadata": {},
"outputs": [],
"source": [
"# with function\n",
"\n",
"# activity = \"sport\"\n",
"\n",
"hist_segment_business_KPIs(X_test_business_fig, \"segment\", \"size\", \"nb_tickets\", \n",
" \"nb_purchases\", \"total_amount\", \"nb_campaigns\")\n",
"\n",
"save_file_s3_mp(File_name = \"segments_business_KPIs_\", type_of_activity = type_of_activity)"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "markdown",
"id": "53d24165-6b98-4b66-9ad8-7514564689d8",
"metadata": {},
"source": [
"## 2. Spider plot summarizing sociodemographic characteristics and purchasing behaviour"
]
},
{
"cell_type": "code",
2024-03-31 19:57:10 +02:00
"execution_count": 41,
"id": "beb31e4b-a01b-4312-879a-fe5757ea061f",
"metadata": {},
"outputs": [],
"source": [
2024-03-29 13:43:36 +01:00
"def df_segment_mp(df, segment, gender_female, gender_male, gender_other, country_fr, age) :\n",
" df_mp = df.groupby(segment)[[gender_female, gender_male, gender_other, country_fr, age]].mean().reset_index()\n",
2024-03-31 19:57:10 +02:00
" # df_mp.insert(3, \"share_known_gender\", df_mp[gender_female]+df_mp[gender_male])\n",
" df_mp.insert(4, \"share_of_women\", df_mp[gender_female]/(df_mp[gender_female]+df_mp[gender_male]))\n",
" return df_mp"
]
},
{
"cell_type": "code",
2024-03-31 19:57:10 +02:00
"execution_count": 40,
2024-03-26 12:20:03 +01:00
"id": "267ebaee-eaef-4720-8ca9-e40c0cf125df",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
2024-03-31 19:57:10 +02:00
" <th>share_of_women</th>\n",
2024-03-26 12:20:03 +01:00
" <th>country_fr</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.234460</td>\n",
" <td>0.419216</td>\n",
" <td>0.346324</td>\n",
" <td>0.358679</td>\n",
" <td>0.511056</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.295031</td>\n",
" <td>0.539591</td>\n",
" <td>0.165378</td>\n",
" <td>0.353490</td>\n",
" <td>0.726962</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.232354</td>\n",
" <td>0.583174</td>\n",
" <td>0.184472</td>\n",
" <td>0.284912</td>\n",
" <td>0.633363</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.200692</td>\n",
" <td>0.674637</td>\n",
" <td>0.124671</td>\n",
" <td>0.229276</td>\n",
" <td>0.678772</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-31 19:57:10 +02:00
" segment gender_female gender_male gender_other share_of_women \\\n",
"0 1 0.234460 0.419216 0.346324 0.358679 \n",
"1 2 0.295031 0.539591 0.165378 0.353490 \n",
"2 3 0.232354 0.583174 0.184472 0.284912 \n",
"3 4 0.200692 0.674637 0.124671 0.229276 \n",
2024-03-26 12:20:03 +01:00
"\n",
2024-03-31 19:57:10 +02:00
" country_fr \n",
"0 0.511056 \n",
"1 0.726962 \n",
"2 0.633363 \n",
"3 0.678772 "
2024-03-26 12:20:03 +01:00
]
},
2024-03-31 19:57:10 +02:00
"execution_count": 40,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# description of marketing personae\n",
"\n",
"X_test_segment_mp = X_test_segment.groupby(\"segment\")[['gender_female', 'gender_male', 'gender_other', 'country_fr']].mean().reset_index()\n",
2024-03-31 19:57:10 +02:00
"# X_test_segment_mp.insert(3, \"share_known_gender\", X_test_segment_mp[\"gender_female\"]+X_test_segment_mp[\"gender_male\"])\n",
"X_test_segment_mp.insert(4, \"share_of_women\", X_test_segment_mp[\"gender_female\"]/(X_test_segment_mp[\"gender_female\"]+X_test_segment_mp[\"gender_male\"]))\n",
2024-03-26 12:20:03 +01:00
"X_test_segment_mp"
]
},
{
"cell_type": "code",
2024-03-31 19:57:10 +02:00
"execution_count": 44,
"id": "5f908232-b0fe-4707-a8c5-5cadb7d8653f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
2024-03-31 19:57:10 +02:00
" <th>share_of_women</th>\n",
" <th>country_fr</th>\n",
2024-03-29 13:43:36 +01:00
" <th>age</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.234460</td>\n",
" <td>0.419216</td>\n",
" <td>0.346324</td>\n",
" <td>0.358679</td>\n",
" <td>0.511056</td>\n",
" <td>40.652136</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.295031</td>\n",
" <td>0.539591</td>\n",
" <td>0.165378</td>\n",
" <td>0.353490</td>\n",
" <td>0.726962</td>\n",
" <td>36.204792</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.232354</td>\n",
" <td>0.583174</td>\n",
" <td>0.184472</td>\n",
" <td>0.284912</td>\n",
" <td>0.633363</td>\n",
" <td>37.533425</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.200692</td>\n",
" <td>0.674637</td>\n",
" <td>0.124671</td>\n",
" <td>0.229276</td>\n",
" <td>0.678772</td>\n",
" <td>39.665371</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-31 19:57:10 +02:00
" segment gender_female gender_male gender_other share_of_women \\\n",
"0 1 0.234460 0.419216 0.346324 0.358679 \n",
"1 2 0.295031 0.539591 0.165378 0.353490 \n",
"2 3 0.232354 0.583174 0.184472 0.284912 \n",
"3 4 0.200692 0.674637 0.124671 0.229276 \n",
"\n",
2024-03-31 19:57:10 +02:00
" country_fr age \n",
"0 0.511056 40.652136 \n",
"1 0.726962 36.204792 \n",
"2 0.633363 37.533425 \n",
"3 0.678772 39.665371 "
]
},
2024-03-31 19:57:10 +02:00
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment_mp = df_segment_mp(X_test_segment, \"segment\", \"gender_female\", \n",
2024-03-29 13:43:36 +01:00
" \"gender_male\", \"gender_other\", \"country_fr\", \"age\")\n",
"X_test_segment_mp"
]
},
{
"cell_type": "code",
2024-03-31 19:57:10 +02:00
"execution_count": 46,
2024-03-26 12:20:03 +01:00
"id": "910876fe-e6df-4f8d-9978-5d6fdd893ac0",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
2024-03-31 19:57:10 +02:00
" <th>prop_purchases_internet</th>\n",
2024-03-26 12:20:03 +01:00
" <th>share_campaigns_opened</th>\n",
" <th>opt_in</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.090439</td>\n",
" <td>0.141985</td>\n",
" <td>0.587075</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.502232</td>\n",
" <td>0.271623</td>\n",
" <td>0.111611</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.681753</td>\n",
" <td>0.299255</td>\n",
" <td>0.122377</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.528249</td>\n",
" <td>0.349811</td>\n",
" <td>0.178660</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-31 19:57:10 +02:00
" segment prop_purchases_internet share_campaigns_opened opt_in\n",
"0 1 0.090439 0.141985 0.587075\n",
"1 2 0.502232 0.271623 0.111611\n",
"2 3 0.681753 0.299255 0.122377\n",
"3 4 0.528249 0.349811 0.178660"
2024-03-26 12:20:03 +01:00
]
},
2024-03-31 19:57:10 +02:00
"execution_count": 46,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# purchasing behaviour\n",
"\n",
2024-03-31 19:57:10 +02:00
"# X_test_segment[\"share_tickets_internet\"] = X_test_segment[\"nb_tickets_internet\"]/X_test_segment[\"nb_tickets\"]\n",
2024-03-26 12:20:03 +01:00
"X_test_segment[\"share_campaigns_opened\"] = X_test_segment[\"nb_campaigns_opened\"]/X_test_segment[\"nb_campaigns\"]\n",
2024-03-31 19:57:10 +02:00
"X_test_segment_pb = X_test_segment.groupby(\"segment\")[[\"prop_purchases_internet\", \"share_campaigns_opened\", \"opt_in\"]].mean().reset_index()\n",
2024-03-26 12:20:03 +01:00
"X_test_segment_pb"
]
},
{
"cell_type": "code",
2024-03-29 13:43:36 +01:00
"execution_count": 33,
"id": "8d3ab073-040c-4480-bd44-33fc88626707",
"metadata": {},
"outputs": [],
"source": [
2024-03-28 14:13:13 +01:00
"def df_segment_pb (df, segment, nb_tickets_internet, nb_tickets, nb_campaigns_opened, nb_campaigns, opt_in,\n",
" time_to_open) :\n",
" df_used = df\n",
" df_used[\"share_tickets_internet\"] = df_used[nb_tickets_internet]/df_used[nb_tickets]\n",
" df_used[\"share_campaigns_opened\"] = df_used[nb_campaigns_opened]/df_used[nb_campaigns]\n",
2024-03-28 14:13:13 +01:00
" df_pb = df_used.groupby(segment)[[\"share_tickets_internet\", \"share_campaigns_opened\", \n",
" opt_in, time_to_open]].mean().reset_index()\n",
" df_pb[\"time_to_open_med\"] = df_used.groupby(segment)[[time_to_open]].apply(lambda x: x.dropna().median()).values\n",
" return df_pb"
]
},
{
"cell_type": "code",
2024-03-29 13:43:36 +01:00
"execution_count": 35,
2024-03-28 14:13:13 +01:00
"id": "33a11ddf-b410-4cf1-9e6b-645de6dad604",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Durée totale en heures : 49.65333333333333\n"
]
}
],
"source": [
"# add : variable time to open\n",
"\n",
"from datetime import timedelta\n",
"\n",
"def str_duration_to_hours(duration_str):\n",
" parts = duration_str.split()\n",
" days = int(parts[0]) if len(parts) > 1 else 0\n",
" time_parts = parts[-1].split(':')\n",
" hours = int(time_parts[0])\n",
" minutes = int(time_parts[1])\n",
" seconds = int(time_parts[2].split('.')[0])\n",
" total_hours = days * 24 + hours + minutes / 60 + seconds / 3600\n",
" return total_hours\n",
"\n",
"# Exemple d'utilisation :\n",
"duration_str = '2 days 01:39:12.750000'\n",
"\n",
"hours = str_duration_to_hours(duration_str)\n",
"print(\"Durée totale en heures :\", hours)\n"
]
},
{
"cell_type": "code",
2024-03-29 13:43:36 +01:00
"execution_count": 36,
2024-03-28 14:13:13 +01:00
"id": "4760743c-1032-452a-85fa-63d1447a742c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"segment\n",
"1 6.418056\n",
"2 8.031389\n",
"3 13.037500\n",
"4 15.197500\n",
"Name: time_to_open, dtype: float64"
]
},
2024-03-29 13:43:36 +01:00
"execution_count": 36,
2024-03-28 14:13:13 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# def of the variable time_to_open\n",
"\n",
"X_test_segment[\"time_to_open\"] = dataset_test[\"time_to_open\"].apply(lambda x : np.nan if pd.isna(x) else str_duration_to_hours(x))\n",
"X_test_segment.groupby(\"segment\")[\"time_to_open\"].median()"
]
},
{
"cell_type": "code",
2024-03-29 13:43:36 +01:00
"execution_count": 37,
"id": "0cb8f47a-bf0f-4285-b2ff-d90de394c787",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>share_tickets_internet</th>\n",
" <th>share_campaigns_opened</th>\n",
" <th>opt_in</th>\n",
2024-03-28 14:13:13 +01:00
" <th>time_to_open</th>\n",
" <th>time_to_open_med</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.527270</td>\n",
" <td>0.136565</td>\n",
" <td>0.730064</td>\n",
2024-03-28 14:13:13 +01:00
" <td>56.785498</td>\n",
" <td>6.418056</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.629648</td>\n",
" <td>0.194240</td>\n",
" <td>0.275860</td>\n",
2024-03-28 14:13:13 +01:00
" <td>56.349272</td>\n",
" <td>8.031389</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.654488</td>\n",
" <td>0.292206</td>\n",
" <td>0.054260</td>\n",
2024-03-28 14:13:13 +01:00
" <td>57.847390</td>\n",
" <td>13.037500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.606618</td>\n",
" <td>0.370733</td>\n",
" <td>0.127051</td>\n",
2024-03-28 14:13:13 +01:00
" <td>57.567684</td>\n",
" <td>15.197500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-28 14:13:13 +01:00
" segment share_tickets_internet share_campaigns_opened opt_in \\\n",
"0 1 0.527270 0.136565 0.730064 \n",
"1 2 0.629648 0.194240 0.275860 \n",
"2 3 0.654488 0.292206 0.054260 \n",
"3 4 0.606618 0.370733 0.127051 \n",
"\n",
" time_to_open time_to_open_med \n",
"0 56.785498 6.418056 \n",
"1 56.349272 8.031389 \n",
"2 57.847390 13.037500 \n",
"3 57.567684 15.197500 "
]
},
2024-03-29 13:43:36 +01:00
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment_pb = df_segment_pb(X_test_segment, \"segment\", \"nb_tickets_internet\", \"nb_tickets\", \n",
2024-03-28 14:13:13 +01:00
" \"nb_campaigns_opened\", \"nb_campaigns\", \"opt_in\", \"time_to_open\")\n",
"X_test_segment_pb"
]
},
{
"cell_type": "code",
2024-03-31 19:57:10 +02:00
"execution_count": 49,
2024-03-26 12:20:03 +01:00
"id": "ba2884e3-004a-4554-ab82-6d477dcc4869",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
2024-03-31 19:57:10 +02:00
" <th>prop_purchases_internet</th>\n",
2024-03-26 12:20:03 +01:00
" <th>share_campaigns_opened</th>\n",
" <th>opt_in</th>\n",
" <th>share_of_women</th>\n",
2024-03-29 13:43:36 +01:00
" <th>age</th>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.090439</td>\n",
" <td>0.141985</td>\n",
" <td>0.587075</td>\n",
" <td>0.358679</td>\n",
" <td>40.652136</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.502232</td>\n",
" <td>0.271623</td>\n",
" <td>0.111611</td>\n",
" <td>0.353490</td>\n",
" <td>36.204792</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.681753</td>\n",
" <td>0.299255</td>\n",
" <td>0.122377</td>\n",
" <td>0.284912</td>\n",
" <td>37.533425</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
2024-03-31 19:57:10 +02:00
" <td>0.528249</td>\n",
" <td>0.349811</td>\n",
" <td>0.178660</td>\n",
" <td>0.229276</td>\n",
" <td>39.665371</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-31 19:57:10 +02:00
" segment prop_purchases_internet share_campaigns_opened opt_in \\\n",
"0 1 0.090439 0.141985 0.587075 \n",
"1 2 0.502232 0.271623 0.111611 \n",
"2 3 0.681753 0.299255 0.122377 \n",
"3 4 0.528249 0.349811 0.178660 \n",
2024-03-26 12:20:03 +01:00
"\n",
2024-03-31 19:57:10 +02:00
" share_of_women age \n",
"0 0.358679 40.652136 \n",
"1 0.353490 36.204792 \n",
"2 0.284912 37.533425 \n",
"3 0.229276 39.665371 "
2024-03-26 12:20:03 +01:00
]
},
2024-03-31 19:57:10 +02:00
"execution_count": 49,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-31 19:57:10 +02:00
"#X_test_segment_caract = pd.concat([X_test_segment_pb.drop(\"time_to_open\", axis=1), X_test_segment_mp[['share_known_gender', 'share_of_women', 'country_fr', 'age']]], axis=1)\n",
"X_test_segment_caract = pd.concat([X_test_segment_pb, X_test_segment_mp[[ 'share_of_women', 'age']]], axis=1)\n",
2024-03-26 12:20:03 +01:00
"X_test_segment_caract"
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 216,
2024-03-26 12:20:03 +01:00
"id": "23a37e9b-bb29-4122-85cb-cc15cc344ee2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2024-03-28 14:13:13 +01:00
"share_tickets_internet 0.654488\n",
"share_campaigns_opened 0.370733\n",
"opt_in 0.730064\n",
"time_to_open_med 15.197500\n",
"share_known_gender 0.903085\n",
"share_of_women 0.571869\n",
"country_fr 0.805862\n",
2024-03-26 12:20:03 +01:00
"dtype: float64"
]
},
2024-03-28 14:13:13 +01:00
"execution_count": 216,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment_caract.loc[:,\"share_tickets_internet\":].max()"
]
},
{
"cell_type": "code",
2024-03-31 19:57:10 +02:00
"execution_count": 50,
2024-03-26 12:20:03 +01:00
"id": "0809e2ae-3487-4b24-8f60-741c683cb9af",
"metadata": {},
"outputs": [],
"source": [
"# def d'une fonction associée - KEEP THIS !!!\n",
"\n",
2024-03-29 13:43:36 +01:00
"def radar_mp_plot(df, categories, index, var_not_perc) :\n",
2024-03-26 12:20:03 +01:00
" categories = categories\n",
"\n",
" # true values are used to print the true value in parenthesis\n",
" tvalues = list(df.loc[index,categories]) \n",
"\n",
" max_values = df[categories].max()\n",
"\n",
" # values are true values / max among the 4 segments, allows to \n",
" # put values in relation with the values for other segments\n",
" # if the point has a maximal abscisse it means that value is maximal for the segment considered\n",
" # , event if not equal to 1\n",
" \n",
" values = list(df.loc[index,categories]/max_values)\n",
" \n",
" # values normalized are used to adjust the value around the circle\n",
" # for instance if the maximum of values is equal to 0.8, we want the point to be \n",
" # at 8/10th of the circle radius, not at the edge \n",
" values_normalized = [ max(values) * elt for elt in values]\n",
"\n",
" # Nb of categories\n",
" num_categories = len(categories)\n",
" \n",
" angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()\n",
" \n",
" # Initialize graphic\n",
" fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
" \n",
" # we have to draw first a transparent line (alpha=0) of values to adjust the radius of the circle\n",
" # which is based on max(value)\n",
" ax.plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5)\n",
" ax.plot(angles + angles[:1], values_normalized + values_normalized[:1], color='black', alpha = 0.5, linewidth=1.2)\n",
" \n",
" # fill the sector\n",
" ax.fill(angles, values_normalized, color='orange', alpha=0.4)\n",
" \n",
" # labels\n",
" ax.set_yticklabels([])\n",
" ax.set_xticks(angles)\n",
2024-03-28 14:13:13 +01:00
"\n",
" # define tick labels\n",
" values_printed = [str(round(tvalues[i],2)) if categories[i] in var_not_perc else f\"{round(100 * tvalues[i],2)}%\" for i in range(len(categories))]\n",
" # ticks = [categories[i].replace(\"_\",\" \") + f\"\\n({round(100 * tvalues[i],2)}%)\" for i in range(len(categories))]\n",
" ticks = [categories[i].replace(\"_\",\" \") + f\"\\n({values_printed[i]})\" for i in range(len(categories))]\n",
"\n",
2024-03-26 12:20:03 +01:00
" ax.set_xticklabels(ticks, color=\"black\")\n",
" \n",
" ax.spines['polar'].set_visible(False)\n",
" \n",
" plt.title(f'Characteristics of the segment {index+1}\\n')\n",
" \n",
" # plt.show()"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 229,
"id": "2fe80072-90d1-4e17-b8a7-ddc3e3be1b12",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['60.66%', '37.07%', '12.71%', '15.2', '20.82%', '63.9%']"
]
},
"execution_count": 229,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-29 13:43:36 +01:00
"var_not_perc = [\"time_to_open_med\", \"age\"]\n",
2024-03-28 14:13:13 +01:00
"\n",
"tvalues = list(X_test_segment_caract.loc[3,categories]) \n",
"\n",
"values_printed = [str(round(tvalues[i],2)) if categories[i] in var_not_perc else f\"{round(100 * tvalues[i],2)}%\" for i in range(len(categories))]\n",
"values_printed"
]
},
{
"cell_type": "code",
2024-03-29 13:43:36 +01:00
"execution_count": 41,
2024-03-28 14:13:13 +01:00
"id": "cd3cb227-28b2-461e-a921-cff721c356e6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['share_tickets_internet',\n",
" 'share_campaigns_opened',\n",
" 'opt_in',\n",
" 'time_to_open_med',\n",
" 'share_of_women',\n",
2024-03-29 13:43:36 +01:00
" 'country_fr',\n",
" 'age']"
2024-03-28 14:13:13 +01:00
]
},
2024-03-29 13:43:36 +01:00
"execution_count": 41,
2024-03-28 14:13:13 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-29 13:43:36 +01:00
"list(X_test_segment_caract.drop([\"segment\", \"share_known_gender\"], axis=1).columns)\n"
2024-03-28 14:13:13 +01:00
]
},
{
"cell_type": "code",
2024-03-31 19:57:10 +02:00
"execution_count": 52,
"id": "9a550db7-ddd7-4d6f-bf98-cf0b2ea35d91",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>prop_purchases_internet</th>\n",
" <th>share_campaigns_opened</th>\n",
" <th>opt_in</th>\n",
" <th>share_of_women</th>\n",
" <th>age</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.090439</td>\n",
" <td>0.141985</td>\n",
" <td>0.587075</td>\n",
" <td>0.358679</td>\n",
" <td>40.652136</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.502232</td>\n",
" <td>0.271623</td>\n",
" <td>0.111611</td>\n",
" <td>0.353490</td>\n",
" <td>36.204792</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.681753</td>\n",
" <td>0.299255</td>\n",
" <td>0.122377</td>\n",
" <td>0.284912</td>\n",
" <td>37.533425</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.528249</td>\n",
" <td>0.349811</td>\n",
" <td>0.178660</td>\n",
" <td>0.229276</td>\n",
" <td>39.665371</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" segment prop_purchases_internet share_campaigns_opened opt_in \\\n",
"0 1 0.090439 0.141985 0.587075 \n",
"1 2 0.502232 0.271623 0.111611 \n",
"2 3 0.681753 0.299255 0.122377 \n",
"3 4 0.528249 0.349811 0.178660 \n",
"\n",
" share_of_women age \n",
"0 0.358679 40.652136 \n",
"1 0.353490 36.204792 \n",
"2 0.284912 37.533425 \n",
"3 0.229276 39.665371 "
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment_caract"
]
},
{
"cell_type": "code",
"execution_count": 53,
2024-03-26 12:20:03 +01:00
"id": "56cb026b-857f-42eb-baed-0ebdf5aee447",
"metadata": {},
"outputs": [
{
"data": {
2024-03-31 19:57:10 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmMAAAI0CAYAAABRQy4yAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3ib5dk34J+2bEvykmXLe8Z7r3jEI7OBhF0oXWG0fSHwtrRAB/QrlPKWvUsZYYUCYYVNSCDDdhI7tuMRx3vvPWXJ2rq/P4LVGNuJ7UjW8H0eh8CRnnHpsSxduu7FIIQQUBRFURRFURbBtHQAFEVRFEVRaxlNxiiKoiiKoiyIJmMURVEURVEWRJMxiqIoiqIoC6LJGEVRFEVRlAXRZIyiKIqiKMqCaDJGURRFURRlQTQZoyiKoiiKsiCajFEURVEURVkQTcaoRdXU1ODmm29GUFAQ+Hw+BAIBkpKS8Pjjj2N8fNy4XWBgIHbs2GHBSFfu3//+N9566y2zHZ/BYODBBx9c1j4HDhxYdJ/AwEDcdNNNlxyXqY2Pj+MnP/kJJBIJGAwGrrrqqkW3XeyaFxQUgMFg4OOPPzZfoNQc7733Hp599tkV7UsIQU5ODhgMBu68807TBkZRawzb0gFQ1mnPnj3YvXs3wsPDce+99yIqKgparRanT5/Gyy+/jJKSEnz66aeWDvOS/fvf/4ZYLDZbglNSUgJfX99l7XPgwAG8+OKLCyZkn376KUQikYmiM51//OMf+PTTT/HGG28gJCQEbm5ui25r7mtOLd17772H2tpa3HXXXcve98UXX0Rra6vpg6KoNYgmY9Q8JSUluP3227FlyxZ89tln4PF4xse2bNmCu+++GwcPHlzVmPR6PXQ63ZxYrBUhBCqVCg4ODli/fr1Jj52YmGjS45lKbW0tQkJC8LOf/czSoVCroLOzE3/5y1/w9ttv45prrrF0OBRl82gzJTXPP//5TzAYDLz66qsLJj9cLhdXXHHFvPsPHjyIpKQkODg4ICIiAm+88cacx0dGRrB7925ERUVBIBBAIpFg48aNOH78+JztOjs7wWAw8Pjjj+Phhx9GUFAQeDwejh07BpVKhbvvvhsJCQlwdnaGm5sbMjIy8Pnnn8+Lx2Aw4IUXXkBCQgIcHBzg4uKC9evX44svvgBwrsmvrq4OhYWFYDAYYDAYCAwMNO4vk8lwzz33ICgoCFwuFz4+PrjrrrugUCjmnGe2mebll19GZGQkeDwe9u7da3zs/ArXzMyM8Zh8Ph9ubm5ISUnBvn37AAA33XQTXnzxReO+s7fOzk5jzD+sKE1OTuLuu+9GcHAweDweJBIJLrvsMjQ2Nhq3eemllxAfHw+BQAChUIiIiAjcd999867ZD42Pj2P37t3w8fEBl8tFcHAw7r//fqjV6jm/q8OHD6OhocEYb0FBwYLHu9g1BwCtVov7778f3t7eEIlE2Lx5M5qamuYd6/Dhw9i0aRNEIhEcHR2RlZWFI0eOXPQ5GQwGPPzwwwgPDze+LuLi4vDcc8/N2a6lpQU//elPIZFIwOPxEBkZafzdnK+urg5bt26Fo6MjPDw8cMcdd+Drr7+edx3y8vIQExODkpISZGZmwsHBAYGBgXjzzTcBAF9//TWSkpLg6OiI2NjYBb/wLCWm2ebeffv2XfA65uXl4euvv0ZXV9ec19pS/OY3v8GWLVtw9dVXL2l7iqIujFbGqDn0ej2OHj2K5ORk+Pn5LXm/M2fO4O6778af//xneHp64rXXXsOtt96K0NBQ5OTkAICxn9kDDzwALy8vyOVyfPrpp8jLy8ORI0eQl5c355jPP/881q1bhyeffBIikQhhYWFQq9UYHx/HPffcAx8fH2g0Ghw+fBjXXHMN3nzzTfzyl7807n/TTTfhnXfewa233oqHHnoIXC4XlZWVxsTm008/xXXXXQdnZ2f8+9//BgBj8jkzM4Pc3Fz09vbivvvuQ1xcHOrq6vC3v/0NZ8+exeHDh+d8cH322Wc4fvw4/va3v8HLywsSiWTB6/SHP/wB//nPf/Dwww8jMTERCoUCtbW1GBsbAwD8v//3/6BQKPDxxx+jpKTEuJ9UKl3weNPT08jOzkZnZyf+9Kc/IT09HXK5HEVFRRgYGEBERATef/997N69G//7v/+LJ598EkwmE62traivr7/g71SlUiE/Px9tbW34+9//jri4OBw/fhyPPPIIqqur8fXXX0MqlaKkpAS7d+/G1NQU3n33XQBAVFTUgse80DWfdd999yErKwuvvfYaZDIZ/vSnP2Hnzp1oaGgAi8UCALzzzjv45S9/iSuvvBJ79+4Fh8PBK6+8gm3btuHQoUPYtGnTos/r8ccfx4MPPoi//vWvyMnJgVarRWNjIyYnJ43b1NfXIzMzE/7+/njqqafg5eWFQ4cO4be//S1GR0fxwAMPAAAGBgaQm5sLJycnvPTSS5BIJNi3b9+ifagGBwdx8803449//CN8fX3xwgsv4JZbbkFPTw8+/vhj3HfffXB2dsZDDz2Eq666Cu3t7fD29l5WTEu9jv/+97/xm9/8Bm1tbcvqcvDaa6+hrKzsoq8fiqKWgVDUeQYHBwkA8pOf/GTJ+wQEBBA+n0+6urqM9ymVSuLm5kb+53/+Z9H9dDod0Wq1ZNOmTeTqq6823t/R0UEAkJCQEKLRaC547tlj3HrrrSQxMdF4f1FREQFA7r///gvuHx0dTXJzc+fd/8gjjxAmk0nKy8vn3P/xxx8TAOTAgQPG+wAQZ2dnMj4+Pu84AMgDDzxg/HdMTAy56qqrLhjTHXfcQRb70wwICCC7du0y/vuhhx4iAMh333236PHuvPNO4uLicsFzLuTll18mAMiHH3445/7HHnuMACDffvut8b7c3FwSHR29pOMuds2PHTtGAJDLLrtszv0ffvghAUBKSkoIIYQoFAri5uZGdu7cOWc7vV5P4uPjSVpa2gXPv2PHDpKQkHDBbbZt20Z8fX3J1NTUnPvvvPNOwufzjb/re++9lzAYDFJXVzdvfwDk2LFjxvtyc3MJAHL69GnjfWNjY4TFYhEHBwfS19dnvL+6upoAIM8///yyY1rqdSSEkMsvv5wEBARc8Fqcr7e3lzg7O5NXXnnFeB8Acscddyz5GBRFzUebKSmTSEhIgL+/v/HffD4f69atQ1dX15ztXn75ZSQlJYHP54PNZoPD4eDIkSNoaGiYd8wrrrgCHA5n3v0fffQRsrKyIBAIjMd4/fXX5xzjm2++AQDccccdK3o+X331FWJiYpCQkACdTme8bdu2bcFmuI0bN8LV1fWix01LS8M333yDP//5zygoKIBSqVxRfLO++eYbrFu3Dps3b77gOScnJ3HjjTfi888/x+jo6JKOffToUTg5OeG6666bc/9sM+lSmgRX4odN4HFxcQBgfC0VFxdjfHwcu3btmvO7MRgM+NGPfoTy8vJ5TcnnS0tLw5kzZ7B7924cOnQIMplszuMqlQpHjhzB1VdfDUdHxznnuOyyy6BSqXDq1CkAQGFhIWJiYuZVAm+88cYFzy2VSpGcnGz8t5ubGyQSCRISEowVMACIjIyc85yXE9NSr+NK3HbbbYiPj8evf/3rFR+Doqj5aDJGzSEWi+Ho6IiOjo5l7efu7j7vPh6PNyfZePrpp3H77bcjPT0d+/fvx6lTp1BeXo4f/ehHCyYlCzXNffLJJ7j++uvh4+ODd955ByUlJSgvL8ctt9wClUpl3G5kZAQsFgteXl7Leh6zhoaGUFNTAw6HM+cmFApBCJmX0CzWjPhDzz//PP70pz/hs88+Q35+Ptzc3HDVVVehpaVlRXGOjIxcdLTmL37xC7zxxhvo6urCtddeC4lEgvT0dHz33XcX3G9sbAxeXl7z+hFJJBKw2Wxj06qp/fC1NNuMOfsaGRoaAgBcd911834/jz32GAghc6Ze+aG//OUvePLJJ3Hq1Cls374d7u7u2LRpE06fPg3g3PPW6XR44YUX5h3/sssuAwDj739sbAyenp7zzrHQfQAWHGXK5XLn3c/lcgHA+JpeTkyzLnYdl+vjjz/GwYMH8fjjj2Nqagq
2024-03-26 12:20:03 +01:00
"text/plain": [
"<Figure size 600x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-03-31 19:57:10 +02:00
"var_not_perc = [\"age\"]\n",
2024-03-29 13:43:36 +01:00
"\n",
2024-03-31 19:57:10 +02:00
"categories = list(X_test_segment_caract.drop([\"segment\"], axis=1).columns)\n",
2024-03-26 12:20:03 +01:00
"#for i in range(4) :\n",
"# radar_mp_plot(df=X_test_segment_caract, categories=categories, index=i)\n",
2024-03-29 13:43:36 +01:00
"radar_mp_plot(df=X_test_segment_caract, categories=categories, index=3, var_not_perc=var_not_perc)"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "code",
"execution_count": 739,
"id": "5b3c4bac-396e-4117-a7d9-f39a3d8f95b4",
"metadata": {},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (4005960846.py, line 6)",
"output_type": "error",
"traceback": [
"\u001b[0;36m Cell \u001b[0;32mIn[739], line 6\u001b[0;36m\u001b[0m\n\u001b[0;31m file_name = \"spider_chart_\" + activity + \"_sgt_\" str(index)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
]
}
],
"source": [
"# export to MinIo\n",
"\n",
"activity = \"sport\"\n",
"PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{activity}/\"\n",
"\n",
"file_name = \"spider_chart_\" + activity + \"_sgt_\" + str(index)\n",
"FILE_PATH_OUT_S3 = PATH + file_name + \".csv\"\n",
"\n",
"\n",
"radar_mp_plot(df=X_test_segment_caract, categories=categories, index=3)\n",
"\n",
"image_buffer = io.BytesIO()\n",
"plt.savefig(image_buffer, format='png')\n",
"image_buffer.seek(0)\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:\n",
" s3_file.write(image_buffer.read())\n",
"plt.close()"
]
},
{
"cell_type": "code",
"execution_count": 740,
"id": "276de9a5-d506-4c11-a7c2-a23ebbc59fe5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'projet-bdc2324-team1/Output_marketing_personae_analysis/sport/spider_chart_sport_sgt_3.csv'"
]
},
"execution_count": 740,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"activity = \"sport\"\n",
"PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{activity}/\"\n",
"\n",
"file_name = \"spider_chart_\" + activity + \"_sgt_\" + str(index)\n",
"FILE_PATH_OUT_S3 = PATH + file_name + \".csv\"\n",
"FILE_PATH_OUT_S3"
]
},
{
"cell_type": "code",
2024-03-31 19:57:10 +02:00
"execution_count": 54,
2024-03-26 12:20:03 +01:00
"id": "80e47dbc-3efd-4857-8055-876b308cbcb5",
"metadata": {},
"outputs": [],
"source": [
"# general function to have the 4 radars in one plot\n",
"\n",
2024-03-29 13:43:36 +01:00
"def radar_mp_plot_all(df, categories, var_not_perc) :\n",
2024-03-26 12:20:03 +01:00
" \n",
" nb_segments = df.shape[0]\n",
" categories = categories\n",
"\n",
" # Initialize graphic\n",
2024-03-29 13:43:36 +01:00
" fig, ax = plt.subplots(2,2, figsize=(20, 21), subplot_kw=dict(polar=True))\n",
2024-03-26 12:20:03 +01:00
" \n",
" for index in range(nb_segments) :\n",
" row = index // 2 # Division entière pour obtenir le numéro de ligne\n",
" col = index % 2 \n",
" \n",
2024-03-28 14:13:13 +01:00
" # df = X_test_segment_caract\n",
2024-03-26 12:20:03 +01:00
" \n",
" # true values are used to print the true value in parenthesis\n",
" tvalues = list(df.loc[index,categories]) \n",
" \n",
" max_values = df[categories].max()\n",
" \n",
" # values are true values / max among the 4 segments, allows to \n",
" # put values in relation with the values for other segments\n",
" # if the point has a maximal abscisse it means that value is maximal for the segment considered\n",
" # , event if not equal to 1\n",
" \n",
" values = list(df.loc[index,categories]/max_values)\n",
" \n",
" # values normalized are used to adjust the value around the circle\n",
" # for instance if the maximum of values is equal to 0.8, we want the point to be \n",
" # at 8/10th of the circle radius, not at the edge \n",
" values_normalized = [ max(values) * elt for elt in values]\n",
" \n",
" # Nb of categories\n",
" num_categories = len(categories)\n",
"\n",
" angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()\n",
" \n",
" # we have to draw first a transparent line (alpha=0) of values to adjust the radius of the circle\n",
" # which is based on max(value)\n",
" ax[row, col].plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5)\n",
" ax[row, col].plot(angles + angles[:1], values_normalized + values_normalized[:1], color='black', alpha = 0.5, linewidth=1.2)\n",
" \n",
" # fill the sector\n",
" ax[row, col].fill(angles, values_normalized, color='orange', alpha=0.4, label = index)\n",
" \n",
" # labels\n",
" ax[row, col].set_yticklabels([])\n",
" ax[row, col].set_xticks(angles)\n",
2024-03-28 14:13:13 +01:00
" \n",
" # define the ticks\n",
" values_printed = [str(round(tvalues[i],2)) if categories[i] in var_not_perc else f\"{round(100 * tvalues[i],2)}%\" for i in range(len(categories))]\n",
"\n",
" # ticks = [categories[i].replace(\"_\",\" \") + f\"\\n({round(100 * tvalues[i],2)}%)\" for i in range(len(categories))]\n",
" ticks = [categories[i].replace(\"_\",\" \") + f\"\\n({values_printed[i]})\" for i in range(len(categories))]\n",
2024-03-26 16:00:39 +01:00
" ax[row, col].set_xticklabels(ticks, color=\"black\", size = 20)\n",
2024-03-26 12:20:03 +01:00
" \n",
" ax[row, col].spines['polar'].set_visible(False)\n",
" \n",
" # plt.title(f'Characteristics of the segment {index+1}\\n')\n",
" ax[row, col].set_title(f'Segment {index+1}\\n', size = 24)\n",
" \n",
" fig.suptitle(f\"Characteristics of marketing personae of {type_of_activity} companies\", size=32)\n",
2024-03-26 12:20:03 +01:00
"\n",
2024-03-29 13:43:36 +01:00
" plt.tight_layout()\n",
"\n",
2024-03-26 16:00:39 +01:00
" # plt.show()"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "code",
2024-03-29 13:43:36 +01:00
"execution_count": 45,
"id": "67d9a15b-bd93-4e63-a193-e9760d710906",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>share_tickets_internet</th>\n",
" <th>share_campaigns_opened</th>\n",
" <th>opt_in</th>\n",
" <th>time_to_open_med</th>\n",
" <th>share_known_gender</th>\n",
" <th>share_of_women</th>\n",
" <th>country_fr</th>\n",
" <th>age</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.527270</td>\n",
" <td>0.136565</td>\n",
" <td>0.730064</td>\n",
" <td>6.418056</td>\n",
" <td>0.523129</td>\n",
" <td>0.571869</td>\n",
" <td>0.339959</td>\n",
" <td>41.298584</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.629648</td>\n",
" <td>0.194240</td>\n",
" <td>0.275860</td>\n",
" <td>8.031389</td>\n",
" <td>0.855391</td>\n",
" <td>0.182710</td>\n",
" <td>0.805862</td>\n",
" <td>39.293163</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.654488</td>\n",
" <td>0.292206</td>\n",
" <td>0.054260</td>\n",
" <td>13.037500</td>\n",
" <td>0.903085</td>\n",
" <td>0.323075</td>\n",
" <td>0.701258</td>\n",
" <td>35.176503</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.606618</td>\n",
" <td>0.370733</td>\n",
" <td>0.127051</td>\n",
" <td>15.197500</td>\n",
" <td>0.864373</td>\n",
" <td>0.208231</td>\n",
" <td>0.638972</td>\n",
" <td>41.320841</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" segment share_tickets_internet share_campaigns_opened opt_in \\\n",
"0 1 0.527270 0.136565 0.730064 \n",
"1 2 0.629648 0.194240 0.275860 \n",
"2 3 0.654488 0.292206 0.054260 \n",
"3 4 0.606618 0.370733 0.127051 \n",
"\n",
" time_to_open_med share_known_gender share_of_women country_fr age \n",
"0 6.418056 0.523129 0.571869 0.339959 41.298584 \n",
"1 8.031389 0.855391 0.182710 0.805862 39.293163 \n",
"2 13.037500 0.903085 0.323075 0.701258 35.176503 \n",
"3 15.197500 0.864373 0.208231 0.638972 41.320841 "
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment_caract"
]
},
{
"cell_type": "code",
2024-03-31 19:57:10 +02:00
"execution_count": 55,
2024-03-26 12:20:03 +01:00
"id": "edf76688-1b7e-469e-873f-4884d551be66",
"metadata": {},
"outputs": [
{
"data": {
2024-03-31 19:57:10 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAB8MAAAfMCAYAAAAitTxyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3xN9+M/8NfN3hFJjBihjdiCCqEIahY1a6u9yqetLqN2UW1VixodilJao/aqETMhIbbYiZUgkwyZ9/37wy/3m3P3vbnJjev1fDzu43HPOe919nmf9znvIxNCCBAREREREREREREREREREVkQK3MXgIiIiIiIiIiIiIiIiIiIyNTYGE5ERERERERERERERERERBaHjeFERERERERERERERERERGRx2BhOREREREREREREREREREQWh43hRERERERERERERERERERkcdgYTkREREREREREREREREREFoeN4UREREREREREREREREREZHHYGE5ERERERERERERERERERBaHjeFERERERERERERERERERGRx2BhORFSC3blzB7t378aKFSvw7bffYt68eVi2bBn++usvnDhxAunp6QanKZPJJD8iSzZ06FDJ9r5mzRpzF6nQWrVqJZmno0ePmrtIpCQ5ORkLFy5Ep06dULFiRTg7O0vWWZUqVcxdRNLD0aNHJeutVatW5i6S2cXExHBbJiIyUE5ODtavX4++ffvCz88P7u7usLKykhxPY2JizF1MIqISgfVdIiIqCjbmLgAREUmdOHECf/75J3bt2oUnT55oDWtlZYXatWuje/fuGDRoEPz9/YuplEREpM6WLVswfPhwpKammrsoREREZGZXrlxBz549cevWLXMXhYiIiIjotcXGcCKiEuLMmTP45JNPcPr0ab3jyOVyXL58GZcvX8bXX3+NNm3aYMGCBQgMDCzCktKrZvv27bhw4YJiuHv37qhfv77ZyvO6SklJwU8//aQYLlWqFD755BOzlYdM7+DBg+jTpw+EEOYuCpGKo0ePSt6sadWqFd92JyIqQnFxcWjTpg3i4+PNXRQqJqx3EREREZVMbAwnIjIzuVyOmTNnYt68eVobUKysrFCqVClkZ2cjLS1NbZgjR46gSZMmmDt3LqZOnVpURaZXzPbt27F27VrFcJUqVXhTxgxSUlIwe/ZsxbCvry8bwy2IEAIffvih5Dju4eGBvn37onr16nBwcFCMd3V1NUcR6TV39OhRyTEIABvDiYiK0LRp0yQN4ba2tujWrRsCAwPh5uYmCevp6VncxaMiwHoXERERUcnExnAiIjPKzc3FwIEDsWnTJpVpVapUQa9evfDuu++ibt268PT0hJWVFQAgNTUVV69exZEjR/D333/j8uXLinhCCNy8ebPY5oGIiF42NN6+fVsxXLZsWURGRsLHx8eMpSIiIiJzyMjIwMaNGxXDMpkMe/fuRdu2bc1YKiIiIiKi1xMbw4mIzGjEiBEqDeEeHh6YMWMGxo8fD1tbW7XxXF1dERQUhKCgIEydOhUhISH46quvEBYWVhzFJnplrFmzBmvWrDF3MUyqYDfHVHKcPHlSMjxy5Eg2hJPFqVKlCj8DQESkh3PnzuHFixeK4WbNmrEhnIhID6zvEhFRUbAydwGIiF5Xy5Ytw59//ikZV7FiRYSGhuKTTz7R2BCuTuvWrXHq1CksX75c0hUvEREVj+vXr0uGGzRoYKaSEBERkbnxuoCIiIiIqORgYzgRkRncv38fkyZNkozz9PTEyZMnUaNGDaPSlMlkGDduHEJDQ/k2IhFRMUtJSZEMe3h4mKcgREREZHa8LiAiIiIiKjnYTToRkRnMmjUL6enpknFLly6Fr69vodNu0KABAgICCp0OERHpLyMjQzJsZcVnTomIiF5XvC4gIiIiIio52BhORFTMnj59ivXr10vGtW7dGv379zdZHoW52SKXyxEREYFLly4hPj4eDg4O8Pb2RmBgoNFvrReUnZ2NGzduICoqCk+ePEFqairs7e1RunRpVK5cGU2aNIGLi0uh89Hk2rVruHDhAmJjY5GZmQl3d3d06NAB/v7+WuM9ePAA165dQ0xMDJ49e4a8vDyULl0aXl5eCAwMROXKlU1e1uzsbISHh+P+/ftISEhAamoqnJ2dUa5cOdSqVQu1atWCjY35T+VpaWkICwtDbGws4uPjIYSAt7c3qlatiqZNm8LOzs7keebm5uLs2bO4evUqEhISkJubC29vb/Tt2xfu7u4myyctLQ2XLl3C9evXkZKSgoyMDDg4OMDFxQWVKlXCm2++CX9//1f2BuezZ89w+vRpPH78GAkJCcjKyoKbmxsqV66M2rVr48033zQ4Tblcjps3b+LSpUuKfdzKygpOTk4oW7Ysqlatilq1ahXpfq5NUlISwsLC8PjxY8kxzs/PD40aNYK1tbVR6ZbE7yjn5eUhIiICly9fRkJCAqytrVGpUiW0aNECFStW1Bk/MzMToaGhiIqKQkpKCtzc3ODr64s2bdoUev0JIXD37l1ERUXh/v37eP78OWQyGUqXLo2yZcuiSZMmKFu2bKHy0CY2Nhbh4eGIiYlBWloaHB0dERgYiJYtWxZZnobIX3fR0dF4+vQpMjIy4OXlBR8fH7z99tsoVaqUuYtoMkV93QEAWVlZOH78OO7du4enT5/CwcEBvr6+aNq0aYnuTef27duIiIjAw4cPkZeXh/Lly8Pf3x9NmjQx6XnnVTqPP336FBcvXsTdu3fx7NkzZGdnw8nJCaVKlYKvry/8/f1RqVKlQpUtKysLYWFhuH//PuLj4yGXy+Ht7Y0KFSrg7bffhpOTU6HS1yQlJQUnT57E7du3kZ6eDg8PD1SoUAHBwcEm2eeTkpJw7do13Lp1C8nJyXjx4gXc3d1RunRp1K1bF3Xq1IFMJiv8jBTwKhzLimp9l4TrgpSUFFy4cAG3bt3Cs2fPkJmZCUdHR8W1np+fn1HXepoUxzFLCIHz58/jxo0biI+PR3p6Ory8vFCmTBk0bdoUZcqUMUk+yjIzM3HmzBlcv34dSUlJAICyZcti8ODBBn3ezFzy8vIQGRmJO3fuICEhAc+ePYOTkxO8vLxQs2ZN1K1bF/b29kann5aWhtDQUDx69Ajx8fGwtrZGmTJl4Ovri6CgoCI5jwBATEwMzpw5gwcPHiArKwuenp5o2LAhAgMD9TqeXblyBefOncOTJ08gk8lQrlw5NGvWzKT7Rb6EhASEhobizp07ePHiBby8vODr64uWLVvC0dGx0Omnpqbi2rVruHHjBhITE5GRkQFXV1eULl0a1atXR4MGDYrs3kFxXMsZ6/HjxwgPD8fTp0+RkJAABwcHlClTBrVq1UJAQEChz3s5OTmIiorClStXFPdsbGxs4OzsjPLlyyvqvvycIRFRCSCIiKhY/fDDDwKA5Ld58+Ziy18573xZWVliwYIFoly5ciph8n/+/v7i77//NjjPu3fvigULFog2bdoIR0dHjekDEDY2NqJVq1Zi9+7dBucTHBwsSSskJEQIIUROTo5YsmSJeOONN9Tm+eOPP6qklZ6eLv7++28xcOBA4ePjo7XMAISvr6/45ptvREpKisHlVnbgwAHRqVMn4eTkpDVPNzc30aNHD7Ft2zaRk5MjSWPIkCE6y6zpN3PmTL3KuX37dtG6dWthZ2enMS0XFxcxYMAAcfPmTYOWgabtNDExUXz66aeiVKlSavM7f/681uWwevVqvfI/fvy46Natm9Z5y/+5urqKDh06iF9++UVkZGRI0omOjjZ6PWi6TNO0nesrOztb/PLLL6Jx48bC2tpaa/4VK1YUo0ePFqdPn9aZbnx8vPj8889F+fLldc6XlZWVqFevnpgyZYq4ceOGQeU31ubNm0XTpk2FlZWVxnKVLl1aDBs2TERHR+tMz9h9LDg42GTz5OvrK0k7v9yZmZli3rx5okyZMhqX/3vvvSdu376tNt2nT5+KCRMmCFdXV7Xx7e3txfjx40VycrJB5U1OTharVq0SPXv2FJ6enjqXVc2aNcXy5cvFixcvDMpn5syZGo9pe/bsEc2aNVObX7du3STphIS
2024-03-26 12:20:03 +01:00
"text/plain": [
2024-03-29 13:43:36 +01:00
"<Figure size 2000x2100 with 4 Axes>"
2024-03-26 12:20:03 +01:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-03-31 19:57:10 +02:00
"categories = list(X_test_segment_caract.drop([\"segment\"], axis=1).columns)\n",
"var_not_perc = [\"age\"]\n",
2024-03-29 13:43:36 +01:00
"radar_mp_plot_all(df=X_test_segment_caract, categories=categories, var_not_perc=var_not_perc)"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "code",
"execution_count": 97,
2024-03-26 12:20:03 +01:00
"id": "c48136d1-c980-4f74-a69f-ed4304c83188",
"metadata": {},
"outputs": [],
"source": [
"# export to MinIo\n",
"\n",
2024-03-26 16:00:39 +01:00
"# activity = \"sport\"\n",
"# PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{activity}/\"\n",
2024-03-26 12:20:03 +01:00
"\n",
"file_name = \"spider_chart_all_\" + activity\n",
"FILE_PATH_OUT_S3 = PATH + file_name + \".png\"\n",
"\n",
"radar_mp_plot_all(df=X_test_segment_caract, categories=categories)\n",
"\n",
"image_buffer = io.BytesIO()\n",
"plt.savefig(image_buffer, format='png', dpi=110)\n",
2024-03-26 12:20:03 +01:00
"image_buffer.seek(0)\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:\n",
" s3_file.write(image_buffer.read())\n",
"plt.close()"
]
},
{
"cell_type": "markdown",
"id": "a2395680-69fe-4247-8deb-22f8ee15830b",
"metadata": {},
"source": [
"## --- end of the main part --- here are just some attempts --- ##"
]
},
{
"cell_type": "code",
"execution_count": 489,
"id": "7d9a2aca-d28d-43b3-9b72-5913b20c4f04",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAj8AAAH3CAYAAABU/z5zAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3hb5fn+76M9LU9Z3nvHSeyEONtxwgoQCIW2QGgh9NeGQkMppS1QApQVVoEvhRJ2aMMsJRDKJsOJ7Ww7ieNtxzO2vGVrz/f3h2IRxxkess+R/H6uS1diWTrnPrKkc5/ned7nYQghBBQKhUKhUCjTBB7bAigUCoVCoVCmEmp+KBQKhUKhTCuo+aFQKBQKhTKtoOaHQqFQKBTKtIKaHwqFQqFQKNMKan4oFAqFQqFMK6j5oVAoFAqFMq2g5odCoVAoFMq0gpofCoVCoVAo0wpqfijn5dixY5DJZPjHP/7BtpQx8frrr0Mul2P//v1sS6GchsFgQHp6On7605/C5XKxLYdCoUxTqPmZBmzevBkMw3huAoEAERERuOGGG1BXV3fO5+n1elx//fVYv3491q9fP4WKJ8aRI0dwzz334MMPP0ReXt64tzP0ujU1NXlPnJ/y5JNP4rPPPrvg4379618jPDwcW7ZsAY83uq+fpqYmMAyDzZs3j0nT+++/jxdffPGsv2MYBo888siYtkehUPwHAdsCKFPHO++8g/T0dFgsFhQXF+OJJ57Azp07UV1djaCgoBGP/9WvfoV58+bhqaeeYkHt+BgcHMRPf/pTPP/881i1ahXbcqYNTz75JK6//nqsXr36nI955ZVXcOzYMRQXF0MsFo962xEREdi7dy+SkpLGpOn999/H8ePHcffdd4/43d69exEdHT2m7VEoFP+Bmp9pxIwZMzB37lwAwLJly+B0OvHwww/js88+w9q1a0c8/uOPP55qiRMmICDgvNGs6Y7dbvdE/6aaO++8E3feeeeoH+90OuFwOCAWizF//nyvavH29igUim9B017TmCEj1NnZOez+Q4cO4eqrr0ZwcDAkEglycnJGGCGTyYR7770XCQkJkEgkCA4Oxty5c/HBBx94HrNs2TIsW7ZsxH5vvfVWxMfHe34eSms8++yzePrppxEfHw+pVIply5ahtrYWdrsd9913HyIjI6FSqXDttdeiq6trxHY/+ugjLFiwAHK5HAqFApdddhnKyspG9Vrs27cPixYtgkQiQWRkJO6//37Y7fazPnY0+zlx4gRuuOEGREZGQiwWIzw8HCtWrMCRI0cuqGX//v1YtWoVQkJCIJFIkJSUNCx6UV9fj7Vr1yIlJQUymQxRUVFYtWoVysvLh21n165dYBgG//73v/HHP/4RUVFREIvFqK+vR3d3N+644w5kZmZCoVBArVZj+fLl2LNnzwg9VqsVjz76KDIyMiCRSBASEoKCggKUlJQAcKeQjEYj3n33XU9q9fS/u1arxbp16xAdHQ2RSISEhAT87W9/g8Ph8Dxm6D3wzDPP4PHHH0dCQgLEYjF27tx51rRXd3c3fvOb3yAmJgZisRhhYWFYtGgRfvjhBwDu996XX36J5ubmYSnfIc6W9jrbe+CNN94Ykfo8V8osPj4et95667D7RnPsAPDqq69i1qxZUCgUUCqVSE9PxwMPPDBiHxQKxTvQyM80prGxEQCQmprquW/nzp24/PLLkZeXh02bNkGlUuHDDz/Ez3/+c5hMJs+X+z333IN///vfePzxx5GTkwOj0Yjjx4+jt7d33HpeeeUVzJw5E6+88gp0Oh3++Mc/YtWqVcjLy4NQKMTbb7+N5uZm3Hvvvfh//+//Ydu2bZ7nPvnkk3jwwQexdu1aPPjgg7DZbHj22WexZMkSHDhwAJmZmefcb2VlJVasWIH4+Hhs3rwZMpkM//znP/H++++PeOxo93PFFVfA6XTimWeeQWxsLHp6elBSUgKdTnfe1+Dbb7/FqlWrkJGRgeeffx6xsbFoamrCd99953lMe3s7QkJC8NRTTyEsLAx9fX149913kZeXh7KyMqSlpQ3b5v33348FCxZg06ZN4PF4UKvV6O7uBgA8/PDD0Gg0MBgM2Lp1K5YtW4bt27d7zIvD4cDKlSuxZ88e3H333Vi+fDkcDgf27duHlpYWLFy4EHv37sXy5ctRUFCADRs2AHBH4AD3yX/evHng8Xh46KGHkJSUhL179+Lxxx9HU1MT3nnnnWFaX3rpJaSmpuK5555DQEAAUlJSzvo6/eIXv0BpaSmeeOIJpKamQqfTobS01PP+++c//4nf/OY3aGhowNatW8/7mgNjew+MltEe+4cffog77rgD69evx3PPPQcej4f6+npUVlaOe98UCuUCEIrf88477xAAZN++fcRutxO9Xk+++eYbotFoyNKlS4ndbvc8Nj09neTk5Ay7jxBCrrrqKhIREUGcTichhJAZM2aQ1atXn3e/+fn5JD8/f8T9t9xyC4mLi/P83NjYSACQWbNmebZPCCEvvvgiAUCuvvrqYc+/++67CQAyMDBACCGkpaWFCAQCsn79+mGP0+v1RKPRkJ/97Gfn1fnzn/+cSKVSotVqPfc5HA6Snp5OAJDGxsYx7aenp4cAIC+++OJ593s2kpKSSFJSEjGbzaN+jsPhIDabjaSkpJA//OEPnvt37txJAJClS5eOaht2u52sWLGCXHvttZ77//WvfxEA5I033jjv8+VyObnllltG3L9u3TqiUChIc3PzsPufe+45AoBUVFQQQn58DyQlJRGbzTbssUO/e+eddzz3KRQKcvfdd59X05VXXjnsfXY6AMjDDz/s+Xm074GzPXeIuLi4Ya/BaI/9d7/7HQkMDDzvsVAoFO9C017TiPnz50MoFEKpVOLyyy9HUFAQPv/8c0/9R319Paqrq7FmzRoA7qv+odsVV1yBjo4O1NTUAADmzZuHr7/+Gvfddx927doFs9k8YX1XXHHFsBVAGRkZAIArr7xy2OOG7m9paQHgjpY4HA788pe/HKZZIpEgPz8fu3btOu9+d+7ciRUrViA8PNxzH5/Px89//vNhjxvtfoKDg5GUlIRnn30Wzz//PMrKyka1rLu2thYNDQ341a9+BYlEcs7HORwOPPnkk8jMzIRIJIJAIIBIJEJdXR2qqqpGPP66664763Y2bdqE3NxcSCQSCAQCCIVCbN++fdg2vv76a0gkEtx2220X1H82/ve//6GgoACRkZHDXrOVK1cCAAoLC4c9/uqrr4ZQKLzgdufNm4fNmzfj8ccfx759+86Zohwto30PjIXRHvu8efOg0+lw44034vPPP0dPT8+EjoVCoVwYan6mEf/6179w8OBB7NixA+vWrUNVVRVuvPFGz++Han/uvfdeCIXCYbc77rgDADxfzC+99BL+8pe/4LPPPkNBQQGCg4OxevXqCRUbBwcHD/tZJBKd936LxTJM90UXXTRC90cffXTBk0lvby80Gs2I+8+8b7T7YRgG27dvx2WXXYZnnnkGubm5CAsLw1133QW9Xn9OHUOpqAutQrrnnnuwYcMGrF69Gl988QX279+PgwcPYtasWWc1oRERESPue/755/Hb3/4WeXl5+O9//4t9+/bh4MGDuPzyy4dto7u7G5GRkaNeln4mnZ2d+OKLL0a8XllZWQAw4m9zNq1n46OPPsItt9yCN998EwsWLEBwcDB++ctfQqvVjkvnaN8DY2G0x/6LX/zCk9K97rrroFarkZeXh++//37c+6ZQKOeH1vxMIzIyMjxFzgUFBXA6nXjzzTfxySef4Prrr0doaCgAd43IT37yk7NuY6ieRC6X429/+xv+9re/obOz0xMFWrVqFaqrqwEAEokEAwMDI7bh7SvbId2ffPIJ4uLixvz8kJCQs540z7xvLPuJi4vDW2+9BcAd0fn444/xyCOPwGazYdOmTWd9TlhYGACgra3tvNvesmULfvnLX+LJJ58cdn9PTw8CAwNHPP70Qt/Tt7Fs2TK8+uqrw+4/05yFhYWhqKgILpdrXAYoNDQUM2fOxBNPPHHW30dGRl5Q67m2++KLL+LFF19ES0sLtm3bhvvuuw9
"text/plain": [
"<Figure size 600x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"colors = plt.cm.Blues(np.linspace(0.1, 0.9, 4)) \n",
"colors = [\"blue\", \"green\", \"orange\", \"red\"]\n",
"\n",
"# Initialisez le graphique en étoile\n",
"fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
"\n",
"for i in range(4) :\n",
"\n",
" # Caractéristiques et valeurs associées (exemple)\n",
" categories = ['share_known_gender', 'share_of_women', 'country_fr']\n",
" values = list(X_test_segment_mp.loc[i,categories]) # Exemple de valeurs, ajustez selon vos données\n",
" \n",
" values_normalized = [ max(values) * elt for elt in values]\n",
" \n",
" # Nombre de caractéristiques\n",
" num_categories = len(categories)\n",
" \n",
" # Créer un angle pour chaque axe\n",
" angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()\n",
" \n",
" \n",
" # Tracer uniquement le contour du polygone\n",
" ax.plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5)\n",
" # ax.plot(angles + angles[:1], values_normalized + values_normalized[:1], color='blue', alpha = 0.3, linewidth=1.5)\n",
" \n",
" # Remplir le secteur central avec une couleur\n",
" ax.fill(angles, values_normalized, color=colors[i], alpha=0.2, label = str(i+1))\n",
"\n",
"# Étiqueter les axes\n",
"ax.set_yticklabels([])\n",
"ax.set_xticks(angles)\n",
"ax.set_xticklabels(categories)\n",
"ax.legend()\n",
"\n",
"# Titre du graphique\n",
"plt.title('Résumé des caractéristiques')\n",
"\n",
"# Afficher le graphique\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 301,
"id": "96aa9ff5-c1ed-49eb-8fb7-2319ac0c40be",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAgoAAAITCAYAAABmGDQGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3hUZfbHPzPpvVcCSQgphE4g9G4BkWJvSFkL9l3LrrprW1ddFcUuWFBUYGWxwk8BlU7oISRAQhophPRep7+/P8ZkibSUydyZyf08zzxJptz3zM299/3ec857jkIIIZCRkZGRkZGRuQBKqQ2QkZGRkZGRsVxkoSAjIyMjIyNzUWShICMjIyMjI3NRZKEgIyMjIyMjc1FkoSAjIyMjIyNzUWShICMjIyMjI3NRZKEgIyMjIyMjc1FkoSAjIyMjIyNzUWShICMjIyMjI3NRZKEg02HS0tJwdXXlvffek9qUTvHxxx/j5ubGwYMHpTZF5hwaGxuJi4vjpptuwmAwSG2OjIzMRZCFQi9j9erVKBSKtoe9vT0hISHceuutZGdnX/RzDQ0N3HjjjTz88MM8/PDDZrS4exw7dozHHnuMr7/+mjFjxnR5O637LT8/33TG2SivvPIKP/zww2Xfd8899xAUFMSaNWtQKjt2KcrPz0ehULB69epO2bRu3TrefvvtC76mUCh44YUXOrU9GZnehL3UBshIw+eff05cXBwqlYqkpCRefvllduzYwalTp/Dx8Tnv/XfddReJiYm8+uqrEljbNerr67nppptYvnw5c+bMkdqcXsMrr7zCjTfeyPz58y/6ng8++IC0tDSSkpJwcnLq8LZDQkLYv38/UVFRnbJp3bp1nDhxgr/85S/nvbZ//37CwsI6tT0Zmd6ELBR6KYMHD2bUqFEATJ06Fb1ez/PPP88PP/zAkiVLznv/f//7X3Ob2G08PT0v6SXp7Wi12javkrl58MEHefDBBzv8fr1ej06nw8nJibFjx5rUFlNvT0bG1pBDDzIAbaKhrKys3fNHjhxh7ty5+Pr64uzszIgRI84TDc3NzTzxxBNERkbi7OyMr68vo0aN4j//+U/be6ZOncrUqVPPG3fx4sVERES0/d3qWl62bBmvvfYaERERuLi4MHXqVLKystBqtTz11FOEhobi5eXFddddR3l5+XnbXb9+PePGjcPNzQ13d3euvvpqUlJSOrQvDhw4wIQJE3B2diY0NJSnn34arVZ7wfd2ZJzTp09z6623EhoaipOTE0FBQcyYMYNjx45d1paDBw8yZ84c/Pz8cHZ2Jioqqt1dcU5ODkuWLCE6OhpXV1f69OnDnDlzOH78eLvt7Ny5E4VCwVdffcXjjz9Onz59cHJyIicnh4qKCh544AHi4+Nxd3cnMDCQ6dOns2fPnvPsUavVvPjiiwwcOBBnZ2f8/PyYNm0a+/btA4xu/KamJr744ou28Na5//fS0lKWLl1KWFgYjo6OREZG8s9//hOdTtf2ntZj4PXXX+ell14iMjISJycnduzYccHQQ0VFBffeey99+/bFycmJgIAAJkyYwG+//QYYj72ffvqJgoKCdmG3Vi4UerjQMfDJJ5+cF366WNgiIiKCxYsXt3uuI98dYMWKFQwbNgx3d3c8PDyIi4vj73//+3ljyMiYC9mjIANAXl4eADExMW3P7dixg5kzZzJmzBhWrlyJl5cXX3/9NbfccgvNzc1tF8LHHnuMr776ipdeeokRI0bQ1NTEiRMnqKqq6rI9H3zwAUOHDuWDDz6gtraWxx9/nDlz5jBmzBgcHBz47LPPKCgo4IknnuDuu+9m48aNbZ995ZVXeOaZZ1iyZAnPPPMMGo2GZcuWMWnSJA4dOkR8fPxFx01PT2fGjBlERESwevVqXF1d+fDDD1m3bt157+3oONdccw16vZ7XX3+dfv36UVlZyb59+6itrb3kPti6dStz5sxh4MCBLF++nH79+pGfn88vv/zS9p7i4mL8/Px49dVXCQgIoLq6mi+++IIxY8aQkpJCbGxsu20+/fTTjBs3jpUrV6JUKgkMDKSiogKA559/nuDgYBobG/n++++ZOnUq27Zta5vodTods2bNYs+ePfzlL39h+vTp6HQ6Dhw4QGFhIePHj2f//v1Mnz6dadOm8eyzzwJGzw4YJ8rExESUSiXPPfccUVFR7N+/n5deeon8/Hw+//zzdra+++67xMTE8MYbb+Dp6Ul0dPQF99Odd97J0aNHefnll4mJiaG2tpajR4+2HX8ffvgh9957L7m5uXz//feX3OfQuWOgo3T0u3/99dc88MADPPzww7zxxhsolUpycnJIT0/v8tgyMt1GyPQqPv/8cwGIAwcOCK1WKxoaGsSWLVtEcHCwmDx5stBqtW3vjYuLEyNGjGj3nBBCXHvttSIkJETo9XohhBCDBw8W8+fPv+S4U6ZMEVOmTDnv+UWLFonw8PC2v/Py8gQghg0b1rZ9IYR4++23BSDmzp3b7vN/+ctfBCDq6uqEEEIUFhYKe3t78fDDD7d7X0NDgwgODhY333zzJe285ZZbhIuLiygtLW17TqfTibi4OAGIvLy8To1TWVkpAPH2229fctwLERUVJaKiokRLS0uHP6PT6YRGoxHR0dHi0UcfbXt+x44dAhCTJ0/u0Da0Wq2YMWOGuO6669qe//LLLwUgPvnkk0t+3s3NTSxatOi855cuXSrc3d1FQUFBu+ffeOMNAYiTJ08KIf53DERFRQmNRtPuva2vff75523Pubu7i7/85S+XtGn27NntjrNzAcTzzz/f9ndHj4ELfbaV8PDwdvugo9/9oYceEt7e3pf8LjIy5kYOPfRSxo4di4ODAx4eHsycORMfHx9+/PHHtnh1Tk4Op06d4o477gCMd5Otj2uuuYaSkhIyMzMBSExMZPPmzTz11FPs3LmTlpaWbtt3zTXXtMuEHzhwIACzZ89u977W5wsLCwHjXbhOp2PhwoXtbHZ2dmbKlCns3LnzkuPu2LGDGTNmEBQU1PacnZ0dt9xyS7v3dXQcX19foqKiWLZsGcuXLyclJaVDSwGzsrLIzc3lrrvuwtnZ+aLv0+l0vPLKK8THx+Po6Ii9vT2Ojo5kZ2eTkZFx3vtvuOGGC25n5cqVjBw5EmdnZ+zt7XFwcGDbtm3ttrF582acnZ3505/+dFn7L8T//d//MW3aNEJDQ9vts1mzZgGwa9eudu+fO3cuDg4Ol91uYmIiq1ev5qWXXuLAgQMXDRN1lI4eA52ho989MTGR2tpabrvtNn788UcqKyu79V1kZEyBLBR6KV9++SWHDx9m+/btLF26lIyMDG677ba211tzFZ544gkcHBzaPR544AGAtovYu+++y5NPPskPP/zAtGnT8PX1Zf78+d1KJPT19W33t6Oj4yWfV6lU7ewePXr0eXavX7/+shfeqqoqgoODz3v+j891dByFQsG2bdu4+uqref311xk5ciQBAQE88sgjNDQ0XNSO1nDA5bLxH3vsMZ599lnmz5/Ppk2bOHjwIIcPH2bYsGEXFGwhISHnPbd8+XLuv/9+xowZw7fffsuBAwc4fPgwM2fObLeNiooKQkNDO7yU8Y+UlZWxadOm8/bXoEGDAM7731zI1guxfv16Fi1axKeffsq4cePw9fVl4cKFlJaWdsnOjh4DnaGj3/3OO+9sC6vdcMMNBAYGMmbMGH799dcujy0j013kHIVeysCBA9sSGKdNm4Zer+fTTz/lm2++4cYbb8Tf3x8wxrSvv/76C26jNf7t5ubGP//5T/75z39SVlbW5l2YM2cOp06dAsDZ2Zm6urrztmHqO6ZWu7/55hvCw8M7/Xk/P78LTjB/fK4z44SHh7Nq1SrA6Cn473//ywsvvIBGo2HlypUX/ExAQAAARUVFl9z2mjVrWLhwIa+88kq75ysrK/H29j7v/ecm8Z27jalTp7JixYp2z/9RyAQEBLB3714MBkOXxIK/vz9Dhw7l5ZdfvuDroaGhl7X1Ytt9++23efvttyksLGTjxo089dRTlJeXs2XLlk7b2dFjAMDJyQm1Wn3e83/Mz+nMd1+yZAlLliyhqam
"text/plain": [
"<Figure size 600x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# KEEP THIS CODE !!\n",
"\n",
"# Caractéristiques et valeurs associées (exemple)\n",
"categories = ['Force', 'Vitesse', 'Agilité', 'Précision', 'Endurance']\n",
"values = [8, 7, 6, 9, 7] # Exemple de valeurs, ajustez selon vos données\n",
"\n",
"# Plage de valeurs maximales pour chaque caractéristique\n",
"max_range = [20, 20, 20, 20, 20]\n",
"\n",
"values_normalized = [2 * max(values) * x / y for x, y in zip(values, max_range)]\n",
"\n",
"# Nombre de caractéristiques\n",
"num_categories = len(categories)\n",
"\n",
"# Créer un angle pour chaque axe\n",
"angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()\n",
"\n",
"# Initialisez le graphique en étoile\n",
"fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
"\n",
"# Tracer uniquement le contour du polygone\n",
"ax.plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5)\n",
"ax.plot(angles + angles[:1], values_normalized + values_normalized[:1], color='blue', linewidth=1.5)\n",
"\n",
"# Remplir le secteur central avec une couleur\n",
"ax.fill(angles, values_normalized, color='skyblue', alpha=0.4)\n",
"\n",
"# Étiqueter les axes\n",
"ax.set_yticklabels([])\n",
"ax.set_xticks(angles)\n",
"ax.set_xticklabels(categories)\n",
"\n",
"# Titre du graphique\n",
"plt.title('Résumé des caractéristiques')\n",
"\n",
"# Afficher le graphique\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 149,
"id": "adb7ccb3-7dea-4347-9298-37311a2f1fb1",
"metadata": {},
"outputs": [],
"source": [
"def radar_chart(values, categories, segment) :\n",
" # Caractéristiques et valeurs associées (exemple)\n",
" categories = categories\n",
" values = values # Exemple de valeurs, ajustez selon vos données\n",
" \n",
" # Nombre de caractéristiques\n",
" num_categories = len(categories)\n",
" \n",
" # Créer un angle pour chaque axe\n",
" angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()\n",
" \n",
" # Répéter le premier angle pour fermer la figure\n",
" values += values[:1]\n",
" angles += angles[:1]\n",
" \n",
" # Initialisez le graphique en étoile\n",
" fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
" \n",
" # Tracer les lignes radiales\n",
" ax.fill(angles, values, color='skyblue', alpha=0.4)\n",
" \n",
" # Tracer les points sur les axes radiaux\n",
" ax.plot(angles, values, color='blue', linewidth=2, linestyle='solid')\n",
"\n",
" # Afficher les valeurs associées sous les noms de variables\n",
" \"\"\"\n",
" for i, angle in enumerate(angles[:-1]):\n",
" x = angle\n",
" y = values[i] + 0.2 # Ajustez la distance des valeurs par rapport au centre\n",
" plt.text(x, y, str(values[i]), color='black', ha='center', fontsize=10)\n",
" \"\"\"\n",
" \n",
" # Remplir le secteur central avec une couleur\n",
" # ax.fill(angles, values, color='skyblue', alpha=0.4)\n",
"\n",
" \n",
" # Étiqueter les axes\n",
" ax.set_yticklabels([])\n",
" #ax.set_xticks(angles[:-1])\n",
" #ax.set_xticklabels(categories, # fontsize=12, ha='right', rotation=45\n",
" # )\n",
" # ax.set_xticklabels(categories, fontsize=10, color='black', ha='right')\n",
"\n",
" labels = [f\"{category} = {round(100 *value,2)}%\" for category, value in zip(categories, values[:-1])]\n",
" ax.set_xticks(angles[:-1])\n",
" ax.set_xticklabels(labels, fontsize=10, color='black', ha='right')\n",
" \n",
" # Titre du graphique\n",
" plt.title(f'Caracteristics of segment {segment}')\n",
" \n",
" # Afficher le graphique\n",
" plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 150,
"id": "8793fb51-812c-4500-b252-2e2d61d6ff48",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkkAAAH2CAYAAABk9BgJAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3xT1fvHPzezTZOOdO+W0gmUtoyWUqBYwAGiP5YLBRUVQURUBFyIynShqAwHOFARBQS/OJBRShkFWgqle++92+yc3x+xaSsFum/SnvfrlVfT5N5zPrkZ93Of85znMIQQAgqFQqFQKBRKOzhsC6BQKBQKhUIxRKhJolAoFAqFQukAapIoFAqFQqFQOoCaJAqFQqFQKJQOoCaJQqFQKBQKpQOoSaJQKBQKhULpAGqSKBQKhUKhUDqAmiQKhUKhUCiUDqAmiUKhUCgUCqUDqEmiUHqZq1ev4vHHH4enpydMTEwgFosREhKCLVu2oLq6mm15NyU5ORlvvfUWcnNz+6T9t956CwzDdGmf5uZmvPXWWzh16tQNz+3ZswcMw/SZ3p6wb98+DBs2DKampmAYBleuXGFbksHz+eefY8+ePZ3e/vfff8djjz2GESNGgM/nd/mzRaF0BoYuS0Kh9B5ffPEFlixZAl9fXyxZsgQBAQFQqVS4dOkSvvjiC4wcORIHDx5kW2aH/PLLL5g7dy5OnjyJyMjIXm+/sLAQhYWFCAsL6/Q+lZWVsLW1xdq1a/HWW2+1e66iogJZWVkIDg6GUCjsZbXdp6KiAs7Ozrjrrrvw0ksvQSgUIjAwECKRiG1pBs3w4cNhY2PToSHuiCeffBIxMTEIDg5GVlYWLl++DHo6o/Q2PLYFUCgDhXPnzuHZZ5/F1KlTcejQoXYn7qlTp+Kll17Cn3/+2St9NTc3G81Jt0Wri4sLXFxceq1dW1tb2Nra9lp7vUV6ejpUKhXmz5+PSZMmsS1nwPLFF1+Aw9ENhjz33HO4fPkyy4ooAxJCoVB6hRkzZhAej0fy8/M7tf1PP/1Epk6dShwcHIiJiQnx8/Mjq1atIo2Nje22W7BgATEzMyNXr14lU6dOJWKxmISFhRFCCPn777/JzJkzibOzMxEKhcTLy4s8/fTTpKKi4ob+UlJSyIMPPkjs7OyIQCAgrq6u5NFHHyVyuZzs3r2bALjhtnv3bv3+x44dI3fccQeRSCTE1NSUhIeHk3/++addH2vXriUAyOXLl8ns2bOJpaUlcXBwaPdcW44fP04mTZpEpFIpMTExIa6urmTWrFmkqamJ5OTkdKhpwYIFhBCi15yTk9OuzT/++IPccccdxNzcnJiamhI/Pz+yYcMG/fNZWVnkgQceII6OjkQgEBA7Oztyxx13kISEhNu+Z7/99hsJCwsjpqamRCwWkylTppCzZ8+2e6/+q3fSpEk3ba+pqYm89NJLxMPDgwiFQmJlZUVGjRpFfvjhh3bbXbx4kdx7773EysqKCIVCEhQURPbt23dDezExMSQsLIwIhULi5OREXn/9dfLFF1/ccJzc3d3J9OnTyZEjR0hQUJD+83fkyBH9sfXz8yMikYiMGTOGXLx48Ya+OqOp5T06ceIEWbx4MbG2tiZSqZT83//9HykqKmqn57/Hzd3d/VZvRTuWLl16w2eLQukNaCSJQukFNBoNTpw4gVGjRsHV1bVT+2RkZOCee+7BCy+8ADMzM6SmpmLz5s2Ii4vDiRMn2m2rVCoxc+ZMPPPMM1i9ejXUajUAICsrC+PGjcOiRYtgYWGB3NxcfPjhh4iIiMC1a9fA5/MBAImJiYiIiICNjQ3efvtteHt7o6SkBIcPH4ZSqcT06dOxYcMGvPrqq/jss88QEhICAPDy8gIAfP/993jsscdw33334ZtvvgGfz8fOnTtx55134q+//kJUVFQ7vbNmzcKDDz6IxYsXo6mpqcPXn5ubi+nTp2PChAn4+uuvYWlpiaKiIvz5559QKpVwdHTEn3/+ibvuugtPPvkkFi1aBAC3jB599dVXeOqppzBp0iTs2LEDdnZ2SE9PR1JSkn6be+65BxqNBlu2bIGbmxsqKytx9uxZ1NbW3vL9+uGHH/DII49g2rRp+PHHH6FQKLBlyxZERkbi+PHjiIiIwBtvvIGxY8di6dKl2LBhAyZPngxzc/Obtvniiy/iu+++w7vvvovg4GA0NTUhKSkJVVVV+m1OnjyJu+66C6GhodixYwcsLCzw008/4YEHHkBzczMWLlwIQJcLN3XqVPj4+OCbb76BSCTCjh078P3333fYd2JiItasWYPXXnsNFhYWWLduHWbNmoU1a9bg+PHj2LBhAxiGwapVqzBjxgzk5OTA1NS0S5paWLRoEaZPn44ffvgBBQUFWLlyJebPn6//nB88eBBz5syBhYUFPv/8cwAwqCFUyiCGbZdGoQwESktLCQDy4IMPdmt/rVZLVCoViY6OJgBIYmKi/rmW6MTXX3/dqTby8vIIAPLbb7/pn7vjjjuIpaUlKS8vv+n++/fvJwDIyZMn2z3e1NREpFIpuffee9s9rtFoyMiRI8nYsWP1j7VEi958880b2v9vJOmXX34hAMiVK1duqqmiooIAIGvXrr3huf9GkhoaGoi5uTmJiIggWq22w/YqKysJALJ169ab9tkRGo2GODk5kREjRhCNRqN/vKGhgdjZ2ZHw8HD9YydPniQAyP79+2/b7vDhw8n9999/y238/PxIcHAwUalU7R6fMWMGcXR01OuZO3cuMTMzaxdF1Gg0JCAgoMNIkqmpKSksLNQ/duXKFQKAODo6kqamJv3jhw4dIgDI4cOHu6yp5T1asmRJu+22bNlCAJCSkhL9Y8OGDbtl1O1W0EgSpa+gs9soFJbIzs7Gww8/DAcHB3C5XPD5fH0OS0pKyg3bz549+4bHysvLsXjxYri6uoLH44HP58Pd3b1dG83NzYiOjsa8efO6lcNz9uxZVFdXY8GCBVCr1fqbVqvFXXfdhYsXL94QLepI638JCgqCQCDA008/jW+++QbZ2dld1vZfnfX19ViyZMlNZzpJpVJ4eXnhvffew4cffoiEhARotdrbtp2Wlobi4mI8+uij+jwYABCLxZg9ezbOnz+P5ubmLmseO3Ys/vjjD6xevRqnTp2CTCZr93xmZiZSU1PxyCOPAEC743/PPfegpKQEaWlpAIDo6GjccccdsLGx0e/P4XAwb968DvsOCgqCs7Oz/n9/f38AQGRkZLt8t5bH8/LyuqyphZkzZ7b7PzAwsF2bFIqhQk0ShdIL2NjYQCQSIScnp1PbNzY2YsKECbhw4QLeffddnDp1ChcvXsSBAwcA4IaTpUgkumHYRqvVYtq0aThw4ABeeeUVHD9+HHFxcTh//ny7NmpqaqDRaLqdNF1WVgYAmDNnDvh8frvb5s2bQQi5obSBo6Pjbdv18vLCP//8Azs7OyxduhReXl7w8vLCxx9/3C2dFRUVAHDL18kwDI4fP44777wTW7ZsQUhICGxtbfH888+joaHhpvu1DH919LqcnJyg1WpRU1PTZc2ffPIJVq1ahUOHDmHy5MmQSqW4//77kZGRAaD12L/88ss3HPslS5YA0M0AbNFob29/Qx8dPQboDGNbBALBLR+Xy+Vd1tSCtbV1u/9bhtL++zmnUAwNmpNEofQCXC4XUVFR+OOPP1BYWHhbQ3LixAkUFxfj1KlT7WZA3SwvpqPISFJSEhITE7Fnzx4sWLBA/3hmZma77aRSKbhcLgoLC7vwilppiUxs27btptP3/3si7mzNmgkTJmDChAnQaDS4dOkStm3bhhdeeAH29vZ48MEHu6SzJUp2u9fp7u6Or776CoBuJtrPP/+Mt956C0qlEjt27Ohwn5aTfElJyQ3PFRcXg8PhwMrKqkt6AcDMzAzr1q3DunXrUFZWpo8q3XvvvUhNTdUf+zVr1mDWrFkdtuHr66vX2GJg2lJaWtplXbeiK5ooFGOHRpIolF5izZo1IITgqaeeglKpvOF5lUqFI0eOAGg1Ef9NTt25c2en++tsG6amppg0aRL2799/wxV+W252dT9+/HhYWloiOTkZo0eP7vD
"text/plain": [
"<Figure size 600x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"categories= [\"share_known_gender\",\"share_of_women\",\"country_fr\"]\n",
"radar_chart(values=X_test_segment_mp.loc[0,categories].values.tolist(), categories= categories,\n",
" segment = \"1\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}