BDC-team-1/Sport/Modelization/segment_analysis_sport_0_6.ipynb

1728 lines
1.1 MiB
Plaintext
Raw Normal View History

2024-03-26 12:20:03 +01:00
{
"cells": [
{
"cell_type": "markdown",
"id": "c488134e-680f-44e4-8c43-40c246140519",
"metadata": {},
"source": [
"# Analysis of segments and marketing personae associated"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "9a8b8c3a-8e74-49f3-91d1-cccfc057fdcd",
"metadata": {},
"outputs": [],
"source": [
"# importations\n",
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import io\n",
"import s3fs\n",
"import re\n",
"import pickle\n",
"import warnings"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "d553c868-695f-4d57-96d6-d5c6629cefb2",
"metadata": {},
"outputs": [],
"source": [
"def load_model(type_of_activity, model):\n",
" BUCKET = f\"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}/\"\n",
" filename = model + '.pkl'\n",
" file_path = BUCKET + filename\n",
" with fs.open(file_path, mode=\"rb\") as f:\n",
" model_bytes = f.read()\n",
"\n",
" model = pickle.loads(model_bytes)\n",
" return model\n",
"\n",
"\n",
"def load_test_file(type_of_activity):\n",
" file_path_test = f\"projet-bdc2324-team1/Generalization/{type_of_activity}/Test_set.csv\"\n",
" with fs.open(file_path_test, mode=\"rb\") as file_in:\n",
" dataset_test = pd.read_csv(file_in, sep=\",\")\n",
" return dataset_test"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "3af80fea-a937-4ea8-bece-cfeaa89d1055",
"metadata": {},
"outputs": [],
"source": [
"# exec(open('utils_segmentation.py').read())\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"# choose the type of companies for which you want to run the pipeline\n",
"type_of_activity = \"sport\""
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "cc6af7fa-33b2-4d58-ada4-e2ee7262bab9",
"metadata": {},
"outputs": [],
"source": [
"# load test set\n",
"dataset_test = load_test_file(type_of_activity)\n",
"\n",
"# Load Model \n",
"model = load_model(type_of_activity, 'LogisticRegression_Benchmark')"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "e4287c1a-eab6-4897-91d6-d21804518dc4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>country_fr</th>\n",
" <th>has_purchased</th>\n",
" <th>has_purchased_estim</th>\n",
" <th>score</th>\n",
" <th>segment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>100.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>5.177187</td>\n",
" <td>5.177187</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.665854</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>55.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>426.265613</td>\n",
" <td>426.265613</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.257465</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>17.0</td>\n",
" <td>1.0</td>\n",
" <td>80.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>436.033437</td>\n",
" <td>436.033437</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.232284</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>120.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>5.196412</td>\n",
" <td>5.196412</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.665888</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>34.0</td>\n",
" <td>2.0</td>\n",
" <td>416.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>478.693148</td>\n",
" <td>115.631470</td>\n",
" <td>363.061678</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.916749</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96091</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>67.31</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>278.442257</td>\n",
" <td>278.442257</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>15.0</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.629333</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96092</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>61.41</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>189.207373</td>\n",
" <td>189.207373</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>12.0</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.687097</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96093</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>29.0</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.112332</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96094</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>79.43</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>279.312905</td>\n",
" <td>279.312905</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>20.0</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.630403</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96095</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>31.0</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.322976</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>96096 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 4.0 1.0 100.00 1.0 \n",
"1 1.0 1.0 55.00 1.0 \n",
"2 17.0 1.0 80.00 1.0 \n",
"3 4.0 1.0 120.00 1.0 \n",
"4 34.0 2.0 416.00 1.0 \n",
"... ... ... ... ... \n",
"96091 1.0 1.0 67.31 1.0 \n",
"96092 1.0 1.0 61.41 1.0 \n",
"96093 0.0 0.0 0.00 0.0 \n",
"96094 1.0 1.0 79.43 1.0 \n",
"96095 0.0 0.0 0.00 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0.0 5.177187 5.177187 \n",
"1 0.0 426.265613 426.265613 \n",
"2 0.0 436.033437 436.033437 \n",
"3 0.0 5.196412 5.196412 \n",
"4 0.0 478.693148 115.631470 \n",
"... ... ... ... \n",
"96091 1.0 278.442257 278.442257 \n",
"96092 1.0 189.207373 189.207373 \n",
"96093 0.0 550.000000 550.000000 \n",
"96094 1.0 279.312905 279.312905 \n",
"96095 0.0 550.000000 550.000000 \n",
"\n",
" time_between_purchase nb_tickets_internet is_email_true ... \\\n",
"0 0.000000 0.0 True ... \n",
"1 0.000000 0.0 True ... \n",
"2 0.000000 0.0 True ... \n",
"3 0.000000 0.0 True ... \n",
"4 363.061678 0.0 True ... \n",
"... ... ... ... ... \n",
"96091 0.000000 1.0 True ... \n",
"96092 0.000000 1.0 True ... \n",
"96093 -1.000000 0.0 True ... \n",
"96094 0.000000 1.0 True ... \n",
"96095 -1.000000 0.0 True ... \n",
"\n",
" gender_female gender_male gender_other nb_campaigns \\\n",
"0 1 0 0 0.0 \n",
"1 0 1 0 0.0 \n",
"2 1 0 0 0.0 \n",
"3 1 0 0 0.0 \n",
"4 1 0 0 0.0 \n",
"... ... ... ... ... \n",
"96091 0 1 0 15.0 \n",
"96092 0 1 0 12.0 \n",
"96093 1 0 0 29.0 \n",
"96094 0 1 0 20.0 \n",
"96095 0 1 0 31.0 \n",
"\n",
" nb_campaigns_opened country_fr has_purchased has_purchased_estim \\\n",
"0 0.0 1.0 0.0 1.0 \n",
"1 0.0 1.0 1.0 0.0 \n",
"2 0.0 1.0 0.0 0.0 \n",
"3 0.0 1.0 0.0 1.0 \n",
"4 0.0 1.0 1.0 1.0 \n",
"... ... ... ... ... \n",
"96091 5.0 0.0 1.0 1.0 \n",
"96092 9.0 0.0 0.0 1.0 \n",
"96093 3.0 0.0 0.0 0.0 \n",
"96094 4.0 1.0 0.0 1.0 \n",
"96095 4.0 NaN 0.0 0.0 \n",
"\n",
" score segment \n",
"0 0.665854 3 \n",
"1 0.257465 2 \n",
"2 0.232284 1 \n",
"3 0.665888 3 \n",
"4 0.916749 4 \n",
"... ... ... \n",
"96091 0.629333 3 \n",
"96092 0.687097 3 \n",
"96093 0.112332 1 \n",
"96094 0.630403 3 \n",
"96095 0.322976 2 \n",
"\n",
"[96096 rows x 21 columns]"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Processing\n",
"X_test = dataset_test[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n",
" 'time_between_purchase', 'nb_tickets_internet', 'is_email_true', 'opt_in', #'is_partner',\n",
" 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened']]\n",
"\n",
"y_test = dataset_test[['y_has_purchased']]\n",
"\n",
"X_test_segment = X_test\n",
"\n",
"X_test_segment.insert(X_test.shape[1], \"country_fr\", dataset_test[\"country_fr\"])\n",
"\n",
"# add y_has_purchased to X_test\n",
"X_test_segment[\"has_purchased\"] = y_test\n",
"\n",
"# Add prediction and probability to dataset_test\n",
"y_pred = model.predict(X_test)\n",
"X_test_segment[\"has_purchased_estim\"] = y_pred\n",
"\n",
"y_pred_prob = model.predict_proba(X_test)[:, 1]\n",
"X_test_segment['score'] = y_pred_prob\n",
"\n",
"X_test_segment[\"segment\"] = np.where(X_test_segment['score']<0.25, '1',\n",
" np.where(X_test_segment['score']<0.5, '2',\n",
" np.where(X_test_segment['score']<0.75, '3', '4')))\n",
"\n",
"X_test_segment"
]
},
{
"cell_type": "markdown",
"id": "9058c3b2-8fa2-4322-a57b-395da4033eaf",
"metadata": {},
"source": [
"## 1. Business KPIs"
]
},
{
"cell_type": "code",
"execution_count": 559,
"id": "3067d919-50c9-49e9-b0a6-b676a5dbae56",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>vente_internet_max</th>\n",
" <th>nb_tickets_internet</th>\n",
" </tr>\n",
" <tr>\n",
" <th>segment</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2748.0</td>\n",
" <td>1039.0</td>\n",
" <td>6.856702e+04</td>\n",
" <td>449.0</td>\n",
" <td>1071.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>40316.0</td>\n",
" <td>16931.0</td>\n",
" <td>1.085152e+06</td>\n",
" <td>8736.0</td>\n",
" <td>22196.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>62641.0</td>\n",
" <td>29061.0</td>\n",
" <td>2.323148e+06</td>\n",
" <td>13283.0</td>\n",
" <td>32902.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>393466.0</td>\n",
" <td>102685.0</td>\n",
" <td>1.646072e+07</td>\n",
" <td>6990.0</td>\n",
" <td>114070.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" nb_tickets nb_purchases total_amount vente_internet_max \\\n",
"segment \n",
"1 2748.0 1039.0 6.856702e+04 449.0 \n",
"2 40316.0 16931.0 1.085152e+06 8736.0 \n",
"3 62641.0 29061.0 2.323148e+06 13283.0 \n",
"4 393466.0 102685.0 1.646072e+07 6990.0 \n",
"\n",
" nb_tickets_internet \n",
"segment \n",
"1 1071.0 \n",
"2 22196.0 \n",
"3 32902.0 \n",
"4 114070.0 "
]
},
"execution_count": 559,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# business figures\n",
"X_test_segment.groupby(\"segment\")[[\"nb_tickets\", \"nb_purchases\", \"total_amount\", \"vente_internet_max\", \n",
" \"nb_tickets_internet\"]].sum()"
]
},
{
"cell_type": "code",
"execution_count": 583,
"id": "03b53fc6-2ec0-4467-aeda-e56507bed1d6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>size</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_campaigns</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>38.719614</td>\n",
" <td>0.550513</td>\n",
" <td>0.693981</td>\n",
" <td>0.343908</td>\n",
" <td>39.320831</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>30.736971</td>\n",
" <td>8.076591</td>\n",
" <td>11.308745</td>\n",
" <td>5.442746</td>\n",
" <td>26.855461</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>20.910340</td>\n",
" <td>12.549006</td>\n",
" <td>19.410751</td>\n",
" <td>11.652101</td>\n",
" <td>17.844514</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>9.633075</td>\n",
" <td>78.823890</td>\n",
" <td>68.586524</td>\n",
" <td>82.561245</td>\n",
" <td>15.979194</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" segment size nb_tickets nb_purchases total_amount nb_campaigns\n",
"0 1 38.719614 0.550513 0.693981 0.343908 39.320831\n",
"1 2 30.736971 8.076591 11.308745 5.442746 26.855461\n",
"2 3 20.910340 12.549006 19.410751 11.652101 17.844514\n",
"3 4 9.633075 78.823890 68.586524 82.561245 15.979194"
]
},
"execution_count": 583,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_business_fig = X_test_segment.groupby(\"segment\")[[\"nb_tickets\", \"nb_purchases\", \"total_amount\", \"nb_campaigns\"]].sum().reset_index()\n",
"X_test_business_fig.insert(1, \"size\", X_test_segment.groupby(\"segment\").size().values)\n",
"X_test_business_fig[[\"size\", \"nb_tickets\", \"nb_purchases\", \"total_amount\", \"nb_campaigns\"]] = 100 * X_test_business_fig[[\"size\", \"nb_tickets\", \"nb_purchases\", \"total_amount\", \"nb_campaigns\"]] / X_test_business_fig[[\"size\", \"nb_tickets\", \"nb_purchases\", \"total_amount\", \"nb_campaigns\"]].sum()\n",
"X_test_business_fig"
]
},
{
"cell_type": "code",
"execution_count": 729,
"id": "d2f618b6-c984-4790-bd8f-29c7d01c6707",
"metadata": {},
"outputs": [],
"source": [
"def hist_segment_business_KPIs(df, segment, size, nb_tickets, nb_purchases, total_amount, nb_campaigns) :\n",
" \n",
" plt.figure()\n",
"\n",
" df_plot = df[[segment, size, nb_tickets, nb_purchases, total_amount, nb_campaigns]]\n",
" \n",
" x = [\"number of\\ncustomers\", \"number of\\ntickets\", \"number of\\npurchases\", \"total\\namount\", \n",
" \"number of\\ncampaigns\"]\n",
"\n",
" # liste_var = [size, nb_tickets, nb_purchases, total_amount]\n",
" \n",
" bottom = np.zeros(5)\n",
" \n",
" # Définir une palette de couleurs\n",
" colors = plt.cm.Blues(np.linspace(0.1, 0.9, 4))\n",
" \n",
" for i in range(4) :\n",
" # print(str(df_plot[segment][i]))\n",
" # segment = df_plot[segment][i]\n",
" height = list(df_plot.loc[i,size:].values)\n",
" \n",
" plt.bar(x=x, height=height, label = str(df_plot[segment][i]), bottom=bottom, color=colors[i]#, width=0.1\n",
" )\n",
" \n",
" bottom+=height\n",
" \n",
" plt.legend(title = \"segment\", loc = \"upper right\", bbox_to_anchor=(1.2, 1))\n",
" plt.ylabel(\"Fraction represented by the segment (%)\")\n",
" plt.title(\"Relative weight of each segment regarding business KPIs\")\n",
" plt.show()\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 730,
"id": "162cc8a2-66e0-496c-a9cf-b2db513a6f14",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAApgAAAHBCAYAAADNWMtrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABwdUlEQVR4nO3dd1gU1/s28HtUWJZepCoKKHZQFBuoYMUajRp7RDRGY+yxV+wtMfrVRKMmoMaaWJIYCxoBu2LvHTuIFRSVet4/fJmfK6i7OAuu3p/r4pI5U84zZ2eHxzNzZiQhhAARERERkUIK5HcARERERPRxYYJJRERERIpigklEREREimKCSURERESKYoJJRERERIpigklEREREimKCSURERESKYoJJRERERIpigklEREREimKCqaPw8HBIkiT/FCpUCM7OzujQoQMuXbqUq21GRUVBkiRERUXpvO7Zs2cRGhqKa9euZZvXrVs3uLm55SqmD9n77FdoaCgkScL9+/ffuezUqVOxcePGXNXzNg8fPkSHDh3g4OAASZLQqlUrxet4H1nH459//pnfoRg8fR1DHzpJkhAaGipPZ503czpP6Zsu33l91Guo3nQeePbsGZo0aQIjIyMsW7YMQM5/F4sWLYqQkBDcvn37ndukjxMTzFwKCwvD/v37sWPHDvTt2xd///03atWqhUePHuVpHGfPnsWECRNyPHGPHTsWGzZsyNN48kJe7Ze+koNJkyZhw4YN+PHHH7F//37MnDlT8Trow/CpJpiva9asGfbv3w9nZ+f8DiXPfPXVV9i/f39+h6GoxMRENGrUCJGRkfjzzz/RtWtXjflZfxe3b9+Onj17YtWqVahduzaSk5PzKWLKT4XyOwBDVaFCBfj6+gIAAgMDkZGRgfHjx2Pjxo0ICQnJ5+heKlGiRH6HoBeGvl+nT59GiRIl0Llz5/wOhT5wGRkZSE9Ph0qlyu9QNDx79gympqZaL29vbw97e3s9RvThKVq0KIoWLZrfYSgmISEBQUFBuHLlCrZs2YK6detmW+bVv4t169ZFRkYGJk2ahI0bN/J89wliD6ZCsr5Ud+/e1Sg/fPgwPvvsM9ja2sLExAQ+Pj5Yu3btO7d3+PBhdOjQAW5ublCr1XBzc0PHjh1x/fp1eZnw8HB88cUXAF5+mbMuT4SHhwPIfinZx8cHtWvXzlZXRkYGihQpgtatW8tlqampmDx5MsqUKQOVSgV7e3uEhITg3r17b43733//hSRJiImJkcvWrVsHSZLQrFkzjWW9vb3Rpk0beVoIgZ9//hmVKlWCWq2GjY0N2rZti6tXr2qsl9Ml8sePH6NHjx6wtbWFubk5mjVrhqtXr2a7VJfl7t276NixI6ysrODo6Iju3bsjMTFRni9JEpKTk7F06VK5XQMDA9+67w8fPkSfPn1QpEgRGBsbw8PDA6NHj0ZKSgoA4Nq1a5AkCTt27MC5c+fk7b7r1og1a9agZs2aMDMzg7m5OYKCgnDs2DGNZbQ5XrLcvn0bX3/9NVxdXWFsbAwXFxe0bds227GblpaG0aNHw8XFBZaWlmjQoAEuXLjw1lgB4N69e/L2s44df39/7NixQ2O5HTt2oH79+rC0tISpqSn8/f3x33//ZdveX3/9BW9vb6hUKnh4eGDu3Lk5Xn6UJAl9+/ZFWFgYSpcuDbVaDV9fXxw4cABCCMyaNQvu7u4wNzdHvXr1cPny5Wx1aRNTVt1nzpxR9BjKOj5mzpyJyZMnw93dHSqVCpGRkQC0P5fs2bMHNWvWhImJCYoUKYKxY8diyZIl2S5Rr1mzBo0aNYKzszPUajXKli2LESNGZOtt6tatG8zNzXHq1Ck0atQIFhYWqF+/PgAgKSkJPXv2hJ2dHczNzdG4cWNcvHgxW0w5XSIPDAxEhQoVEBMTg9q1a8PU1BQeHh6YPn06MjMzNdY/c+YMGjVqBFNTU9jb2+Pbb7+VzzXa3lp08+ZNtG7dGpaWlrCyskKXLl2ync/edL5wc3NDt27d5Olnz55hyJAhcHd3h4mJCWxtbeHr64tVq1bJy+R0jLq5uaF58+bYunUrKleuDLVajTJlyuC3337LVmd8fDx69eqFokWLwtjYGO7u7pgwYQLS09M1lluwYAEqVqwIc3NzWFhYoEyZMhg1apROsb7L9evXUatWLdy6dQs7d+7MMbnMSY0aNeT130Tb8wUZHvZgKiQ2NhYAUKpUKbksMjISjRs3RvXq1bFw4UJYWVlh9erVaN++PZ49e6ZxwnrdtWvXULp0aXTo0AG2traIi4vDggULULVqVZw9exaFCxdGs2bNMHXqVIwaNQo//fQTKleuDODNPXwhISEYMGAALl26BE9PT7k8IiICd+7ckXteMzMz0bJlS+zevRvDhg2Dn58frl+/jvHjxyMwMBCHDx+GWq3OsY6AgAAYGRlhx44dqFq1KoCXf7TVajWio6ORlpYGIyMjJCQk4PTp0/jmm2/kdXv16oXw8HD0798fM2bMwMOHDzFx4kT4+fnhxIkTcHR0zLHOzMxMtGjRAocPH0ZoaCgqV66M/fv3o3Hjxm9s3zZt2qB9+/bo0aMHTp06hZEjRwKAfKLfv38/6tWrh7p162Ls2LEAAEtLyzdu78WLF6hbty6uXLmCCRMmwNvbG7t378a0adNw/Phx/Pvvv3B2dsb+/fvRp08fJCYmYsWKFQCAcuXKvXG7U6dOxZgxYxASEoIxY8YgNTUVs2bNQu3atXHo0CF5XW2OF+Blclm1alWkpaVh1KhR8Pb2xoMHD7Bt2zY8evRIo41HjRoFf39/LFmyBElJSRg+fDhatGiBc+fOoWDBgm+M+csvv8TRo0cxZcoUlCpVCo8fP8bRo0fx4MEDeZnff/8dXbt2RcuWLbF06VIYGRnhl19+QVBQELZt2yYnMFu3bkXr1q1Rp04drFmzBunp6fj++++zJcNZNm3ahGPHjmH69OmQJAnDhw9Hs2bNEBwcjKtXr2L+/PlITEzE4MGD0aZNGxw/flxOArSNKYvSx1CW//3vfyhVqhS+//57WFpawtPTU+tzycmTJ9GwYUOUKlUKS5cuhampKRYuXIjff/89Wz2XLl1C06ZNMXDgQJiZmeH8+fOYMWMGDh06hJ07d2osm5qais8++wy9evXCiBEjkJ6eDiEEWrVqhX379mHcuHGoWrUq9u7diyZNmrxzH7PEx8ejc+fO+O677zB+/Hhs2LABI0eOhIuLi3z5NS4uDgEBATAzM8OCBQvg4OCAVatWoW/fvlrXAwCff/452rVrh969e+PMmTMYO3Yszp49i4MHD8LIyEinbQ0ePBjLly/H5MmT4ePjg+TkZJw+fVrjGH+TEydO4LvvvsOIESPg6OiIJUuWoEePHihZsiTq1Kkjt0u1atVQoEABjBs3DiVKlMD+/fsxefJkXLt2DWFhYQCA1atXo0+fPujXrx++//57FChQAJcvX8bZs2cViRUAzp07h0GDBgEAdu3ahbJly2rdTln/iXtb77U25wsyUIJ0EhYWJgCIAwcOiLS0NPHkyROxdetW4eTkJOrUqSPS0tLkZcuUKSN8fHw0yoQQonnz5sLZ2VlkZGQIIYSIjIwUAERkZOQb601PTxdPnz4VZmZmYu7cuXL5H3/88cZ1g4ODRfHixeXp+/fvC2NjYzFq1CiN5dq1ayccHR3lOFetWiUAiHXr1mksFxMTIwCIn3/++a1tVKtWLVGvXj15umTJkmLo0KGiQIECIjo6WgghxIoVKwQAcfHiRSGEEPv37xcAxA8//KCxrZs3bwq1Wi2GDRv2xv36999/BQCxYMECjXWnTZsmAIjx48fLZePHjxcAxMyZMzWW7dOnjzAxMRGZmZlymZmZmQgODn7rvmZZuHChACDWrl2rUT5jxgwBQERERMhlAQEBonz58u/c5o0bN0ShQoVEv379NMqfPHkinJycRLt27d647puOl+7duwsjIyNx9uzZN66bdTw2bdpUo3zt2rUCgNi/f/9b4zY3NxcDBw584/zk5GRha2srWrRooVGekZEhKlasKKpVqyaXVa1aVbi6uoqUlBS57MmTJ8L
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"hist_segment_business_KPIs(X_test_business_fig, \"segment\", \"size\", \"nb_tickets\", \n",
" \"nb_purchases\", \"total_amount\", \"nb_campaigns\")"
]
},
{
"cell_type": "code",
"execution_count": 588,
"id": "1790cb81-3304-41f1-a371-d8c926d32906",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>size</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_campaigns</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>38.719614</td>\n",
" <td>0.550513</td>\n",
" <td>0.693981</td>\n",
" <td>0.343908</td>\n",
" <td>39.320831</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>30.736971</td>\n",
" <td>8.076591</td>\n",
" <td>11.308745</td>\n",
" <td>5.442746</td>\n",
" <td>26.855461</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>20.910340</td>\n",
" <td>12.549006</td>\n",
" <td>19.410751</td>\n",
" <td>11.652101</td>\n",
" <td>17.844514</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>9.633075</td>\n",
" <td>78.823890</td>\n",
" <td>68.586524</td>\n",
" <td>82.561245</td>\n",
" <td>15.979194</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" segment size nb_tickets nb_purchases total_amount nb_campaigns\n",
"0 1 38.719614 0.550513 0.693981 0.343908 39.320831\n",
"1 2 30.736971 8.076591 11.308745 5.442746 26.855461\n",
"2 3 20.910340 12.549006 19.410751 11.652101 17.844514\n",
"3 4 9.633075 78.823890 68.586524 82.561245 15.979194"
]
},
"execution_count": 588,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# save to Minio\n",
"\n",
"activity = \"sport\"\n",
"PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{activity}/\"\n",
"\n",
"file_name = \"segments_business_KPIs_\" + activity\n",
"FILE_PATH_OUT_S3 = PATH + file_name + \".csv\"\n",
"\n",
"hist_segment_business_KPIs(X_test_business_fig, \"segment\", \"size\", \"nb_tickets\", \n",
" \"nb_purchases\", \"total_amount\", \"nb_campaigns\")\n",
"\n",
"image_buffer = io.BytesIO()\n",
"plt.savefig(image_buffer, format='png')\n",
"image_buffer.seek(0)\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:\n",
" s3_file.write(image_buffer.read())\n",
"plt.close()"
]
},
{
"cell_type": "markdown",
"id": "53d24165-6b98-4b66-9ad8-7514564689d8",
"metadata": {},
"source": [
"## 2. Spider plot summarizing sociodemographic characteristics and purchasing behaviour"
]
},
{
"cell_type": "code",
"execution_count": 458,
"id": "267ebaee-eaef-4720-8ca9-e40c0cf125df",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>share_known_gender</th>\n",
" <th>share_of_women</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.291201</td>\n",
" <td>0.227424</td>\n",
" <td>0.518625</td>\n",
" <td>0.561486</td>\n",
" <td>0.481375</td>\n",
" <td>0.330700</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.169144</td>\n",
" <td>0.685581</td>\n",
" <td>0.854725</td>\n",
" <td>0.197893</td>\n",
" <td>0.145275</td>\n",
" <td>0.803031</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.287200</td>\n",
" <td>0.615159</td>\n",
" <td>0.902359</td>\n",
" <td>0.318277</td>\n",
" <td>0.097641</td>\n",
" <td>0.700363</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.184077</td>\n",
" <td>0.678406</td>\n",
" <td>0.862482</td>\n",
" <td>0.213427</td>\n",
" <td>0.137518</td>\n",
" <td>0.681425</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" segment gender_female gender_male share_known_gender share_of_women \\\n",
"0 1 0.291201 0.227424 0.518625 0.561486 \n",
"1 2 0.169144 0.685581 0.854725 0.197893 \n",
"2 3 0.287200 0.615159 0.902359 0.318277 \n",
"3 4 0.184077 0.678406 0.862482 0.213427 \n",
"\n",
" gender_other country_fr \n",
"0 0.481375 0.330700 \n",
"1 0.145275 0.803031 \n",
"2 0.097641 0.700363 \n",
"3 0.137518 0.681425 "
]
},
"execution_count": 458,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# description of marketing personae\n",
"\n",
"X_test_segment_mp = X_test_segment.groupby(\"segment\")[['gender_female', 'gender_male', 'gender_other', 'country_fr']].mean().reset_index()\n",
"X_test_segment_mp.insert(3, \"share_known_gender\", X_test_segment_mp[\"gender_female\"]+X_test_segment_mp[\"gender_male\"])\n",
"X_test_segment_mp.insert(4, \"share_of_women\", X_test_segment_mp[\"gender_female\"]/(X_test_segment_mp[\"share_known_gender\"]))\n",
"X_test_segment_mp"
]
},
{
"cell_type": "code",
"execution_count": 590,
"id": "910876fe-e6df-4f8d-9978-5d6fdd893ac0",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>share_tickets_internet</th>\n",
" <th>share_campaigns_opened</th>\n",
" <th>opt_in</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.461420</td>\n",
" <td>0.136830</td>\n",
" <td>0.737906</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.626108</td>\n",
" <td>0.191367</td>\n",
" <td>0.274740</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.657410</td>\n",
" <td>0.293768</td>\n",
" <td>0.055838</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.608890</td>\n",
" <td>0.366901</td>\n",
" <td>0.132872</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" segment share_tickets_internet share_campaigns_opened opt_in\n",
"0 1 0.461420 0.136830 0.737906\n",
"1 2 0.626108 0.191367 0.274740\n",
"2 3 0.657410 0.293768 0.055838\n",
"3 4 0.608890 0.366901 0.132872"
]
},
"execution_count": 590,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# purchasing behaviour\n",
"\n",
"X_test_segment[\"share_tickets_internet\"] = X_test_segment[\"nb_tickets_internet\"]/X_test_segment[\"nb_tickets\"]\n",
"X_test_segment[\"share_campaigns_opened\"] = X_test_segment[\"nb_campaigns_opened\"]/X_test_segment[\"nb_campaigns\"]\n",
"X_test_segment_pb = X_test_segment.groupby(\"segment\")[[\"share_tickets_internet\", \"share_campaigns_opened\", \"opt_in\"]].mean().reset_index()\n",
"X_test_segment_pb"
]
},
{
"cell_type": "code",
"execution_count": 598,
"id": "ba2884e3-004a-4554-ab82-6d477dcc4869",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>share_tickets_internet</th>\n",
" <th>share_campaigns_opened</th>\n",
" <th>opt_in</th>\n",
" <th>share_known_gender</th>\n",
" <th>share_of_women</th>\n",
" <th>country_fr</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.461420</td>\n",
" <td>0.136830</td>\n",
" <td>0.737906</td>\n",
" <td>0.518625</td>\n",
" <td>0.561486</td>\n",
" <td>0.330700</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.626108</td>\n",
" <td>0.191367</td>\n",
" <td>0.274740</td>\n",
" <td>0.854725</td>\n",
" <td>0.197893</td>\n",
" <td>0.803031</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.657410</td>\n",
" <td>0.293768</td>\n",
" <td>0.055838</td>\n",
" <td>0.902359</td>\n",
" <td>0.318277</td>\n",
" <td>0.700363</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.608890</td>\n",
" <td>0.366901</td>\n",
" <td>0.132872</td>\n",
" <td>0.862482</td>\n",
" <td>0.213427</td>\n",
" <td>0.681425</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" segment share_tickets_internet share_campaigns_opened opt_in \\\n",
"0 1 0.461420 0.136830 0.737906 \n",
"1 2 0.626108 0.191367 0.274740 \n",
"2 3 0.657410 0.293768 0.055838 \n",
"3 4 0.608890 0.366901 0.132872 \n",
"\n",
" share_known_gender share_of_women country_fr \n",
"0 0.518625 0.561486 0.330700 \n",
"1 0.854725 0.197893 0.803031 \n",
"2 0.902359 0.318277 0.700363 \n",
"3 0.862482 0.213427 0.681425 "
]
},
"execution_count": 598,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment_caract = pd.concat([X_test_segment_pb, X_test_segment_mp[['share_known_gender', 'share_of_women', 'country_fr']]], axis=1)\n",
"X_test_segment_caract"
]
},
{
"cell_type": "code",
"execution_count": 666,
"id": "23a37e9b-bb29-4122-85cb-cc15cc344ee2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"share_tickets_internet 0.657410\n",
"share_campaigns_opened 0.366901\n",
"opt_in 0.737906\n",
"share_known_gender 0.902359\n",
"share_of_women 0.561486\n",
"country_fr 0.803031\n",
"dtype: float64"
]
},
"execution_count": 666,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment_caract.loc[:,\"share_tickets_internet\":].max()"
]
},
{
"cell_type": "code",
"execution_count": 733,
"id": "0809e2ae-3487-4b24-8f60-741c683cb9af",
"metadata": {},
"outputs": [],
"source": [
"# def d'une fonction associée - KEEP THIS !!!\n",
"\n",
"def radar_mp_plot(df, categories, index) :\n",
" categories = categories\n",
"\n",
" # true values are used to print the true value in parenthesis\n",
" tvalues = list(df.loc[index,categories]) \n",
"\n",
" max_values = df[categories].max()\n",
"\n",
" # values are true values / max among the 4 segments, allows to \n",
" # put values in relation with the values for other segments\n",
" # if the point has a maximal abscisse it means that value is maximal for the segment considered\n",
" # , event if not equal to 1\n",
" \n",
" values = list(df.loc[index,categories]/max_values)\n",
" \n",
" # values normalized are used to adjust the value around the circle\n",
" # for instance if the maximum of values is equal to 0.8, we want the point to be \n",
" # at 8/10th of the circle radius, not at the edge \n",
" values_normalized = [ max(values) * elt for elt in values]\n",
"\n",
" # Nb of categories\n",
" num_categories = len(categories)\n",
" \n",
" angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()\n",
" \n",
" # Initialize graphic\n",
" fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
" \n",
" # we have to draw first a transparent line (alpha=0) of values to adjust the radius of the circle\n",
" # which is based on max(value)\n",
" ax.plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5)\n",
" ax.plot(angles + angles[:1], values_normalized + values_normalized[:1], color='black', alpha = 0.5, linewidth=1.2)\n",
" \n",
" # fill the sector\n",
" ax.fill(angles, values_normalized, color='orange', alpha=0.4)\n",
" \n",
" # labels\n",
" ax.set_yticklabels([])\n",
" ax.set_xticks(angles)\n",
" ticks = [categories[i].replace(\"_\",\" \") + f\"\\n({round(100 * tvalues[i],2)}%)\" for i in range(len(categories))]\n",
" ax.set_xticklabels(ticks, color=\"black\")\n",
" \n",
" ax.spines['polar'].set_visible(False)\n",
" \n",
" plt.title(f'Characteristics of the segment {index+1}\\n')\n",
" \n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 734,
"id": "56cb026b-857f-42eb-baed-0ebdf5aee447",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAApkAAAIICAYAAAAllfW0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd1hb59k/8O/RQBJIQmyx9wZjMMMG2xiPOM4ebtK0b5s0afsmTtokTdq0Sd8mcdKsJm3T/NIMZzeN4+xlx068F8bYBszeAjMEQkJCEtp6fn+4qMYMMwRC4vlcF5eNdM557nPQkW49kyGEEFAURVEURVGUE7FcHQBFURRFURTleWiSSVEURVEURTkdTTIpiqIoiqIop6NJJkVRFEVRFOV0NMmkKIqiKIqinI4mmRRFURRFUZTT0SSToiiKoiiKcjqaZFIURVEURVFOR5NMiqIoiqIoyulokklR4zh79ix+9rOfITY2Fnw+H0KhEDk5OXjuueegUqkc28XExOCqq65yYaQz989//hPvvPPOnB2fYRg89thj09pn165dE+4TExOD2267bdZxOZtKpcIPf/hDBAcHg2EYXHfddRNuO9E1P3jwIBiGwSeffDJ3gVKjfPDBB/j73/8+o30JIVi9ejUYhsE999zj3MAoyoNwXB0ARS0027Ztw5YtW5CcnIzf/va3SEtLg8ViwalTp/Dqq6+itLQUn3/+uavDnLV//vOfCAwMnLPErbS0FBEREdPaZ9euXXj55ZfHTTQ///xziMViJ0XnPE888QQ+//xzvPXWW4iPj4e/v/+E2871Naem7oMPPkBNTQ3uu+++ae/78ssvo6WlxflBUZSHoUkmRV2gtLQUd911FzZs2IAvvvgCPB7P8dyGDRvwwAMPYPfu3fMak81mg9VqHRXLQkUIgdFohEAgwPLly5167OzsbKcez1lqamoQHx+PH//4x64OhZoHMpkMf/jDH/Dee+/hhhtucHU4FLWg0eZyirrAU089BYZh8Prrr4+b1Hl5eeGaa64Z8/ju3buRk5MDgUCAlJQUvPXWW6OeVygU2LJlC9LS0iAUChEcHIy1a9fiyJEjo7aTyWRgGAbPPfccnnzyScTGxoLH4+HAgQMwGo144IEHsHTpUvj6+sLf3x8rVqzAl19+OSYeu92Ol156CUuXLoVAIIBEIsHy5cvx1VdfATjf9FxbW4tDhw6BYRgwDIOYmBjH/kNDQ3jwwQcRGxsLLy8vhIeH47777oNerx9Vzkhz4auvvorU1FTweDy8++67jucurJEcHh52HJPP58Pf3x+5ubnYvn07AOC2227Dyy+/7Nh35EcmkzlivrgGUK1W44EHHkBcXBx4PB6Cg4NxxRVXoKGhwbHNK6+8gqysLAiFQohEIqSkpODhhx8ec80uplKpsGXLFoSHh8PLywtxcXF45JFHYDKZRv2t9u7di/r6eke8Bw8eHPd4l7rmAGCxWPDII48gLCwMYrEY69evR2Nj45hj7d27F+vWrYNYLIa3tzeKioqwb9++S56T3W7Hk08+ieTkZMfrYsmSJXjxxRdHbdfc3Iwf/ehHCA4OBo/HQ2pqquNvc6Ha2lpcdtll8Pb2RlBQEO6++27s3LlzzHVYs2YNMjIyUFpaisLCQggEAsTExODtt98GAOzcuRM5OTnw9vZGZmbmuF/kphLTSLeD7du3T3od16xZg507d6Kjo2PUa20qfvnLX2LDhg24/vrrp7Q9RS1mtCaTov7DZrNh//79WLZsGSIjI6e8X1VVFR544AH8/ve/R0hICN544w3ccccdSEhIwOrVqwHA0Y/z0UcfhVQqhU6nw+eff441a9Zg3759WLNmzahj/uMf/0BSUhKef/55iMViJCYmwmQyQaVS4cEHH0R4eDjMZjP27t2LG264AW+//TZ++tOfOva/7bbb8P777+OOO+7A1q1b4eXlhTNnzjgSts8//xybN2+Gr68v/vnPfwKAI6keHh5GcXExurq68PDDD2PJkiWora3Fn/70J1RXV2Pv3r2jPpC/+OILHDlyBH/6058glUoRHBw87nX6zW9+g3/961948sknkZ2dDb1ej5qaGiiVSgDA//3f/0Gv1+OTTz5BaWmpY7/Q0NBxj6fVarFy5UrIZDI89NBDKCgogE6nw+HDh9Hb24uUlBR8+OGH2LJlC371q1/h+eefB4vFQktLC+rq6ib9mxqNRpSUlKC1tRWPP/44lixZgiNHjuDpp59GZWUldu7cidDQUJSWlmLLli3QaDT497//DQBIS0sb95iTXfMRDz/8MIqKivDGG29gaGgIDz30EK6++mrU19eDzWYDAN5//3389Kc/xbXXXot3330XXC4Xr732GjZu3Ig9e/Zg3bp1E57Xc889h8ceewx//OMfsXr1algsFjQ0NECtVju2qaurQ2FhIaKiovDCCy9AKpViz549+PWvf42BgQE8+uijAIDe3l4UFxfDx8cHr7zyCoKDg7F9+/YJ+yjK5XL87Gc/w+9+9ztERETgpZdewu23345z587hk08+wcMPPwxfX19s3boV1113Hdra2hAWFjatmKZ6Hf/5z3/il7/8JVpbW6fV9eWNN97AyZMnL/n6oSjqPwhFUYQQQuRyOQFAfvjDH055n+joaMLn80lHR4fjMYPBQPz9/cn//u//Trif1WolFouFrFu3jlx//fWOx9vb2wkAEh8fT8xm86RljxzjjjvuINnZ2Y7HDx8+TACQRx55ZNL909PTSXFx8ZjHn376acJisUh5efmoxz/55BMCgOzatcvxGADi6+tLVCrVmOMAII8++qjj94yMDHLddddNGtPdd99NJnpbio6OJrfeeqvj961btxIA5Pvvv5/wePfccw+RSCSTljmeV199lQAgH3300ajHn332WQKAfPfdd47HiouLSXp6+pSOO9E1P3DgAAFArrjiilGPf/TRRwQAKS0tJYQQotfrib+/P7n66qtHbWez2UhWVhbJz8+ftPyrrrqKLF26dNJtNm7cSCIiIohGoxn1+D333EP4fL7jb/3b3/6WMAxDamtrx+wPgBw4cMDxWHFxMQFATp065XhMqVQSNptNBAIB6e7udjxeWVlJAJB//OMf045pqteREEKuvPJKEh0dPem1uFBXVxfx9fUlr732muMxAOTuu++e8jEoarGhzeUUNUtLly5FVFSU43c+n4+kpCR0dHSM2u7VV19FTk4O+Hw+OBwOuFwu9u3bh/r6+jHHvOaaa8Dlcsc8/vHHH6OoqAhCodBxjDfffHPUMb799lsAwN133z2j8/nmm2+QkZGBpUuXwmq1On42btw4bnPw2rVr4efnd8nj5ufn49tvv8Xvf/97HDx4EAaDYUbxjfj222+RlJSE9evXT1qmWq3GLbfcgi+//BIDAwNTOvb+/fvh4+ODzZs3j3p8pLl+Kk3TM3FxV4wlS5YAgOO1dPz4cahUKtx6662j/jZ2ux2XX345ysvLx3RpuFB+fj6qqqqwZcsW7NmzB0NDQ6OeNxqN2LdvH66//np4e3uPKuOKK66A0WjEiRMnAACHDh1CRkbGmJrbW265ZdyyQ0NDsWzZMsfv/v7+CA4OxtKlSx01lgCQmpo66pynE9NUr+NM3HnnncjKysIvfvGLGR+DohYbmmRS1H8EBgbC29sb7e3t09ovICBgzGM8Hm9UEvXXv/4Vd911FwoKCvDpp5/ixIkTKC8vx+WXXz5usjVeE/Fnn32Gm266CeHh4Xj//fdRWlqK8vJy3H777TAajY7tFAoF2Gw2pFLptM5jRF9fH86ePQsulzvqRyQSgRAyJlGbqDn7Yv/4xz/w0EMP4YsvvkBJSQn8/f1x3XXXobm5eUZxKhSKS45e/8lPfoK33noLHR0duPHGGxEcHIyCggJ8//33k+6nVCohlUrH9NMLDg4Gh8NxNPE728WvpZHm9JHXSF9fHwBg8+bNY/4+zz77LAgho6bYutgf/vAHPP/88zhx4gQ2bdqEgIAArFu3DqdOnQJw/rytViteeumlMce/4oorAMDx91cqlQgJCRlTxniPARh31L2Xl9eYx728vADA8Zq
"text/plain": [
"<Figure size 600x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"categories = list(X_test_segment_caract.drop(\"segment\", axis=1).columns)\n",
"#for i in range(4) :\n",
"# radar_mp_plot(df=X_test_segment_caract, categories=categories, index=i)\n",
"radar_mp_plot(df=X_test_segment_caract, categories=categories, index=3)"
]
},
{
"cell_type": "code",
"execution_count": 739,
"id": "5b3c4bac-396e-4117-a7d9-f39a3d8f95b4",
"metadata": {},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (4005960846.py, line 6)",
"output_type": "error",
"traceback": [
"\u001b[0;36m Cell \u001b[0;32mIn[739], line 6\u001b[0;36m\u001b[0m\n\u001b[0;31m file_name = \"spider_chart_\" + activity + \"_sgt_\" str(index)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
]
}
],
"source": [
"# export to MinIo\n",
"\n",
"activity = \"sport\"\n",
"PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{activity}/\"\n",
"\n",
"file_name = \"spider_chart_\" + activity + \"_sgt_\" + str(index)\n",
"FILE_PATH_OUT_S3 = PATH + file_name + \".csv\"\n",
"\n",
"\n",
"radar_mp_plot(df=X_test_segment_caract, categories=categories, index=3)\n",
"\n",
"image_buffer = io.BytesIO()\n",
"plt.savefig(image_buffer, format='png')\n",
"image_buffer.seek(0)\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:\n",
" s3_file.write(image_buffer.read())\n",
"plt.close()"
]
},
{
"cell_type": "code",
"execution_count": 740,
"id": "276de9a5-d506-4c11-a7c2-a23ebbc59fe5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'projet-bdc2324-team1/Output_marketing_personae_analysis/sport/spider_chart_sport_sgt_3.csv'"
]
},
"execution_count": 740,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"activity = \"sport\"\n",
"PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{activity}/\"\n",
"\n",
"file_name = \"spider_chart_\" + activity + \"_sgt_\" + str(index)\n",
"FILE_PATH_OUT_S3 = PATH + file_name + \".csv\"\n",
"FILE_PATH_OUT_S3"
]
},
{
"cell_type": "code",
"execution_count": 735,
"id": "80e47dbc-3efd-4857-8055-876b308cbcb5",
"metadata": {},
"outputs": [],
"source": [
"def radar_mp_plot_all(df, categories) :\n",
" \n",
" nb_segments = df.shape[0]\n",
" categories = categories\n",
"\n",
" # Initialize graphic\n",
" fig, ax = plt.subplots(2,2, figsize=(25, 20), subplot_kw=dict(polar=True))\n",
" \n",
" for index in range(nb_segments) :\n",
" row = index // 2 # Division entière pour obtenir le numéro de ligne\n",
" col = index % 2 \n",
" \n",
" df = X_test_segment_caract\n",
" \n",
" # true values are used to print the true value in parenthesis\n",
" tvalues = list(df.loc[index,categories]) \n",
" \n",
" max_values = df[categories].max()\n",
" \n",
" # values are true values / max among the 4 segments, allows to \n",
" # put values in relation with the values for other segments\n",
" # if the point has a maximal abscisse it means that value is maximal for the segment considered\n",
" # , event if not equal to 1\n",
" \n",
" values = list(df.loc[index,categories]/max_values)\n",
" \n",
" # values normalized are used to adjust the value around the circle\n",
" # for instance if the maximum of values is equal to 0.8, we want the point to be \n",
" # at 8/10th of the circle radius, not at the edge \n",
" values_normalized = [ max(values) * elt for elt in values]\n",
" \n",
" # Nb of categories\n",
" num_categories = len(categories)\n",
"\n",
" angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()\n",
" \n",
" # we have to draw first a transparent line (alpha=0) of values to adjust the radius of the circle\n",
" # which is based on max(value)\n",
" ax[row, col].plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5)\n",
" ax[row, col].plot(angles + angles[:1], values_normalized + values_normalized[:1], color='black', alpha = 0.5, linewidth=1.2)\n",
" \n",
" # fill the sector\n",
" ax[row, col].fill(angles, values_normalized, color='orange', alpha=0.4, label = index)\n",
" \n",
" # labels\n",
" ax[row, col].set_yticklabels([])\n",
" ax[row, col].set_xticks(angles)\n",
" ticks = [categories[i].replace(\"_\",\" \") + f\"\\n({round(100 * tvalues[i],2)}%)\" for i in range(len(categories))]\n",
" ax[row, col].set_xticklabels(ticks, color=\"black\")\n",
" \n",
" ax[row, col].spines['polar'].set_visible(False)\n",
" \n",
" # plt.title(f'Characteristics of the segment {index+1}\\n')\n",
" ax[row, col].set_title(f'Characteristics of the segment {index+1}\\n', size = 15)\n",
"\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 736,
"id": "edf76688-1b7e-469e-873f-4884d551be66",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAB6gAAAZHCAYAAADZsixaAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd1iUV9o/8O/QBhiG3nsXUAQVC1hQMZoeo0mMaWZT3ZTNbjYxm7Krb7LZ1N3sZt9sdjebNcmb3jfFxIJgF7FL7116nWGYen5/5DezIIMUgaF8P9fFpTz1nmF4eM59n+cciRBCgIiIiIiIiIiIiIiIiIiIaIxZWToAIiIiIiIiIiIiIiIiIiKaHligJiIiIiIiIiIiIiIiIiKiccECNRERERERERERERERERERjQsWqImIiIiIiIiIiIiIiIiIaFywQE1EREREREREREREREREROOCBWoiIiIiIiIiIiIiIiIiIhoXLFATEREREREREREREREREdG4YIGaiIiIiIiIiIiIiIiIiIjGBQvUREREREREREREREREREQ0LligJqJxo1Qq8dprr2HFihXw8fGBnZ0d3NzckJycjN/97neoqqrqs/2dd94JiUSCzMxMywQ8xb3zzjuQSCTYtm3bmJ1j27ZtkEgkeOedd8bsHGPt9ddfx8yZMyGVSiGRSLB8+fJLPqZEIkFoaOglH4cmt+rqavztb3/Dpk2bEBsbCysrK0gkEhw9etTSoRERERERTXrMQUwszEEMDXMQNBYMBgMOHDiALVu2YOHChfD29oZUKkVERAQ2b96M8vJyS4dIRNMQC9RENC6OHj2KqKgoPProozh27BhmzZqFG264ASkpKSgtLcVzzz2H6Oho7Nmzx9KhWsRkbSwsX74cEokEFRUVlg5lTHz55Zd45JFHcP78eVx77bXYtGkTLr/88ovuU1FRMWqNSJr4LiWJ9cUXX+DBBx/Ee++9h4KCAgghRj9AIiIiIqJpiDmIi2MOYmJiDoIGM9IcRFlZGZYtW4ZXXnkFtbW1SElJwVVXXQW1Wo1//OMfSEhIwMGDB8cmaCKiAdhYOgAimvrOnj2LlStXQqVS4YknnsBvf/tbyGQy03qDwYCvv/4aW7ZsQU1NjQUjnV6uv/56LFq0CJ6enmN2joceegg333wz/Pz8xuwcY+nrr78GAHz++edYuXKlZYOhKSc8PBy/+tWvMH/+fMyfPx/33HMP9u3bZ+mwiIiIiIgmNeYgJibmIAbHHASNFYlEgjVr1uCpp57CsmXLTMvVajU2b96Md955B7feeitKSkpga2trwUiJaDphgZqIxpQQArfddhtUKhW2bduGrVu39tvGysoK69atQ1paGqqrqy0Q5fTk4uICFxeXMT2Hp6fnmDY+x5oxWREeHm7hSGgquvbaa3HttddaOgwiIiIioimDOYiJizmIwTEHQWMlIiICP/74Y7/lUqkUb775Jr766itUVVXh8OHDSE1NtUCERDQdcYhvIhpTO3fuxLlz5xAYGIinn376otu6uLhg1qxZZtft378fK1euhFwuh7OzM6666irk5eX12669vR1//etfsWbNGoSEhEAqlcLDwwOXX345du/ebfbYvYeI+vDDD7Fo0SLI5XK4urqatvn+++9x1113ITY2Fs7OzpDJZEhISMAf/vAHqNXqAV/TkSNHcNNNN8Hf3x9SqRQBAQFYs2YN3n//fQD/nYMJACorKyGRSExfFw7PpFAo8OyzzyI+Ph6Ojo5wdnZGamqqqYdtb72HeOrs7MSvf/1rhIWFwdbWFr/85S/7nPvC+Z+0Wi3+8Y9/YMGCBfD09ISjoyNCQ0Nx9dVX4+OPP+5zfOPTnmFhYX1iN7rY/E9arRZ/+9vfsHjxYri6usLR0RHR0dG49957kZOT02fbnTt3Ys2aNQgMDIRUKoW/vz+WLFmC//mf/xnwvTenuroa999/v+mz4e3tjXXr1iE7O7vPdsa4MzIy+r2+iw2jtG3bNoSFhQEA9u3b1+c9ufPOO/ttr9fr8fLLLyM6OhpSqRRBQUF44oknBvxMDeczcDFCCHz88cdYtmwZfH19YW9vj6CgIKxatQpvvPGG2e3fffddLFu2DK6urnBwcMDs2bPx6quvQqvVmj3HqVOncMUVV5iSEGvWrEF2dvaAn7vev4effPIJ5s+fD0dHRwQEBGDLli3QaDQAgNLSUmzcuBHe3t5wdHTEypUrcfbs2QFf67fffos1a9bAw8MD9vb2iI6Oxm9/+1soFIp+2/aO4euvv8aiRYsgk8ng7u6OjRs39nu6QiKR4N133wUArFixos/Pe6oOOUdERERENJExB8EcBHMQzEEwBzE8xjgBoK6ubsTHISIaNkFENIYeeughAUD86le/Gva+mzZtEgDEo48+KqytrUVCQoJYv369iI6OFgCEh4eHOH/+fJ99fvjhBwFABAUFibS0NLFhwwaRnJwsJBKJkEgk4u233+53ntTUVAFA3HfffcLKykosXbpU3HzzzWLx4sWmbXx8fISTk5NYuHChuPHGG8WaNWuEm5ubACBWrlwpdDpdv+O+9tprQiKRCABi/vz54uabbxYrV64Unp6eIiQkRAghxIEDB0yvUyaTiU2bNpm+XnjhBdOx6uvrRVxcnAAgAgICxLXXXitWrVolZDKZANBnWyGEKC8vFwDEggULRGJionBzcxNr164V69atE9u2bRNCCLF9+3YBQGzdurXPvhs2bBAAhKenp7jmmmvEhg0bxJIlS4Szs7NITU0VQgjR1NQkNm3aJHx8fAQAsX79+j6xG23dulUAENu3b+9zDoVCIZYuXSoACCcnJ3HFFVeIm266ScyfP1/Y2Nj0ienNN98UAIRUKhWrVq0SGzduFKtWrRIBAQFiOH/Gzp49Kzw9PQUAERMTI26++WaRkpIiAAgbGxvx6aefmrb96quvBnx9+fn5A57jq6++EuvXrxcAhI+PT5/35K233jJtB0CEhISIDRs2CJlMJlasWCGuvvpq4eLiIgCIW2+9td+xh/sZuJgnnnhCABByuVxcccUVYuPGjWL58uV9PptGer1e3HjjjQKAcHZ2FmlpaeK6664Tvr6+AoC48sorhV6v77PPoUOHhIODgwAg5syZI26++WYxe/ZsYWdnJ+6//36znzvj7+Evf/lLYWNjI5KTk8XatWtNP7M77rhDFBUVCU9PTxEeHi7WrVsn4uPjBQDh7u4u6uvr+73ORx99VAAQ9vb2YtmyZWLdunUiJCREABDz5s0TCoXCbAyPP/64sLKyEgsWLBDr1q0TQUFBAoCIiooS3d3dpu03bdokIiIiBACxZs2aPj/vpqamIf88Ljz/kSNHhr0vERERERExB8EcBHMQzEEwBzFcOp1OeHt7CwBi7969Iz4OEdFwsUBNRGNq8eLFAoD4v//7v2Hva2w0WVlZiQ8//NC0XKfTmW7Af/vb3/bZp6ysTBw6dKjfsU6ePClcXV2Fs7Oz6Orq6rPOeENob28vMjMzzcby1Vdf9buR7OzsFFdffbUAIN59990+6/bt2yckEolwdnYWGRkZfdap1Wrx448/9llmbCwM5IorrhAAxJYtW4RGozEtLy0tFREREcLa2lqcOXPGtNzYOAQgkpOTRVtbW79jmmscGvebP3++UKlUfbbv7u4Whw8f7rPM+N6Vl5ebjXugxuHdd98tAIgVK1aI5ubmPutqamrE8ePHTd+HhIQIZ2fnfucwGAxDvnE2GAymhsSTTz4pDAaDad1nn30mrKyshFwu79fAGOz1mWN8D40NaXOMP5vY2Ng+xy4rKzMlHUpKSvrsM9zPwEBUKpWQSqUiNDRUtLS09Fmn1WrFvn37+ix76aWXBABx2WWXicbGRtNyhUIhrrnmGgFA/O///q9puV6vNyVwXn755T7HevbZZ02vfaDGoVwuF/v37zctP3/+vPDx8RESiUTExsaKRx991NQYNRgM4o477hAAxO9+97s+x/vkk09MjdPe77FGoxH33XefACAee+wxszHIZDKRnp5uWq5UKk2JhAsTTMbr1IW/5yPBAjURERER0aVhDoI5COYg+mM
"text/plain": [
"<Figure size 2500x2000 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"radar_mp_plot_all(df=X_test_segment_caract, categories=X_test_segment_caract.columns[1:].values)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c48136d1-c980-4f74-a69f-ed4304c83188",
"metadata": {},
"outputs": [],
"source": [
"# export to MinIo\n",
"\n",
"activity = \"sport\"\n",
"PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{activity}/\"\n",
"\n",
"file_name = \"spider_chart_all_\" + activity\n",
"FILE_PATH_OUT_S3 = PATH + file_name + \".png\"\n",
"\n",
"radar_mp_plot_all(df=X_test_segment_caract, categories=categories)\n",
"\n",
"image_buffer = io.BytesIO()\n",
"plt.savefig(image_buffer, format='png')\n",
"image_buffer.seek(0)\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:\n",
" s3_file.write(image_buffer.read())\n",
"plt.close()"
]
},
{
"cell_type": "markdown",
"id": "a2395680-69fe-4247-8deb-22f8ee15830b",
"metadata": {},
"source": [
"## --- end of the main part --- here are just some attempts --- ##"
]
},
{
"cell_type": "code",
"execution_count": 489,
"id": "7d9a2aca-d28d-43b3-9b72-5913b20c4f04",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAj8AAAH3CAYAAABU/z5zAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3hb5fn+76M9LU9Z3nvHSeyEONtxwgoQCIW2QGgh9NeGQkMppS1QApQVVoEvhRJ2aMMsJRDKJsOJ7Ww7ieNtxzO2vGVrz/f3h2IRxxkess+R/H6uS1diWTrnPrKkc5/ned7nYQghBBQKhUKhUCjTBB7bAigUCoVCoVCmEmp+KBQKhUKhTCuo+aFQKBQKhTKtoOaHQqFQKBTKtIKaHwqFQqFQKNMKan4oFAqFQqFMK6j5oVAoFAqFMq2g5odCoVAoFMq0gpofCoVCoVAo0wpqfijn5dixY5DJZPjHP/7BtpQx8frrr0Mul2P//v1sS6GchsFgQHp6On7605/C5XKxLYdCoUxTqPmZBmzevBkMw3huAoEAERERuOGGG1BXV3fO5+n1elx//fVYv3491q9fP4WKJ8aRI0dwzz334MMPP0ReXt64tzP0ujU1NXlPnJ/y5JNP4rPPPrvg4379618jPDwcW7ZsAY83uq+fpqYmMAyDzZs3j0nT+++/jxdffPGsv2MYBo888siYtkehUPwHAdsCKFPHO++8g/T0dFgsFhQXF+OJJ57Azp07UV1djaCgoBGP/9WvfoV58+bhqaeeYkHt+BgcHMRPf/pTPP/881i1ahXbcqYNTz75JK6//nqsXr36nI955ZVXcOzYMRQXF0MsFo962xEREdi7dy+SkpLGpOn999/H8ePHcffdd4/43d69exEdHT2m7VEoFP+Bmp9pxIwZMzB37lwAwLJly+B0OvHwww/js88+w9q1a0c8/uOPP55qiRMmICDgvNGs6Y7dbvdE/6aaO++8E3feeeeoH+90OuFwOCAWizF//nyvavH29igUim9B017TmCEj1NnZOez+Q4cO4eqrr0ZwcDAkEglycnJGGCGTyYR7770XCQkJkEgkCA4Oxty5c/HBBx94HrNs2TIsW7ZsxH5vvfVWxMfHe34eSms8++yzePrppxEfHw+pVIply5ahtrYWdrsd9913HyIjI6FSqXDttdeiq6trxHY/+ugjLFiwAHK5HAqFApdddhnKyspG9Vrs27cPixYtgkQiQWRkJO6//37Y7fazPnY0+zlx4gRuuOEGREZGQiwWIzw8HCtWrMCRI0cuqGX//v1YtWoVQkJCIJFIkJSUNCx6UV9fj7Vr1yIlJQUymQxRUVFYtWoVysvLh21n165dYBgG//73v/HHP/4RUVFREIvFqK+vR3d3N+644w5kZmZCoVBArVZj+fLl2LNnzwg9VqsVjz76KDIyMiCRSBASEoKCggKUlJQAcKeQjEYj3n33XU9q9fS/u1arxbp16xAdHQ2RSISEhAT87W9/g8Ph8Dxm6D3wzDPP4PHHH0dCQgLEYjF27tx51rRXd3c3fvOb3yAmJgZisRhhYWFYtGgRfvjhBwDu996XX36J5ubmYSnfIc6W9jrbe+CNN94Ykfo8V8osPj4et95667D7RnPsAPDqq69i1qxZUCgUUCqVSE9PxwMPPDBiHxQKxTvQyM80prGxEQCQmprquW/nzp24/PLLkZeXh02bNkGlUuHDDz/Ez3/+c5hMJs+X+z333IN///vfePzxx5GTkwOj0Yjjx4+jt7d33HpeeeUVzJw5E6+88gp0Oh3++Mc/YtWqVcjLy4NQKMTbb7+N5uZm3Hvvvfh//+//Ydu2bZ7nPvnkk3jwwQexdu1aPPjgg7DZbHj22WexZMkSHDhwAJmZmefcb2VlJVasWIH4+Hhs3rwZMpkM//znP/H++++PeOxo93PFFVfA6XTimWeeQWxsLHp6elBSUgKdTnfe1+Dbb7/FqlWrkJGRgeeffx6xsbFoamrCd99953lMe3s7QkJC8NRTTyEsLAx9fX149913kZeXh7KyMqSlpQ3b5v33348FCxZg06ZN4PF4UKvV6O7uBgA8/PDD0Gg0MBgM2Lp1K5YtW4bt27d7zIvD4cDKlSuxZ88e3H333Vi+fDkcDgf27duHlpYWLFy4EHv37sXy5ctRUFCADRs2AHBH4AD3yX/evHng8Xh46KGHkJSUhL179+Lxxx9HU1MT3nnnnWFaX3rpJaSmpuK5555DQEAAUlJSzvo6/eIXv0BpaSmeeOIJpKamQqfTobS01PP+++c//4nf/OY3aGhowNatW8/7mgNjew+MltEe+4cffog77rgD69evx3PPPQcej4f6+npUVlaOe98UCuUCEIrf88477xAAZN++fcRutxO9Xk+++eYbotFoyNKlS4ndbvc8Nj09neTk5Ay7jxBCrrrqKhIREUGcTichhJAZM2aQ1atXn3e/+fn5JD8/f8T9t9xyC4mLi/P83NjYSACQWbNmebZPCCEvvvgiAUCuvvrqYc+/++67CQAyMDBACCGkpaWFCAQCsn79+mGP0+v1RKPRkJ/97Gfn1fnzn/+cSKVSotVqPfc5HA6Snp5OAJDGxsYx7aenp4cAIC+++OJ593s2kpKSSFJSEjGbzaN+jsPhIDabjaSkpJA//OEPnvt37txJAJClS5eOaht2u52sWLGCXHvttZ77//WvfxEA5I033jjv8+VyObnllltG3L9u3TqiUChIc3PzsPufe+45AoBUVFQQQn58DyQlJRGbzTbssUO/e+eddzz3KRQKcvfdd59X05VXXjnsfXY6AMjDDz/s+Xm074GzPXeIuLi4Ya/BaI/9d7/7HQkMDDzvsVAoFO9C017TiPnz50MoFEKpVOLyyy9HUFAQPv/8c0/9R319Paqrq7FmzRoA7qv+odsVV1yBjo4O1NTUAADmzZuHr7/+Gvfddx927doFs9k8YX1XXHHFsBVAGRkZAIArr7xy2OOG7m9paQHgjpY4HA788pe/HKZZIpEgPz8fu3btOu9+d+7ciRUrViA8PNxzH5/Px89//vNhjxvtfoKDg5GUlIRnn30Wzz//PMrKyka1rLu2thYNDQ341a9+BYlEcs7HORwOPPnkk8jMzIRIJIJAIIBIJEJdXR2qqqpGPP66664763Y2bdqE3NxcSCQSCAQCCIVCbN++fdg2vv76a0gkEtx2220X1H82/ve//6GgoACRkZHDXrOVK1cCAAoLC4c9/uqrr4ZQKLzgdufNm4fNmzfj8ccfx759+86Zohwto30PjIXRHvu8efOg0+lw44034vPPP0dPT8+EjoVCoVwYan6mEf/6179w8OBB7NixA+vWrUNVVRVuvPFGz++Han/uvfdeCIXCYbc77rgDADxfzC+99BL+8pe/4LPPPkNBQQGCg4OxevXqCRUbBwcHD/tZJBKd936LxTJM90UXXTRC90cffXTBk0lvby80Gs2I+8+8b7T7YRgG27dvx2WXXYZnnnkGubm5CAsLw1133QW9Xn9OHUOpqAutQrrnnnuwYcMGrF69Gl988QX279+PgwcPYtasWWc1oRERESPue/755/Hb3/4WeXl5+O9//4t9+/bh4MGDuPzyy4dto7u7G5GRkaNeln4mnZ2d+OKLL0a8XllZWQAw4m9zNq1n46OPPsItt9yCN998EwsWLEBwcDB++ctfQqvVjkvnaN8DY2G0x/6LX/zCk9K97rrroFarkZeXh++//37c+6ZQKOeH1vxMIzIyMjxFzgUFBXA6nXjzzTfxySef4Prrr0doaCgAd43IT37yk7NuY6ieRC6X429/+xv+9re/obOz0xMFWrVqFaqrqwEAEokEAwMDI7bh7SvbId2ffPIJ4uLixvz8kJCQs540z7xvLPuJi4vDW2+9BcAd0fn444/xyCOPwGazYdOmTWd9TlhYGACgra3tvNvesmULfvnLX+LJJ58cdn9PTw8CAwNHPP70Qt/Tt7Fs2TK8+uqrw+4/05yFhYWhqKgILpdrXAYoNDQUM2fOxBNPPHHW30dGRl5Q67m2++KLL+LFF19ES0sLtm3bhvvuuw9
"text/plain": [
"<Figure size 600x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"colors = plt.cm.Blues(np.linspace(0.1, 0.9, 4)) \n",
"colors = [\"blue\", \"green\", \"orange\", \"red\"]\n",
"\n",
"# Initialisez le graphique en étoile\n",
"fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
"\n",
"for i in range(4) :\n",
"\n",
" # Caractéristiques et valeurs associées (exemple)\n",
" categories = ['share_known_gender', 'share_of_women', 'country_fr']\n",
" values = list(X_test_segment_mp.loc[i,categories]) # Exemple de valeurs, ajustez selon vos données\n",
" \n",
" values_normalized = [ max(values) * elt for elt in values]\n",
" \n",
" # Nombre de caractéristiques\n",
" num_categories = len(categories)\n",
" \n",
" # Créer un angle pour chaque axe\n",
" angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()\n",
" \n",
" \n",
" # Tracer uniquement le contour du polygone\n",
" ax.plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5)\n",
" # ax.plot(angles + angles[:1], values_normalized + values_normalized[:1], color='blue', alpha = 0.3, linewidth=1.5)\n",
" \n",
" # Remplir le secteur central avec une couleur\n",
" ax.fill(angles, values_normalized, color=colors[i], alpha=0.2, label = str(i+1))\n",
"\n",
"# Étiqueter les axes\n",
"ax.set_yticklabels([])\n",
"ax.set_xticks(angles)\n",
"ax.set_xticklabels(categories)\n",
"ax.legend()\n",
"\n",
"# Titre du graphique\n",
"plt.title('Résumé des caractéristiques')\n",
"\n",
"# Afficher le graphique\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 301,
"id": "96aa9ff5-c1ed-49eb-8fb7-2319ac0c40be",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAgoAAAITCAYAAABmGDQGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3hUZfbHPzPpvVcCSQgphE4g9G4BkWJvSFkL9l3LrrprW1ddFcUuWFBUYGWxwk8BlU7oISRAQhophPRep7+/P8ZkibSUydyZyf08zzxJptz3zM299/3ec857jkIIIZCRkZGRkZGRuQBKqQ2QkZGRkZGRsVxkoSAjIyMjIyNzUWShICMjIyMjI3NRZKEgIyMjIyMjc1FkoSAjIyMjIyNzUWShICMjIyMjI3NRZKEgIyMjIyMjc1FkoSAjIyMjIyNzUWShICMjIyMjI3NRZKEg02HS0tJwdXXlvffek9qUTvHxxx/j5ubGwYMHpTZF5hwaGxuJi4vjpptuwmAwSG2OjIzMRZCFQi9j9erVKBSKtoe9vT0hISHceuutZGdnX/RzDQ0N3HjjjTz88MM8/PDDZrS4exw7dozHHnuMr7/+mjFjxnR5O637LT8/33TG2SivvPIKP/zww2Xfd8899xAUFMSaNWtQKjt2KcrPz0ehULB69epO2bRu3TrefvvtC76mUCh44YUXOrU9GZnehL3UBshIw+eff05cXBwqlYqkpCRefvllduzYwalTp/Dx8Tnv/XfddReJiYm8+uqrEljbNerr67nppptYvnw5c+bMkdqcXsMrr7zCjTfeyPz58y/6ng8++IC0tDSSkpJwcnLq8LZDQkLYv38/UVFRnbJp3bp1nDhxgr/85S/nvbZ//37CwsI6tT0Zmd6ELBR6KYMHD2bUqFEATJ06Fb1ez/PPP88PP/zAkiVLznv/f//7X3Ob2G08PT0v6SXp7Wi12javkrl58MEHefDBBzv8fr1ej06nw8nJibFjx5rUFlNvT0bG1pBDDzIAbaKhrKys3fNHjhxh7ty5+Pr64uzszIgRI84TDc3NzTzxxBNERkbi7OyMr68vo0aN4j//+U/be6ZOncrUqVPPG3fx4sVERES0/d3qWl62bBmvvfYaERERuLi4MHXqVLKystBqtTz11FOEhobi5eXFddddR3l5+XnbXb9+PePGjcPNzQ13d3euvvpqUlJSOrQvDhw4wIQJE3B2diY0NJSnn34arVZ7wfd2ZJzTp09z6623EhoaipOTE0FBQcyYMYNjx45d1paDBw8yZ84c/Pz8cHZ2Jioqqt1dcU5ODkuWLCE6OhpXV1f69OnDnDlzOH78eLvt7Ny5E4VCwVdffcXjjz9Onz59cHJyIicnh4qKCh544AHi4+Nxd3cnMDCQ6dOns2fPnvPsUavVvPjiiwwcOBBnZ2f8/PyYNm0a+/btA4xu/KamJr744ou28Na5//fS0lKWLl1KWFgYjo6OREZG8s9//hOdTtf2ntZj4PXXX+ell14iMjISJycnduzYccHQQ0VFBffeey99+/bFycmJgIAAJkyYwG+//QYYj72ffvqJgoKCdmG3Vi4UerjQMfDJJ5+cF366WNgiIiKCxYsXt3uuI98dYMWKFQwbNgx3d3c8PDyIi4vj73//+3ljyMiYC9mjIANAXl4eADExMW3P7dixg5kzZzJmzBhWrlyJl5cXX3/9NbfccgvNzc1tF8LHHnuMr776ipdeeokRI0bQ1NTEiRMnqKqq6rI9H3zwAUOHDuWDDz6gtraWxx9/nDlz5jBmzBgcHBz47LPPKCgo4IknnuDuu+9m48aNbZ995ZVXeOaZZ1iyZAnPPPMMGo2GZcuWMWnSJA4dOkR8fPxFx01PT2fGjBlERESwevVqXF1d+fDDD1m3bt157+3oONdccw16vZ7XX3+dfv36UVlZyb59+6itrb3kPti6dStz5sxh4MCBLF++nH79+pGfn88vv/zS9p7i4mL8/Px49dVXCQgIoLq6mi+++IIxY8aQkpJCbGxsu20+/fTTjBs3jpUrV6JUKgkMDKSiogKA559/nuDgYBobG/n++++ZOnUq27Zta5vodTods2bNYs+ePfzlL39h+vTp6HQ6Dhw4QGFhIePHj2f//v1Mnz6dadOm8eyzzwJGzw4YJ8rExESUSiXPPfccUVFR7N+/n5deeon8/Hw+//zzdra+++67xMTE8MYbb+Dp6Ul0dPQF99Odd97J0aNHefnll4mJiaG2tpajR4+2HX8ffvgh9957L7m5uXz//feX3OfQuWOgo3T0u3/99dc88MADPPzww7zxxhsolUpycnJIT0/v8tgyMt1GyPQqPv/8cwGIAwcOCK1WKxoaGsSWLVtEcHCwmDx5stBqtW3vjYuLEyNGjGj3nBBCXHvttSIkJETo9XohhBCDBw8W8+fPv+S4U6ZMEVOmTDnv+UWLFonw8PC2v/Py8gQghg0b1rZ9IYR4++23BSDmzp3b7vN/+ctfBCDq6uqEEEIUFhYKe3t78fDDD7d7X0NDgwgODhY333zzJe285ZZbhIuLiygtLW17TqfTibi4OAGIvLy8To1TWVkpAPH2229fctwLERUVJaKiokRLS0uHP6PT6YRGoxHR0dHi0UcfbXt+x44dAhCTJ0/u0Da0Wq2YMWOGuO6669qe//LLLwUgPvnkk0t+3s3NTSxatOi855cuXSrc3d1FQUFBu+ffeOMNAYiTJ08KIf53DERFRQmNRtPuva2vff75523Pubu7i7/85S+XtGn27NntjrNzAcTzzz/f9ndHj4ELfbaV8PDwdvugo9/9oYceEt7e3pf8LjIy5kYOPfRSxo4di4ODAx4eHsycORMfHx9+/PHHtnh1Tk4Op06d4o477gCMd5Otj2uuuYaSkhIyMzMBSExMZPPmzTz11FPs3LmTlpaWbtt3zTXXtMuEHzhwIACzZ89u977W5wsLCwHjXbhOp2PhwoXtbHZ2dmbKlCns3LnzkuPu2LGDGTNmEBQU1PacnZ0dt9xyS7v3dXQcX19foqKiWLZsGcuXLyclJaVDSwGzsrLIzc3lrrvuwtnZ+aLv0+l0vPLKK8THx+Po6Ii9vT2Ojo5kZ2eTkZFx3vtvuOGGC25n5cqVjBw5EmdnZ+zt7XFwcGDbtm3ttrF582acnZ3505/+dFn7L8T//d//MW3aNEJDQ9vts1mzZgGwa9eudu+fO3cuDg4Ol91uYmIiq1ev5qWXXuLAgQMXDRN1lI4eA52ho989MTGR2tpabrvtNn788UcqKyu79V1kZEyBLBR6KV9++SWHDx9m+/btLF26lIyMDG677ba211tzFZ544gkcHBzaPR544AGAtovYu+++y5NPPskPP/zAtGnT8PX1Zf78+d1KJPT19W33t6Oj4yWfV6lU7ewePXr0eXavX7/+shfeqqoqgoODz3v+j891dByFQsG2bdu4+uqref311xk5ciQBAQE88sgjNDQ0XNSO1nDA5bLxH3vsMZ599lnmz5/Ppk2bOHjwIIcPH2bYsGEXFGwhISHnPbd8+XLuv/9+xowZw7fffsuBAwc4fPgwM2fObLeNiooKQkNDO7yU8Y+UlZWxadOm8/bXoEGDAM7731zI1guxfv16Fi1axKeffsq4cePw9fVl4cKFlJaWdsnOjh4DnaGj3/3OO+9sC6vdcMMNBAYGMmbMGH799dcujy0j013kHIVeysCBA9sSGKdNm4Zer+fTTz/lm2++4cYbb8Tf3x8wxrSvv/76C26jNf7t5ubGP//5T/75z39SVlbW5l2YM2cOp06dAsDZ2Zm6urrztmHqO6ZWu7/55hvCw8M7/Xk/P78LTjB/fK4z44SHh7Nq1SrA6Cn473//ywsvvIBGo2HlypUX/ExAQAAARUVFl9z2mjVrWLhwIa+88kq75ysrK/H29j7v/ecm8Z27jalTp7JixYp2z/9RyAQEBLB3714MBkOXxIK/vz9Dhw7l5ZdfvuDroaGhl7X1Ytt9++23efvttyksLGTjxo089dRTlJeXs2XLlk7b2dFjAMDJyQm1Wn3e83/Mz+nMd1+yZAlLliyhqam
"text/plain": [
"<Figure size 600x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# KEEP THIS CODE !!\n",
"\n",
"# Caractéristiques et valeurs associées (exemple)\n",
"categories = ['Force', 'Vitesse', 'Agilité', 'Précision', 'Endurance']\n",
"values = [8, 7, 6, 9, 7] # Exemple de valeurs, ajustez selon vos données\n",
"\n",
"# Plage de valeurs maximales pour chaque caractéristique\n",
"max_range = [20, 20, 20, 20, 20]\n",
"\n",
"values_normalized = [2 * max(values) * x / y for x, y in zip(values, max_range)]\n",
"\n",
"# Nombre de caractéristiques\n",
"num_categories = len(categories)\n",
"\n",
"# Créer un angle pour chaque axe\n",
"angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()\n",
"\n",
"# Initialisez le graphique en étoile\n",
"fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
"\n",
"# Tracer uniquement le contour du polygone\n",
"ax.plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5)\n",
"ax.plot(angles + angles[:1], values_normalized + values_normalized[:1], color='blue', linewidth=1.5)\n",
"\n",
"# Remplir le secteur central avec une couleur\n",
"ax.fill(angles, values_normalized, color='skyblue', alpha=0.4)\n",
"\n",
"# Étiqueter les axes\n",
"ax.set_yticklabels([])\n",
"ax.set_xticks(angles)\n",
"ax.set_xticklabels(categories)\n",
"\n",
"# Titre du graphique\n",
"plt.title('Résumé des caractéristiques')\n",
"\n",
"# Afficher le graphique\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 149,
"id": "adb7ccb3-7dea-4347-9298-37311a2f1fb1",
"metadata": {},
"outputs": [],
"source": [
"def radar_chart(values, categories, segment) :\n",
" # Caractéristiques et valeurs associées (exemple)\n",
" categories = categories\n",
" values = values # Exemple de valeurs, ajustez selon vos données\n",
" \n",
" # Nombre de caractéristiques\n",
" num_categories = len(categories)\n",
" \n",
" # Créer un angle pour chaque axe\n",
" angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()\n",
" \n",
" # Répéter le premier angle pour fermer la figure\n",
" values += values[:1]\n",
" angles += angles[:1]\n",
" \n",
" # Initialisez le graphique en étoile\n",
" fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
" \n",
" # Tracer les lignes radiales\n",
" ax.fill(angles, values, color='skyblue', alpha=0.4)\n",
" \n",
" # Tracer les points sur les axes radiaux\n",
" ax.plot(angles, values, color='blue', linewidth=2, linestyle='solid')\n",
"\n",
" # Afficher les valeurs associées sous les noms de variables\n",
" \"\"\"\n",
" for i, angle in enumerate(angles[:-1]):\n",
" x = angle\n",
" y = values[i] + 0.2 # Ajustez la distance des valeurs par rapport au centre\n",
" plt.text(x, y, str(values[i]), color='black', ha='center', fontsize=10)\n",
" \"\"\"\n",
" \n",
" # Remplir le secteur central avec une couleur\n",
" # ax.fill(angles, values, color='skyblue', alpha=0.4)\n",
"\n",
" \n",
" # Étiqueter les axes\n",
" ax.set_yticklabels([])\n",
" #ax.set_xticks(angles[:-1])\n",
" #ax.set_xticklabels(categories, # fontsize=12, ha='right', rotation=45\n",
" # )\n",
" # ax.set_xticklabels(categories, fontsize=10, color='black', ha='right')\n",
"\n",
" labels = [f\"{category} = {round(100 *value,2)}%\" for category, value in zip(categories, values[:-1])]\n",
" ax.set_xticks(angles[:-1])\n",
" ax.set_xticklabels(labels, fontsize=10, color='black', ha='right')\n",
" \n",
" # Titre du graphique\n",
" plt.title(f'Caracteristics of segment {segment}')\n",
" \n",
" # Afficher le graphique\n",
" plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 150,
"id": "8793fb51-812c-4500-b252-2e2d61d6ff48",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkkAAAH2CAYAAABk9BgJAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3xT1fvHPzezTZOOdO+W0gmUtoyWUqBYwAGiP5YLBRUVQURUBFyIynShqAwHOFARBQS/OJBRShkFWgqle++92+yc3x+xaSsFum/SnvfrlVfT5N5zPrkZ93Of85znMIQQAgqFQqFQKBRKOzhsC6BQKBQKhUIxRKhJolAoFAqFQukAapIoFAqFQqFQOoCaJAqFQqFQKJQOoCaJQqFQKBQKpQOoSaJQKBQKhULpAGqSKBQKhUKhUDqAmiQKhUKhUCiUDqAmiUKhUCgUCqUDqEmiUHqZq1ev4vHHH4enpydMTEwgFosREhKCLVu2oLq6mm15NyU5ORlvvfUWcnNz+6T9t956CwzDdGmf5uZmvPXWWzh16tQNz+3ZswcMw/SZ3p6wb98+DBs2DKampmAYBleuXGFbksHz+eefY8+ePZ3e/vfff8djjz2GESNGgM/nd/mzRaF0BoYuS0Kh9B5ffPEFlixZAl9fXyxZsgQBAQFQqVS4dOkSvvjiC4wcORIHDx5kW2aH/PLLL5g7dy5OnjyJyMjIXm+/sLAQhYWFCAsL6/Q+lZWVsLW1xdq1a/HWW2+1e66iogJZWVkIDg6GUCjsZbXdp6KiAs7Ozrjrrrvw0ksvQSgUIjAwECKRiG1pBs3w4cNhY2PToSHuiCeffBIxMTEIDg5GVlYWLl++DHo6o/Q2PLYFUCgDhXPnzuHZZ5/F1KlTcejQoXYn7qlTp+Kll17Cn3/+2St9NTc3G81Jt0Wri4sLXFxceq1dW1tb2Nra9lp7vUV6ejpUKhXmz5+PSZMmsS1nwPLFF1+Aw9ENhjz33HO4fPkyy4ooAxJCoVB6hRkzZhAej0fy8/M7tf1PP/1Epk6dShwcHIiJiQnx8/Mjq1atIo2Nje22W7BgATEzMyNXr14lU6dOJWKxmISFhRFCCPn777/JzJkzibOzMxEKhcTLy4s8/fTTpKKi4ob+UlJSyIMPPkjs7OyIQCAgrq6u5NFHHyVyuZzs3r2bALjhtnv3bv3+x44dI3fccQeRSCTE1NSUhIeHk3/++addH2vXriUAyOXLl8ns2bOJpaUlcXBwaPdcW44fP04mTZpEpFIpMTExIa6urmTWrFmkqamJ5OTkdKhpwYIFhBCi15yTk9OuzT/++IPccccdxNzcnJiamhI/Pz+yYcMG/fNZWVnkgQceII6OjkQgEBA7Oztyxx13kISEhNu+Z7/99hsJCwsjpqamRCwWkylTppCzZ8+2e6/+q3fSpEk3ba+pqYm89NJLxMPDgwiFQmJlZUVGjRpFfvjhh3bbXbx4kdx7773EysqKCIVCEhQURPbt23dDezExMSQsLIwIhULi5OREXn/9dfLFF1/ccJzc3d3J9OnTyZEjR0hQUJD+83fkyBH9sfXz8yMikYiMGTOGXLx48Ya+OqOp5T06ceIEWbx4MbG2tiZSqZT83//9HykqKmqn57/Hzd3d/VZvRTuWLl16w2eLQukNaCSJQukFNBoNTpw4gVGjRsHV1bVT+2RkZOCee+7BCy+8ADMzM6SmpmLz5s2Ii4vDiRMn2m2rVCoxc+ZMPPPMM1i9ejXUajUAICsrC+PGjcOiRYtgYWGB3NxcfPjhh4iIiMC1a9fA5/MBAImJiYiIiICNjQ3efvtteHt7o6SkBIcPH4ZSqcT06dOxYcMGvPrqq/jss88QEhICAPDy8gIAfP/993jsscdw33334ZtvvgGfz8fOnTtx55134q+//kJUVFQ7vbNmzcKDDz6IxYsXo6mpqcPXn5ubi+nTp2PChAn4+uuvYWlpiaKiIvz5559QKpVwdHTEn3/+ibvuugtPPvkkFi1aBAC3jB599dVXeOqppzBp0iTs2LEDdnZ2SE9PR1JSkn6be+65BxqNBlu2bIGbmxsqKytx9uxZ1NbW3vL9+uGHH/DII49g2rRp+PHHH6FQKLBlyxZERkbi+PHjiIiIwBtvvIGxY8di6dKl2LBhAyZPngxzc/Obtvniiy/iu+++w7vvvovg4GA0NTUhKSkJVVVV+m1OnjyJu+66C6GhodixYwcsLCzw008/4YEHHkBzczMWLlwIQJcLN3XqVPj4+OCbb76BSCTCjh078P3333fYd2JiItasWYPXXnsNFhYWWLduHWbNmoU1a9bg+PHj2LBhAxiGwapVqzBjxgzk5OTA1NS0S5paWLRoEaZPn44ffvgBBQUFWLlyJebPn6//nB88eBBz5syBhYUFPv/8cwAwqCFUyiCGbZdGoQwESktLCQDy4IMPdmt/rVZLVCoViY6OJgBIYmKi/rmW6MTXX3/dqTby8vIIAPLbb7/pn7vjjjuIpaUlKS8vv+n++/fvJwDIyZMn2z3e1NREpFIpuffee9s9rtFoyMiRI8nYsWP1j7VEi958880b2v9vJOmXX34hAMiVK1duqqmiooIAIGvXrr3huf9GkhoaGoi5uTmJiIggWq22w/YqKysJALJ169ab9tkRGo2GODk5kREjRhCNRqN/vKGhgdjZ2ZHw8HD9YydPniQAyP79+2/b7vDhw8n9999/y238/PxIcHAwUalU7R6fMWMGcXR01OuZO3cuMTMzaxdF1Gg0JCAgoMNIkqmpKSksLNQ/duXKFQKAODo6kqamJv3jhw4dIgDI4cOHu6yp5T1asmRJu+22bNlCAJCSkhL9Y8OGDbtl1O1W0EgSpa+gs9soFJbIzs7Gww8/DAcHB3C5XPD5fH0OS0pKyg3bz549+4bHysvLsXjxYri6uoLH44HP58Pd3b1dG83NzYiOjsa8efO6lcNz9uxZVFdXY8GCBVCr1fqbVqvFXXfdhYsXL94QLepI638JCgqCQCDA008/jW+++QbZ2dld1vZfnfX19ViyZMlNZzpJpVJ4eXnhvffew4cffoiEhARotdrbtp2Wlobi4mI8+uij+jwYABCLxZg9ezbOnz+P5ubmLmseO3Ys/vjjD6xevRqnTp2CTCZr93xmZiZSU1PxyCOPAEC743/PPfegpKQEaWlpAIDo6GjccccdsLGx0e/P4XAwb968DvsOCgqCs7Oz/n9/f38AQGRkZLt8t5bH8/LyuqyphZkzZ7b7PzAwsF2bFIqhQk0ShdIL2NjYQCQSIScnp1PbNzY2YsKECbhw4QLeffddnDp1ChcvXsSBAwcA4IaTpUgkumHYRqvVYtq0aThw4ABeeeUVHD9+HHFxcTh//ny7NmpqaqDRaLqdNF1WVgYAmDNnDvh8frvb5s2bQQi5obSBo6Pjbdv18vLCP//8Azs7OyxduhReXl7w8vLCxx9/3C2dFRUVAHDL18kwDI4fP44777wTW7ZsQUhICGxtbfH888+joaHhpvu1DH919LqcnJyg1WpRU1PTZc2ffPIJVq1ahUOHDmHy5MmQSqW4//77kZGRAaD12L/88ss3HPslS5YA0M0AbNFob29/Qx8dPQboDGNbBALBLR+Xy+Vd1tSCtbV1u/9bhtL++zmnUAwNmpNEofQCXC4XUVFR+OOPP1BYWHhbQ3LixAkUFxfj1KlT7WZA3SwvpqPISFJSEhITE7Fnzx4sWLBA/3hmZma77aRSKbhcLgoLC7vwilppiUxs27btptP3/3si7mzNmgkTJmDChAnQaDS4dOkStm3bhhdeeAH29vZ48MEHu6SzJUp2u9fp7u6Or776CoBuJtrPP/+Mt956C0qlEjt27Ohwn5aTfElJyQ3PFRcXg8PhwMrKqkt6AcDMzAzr1q3DunXrUFZWpo8q3XvvvUhNTdUf+zVr1mDWrFkdtuHr66vX2GJg2lJaWtplXbeiK5ooFGOHRpIolF5izZo1IITgqaeeglKpvOF5lUqFI0eOAGg1Ef9NTt25c2en++tsG6amppg0aRL2799/wxV+W252dT9+/HhYWloiOTkZo0eP7vD
"text/plain": [
"<Figure size 600x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"categories= [\"share_known_gender\",\"share_of_women\",\"country_fr\"]\n",
"radar_chart(values=X_test_segment_mp.loc[0,categories].values.tolist(), categories= categories,\n",
" segment = \"1\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}