2024-03-26 12:20:03 +01:00
{
"cells": [
{
"cell_type": "markdown",
"id": "c488134e-680f-44e4-8c43-40c246140519",
"metadata": {},
"source": [
"# Analysis of segments and marketing personae associated"
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 99,
2024-03-26 12:20:03 +01:00
"id": "9a8b8c3a-8e74-49f3-91d1-cccfc057fdcd",
"metadata": {},
"outputs": [],
"source": [
"# importations\n",
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import io\n",
"import s3fs\n",
"import re\n",
"import pickle\n",
2024-03-26 16:00:39 +01:00
"import warnings\n",
"import matplotlib.pyplot as plt"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 100,
2024-03-26 12:20:03 +01:00
"id": "d553c868-695f-4d57-96d6-d5c6629cefb2",
"metadata": {},
"outputs": [],
"source": [
"def load_model(type_of_activity, model):\n",
" BUCKET = f\"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}/\"\n",
" filename = model + '.pkl'\n",
" file_path = BUCKET + filename\n",
" with fs.open(file_path, mode=\"rb\") as f:\n",
" model_bytes = f.read()\n",
"\n",
" model = pickle.loads(model_bytes)\n",
" return model\n",
"\n",
"\n",
"def load_test_file(type_of_activity):\n",
" file_path_test = f\"projet-bdc2324-team1/Generalization/{type_of_activity}/Test_set.csv\"\n",
" with fs.open(file_path_test, mode=\"rb\") as file_in:\n",
" dataset_test = pd.read_csv(file_in, sep=\",\")\n",
" return dataset_test"
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 101,
2024-03-26 12:20:03 +01:00
"id": "3af80fea-a937-4ea8-bece-cfeaa89d1055",
"metadata": {},
"outputs": [],
"source": [
"# exec(open('utils_segmentation.py').read())\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"# choose the type of companies for which you want to run the pipeline\n",
"type_of_activity = \"sport\""
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 102,
2024-03-26 12:20:03 +01:00
"id": "cc6af7fa-33b2-4d58-ada4-e2ee7262bab9",
"metadata": {},
"outputs": [],
"source": [
"# load test set\n",
"dataset_test = load_test_file(type_of_activity)\n",
"\n",
"# Load Model \n",
"model = load_model(type_of_activity, 'LogisticRegression_Benchmark')"
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 103,
2024-03-27 15:59:33 +01:00
"id": "8238ee71-47ec-4621-9813-4b5d2fd03efd",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>5_476624</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>100.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>5.177187</td>\n",
" <td>5.177187</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5_183496</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>55.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>426.265613</td>\n",
" <td>426.265613</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>5_248456</td>\n",
" <td>17.0</td>\n",
" <td>1.0</td>\n",
" <td>80.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>436.033437</td>\n",
" <td>436.033437</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5_474758</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>120.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>5.196412</td>\n",
" <td>5.196412</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5_192421</td>\n",
" <td>34.0</td>\n",
" <td>2.0</td>\n",
" <td>416.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>478.693148</td>\n",
" <td>115.631470</td>\n",
" <td>363.061678</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96091</th>\n",
" <td>9_761001</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>67.31</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>278.442257</td>\n",
" <td>278.442257</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>ch</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>15.0</td>\n",
" <td>5.0</td>\n",
" <td>0 days 00:35:22.600000</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96092</th>\n",
" <td>9_951910</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>61.41</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>189.207373</td>\n",
" <td>189.207373</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>ch</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>12.0</td>\n",
" <td>9.0</td>\n",
" <td>0 days 10:39:13.333333333</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96093</th>\n",
" <td>9_54095</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>ch</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>29.0</td>\n",
" <td>3.0</td>\n",
" <td>0 days 13:44:43.333333333</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96094</th>\n",
" <td>9_755241</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>79.43</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>279.312905</td>\n",
" <td>279.312905</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>20.0</td>\n",
" <td>4.0</td>\n",
" <td>0 days 01:12:12.500000</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96095</th>\n",
" <td>9_2712</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>31.0</td>\n",
" <td>4.0</td>\n",
" <td>2 days 01:39:12.750000</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>96096 rows × 40 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 5_476624 4.0 1.0 100.00 1.0 \n",
"1 5_183496 1.0 1.0 55.00 1.0 \n",
"2 5_248456 17.0 1.0 80.00 1.0 \n",
"3 5_474758 4.0 1.0 120.00 1.0 \n",
"4 5_192421 34.0 2.0 416.00 1.0 \n",
"... ... ... ... ... ... \n",
"96091 9_761001 1.0 1.0 67.31 1.0 \n",
"96092 9_951910 1.0 1.0 61.41 1.0 \n",
"96093 9_54095 0.0 0.0 0.00 0.0 \n",
"96094 9_755241 1.0 1.0 79.43 1.0 \n",
"96095 9_2712 0.0 0.0 0.00 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0.0 5.177187 5.177187 \n",
"1 0.0 426.265613 426.265613 \n",
"2 0.0 436.033437 436.033437 \n",
"3 0.0 5.196412 5.196412 \n",
"4 0.0 478.693148 115.631470 \n",
"... ... ... ... \n",
"96091 1.0 278.442257 278.442257 \n",
"96092 1.0 189.207373 189.207373 \n",
"96093 0.0 550.000000 550.000000 \n",
"96094 1.0 279.312905 279.312905 \n",
"96095 0.0 550.000000 550.000000 \n",
"\n",
" time_between_purchase nb_tickets_internet ... country gender_label \\\n",
"0 0.000000 0.0 ... fr female \n",
"1 0.000000 0.0 ... fr male \n",
"2 0.000000 0.0 ... fr female \n",
"3 0.000000 0.0 ... fr female \n",
"4 363.061678 0.0 ... fr female \n",
"... ... ... ... ... ... \n",
"96091 0.000000 1.0 ... ch male \n",
"96092 0.000000 1.0 ... ch male \n",
"96093 -1.000000 0.0 ... ch female \n",
"96094 0.000000 1.0 ... fr male \n",
"96095 -1.000000 0.0 ... NaN male \n",
"\n",
" gender_female gender_male gender_other country_fr nb_campaigns \\\n",
"0 1 0 0 1.0 0.0 \n",
"1 0 1 0 1.0 0.0 \n",
"2 1 0 0 1.0 0.0 \n",
"3 1 0 0 1.0 0.0 \n",
"4 1 0 0 1.0 0.0 \n",
"... ... ... ... ... ... \n",
"96091 0 1 0 0.0 15.0 \n",
"96092 0 1 0 0.0 12.0 \n",
"96093 1 0 0 0.0 29.0 \n",
"96094 0 1 0 1.0 20.0 \n",
"96095 0 1 0 NaN 31.0 \n",
"\n",
" nb_campaigns_opened time_to_open y_has_purchased \n",
"0 0.0 NaN 0.0 \n",
"1 0.0 NaN 1.0 \n",
"2 0.0 NaN 0.0 \n",
"3 0.0 NaN 0.0 \n",
"4 0.0 NaN 1.0 \n",
"... ... ... ... \n",
"96091 5.0 0 days 00:35:22.600000 1.0 \n",
"96092 9.0 0 days 10:39:13.333333333 0.0 \n",
"96093 3.0 0 days 13:44:43.333333333 0.0 \n",
"96094 4.0 0 days 01:12:12.500000 0.0 \n",
"96095 4.0 2 days 01:39:12.750000 0.0 \n",
"\n",
"[96096 rows x 40 columns]"
]
},
2024-03-28 14:13:13 +01:00
"execution_count": 103,
2024-03-27 15:59:33 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset_test"
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 104,
2024-03-26 12:20:03 +01:00
"id": "e4287c1a-eab6-4897-91d6-d21804518dc4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>country_fr</th>\n",
" <th>has_purchased</th>\n",
" <th>has_purchased_estim</th>\n",
" <th>score</th>\n",
" <th>segment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>100.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>5.177187</td>\n",
" <td>5.177187</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.660083</td>\n",
2024-03-26 12:20:03 +01:00
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>55.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>426.265613</td>\n",
" <td>426.265613</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.255226</td>\n",
" <td>2</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>17.0</td>\n",
" <td>1.0</td>\n",
" <td>80.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>436.033437</td>\n",
" <td>436.033437</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.227317</td>\n",
2024-03-26 12:20:03 +01:00
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>120.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>5.196412</td>\n",
" <td>5.196412</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.660104</td>\n",
2024-03-26 12:20:03 +01:00
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>34.0</td>\n",
" <td>2.0</td>\n",
" <td>416.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>478.693148</td>\n",
" <td>115.631470</td>\n",
" <td>363.061678</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.914463</td>\n",
2024-03-26 12:20:03 +01:00
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96091</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>67.31</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>278.442257</td>\n",
" <td>278.442257</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>15.0</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.661143</td>\n",
2024-03-26 12:20:03 +01:00
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96092</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>61.41</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>189.207373</td>\n",
" <td>189.207373</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>12.0</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.715921</td>\n",
2024-03-26 12:20:03 +01:00
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96093</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>29.0</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.123331</td>\n",
2024-03-26 12:20:03 +01:00
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96094</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>79.43</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>279.312905</td>\n",
" <td>279.312905</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>20.0</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.617434</td>\n",
2024-03-26 12:20:03 +01:00
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96095</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>31.0</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.308419</td>\n",
2024-03-26 12:20:03 +01:00
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>96096 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 4.0 1.0 100.00 1.0 \n",
"1 1.0 1.0 55.00 1.0 \n",
"2 17.0 1.0 80.00 1.0 \n",
"3 4.0 1.0 120.00 1.0 \n",
"4 34.0 2.0 416.00 1.0 \n",
"... ... ... ... ... \n",
"96091 1.0 1.0 67.31 1.0 \n",
"96092 1.0 1.0 61.41 1.0 \n",
"96093 0.0 0.0 0.00 0.0 \n",
"96094 1.0 1.0 79.43 1.0 \n",
"96095 0.0 0.0 0.00 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0.0 5.177187 5.177187 \n",
"1 0.0 426.265613 426.265613 \n",
"2 0.0 436.033437 436.033437 \n",
"3 0.0 5.196412 5.196412 \n",
"4 0.0 478.693148 115.631470 \n",
"... ... ... ... \n",
"96091 1.0 278.442257 278.442257 \n",
"96092 1.0 189.207373 189.207373 \n",
"96093 0.0 550.000000 550.000000 \n",
"96094 1.0 279.312905 279.312905 \n",
"96095 0.0 550.000000 550.000000 \n",
"\n",
" time_between_purchase nb_tickets_internet is_email_true ... \\\n",
"0 0.000000 0.0 True ... \n",
"1 0.000000 0.0 True ... \n",
"2 0.000000 0.0 True ... \n",
"3 0.000000 0.0 True ... \n",
"4 363.061678 0.0 True ... \n",
"... ... ... ... ... \n",
"96091 0.000000 1.0 True ... \n",
"96092 0.000000 1.0 True ... \n",
"96093 -1.000000 0.0 True ... \n",
"96094 0.000000 1.0 True ... \n",
"96095 -1.000000 0.0 True ... \n",
"\n",
" gender_female gender_male gender_other nb_campaigns \\\n",
"0 1 0 0 0.0 \n",
"1 0 1 0 0.0 \n",
"2 1 0 0 0.0 \n",
"3 1 0 0 0.0 \n",
"4 1 0 0 0.0 \n",
"... ... ... ... ... \n",
"96091 0 1 0 15.0 \n",
"96092 0 1 0 12.0 \n",
"96093 1 0 0 29.0 \n",
"96094 0 1 0 20.0 \n",
"96095 0 1 0 31.0 \n",
"\n",
" nb_campaigns_opened country_fr has_purchased has_purchased_estim \\\n",
"0 0.0 1.0 0.0 1.0 \n",
"1 0.0 1.0 1.0 0.0 \n",
"2 0.0 1.0 0.0 0.0 \n",
"3 0.0 1.0 0.0 1.0 \n",
"4 0.0 1.0 1.0 1.0 \n",
"... ... ... ... ... \n",
"96091 5.0 0.0 1.0 1.0 \n",
"96092 9.0 0.0 0.0 1.0 \n",
"96093 3.0 0.0 0.0 0.0 \n",
"96094 4.0 1.0 0.0 1.0 \n",
"96095 4.0 NaN 0.0 0.0 \n",
"\n",
" score segment \n",
2024-03-28 10:27:29 +01:00
"0 0.660083 3 \n",
"1 0.255226 2 \n",
"2 0.227317 1 \n",
"3 0.660104 3 \n",
"4 0.914463 4 \n",
2024-03-26 12:20:03 +01:00
"... ... ... \n",
2024-03-28 10:27:29 +01:00
"96091 0.661143 3 \n",
"96092 0.715921 3 \n",
"96093 0.123331 1 \n",
"96094 0.617434 3 \n",
"96095 0.308419 2 \n",
2024-03-26 12:20:03 +01:00
"\n",
"[96096 rows x 21 columns]"
]
},
2024-03-28 14:13:13 +01:00
"execution_count": 104,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Processing\n",
"X_test = dataset_test[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n",
" 'time_between_purchase', 'nb_tickets_internet', 'is_email_true', 'opt_in', #'is_partner',\n",
2024-03-27 16:06:31 +01:00
" 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened', 'country_fr']]\n",
2024-03-26 12:20:03 +01:00
"\n",
"y_test = dataset_test[['y_has_purchased']]\n",
"\n",
2024-03-27 15:59:33 +01:00
"\n",
2024-03-26 12:20:03 +01:00
"X_test_segment = X_test\n",
"\n",
2024-03-27 16:06:31 +01:00
"# X_test_segment.insert(X_test.shape[1], \"country_fr\", dataset_test[\"country_fr\"])\n",
2024-03-26 12:20:03 +01:00
"\n",
"# add y_has_purchased to X_test\n",
"X_test_segment[\"has_purchased\"] = y_test\n",
"\n",
"# Add prediction and probability to dataset_test\n",
"y_pred = model.predict(X_test)\n",
"X_test_segment[\"has_purchased_estim\"] = y_pred\n",
"\n",
"y_pred_prob = model.predict_proba(X_test)[:, 1]\n",
"X_test_segment['score'] = y_pred_prob\n",
"\n",
"X_test_segment[\"segment\"] = np.where(X_test_segment['score']<0.25, '1',\n",
" np.where(X_test_segment['score']<0.5, '2',\n",
" np.where(X_test_segment['score']<0.75, '3', '4')))\n",
"\n",
"X_test_segment"
]
},
{
"cell_type": "markdown",
"id": "9058c3b2-8fa2-4322-a57b-395da4033eaf",
"metadata": {},
"source": [
"## 1. Business KPIs"
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 105,
2024-03-26 12:20:03 +01:00
"id": "3067d919-50c9-49e9-b0a6-b676a5dbae56",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>vente_internet_max</th>\n",
" <th>nb_tickets_internet</th>\n",
" </tr>\n",
" <tr>\n",
" <th>segment</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
2024-03-28 10:27:29 +01:00
" <td>3139.0</td>\n",
" <td>1236.0</td>\n",
" <td>7.365005e+04</td>\n",
" <td>607.0</td>\n",
" <td>1416.0</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-03-28 10:27:29 +01:00
" <td>40943.0</td>\n",
" <td>17163.0</td>\n",
" <td>1.103652e+06</td>\n",
" <td>8835.0</td>\n",
" <td>22476.0</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-03-28 10:27:29 +01:00
" <td>61528.0</td>\n",
" <td>28513.0</td>\n",
" <td>2.244750e+06</td>\n",
" <td>12936.0</td>\n",
" <td>32533.0</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2024-03-28 10:27:29 +01:00
" <td>393561.0</td>\n",
" <td>102804.0</td>\n",
" <td>1.651554e+07</td>\n",
" <td>7080.0</td>\n",
" <td>113814.0</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" nb_tickets nb_purchases total_amount vente_internet_max \\\n",
"segment \n",
2024-03-28 10:27:29 +01:00
"1 3139.0 1236.0 7.365005e+04 607.0 \n",
"2 40943.0 17163.0 1.103652e+06 8835.0 \n",
"3 61528.0 28513.0 2.244750e+06 12936.0 \n",
"4 393561.0 102804.0 1.651554e+07 7080.0 \n",
2024-03-26 12:20:03 +01:00
"\n",
" nb_tickets_internet \n",
"segment \n",
2024-03-28 10:27:29 +01:00
"1 1416.0 \n",
"2 22476.0 \n",
"3 32533.0 \n",
"4 113814.0 "
2024-03-26 12:20:03 +01:00
]
},
2024-03-28 14:13:13 +01:00
"execution_count": 105,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# business figures\n",
"X_test_segment.groupby(\"segment\")[[\"nb_tickets\", \"nb_purchases\", \"total_amount\", \"vente_internet_max\", \n",
" \"nb_tickets_internet\"]].sum()"
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 106,
2024-03-27 15:59:33 +01:00
"id": "5b1acd28-b346-45b1-8da2-b79ca7f4fa96",
"metadata": {},
"outputs": [],
"source": [
"def df_business_fig(df, segment, list_var) :\n",
" df_business_kpi = df.groupby(segment)[list_var].sum().reset_index()\n",
" df_business_kpi.insert(1, \"size\", df.groupby(segment).size().values)\n",
" all_var = [\"size\"] + list_var\n",
" df_business_kpi[all_var] = 100 * df_business_kpi[all_var] / df_business_kpi[all_var].sum()\n",
"\n",
" return df_business_kpi"
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 107,
2024-03-27 15:59:33 +01:00
"id": "bd63d787-3ef8-4f23-9069-e9b16b4a0de8",
2024-03-26 12:20:03 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>size</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_campaigns</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
2024-03-28 10:27:29 +01:00
" <td>39.345030</td>\n",
" <td>0.628843</td>\n",
" <td>0.825563</td>\n",
" <td>0.369403</td>\n",
" <td>40.762379</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
2024-03-28 10:27:29 +01:00
" <td>30.382118</td>\n",
" <td>8.202199</td>\n",
" <td>11.463705</td>\n",
" <td>5.535536</td>\n",
" <td>25.555636</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
2024-03-28 10:27:29 +01:00
" <td>20.444139</td>\n",
" <td>12.326037</td>\n",
" <td>19.044725</td>\n",
" <td>11.258886</td>\n",
" <td>17.630574</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
2024-03-28 10:27:29 +01:00
" <td>9.828713</td>\n",
" <td>78.842922</td>\n",
" <td>68.666008</td>\n",
" <td>82.836175</td>\n",
" <td>16.051411</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" segment size nb_tickets nb_purchases total_amount nb_campaigns\n",
2024-03-28 10:27:29 +01:00
"0 1 39.345030 0.628843 0.825563 0.369403 40.762379\n",
"1 2 30.382118 8.202199 11.463705 5.535536 25.555636\n",
"2 3 20.444139 12.326037 19.044725 11.258886 17.630574\n",
"3 4 9.828713 78.842922 68.666008 82.836175 16.051411"
2024-03-26 12:20:03 +01:00
]
},
2024-03-28 14:13:13 +01:00
"execution_count": 107,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-27 15:59:33 +01:00
"business_var = [\"nb_tickets\", \"nb_purchases\", \"total_amount\", \"nb_campaigns\"]\n",
"X_test_business_fig = df_business_fig(X_test_segment, \"segment\",\n",
" business_var)\n",
2024-03-26 12:20:03 +01:00
"X_test_business_fig"
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 108,
2024-03-26 12:20:03 +01:00
"id": "d2f618b6-c984-4790-bd8f-29c7d01c6707",
"metadata": {},
"outputs": [],
"source": [
"def hist_segment_business_KPIs(df, segment, size, nb_tickets, nb_purchases, total_amount, nb_campaigns) :\n",
" \n",
" plt.figure()\n",
"\n",
" df_plot = df[[segment, size, nb_tickets, nb_purchases, total_amount, nb_campaigns]]\n",
" \n",
" x = [\"number of\\ncustomers\", \"number of\\ntickets\", \"number of\\npurchases\", \"total\\namount\", \n",
" \"number of\\ncampaigns\"]\n",
"\n",
" # liste_var = [size, nb_tickets, nb_purchases, total_amount]\n",
" \n",
" bottom = np.zeros(5)\n",
" \n",
" # Définir une palette de couleurs\n",
" colors = plt.cm.Blues(np.linspace(0.1, 0.9, 4))\n",
" \n",
" for i in range(4) :\n",
" # print(str(df_plot[segment][i]))\n",
" # segment = df_plot[segment][i]\n",
" height = list(df_plot.loc[i,size:].values)\n",
" \n",
2024-03-28 10:27:29 +01:00
" plt.bar(x=x, height=height, label = str(df_plot[segment][i]), bottom=bottom, color=colors[i])\n",
2024-03-26 12:20:03 +01:00
" \n",
" bottom+=height\n",
2024-03-26 16:00:39 +01:00
"\n",
" # Ajuster les marges\n",
2024-03-28 10:27:29 +01:00
" plt.subplots_adjust(left = 0.125, right = 0.8, bottom = 0.1, top = 0.9)\n",
2024-03-26 12:20:03 +01:00
" \n",
" plt.legend(title = \"segment\", loc = \"upper right\", bbox_to_anchor=(1.2, 1))\n",
" plt.ylabel(\"Fraction represented by the segment (%)\")\n",
2024-03-28 10:27:29 +01:00
" plt.title(f\"Relative weight of each segment regarding business KPIs\\nfor {type_of_activity} companies\", size=12)\n",
" # plt.title(\"test\")\n",
2024-03-26 16:00:39 +01:00
" # plt.show()\n",
2024-03-26 12:20:03 +01:00
" "
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 109,
2024-03-27 15:59:33 +01:00
"id": "14b6ae5c-d704-4f5d-9f9b-5646e29ea470",
2024-03-26 12:20:03 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-03-28 10:27:29 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAksAAAHhCAYAAAB+0voXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB6L0lEQVR4nO3deVxO6f8/8NdB3d3ti7SQFktC0chWqISswwdjpzAzjJ2xrzHZZ3z4YBjMFMY6Y5lhLDGUnSzZd9lrskfau35/+HW+btXtvrlzx7yej0cPznWW632u+9ynd9e5zjmSEEKAiIiIiPJVTN8BEBERERVlTJaIiIiI1GCyRERERKQGkyUiIiIiNZgsEREREanBZImIiIhIDSZLRERERGowWSIiIiJSg8kSERERkRofXbIUGRkJSZLknxIlSsDBwQGdOnXC1atX32mb0dHRkCQJ0dHRWq974cIFhIWF4ebNm3nmhYaGwsXF5Z1iKsreZ7/CwsIgSRIePnz41mWnTZuGzZs3v1M96jx+/BidOnVCqVKlIEkS2rRpo/M63kfu8fj777/rO5SPXmEdQ0WdJEkICwuTp3PPm/mdpwqbNt/5wqj3Y1XQeeDly5do1qwZDAwMsGLFCgD5/14sU6YMevbsiXv37r11m/R2H12ylCsiIgKHDx/G7t27MWDAAPz555+oV68enjx58kHjuHDhAiZPnpzvSWjChAnYtGnTB43nQ/hQ+1VYv+i+++47bNq0Cf/9739x+PBhzJo1S+d1UNHwb02W3tSiRQscPnwYDg4O+g7lg/nyyy9x+PBhfYehU8+ePUOTJk2wd+9e/P777+jRo4fK/Nzfi7t27cJXX32FNWvWoH79+khJSdFTxJ+OEvoO4F1VrVoVPj4+AICAgABkZ2dj0qRJ2Lx5M3r27Knn6F4pV66cvkMoFB/7fp07dw7lypVD165d9R0KFXHZ2dnIysqCQqHQdygqXr58CWNjY42Xt7W1ha2tbSFGVPSUKVMGZcqU0XcYOpOUlITg4GBcv34d27dvR2BgYJ5lXv+9GBgYiOzsbHz33XfYvHkzz3fv6aPtWXpT7gHyzz//qJQfP34cn3/+OaytrWFkZARvb2+sX7/+rds7fvw4OnXqBBcXFyiVSri4uKBz5864deuWvExkZCS++OILAK8OzNwu0MjISAB5L1d5e3ujfv36eerKzs5G6dKl0bZtW7ksIyMD4eHhqFSpEhQKBWxtbdGzZ088ePBAbdx//fUXJElCbGysXLZhwwZIkoQWLVqoLOvl5YV27drJ00II/Pjjj6hevTqUSiWsrKzQvn173LhxQ2W9/C7DPX36FL1794a1tTVMTU3RokUL3LhxI8/lgFz//PMPOnfuDAsLC9jZ2aFXr1549uyZPF+SJKSkpGD58uVyuwYEBKjd98ePH6Nfv34oXbo0DA0N4ebmhnHjxiE9PR0AcPPmTUiShN27d+PixYvydt92+XXdunWoW7cuTExMYGpqiuDgYJw6dUplGU2Ol1z37t3D119/DScnJxgaGsLR0RHt27fPc+xmZmZi3LhxcHR0hLm5ORo1aoTLly+rjRUAHjx4IG8/99jx8/PD7t27VZbbvXs3goKCYG5uDmNjY/j5+eHvv//Os70//vgDXl5eUCgUcHNzw7x58/K9xCFJEgYMGICIiAi4u7tDqVTCx8cHR44cgRACs2fPhqurK0xNTdGwYUNcu3YtT12axJRb9/nz53V6DOUeH7NmzUJ4eDhcXV2hUCiwd+9eAJqfSw4cOIC6devCyMgIpUuXxoQJE7Bs2bI8l8HWrVuHJk2awMHBAUqlEh4eHhg9enSeXoDQ0FCYmpri7NmzaNKkCczMzBAUFAQASE5OxldffQUbGxuYmpqiadOmuHLlSp6Y8rsMFxAQgKpVqyI2Nhb169eHsbEx3NzcMGPGDOTk5Kisf/78eTRp0gTGxsawtbVF//795XONpsMX7ty5g7Zt28Lc3BwWFhbo1q1bnvNZQecLFxcXhIaGytMvX77E8OHD4erqCiMjI1hbW8PHxwdr1qyRl8nvGHVxcUHLli2xY8cOfPbZZ1AqlahUqRJ++eWXPHUmJiaiT58+KFOmDAwNDeHq6orJkycjKytLZblFixahWrVqMDU1hZmZGSpVqoSxY8dqFevb3Lp1C/Xq1cPdu3exZ8+efBOl/NSpU0devyCani/+7T7anqU3xcfHAwAqVqwol+3duxdNmzZF7dq1sXjxYlhYWGDt2rXo2LEjXr58qfLle9PNmzfh7u6OTp06wdraGgkJCVi0aBFq1qyJCxcuoGTJkmjRogWmTZuGsWPHYuHChfjss88AFNzz0rNnTwwePBhXr15FhQoV5PKoqCjcv39f7hHLyclB69atsX//fowcORK+vr64desWJk2ahICAABw/fhxKpTLfOvz9/WFgYIDdu3ejZs2aAF79AlIqlYiJiUFmZiYMDAyQlJSEc+fO4ZtvvpHX7dOnDyIjIzFo0CDMnDkTjx8/xpQpU+Dr64vTp0/Dzs4u3zpzcnLQqlUrHD9+HGFhYfjss89w+PBhNG3atMD2bdeuHTp27IjevXvj7NmzGDNmDADIJ63Dhw+jYcOGCAwMxIQJEwAA5ubmBW4vLS0NgYGBuH79OiZPngwvLy/s378f06dPR1xcHP766y84ODjg8OHD6NevH549e4ZVq1YBACpXrlzgdqdNm4bx48ejZ8+eGD9+PDIyMjB79mzUr18fx44dk9fV5HgBXiVKNWvWRGZmJsaOHQsvLy88evQIO3fuxJMnT1TaeOzYsfDz88OyZcuQnJyMUaNGoVWrVrh48SKKFy9eYMzdu3fHyZMnMXXqVFSsWBFPnz7FyZMn8ejRI3mZX3/9FT169EDr1q2xfPlyGBgY4KeffkJwcDB27twp/zLesWMH2rZtiwYNGmDdunXIysrC999/nyexy7V161acOnUKM2bMgCRJGDVqFFq0aIGQkBDcuHEDCxYswLNnzzBs2DC0a9cOcXFx8i80TWPKpetjKNf//vc/VKxYEd9//z3Mzc1RoUIFjc8lZ86cQePGjVGxYkUsX74cxsbGWLx4MX799dc89Vy9ehXNmzfHkCFDYGJigkuXLmHmzJk4duwY9uzZo7JsRkYGPv/8c/Tp0wejR49GVlYWhBBo06YNDh06hIkTJ6JmzZo4ePAgmjVr9tZ9zJWYmIiuXbvi22+/xaRJk7Bp0yaMGTMGjo6O8iWehIQE+Pv7w8TEBIsWLUKpUqWwZs0aDBgwQON6AOA///kPOnTogL59++L8+fOYMGECLly4gKNHj8LAwECrbQ0bNgwrV65EeHg4vL29kZKSgnPnzqkc4wU5ffo0vv32W4wePRp2dnZYtmwZevfujfLly6NBgwZyu9SqVQvFihXDxIkTUa5cORw+fBjh4eG4efMmIiIiAABr165Fv379MHDgQHz//fcoVqwYrl27hgsXLugkVgC4ePEihg4dCgDYt28fPDw8NG6n3D9I1PUqanK+IADiIxMRESEAiCNHjojMzEzx/PlzsWPHDmFvby8aNGggMjMz5WUrVaokvL29VcqEEKJly5bCwcFBZGdnCyGE2Lt3rwAg9u7dW2C9WVlZ4sWLF8LExETMmzdPLv/tt98KXDckJEQ4OzvL0w8fPhSGhoZi7NixKst16NBB2NnZyXGuWbNGABAbNmxQWS42NlYAED/++KPaNqpXr55o2LChPF2+fHkxYsQIUaxYMRETEyOEEGLVqlUCgLhy5YoQQojDhw8LAOKHH35Q2dadO3eEUqkUI0eOLHC//vrrLwFALFq0SGXd6dOnCwBi0qRJctmkSZMEADFr1iyVZfv16yeMjIxETk6OXGZiYiJCQkLU7muuxYsXCwBi/fr1KuUzZ84UAERUVJRc5u/vL6pUqfLWbd6+fVuUKFFCDBw4UKX8+fPnwt7eXnTo0KHAdQs6Xnr16iUMDAzEhQsXClw393hs3ry5Svn69esFAHH48GG1cZuamoohQ4YUOD8lJUVYW1uLVq1aqZRnZ2eLatWqiVq1asllNWvWFE5OTiI9PV0ue/78ubC
2024-03-26 12:20:03 +01:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-03-27 15:59:33 +01:00
"hist_segment_business_KPIs(X_test_business_fig, \"segment\", \"size\", *business_var)"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "code",
2024-03-26 16:00:39 +01:00
"execution_count": 13,
"id": "f358fba3-f778-4414-bf55-c830be647ddd",
2024-03-26 12:20:03 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2024-03-26 16:00:39 +01:00
"'projet-bdc2324-team1/Output_marketing_personae_analysis/sport/segments_business_KPIs_sport.csv'"
2024-03-26 12:20:03 +01:00
]
},
2024-03-26 16:00:39 +01:00
"execution_count": 13,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
2024-03-26 16:00:39 +01:00
"source": [
"activity = \"sport\"\n",
"PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{activity}/\"\n",
"\n",
"file_name = \"segments_business_KPIs_\" + activity\n",
"FILE_PATH_OUT_S3 = PATH + file_name + \".csv\"\n",
"\n",
"FILE_PATH_OUT_S3"
]
},
2024-03-27 15:59:33 +01:00
{
"cell_type": "code",
2024-03-28 10:27:29 +01:00
"execution_count": 24,
2024-03-27 15:59:33 +01:00
"id": "3eee7b59-f658-402d-95b2-fa051188fd10",
"metadata": {},
"outputs": [],
"source": [
"def save_file_s3_mp(File_name, type_of_activity):\n",
" image_buffer = io.BytesIO()\n",
" plt.savefig(image_buffer, format='png')\n",
" image_buffer.seek(0)\n",
" PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{type_of_activity}/\"\n",
" FILE_PATH_OUT_S3 = PATH + File_name + type_of_activity + '.png'\n",
" with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:\n",
" s3_file.write(image_buffer.read())\n",
" plt.close()"
]
},
2024-03-26 16:00:39 +01:00
{
"cell_type": "code",
2024-03-28 10:27:29 +01:00
"execution_count": 94,
2024-03-26 16:00:39 +01:00
"id": "1790cb81-3304-41f1-a371-d8c926d32906",
"metadata": {},
"outputs": [],
2024-03-26 12:20:03 +01:00
"source": [
"# save to Minio\n",
"\n",
"activity = \"sport\"\n",
"PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{activity}/\"\n",
"\n",
2024-03-28 10:27:29 +01:00
"file_name = \"segments_business_KPI_\" + activity\n",
"# file_name = \"segments_business_KPIs_\" + activity\n",
2024-03-26 16:00:39 +01:00
"FILE_PATH_OUT_S3 = PATH + file_name + \".png\"\n",
2024-03-26 12:20:03 +01:00
"\n",
"hist_segment_business_KPIs(X_test_business_fig, \"segment\", \"size\", \"nb_tickets\", \n",
" \"nb_purchases\", \"total_amount\", \"nb_campaigns\")\n",
"\n",
"image_buffer = io.BytesIO()\n",
2024-03-28 10:27:29 +01:00
"plt.savefig(image_buffer, format='png', dpi=110)\n",
2024-03-26 12:20:03 +01:00
"image_buffer.seek(0)\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:\n",
" s3_file.write(image_buffer.read())\n",
2024-03-28 10:27:29 +01:00
"plt.close()"
2024-03-27 15:59:33 +01:00
]
},
{
"cell_type": "code",
2024-03-28 10:27:29 +01:00
"execution_count": 91,
"id": "cbf2cc62-1144-48c6-90d8-e12c8e510e02",
2024-03-27 15:59:33 +01:00
"metadata": {},
"outputs": [
{
"data": {
2024-03-28 10:27:29 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAksAAAHhCAYAAAB+0voXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB6L0lEQVR4nO3deVxO6f8/8NdB3d3ti7SQFktC0chWqISswwdjpzAzjJ2xrzHZZ3z4YBjMFMY6Y5lhLDGUnSzZd9lrskfau35/+HW+btXtvrlzx7yej0cPznWW632u+9ynd9e5zjmSEEKAiIiIiPJVTN8BEBERERVlTJaIiIiI1GCyRERERKQGkyUiIiIiNZgsEREREanBZImIiIhIDSZLRERERGowWSIiIiJSg8kSERERkRofXbIUGRkJSZLknxIlSsDBwQGdOnXC1atX32mb0dHRkCQJ0dHRWq974cIFhIWF4ebNm3nmhYaGwsXF5Z1iKsreZ7/CwsIgSRIePnz41mWnTZuGzZs3v1M96jx+/BidOnVCqVKlIEkS2rRpo/M63kfu8fj777/rO5SPXmEdQ0WdJEkICwuTp3PPm/mdpwqbNt/5wqj3Y1XQeeDly5do1qwZDAwMsGLFCgD5/14sU6YMevbsiXv37r11m/R2H12ylCsiIgKHDx/G7t27MWDAAPz555+oV68enjx58kHjuHDhAiZPnpzvSWjChAnYtGnTB43nQ/hQ+1VYv+i+++47bNq0Cf/9739x+PBhzJo1S+d1UNHwb02W3tSiRQscPnwYDg4O+g7lg/nyyy9x+PBhfYehU8+ePUOTJk2wd+9e/P777+jRo4fK/Nzfi7t27cJXX32FNWvWoH79+khJSdFTxJ+OEvoO4F1VrVoVPj4+AICAgABkZ2dj0qRJ2Lx5M3r27Knn6F4pV66cvkMoFB/7fp07dw7lypVD165d9R0KFXHZ2dnIysqCQqHQdygqXr58CWNjY42Xt7W1ha2tbSFGVPSUKVMGZcqU0XcYOpOUlITg4GBcv34d27dvR2BgYJ5lXv+9GBgYiOzsbHz33XfYvHkzz3fv6aPtWXpT7gHyzz//qJQfP34cn3/+OaytrWFkZARvb2+sX7/+rds7fvw4OnXqBBcXFyiVSri4uKBz5864deuWvExkZCS++OILAK8OzNwu0MjISAB5L1d5e3ujfv36eerKzs5G6dKl0bZtW7ksIyMD4eHhqFSpEhQKBWxtbdGzZ088ePBAbdx//fUXJElCbGysXLZhwwZIkoQWLVqoLOvl5YV27drJ00II/Pjjj6hevTqUSiWsrKzQvn173LhxQ2W9/C7DPX36FL1794a1tTVMTU3RokUL3LhxI8/lgFz//PMPOnfuDAsLC9jZ2aFXr1549uyZPF+SJKSkpGD58uVyuwYEBKjd98ePH6Nfv34oXbo0DA0N4ebmhnHjxiE9PR0AcPPmTUiShN27d+PixYvydt92+XXdunWoW7cuTExMYGpqiuDgYJw6dUplGU2Ol1z37t3D119/DScnJxgaGsLR0RHt27fPc+xmZmZi3LhxcHR0hLm5ORo1aoTLly+rjRUAHjx4IG8/99jx8/PD7t27VZbbvXs3goKCYG5uDmNjY/j5+eHvv//Os70//vgDXl5eUCgUcHNzw7x58/K9xCFJEgYMGICIiAi4u7tDqVTCx8cHR44cgRACs2fPhqurK0xNTdGwYUNcu3YtT12axJRb9/nz53V6DOUeH7NmzUJ4eDhcXV2hUCiwd+9eAJqfSw4cOIC6devCyMgIpUuXxoQJE7Bs2bI8l8HWrVuHJk2awMHBAUqlEh4eHhg9enSeXoDQ0FCYmpri7NmzaNKkCczMzBAUFAQASE5OxldffQUbGxuYmpqiadOmuHLlSp6Y8rsMFxAQgKpVqyI2Nhb169eHsbEx3NzcMGPGDOTk5Kisf/78eTRp0gTGxsawtbVF//795XONpsMX7ty5g7Zt28Lc3BwWFhbo1q1bnvNZQecLFxcXhIaGytMvX77E8OHD4erqCiMjI1hbW8PHxwdr1qyRl8nvGHVxcUHLli2xY8cOfPbZZ1AqlahUqRJ++eWXPHUmJiaiT58+KFOmDAwNDeHq6orJkycjKytLZblFixahWrVqMDU1hZmZGSpVqoSxY8dqFevb3Lp1C/Xq1cPdu3exZ8+efBOl/NSpU0devyCani/+7T7anqU3xcfHAwAqVqwol+3duxdNmzZF7dq1sXjxYlhYWGDt2rXo2LEjXr58qfLle9PNmzfh7u6OTp06wdraGgkJCVi0aBFq1qyJCxcuoGTJkmjRogWmTZuGsWPHYuHChfjss88AFNzz0rNnTwwePBhXr15FhQoV5PKoqCjcv39f7hHLyclB69atsX//fowcORK+vr64desWJk2ahICAABw/fhxKpTLfOvz9/WFgYIDdu3ejZs2aAF79AlIqlYiJiUFmZiYMDAyQlJSEc+fO4ZtvvpHX7dOnDyIjIzFo0CDMnDkTjx8/xpQpU+Dr64vTp0/Dzs4u3zpzcnLQqlUrHD9+HGFhYfjss89w+PBhNG3atMD2bdeuHTp27IjevXvj7NmzGDNmDADIJ63Dhw+jYcOGCAwMxIQJEwAA5ubmBW4vLS0NgYGBuH79OiZPngwvLy/s378f06dPR1xcHP766y84ODjg8OHD6NevH549e4ZVq1YBACpXrlzgdqdNm4bx48ejZ8+eGD9+PDIyMjB79mzUr18fx44dk9fV5HgBXiVKNWvWRGZmJsaOHQsvLy88evQIO3fuxJMnT1TaeOzYsfDz88OyZcuQnJyMUaNGoVWrVrh48SKKFy9eYMzdu3fHyZMnMXXqVFSsWBFPnz7FyZMn8ejRI3mZX3/9FT169EDr1q2xfPlyGBgY4KeffkJwcDB27twp/zLesWMH2rZtiwYNGmDdunXIysrC999/nyexy7V161acOnUKM2bMgCRJGDVqFFq0aIGQkBDcuHEDCxYswLNnzzBs2DC0a9cOcXFx8i80TWPKpetjKNf//vc/VKxYEd9//z3Mzc1RoUIFjc8lZ86cQePGjVGxYkUsX74cxsbGWLx4MX799dc89Vy9ehXNmzfHkCFDYGJigkuXLmHmzJk4duwY9uzZo7JsRkYGPv/8c/Tp0wejR49GVlYWhBBo06YNDh06hIkTJ6JmzZo4ePAgmjVr9tZ9zJWYmIiuXbvi22+/xaRJk7Bp0yaMGTMGjo6O8iWehIQE+Pv7w8TEBIsWLUKpUqWwZs0aDBgwQON6AOA///kPOnTogL59++L8+fOYMGECLly4gKNHj8LAwECrbQ0bNgwrV65EeHg4vL29kZKSgnPnzqkc4wU5ffo0vv32W4wePRp2dnZYtmwZevfujfLly6NBgwZyu9SqVQvFihXDxIkTUa5cORw+fBjh4eG4efMmIiIiAABr165Fv379MHDgQHz//fcoVqwYrl27hgsXLugkVgC4ePEihg4dCgDYt28fPDw8NG6n3D9I1PUqanK+IADiIxMRESEAiCNHjojMzEzx/PlzsWPHDmFvby8aNGggMjMz5WUrVaokvL29VcqEEKJly5bCwcFBZGdnCyGE2Lt3rwAg9u7dW2C9WVlZ4sWLF8LExETMmzdPLv/tt98KXDckJEQ4OzvL0w8fPhSGhoZi7NixKst16NBB2NnZyXGuWbNGABAbNmxQWS42NlYAED/++KPaNqpXr55o2LChPF2+fHkxYsQIUaxYMRETEyOEEGLVqlUCgLhy5YoQQojDhw8LAOKHH35Q2dadO3eEUqkUI0eOLHC//vrrLwFALFq0SGXd6dOnCwBi0qRJctmkSZMEADFr1iyVZfv16yeMjIxETk6OXGZiYiJCQkLU7muuxYsXCwBi/fr1KuUzZ84UAERUVJRc5u/vL6pUqfLWbd6+fVuUKFFCDBw4UKX8+fPnwt7eXnTo0KHAdQs6Xnr16iUMDAzEhQsXClw393hs3ry5Svn69esFAHH48GG1cZuamoohQ4YUOD8lJUVYW1uLVq1aqZRnZ2eLatWqiVq1asllNWvWFE5OTiI9PV0ue/78ubC
2024-03-27 15:59:33 +01:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
2024-03-28 10:27:29 +01:00
"source": [
"hist_segment_business_KPIs(X_test_business_fig, \"segment\", \"size\", \"nb_tickets\", \n",
" \"nb_purchases\", \"total_amount\", \"nb_campaigns\")"
]
},
{
"cell_type": "code",
"execution_count": 93,
"id": "7a42523d-f80f-488b-ad8f-39dd793cddd6",
"metadata": {},
"outputs": [],
2024-03-27 15:59:33 +01:00
"source": [
"# with function\n",
"\n",
2024-03-28 10:27:29 +01:00
"# activity = \"sport\"\n",
2024-03-27 15:59:33 +01:00
"\n",
"hist_segment_business_KPIs(X_test_business_fig, \"segment\", \"size\", \"nb_tickets\", \n",
" \"nb_purchases\", \"total_amount\", \"nb_campaigns\")\n",
"\n",
2024-03-28 10:27:29 +01:00
"save_file_s3_mp(File_name = \"segments_business_KPIs_\", type_of_activity = type_of_activity)"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "markdown",
"id": "53d24165-6b98-4b66-9ad8-7514564689d8",
"metadata": {},
"source": [
"## 2. Spider plot summarizing sociodemographic characteristics and purchasing behaviour"
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 110,
2024-03-27 15:59:33 +01:00
"id": "beb31e4b-a01b-4312-879a-fe5757ea061f",
"metadata": {},
"outputs": [],
"source": [
"def df_segment_mp(df, segment, gender_female, gender_male, gender_other, country_fr) :\n",
" df_mp = df.groupby(segment)[[gender_female, gender_male, gender_other, country_fr]].mean().reset_index()\n",
" df_mp.insert(3, \"share_known_gender\", X_test_segment_mp[gender_female]+X_test_segment_mp[gender_male])\n",
" df_mp.insert(4, \"share_of_women\", X_test_segment_mp[gender_female]/(X_test_segment_mp[\"share_known_gender\"]))\n",
" return df_mp"
]
},
{
"cell_type": "code",
2024-03-28 10:27:29 +01:00
"execution_count": 45,
2024-03-26 12:20:03 +01:00
"id": "267ebaee-eaef-4720-8ca9-e40c0cf125df",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>share_known_gender</th>\n",
" <th>share_of_women</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.299162</td>\n",
" <td>0.223968</td>\n",
" <td>0.523129</td>\n",
" <td>0.571869</td>\n",
" <td>0.476871</td>\n",
" <td>0.339959</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.156289</td>\n",
" <td>0.699103</td>\n",
" <td>0.855391</td>\n",
" <td>0.182710</td>\n",
" <td>0.144609</td>\n",
" <td>0.805862</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.291764</td>\n",
" <td>0.611320</td>\n",
" <td>0.903085</td>\n",
" <td>0.323075</td>\n",
" <td>0.096915</td>\n",
" <td>0.701258</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.179989</td>\n",
" <td>0.684383</td>\n",
" <td>0.864373</td>\n",
" <td>0.208231</td>\n",
" <td>0.135627</td>\n",
" <td>0.638972</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" segment gender_female gender_male share_known_gender share_of_women \\\n",
2024-03-28 10:27:29 +01:00
"0 1 0.299162 0.223968 0.523129 0.571869 \n",
"1 2 0.156289 0.699103 0.855391 0.182710 \n",
"2 3 0.291764 0.611320 0.903085 0.323075 \n",
"3 4 0.179989 0.684383 0.864373 0.208231 \n",
2024-03-26 12:20:03 +01:00
"\n",
" gender_other country_fr \n",
2024-03-28 10:27:29 +01:00
"0 0.476871 0.339959 \n",
"1 0.144609 0.805862 \n",
"2 0.096915 0.701258 \n",
"3 0.135627 0.638972 "
2024-03-26 12:20:03 +01:00
]
},
2024-03-28 10:27:29 +01:00
"execution_count": 45,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# description of marketing personae\n",
"\n",
"X_test_segment_mp = X_test_segment.groupby(\"segment\")[['gender_female', 'gender_male', 'gender_other', 'country_fr']].mean().reset_index()\n",
"X_test_segment_mp.insert(3, \"share_known_gender\", X_test_segment_mp[\"gender_female\"]+X_test_segment_mp[\"gender_male\"])\n",
"X_test_segment_mp.insert(4, \"share_of_women\", X_test_segment_mp[\"gender_female\"]/(X_test_segment_mp[\"share_known_gender\"]))\n",
"X_test_segment_mp"
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 111,
2024-03-27 15:59:33 +01:00
"id": "5f908232-b0fe-4707-a8c5-5cadb7d8653f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>share_known_gender</th>\n",
" <th>share_of_women</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.299162</td>\n",
" <td>0.223968</td>\n",
" <td>0.523129</td>\n",
" <td>0.571869</td>\n",
" <td>0.476871</td>\n",
" <td>0.339959</td>\n",
2024-03-27 15:59:33 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.156289</td>\n",
" <td>0.699103</td>\n",
" <td>0.855391</td>\n",
" <td>0.182710</td>\n",
" <td>0.144609</td>\n",
" <td>0.805862</td>\n",
2024-03-27 15:59:33 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.291764</td>\n",
" <td>0.611320</td>\n",
" <td>0.903085</td>\n",
" <td>0.323075</td>\n",
" <td>0.096915</td>\n",
" <td>0.701258</td>\n",
2024-03-27 15:59:33 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.179989</td>\n",
" <td>0.684383</td>\n",
" <td>0.864373</td>\n",
" <td>0.208231</td>\n",
" <td>0.135627</td>\n",
" <td>0.638972</td>\n",
2024-03-27 15:59:33 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" segment gender_female gender_male share_known_gender share_of_women \\\n",
2024-03-28 10:27:29 +01:00
"0 1 0.299162 0.223968 0.523129 0.571869 \n",
"1 2 0.156289 0.699103 0.855391 0.182710 \n",
"2 3 0.291764 0.611320 0.903085 0.323075 \n",
"3 4 0.179989 0.684383 0.864373 0.208231 \n",
2024-03-27 15:59:33 +01:00
"\n",
" gender_other country_fr \n",
2024-03-28 10:27:29 +01:00
"0 0.476871 0.339959 \n",
"1 0.144609 0.805862 \n",
"2 0.096915 0.701258 \n",
"3 0.135627 0.638972 "
2024-03-27 15:59:33 +01:00
]
},
2024-03-28 14:13:13 +01:00
"execution_count": 111,
2024-03-27 15:59:33 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment_mp = df_segment_mp(X_test_segment, \"segment\", \"gender_female\", \n",
" \"gender_male\", \"gender_other\", \"country_fr\")\n",
"X_test_segment_mp"
]
},
{
"cell_type": "code",
"execution_count": 150,
2024-03-26 12:20:03 +01:00
"id": "910876fe-e6df-4f8d-9978-5d6fdd893ac0",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>share_tickets_internet</th>\n",
" <th>share_campaigns_opened</th>\n",
" <th>opt_in</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
2024-03-27 15:59:33 +01:00
" <td>0.247851</td>\n",
" <td>0.136462</td>\n",
" <td>0.595110</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
2024-03-27 15:59:33 +01:00
" <td>0.630889</td>\n",
" <td>0.233739</td>\n",
" <td>0.411281</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
2024-03-27 15:59:33 +01:00
" <td>0.679076</td>\n",
" <td>0.297721</td>\n",
" <td>0.045523</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
2024-03-27 15:59:33 +01:00
" <td>0.588024</td>\n",
" <td>0.366443</td>\n",
" <td>0.134395</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" segment share_tickets_internet share_campaigns_opened opt_in\n",
2024-03-27 15:59:33 +01:00
"0 1 0.247851 0.136462 0.595110\n",
"1 2 0.630889 0.233739 0.411281\n",
"2 3 0.679076 0.297721 0.045523\n",
"3 4 0.588024 0.366443 0.134395"
2024-03-26 12:20:03 +01:00
]
},
2024-03-27 15:59:33 +01:00
"execution_count": 150,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# purchasing behaviour\n",
"\n",
"X_test_segment[\"share_tickets_internet\"] = X_test_segment[\"nb_tickets_internet\"]/X_test_segment[\"nb_tickets\"]\n",
"X_test_segment[\"share_campaigns_opened\"] = X_test_segment[\"nb_campaigns_opened\"]/X_test_segment[\"nb_campaigns\"]\n",
"X_test_segment_pb = X_test_segment.groupby(\"segment\")[[\"share_tickets_internet\", \"share_campaigns_opened\", \"opt_in\"]].mean().reset_index()\n",
"X_test_segment_pb"
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 213,
2024-03-27 15:59:33 +01:00
"id": "8d3ab073-040c-4480-bd44-33fc88626707",
"metadata": {},
"outputs": [],
"source": [
2024-03-28 14:13:13 +01:00
"def df_segment_pb (df, segment, nb_tickets_internet, nb_tickets, nb_campaigns_opened, nb_campaigns, opt_in,\n",
" time_to_open) :\n",
2024-03-27 15:59:33 +01:00
" df_used = df\n",
" df_used[\"share_tickets_internet\"] = df_used[nb_tickets_internet]/df_used[nb_tickets]\n",
" df_used[\"share_campaigns_opened\"] = df_used[nb_campaigns_opened]/df_used[nb_campaigns]\n",
2024-03-28 14:13:13 +01:00
" df_pb = df_used.groupby(segment)[[\"share_tickets_internet\", \"share_campaigns_opened\", \n",
" opt_in, time_to_open]].mean().reset_index()\n",
" df_pb[\"time_to_open_med\"] = df_used.groupby(segment)[[time_to_open]].apply(lambda x: x.dropna().median()).values\n",
2024-03-27 15:59:33 +01:00
" return df_pb"
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 185,
"id": "33a11ddf-b410-4cf1-9e6b-645de6dad604",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Durée totale en heures : 49.65333333333333\n"
]
}
],
"source": [
"# add : variable time to open\n",
"\n",
"from datetime import timedelta\n",
"\n",
"def str_duration_to_hours(duration_str):\n",
" parts = duration_str.split()\n",
" days = int(parts[0]) if len(parts) > 1 else 0\n",
" time_parts = parts[-1].split(':')\n",
" hours = int(time_parts[0])\n",
" minutes = int(time_parts[1])\n",
" seconds = int(time_parts[2].split('.')[0])\n",
" total_hours = days * 24 + hours + minutes / 60 + seconds / 3600\n",
" return total_hours\n",
"\n",
"# Exemple d'utilisation :\n",
"duration_str = '2 days 01:39:12.750000'\n",
"\n",
"hours = str_duration_to_hours(duration_str)\n",
"print(\"Durée totale en heures :\", hours)\n"
]
},
{
"cell_type": "code",
"execution_count": 196,
"id": "4760743c-1032-452a-85fa-63d1447a742c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"segment\n",
"1 6.418056\n",
"2 8.031389\n",
"3 13.037500\n",
"4 15.197500\n",
"Name: time_to_open, dtype: float64"
]
},
"execution_count": 196,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# def of the variable time_to_open\n",
"\n",
"X_test_segment[\"time_to_open\"] = dataset_test[\"time_to_open\"].apply(lambda x : np.nan if pd.isna(x) else str_duration_to_hours(x))\n",
"X_test_segment.groupby(\"segment\")[\"time_to_open\"].median()"
]
},
{
"cell_type": "code",
"execution_count": 214,
2024-03-27 15:59:33 +01:00
"id": "0cb8f47a-bf0f-4285-b2ff-d90de394c787",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>share_tickets_internet</th>\n",
" <th>share_campaigns_opened</th>\n",
" <th>opt_in</th>\n",
2024-03-28 14:13:13 +01:00
" <th>time_to_open</th>\n",
" <th>time_to_open_med</th>\n",
2024-03-27 15:59:33 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.527270</td>\n",
" <td>0.136565</td>\n",
" <td>0.730064</td>\n",
2024-03-28 14:13:13 +01:00
" <td>56.785498</td>\n",
" <td>6.418056</td>\n",
2024-03-27 15:59:33 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.629648</td>\n",
" <td>0.194240</td>\n",
" <td>0.275860</td>\n",
2024-03-28 14:13:13 +01:00
" <td>56.349272</td>\n",
" <td>8.031389</td>\n",
2024-03-27 15:59:33 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.654488</td>\n",
" <td>0.292206</td>\n",
" <td>0.054260</td>\n",
2024-03-28 14:13:13 +01:00
" <td>57.847390</td>\n",
" <td>13.037500</td>\n",
2024-03-27 15:59:33 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.606618</td>\n",
" <td>0.370733</td>\n",
" <td>0.127051</td>\n",
2024-03-28 14:13:13 +01:00
" <td>57.567684</td>\n",
" <td>15.197500</td>\n",
2024-03-27 15:59:33 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2024-03-28 14:13:13 +01:00
" segment share_tickets_internet share_campaigns_opened opt_in \\\n",
"0 1 0.527270 0.136565 0.730064 \n",
"1 2 0.629648 0.194240 0.275860 \n",
"2 3 0.654488 0.292206 0.054260 \n",
"3 4 0.606618 0.370733 0.127051 \n",
"\n",
" time_to_open time_to_open_med \n",
"0 56.785498 6.418056 \n",
"1 56.349272 8.031389 \n",
"2 57.847390 13.037500 \n",
"3 57.567684 15.197500 "
2024-03-27 15:59:33 +01:00
]
},
2024-03-28 14:13:13 +01:00
"execution_count": 214,
2024-03-27 15:59:33 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment_pb = df_segment_pb(X_test_segment, \"segment\", \"nb_tickets_internet\", \"nb_tickets\", \n",
2024-03-28 14:13:13 +01:00
" \"nb_campaigns_opened\", \"nb_campaigns\", \"opt_in\", \"time_to_open\")\n",
2024-03-27 15:59:33 +01:00
"X_test_segment_pb"
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 215,
2024-03-26 12:20:03 +01:00
"id": "ba2884e3-004a-4554-ab82-6d477dcc4869",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>share_tickets_internet</th>\n",
" <th>share_campaigns_opened</th>\n",
" <th>opt_in</th>\n",
2024-03-28 14:13:13 +01:00
" <th>time_to_open_med</th>\n",
2024-03-26 12:20:03 +01:00
" <th>share_known_gender</th>\n",
" <th>share_of_women</th>\n",
" <th>country_fr</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.527270</td>\n",
" <td>0.136565</td>\n",
" <td>0.730064</td>\n",
2024-03-28 14:13:13 +01:00
" <td>6.418056</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.523129</td>\n",
" <td>0.571869</td>\n",
" <td>0.339959</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.629648</td>\n",
" <td>0.194240</td>\n",
" <td>0.275860</td>\n",
2024-03-28 14:13:13 +01:00
" <td>8.031389</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.855391</td>\n",
" <td>0.182710</td>\n",
" <td>0.805862</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.654488</td>\n",
" <td>0.292206</td>\n",
" <td>0.054260</td>\n",
2024-03-28 14:13:13 +01:00
" <td>13.037500</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.903085</td>\n",
" <td>0.323075</td>\n",
" <td>0.701258</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.606618</td>\n",
" <td>0.370733</td>\n",
" <td>0.127051</td>\n",
2024-03-28 14:13:13 +01:00
" <td>15.197500</td>\n",
2024-03-28 10:27:29 +01:00
" <td>0.864373</td>\n",
" <td>0.208231</td>\n",
" <td>0.638972</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" segment share_tickets_internet share_campaigns_opened opt_in \\\n",
2024-03-28 10:27:29 +01:00
"0 1 0.527270 0.136565 0.730064 \n",
"1 2 0.629648 0.194240 0.275860 \n",
"2 3 0.654488 0.292206 0.054260 \n",
"3 4 0.606618 0.370733 0.127051 \n",
2024-03-26 12:20:03 +01:00
"\n",
2024-03-28 14:13:13 +01:00
" time_to_open_med share_known_gender share_of_women country_fr \n",
"0 6.418056 0.523129 0.571869 0.339959 \n",
"1 8.031389 0.855391 0.182710 0.805862 \n",
"2 13.037500 0.903085 0.323075 0.701258 \n",
"3 15.197500 0.864373 0.208231 0.638972 "
2024-03-26 12:20:03 +01:00
]
},
2024-03-28 14:13:13 +01:00
"execution_count": 215,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-03-28 14:13:13 +01:00
"X_test_segment_caract = pd.concat([X_test_segment_pb.drop(\"time_to_open\", axis=1), X_test_segment_mp[['share_known_gender', 'share_of_women', 'country_fr']]], axis=1)\n",
2024-03-26 12:20:03 +01:00
"X_test_segment_caract"
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 216,
2024-03-26 12:20:03 +01:00
"id": "23a37e9b-bb29-4122-85cb-cc15cc344ee2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2024-03-28 14:13:13 +01:00
"share_tickets_internet 0.654488\n",
"share_campaigns_opened 0.370733\n",
"opt_in 0.730064\n",
"time_to_open_med 15.197500\n",
"share_known_gender 0.903085\n",
"share_of_women 0.571869\n",
"country_fr 0.805862\n",
2024-03-26 12:20:03 +01:00
"dtype: float64"
]
},
2024-03-28 14:13:13 +01:00
"execution_count": 216,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment_caract.loc[:,\"share_tickets_internet\":].max()"
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 230,
2024-03-26 12:20:03 +01:00
"id": "0809e2ae-3487-4b24-8f60-741c683cb9af",
"metadata": {},
"outputs": [],
"source": [
"# def d'une fonction associée - KEEP THIS !!!\n",
"\n",
"def radar_mp_plot(df, categories, index) :\n",
" categories = categories\n",
"\n",
" # true values are used to print the true value in parenthesis\n",
" tvalues = list(df.loc[index,categories]) \n",
"\n",
" max_values = df[categories].max()\n",
"\n",
" # values are true values / max among the 4 segments, allows to \n",
" # put values in relation with the values for other segments\n",
" # if the point has a maximal abscisse it means that value is maximal for the segment considered\n",
" # , event if not equal to 1\n",
" \n",
" values = list(df.loc[index,categories]/max_values)\n",
" \n",
" # values normalized are used to adjust the value around the circle\n",
" # for instance if the maximum of values is equal to 0.8, we want the point to be \n",
" # at 8/10th of the circle radius, not at the edge \n",
" values_normalized = [ max(values) * elt for elt in values]\n",
"\n",
" # Nb of categories\n",
" num_categories = len(categories)\n",
" \n",
" angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()\n",
" \n",
" # Initialize graphic\n",
" fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
" \n",
" # we have to draw first a transparent line (alpha=0) of values to adjust the radius of the circle\n",
" # which is based on max(value)\n",
" ax.plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5)\n",
" ax.plot(angles + angles[:1], values_normalized + values_normalized[:1], color='black', alpha = 0.5, linewidth=1.2)\n",
" \n",
" # fill the sector\n",
" ax.fill(angles, values_normalized, color='orange', alpha=0.4)\n",
" \n",
" # labels\n",
" ax.set_yticklabels([])\n",
" ax.set_xticks(angles)\n",
2024-03-28 14:13:13 +01:00
"\n",
" # define tick labels\n",
" values_printed = [str(round(tvalues[i],2)) if categories[i] in var_not_perc else f\"{round(100 * tvalues[i],2)}%\" for i in range(len(categories))]\n",
" # ticks = [categories[i].replace(\"_\",\" \") + f\"\\n({round(100 * tvalues[i],2)}%)\" for i in range(len(categories))]\n",
" ticks = [categories[i].replace(\"_\",\" \") + f\"\\n({values_printed[i]})\" for i in range(len(categories))]\n",
"\n",
2024-03-26 12:20:03 +01:00
" ax.set_xticklabels(ticks, color=\"black\")\n",
" \n",
" ax.spines['polar'].set_visible(False)\n",
" \n",
" plt.title(f'Characteristics of the segment {index+1}\\n')\n",
" \n",
2024-03-27 15:59:33 +01:00
" # plt.show()"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 229,
"id": "2fe80072-90d1-4e17-b8a7-ddc3e3be1b12",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['60.66%', '37.07%', '12.71%', '15.2', '20.82%', '63.9%']"
]
},
"execution_count": 229,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"var_not_perc = [\"time_to_open_med\"]\n",
"\n",
"tvalues = list(X_test_segment_caract.loc[3,categories]) \n",
"\n",
"values_printed = [str(round(tvalues[i],2)) if categories[i] in var_not_perc else f\"{round(100 * tvalues[i],2)}%\" for i in range(len(categories))]\n",
"values_printed"
]
},
{
"cell_type": "code",
"execution_count": 227,
"id": "cd3cb227-28b2-461e-a921-cff721c356e6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['share_tickets_internet',\n",
" 'share_campaigns_opened',\n",
" 'opt_in',\n",
" 'time_to_open_med',\n",
" 'share_of_women',\n",
" 'country_fr']"
]
},
"execution_count": 227,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"categories"
]
},
{
"cell_type": "code",
"execution_count": 233,
2024-03-26 12:20:03 +01:00
"id": "56cb026b-857f-42eb-baed-0ebdf5aee447",
"metadata": {},
"outputs": [
{
"data": {
2024-03-28 14:13:13 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAApAAAAIICAYAAADZgN5+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hb5dk/8K8ka1nT8pL33jOOY8dxljMJJEAhQIHSsNoXAi8thZYW2qaFtqxQ1o+GQtlhjzATAnHsLDuOk9iJZ7y35a1paz+/P/JajWM7sRPb0pGfz3XpSnx0xn2OdKRbz2QRQggoiqIoiqIoaorYzg6AoiiKoiiKYhaaQFIURVEURVHTQhNIiqIoiqIoalpoAklRFEVRFEVNC00gKYqiKIqiqGmhCSRFURRFURQ1LTSBpCiKoiiKoqaFJpAURVEURVHUtNAEkqIoiqIoipoWmkBS887p06dxxx13ICIiAgKBAGKxGBkZGXjmmWcwODjoWC88PBwbN250YqSX7l//+hfefvvtWds/i8XCX/7yl2lts3v37km3CQ8Px+23337Zcc20wcFB/PSnP4Wfnx9YLBauvfbaSded7JoXFhaCxWLhs88+m71AqTE++OADvPDCC1Ne/6WXXsLixYvh4+MDPp+P0NBQ/PSnP0VVVdXsBUlRDOfh7AAoai69/vrr2Lp1K+Li4vDb3/4WiYmJsFgsOH78OF599VUUFxdj165dzg7zsv3rX/+Cj4/PrCVlxcXFCA4OntY2u3fvxiuvvDJhErlr1y5IpdIZim7mPPHEE9i1axfefPNNREVFQaFQTLrubF9zauo++OADVFZW4te//vWU1h8YGMCGDRuQlpYGLy8vNDU14amnnkJ2djZOnDiBuLi42Q2YohiIJpDUvFFcXIx7770Xa9euxZdffgk+n+94bu3atXjooYfw/fffz2lMNpsNVqt1TCyuihACo9EIoVCIxYsXz+i+FyxYMKP7mymVlZWIiorCrbfe6uxQqFn017/+dczfK1aswOLFi5GYmIj3338fjz/+uJMioyjXRauwqXnjH//4B1gsFl577bUJEzYej4err7563PLvv/8eGRkZEAqFiI+Px5tvvjnm+b6+PmzduhWJiYkQi8Xw8/PDqlWrcOjQoTHrtbS0gMVi4ZlnnsHf/vY3REREgM/no6CgAEajEQ899BDS09Mhk8mgUCiQk5ODr776alw8drsdL7/8MtLT0yEUCiGXy7F48WJ8/fXXAM5WB1dVVeHAgQNgsVhgsVgIDw93bK/VavHwww8jIiICPB4PQUFB+PWvfw2DwTDmOCwWC/fffz9effVVJCQkgM/n45133nE8d25J4vDwsGOfAoEACoUCmZmZ+PDDDwEAt99+O1555RXHtqOPlpYWR8znl9yp1Wo89NBDiIyMBJ/Ph5+fH6688krU1tY61tmxYwfS0tIgFoshkUgQHx+PRx99dNw1O9/g4CC2bt2KoKAg8Hg8REZG4rHHHoPJZBrzWu3btw81NTWOeAsLCyfc38WuOQBYLBY89thjCAwMhFQqxZo1a3DmzJlx+9q3bx9Wr14NqVQKT09P5ObmIj8//6LnZLfb8be//Q1xcXGO90VqaipefPHFMevV19fjlltugZ+fH/h8PhISEhyvzbmqqqqwbt06eHp6wtfXF/fddx++++67cddh5cqVSE5ORnFxMZYsWQKhUIjw8HC89dZbAIDvvvsOGRkZ8PT0REpKyoQ/0qYS02hTgA8//PCC13HlypX47rvv0NraOua9Nl2+vr4AAA8PWs5CUROhdwY1L9hsNuzfvx8LFy5ESEjIlLc7deoUHnroIfz+97+Hv78//vOf/+Cuu+5CdHQ0li9fDgCOdpPbtm2DUqmEXq/Hrl27sHLlSuTn52PlypVj9vnSSy8hNjYW27dvh1QqRUxMDEwmEwYHB/Hwww8jKCgIZrMZ+/btw3XXXYe33noLP//5zx3b33777di5cyfuuusuPP744+DxeDh58qQjGdu1axc2b94MmUyGf/3rXwDgSJiHh4exYsUKdHR04NFHH0Vqaiqqqqrw5z//GRUVFdi3b9+YL9svv/wShw4dwp///GcolUr4+flNeJ1+85vf4L333sPf/vY3LFiwAAaDAZWVlRgYGAAA/OlPf4LBYMBnn32G4uJix3YBAQET7k+n02Hp0qVoaWnBI488guzsbOj1ehw8eBDd3d2Ij4/HRx99hK1bt+J///d/sX37drDZbDQ0NKC6uvqCr6nRaEReXh4aGxvx17/+FampqTh06BCefPJJlJeX47vvvkNAQACKi4uxdetWaDQavP/++wCAxMTECfd5oWs+6tFHH0Vubi7+85//QKvV4pFHHsGmTZtQU1MDDocDANi5cyd+/vOf45prrsE777wDLpeLf//731i/fj327t2L1atXT3pezzzzDP7yl7/gj3/8I5YvXw6LxYLa2lqo1WrHOtXV1ViyZAlCQ0Px3HPPQalUYu/evXjggQfQ39+Pbdu2AQC6u7uxYsUKiEQi7NixA35+fvjwww9x//33T3hslUqFO+64A7/73e8QHByMl19+GXfeeSfa29vx2Wef4dFHH4VMJsPjjz+Oa6+9Fk1NTQgMDJxWTFO9jv/617/wy1/+Eo2NjdNujjJaI9Dc3Izf//738PPzwx133DGtfVDUvEEoah5QqVQEAPnpT3865W3CwsKIQCAgra2tjmUjIyNEoVCQ//mf/5l0O6vVSiwWC1m9ejX5yU9+4lje3NxMAJCoqChiNpsveOzRfdx1111kwYIFjuUHDx4kAMhjjz12we2TkpLIihUrxi1/8sknCZvNJqWlpWOWf/bZZwQA2b17t2MZACKTycjg4OC4/QAg27Ztc/ydnJxMrr322gvGdN9995HJPnLCwsLIli1bHH8//vjjBAD58ccfJ93f/fffT+Ry+QWPOZFXX32VACCffPLJmOVPP/00AUB++OEHx7IVK1aQpKSkKe13smteUFBAAJArr7xyzPJPPvmEACDFxcWEEEIMBgNRKBRk06ZNY9az2WwkLS2NZGVlXfD4GzduJOnp6RdcZ/369SQ4OJhoNJoxy++//34iEAgcr/Vvf/tbwmKxSFVV1bjtAZCCggLHshUrVhAA5Pjx445lAwMDhMPhEKFQSDo7Ox3Ly8vLCQDy0ksvTTumqV5HQgi56qqrSFhY2AWvxUT4fD4BQACQ2NhYUl1dPe19UNR8QauwKeoC0tPTERoa6vhbIBAgNjYWra2tY9Z79dVXkZGRAYFAAA8PD3C5XOTn56OmpmbcPq+++mpwudxxyz/99FPk5uZCLBY79vHGG2+M2ceePXsAAPfdd98lnc+3336L5ORkpKenw2q1Oh7r16+fsIp21apV8PLyuuh+s7KysGfPHvz+979HYWEhRkZGLim+UXv27EFsbCzWrFlzwWOq1WrcfPPN+Oqrr9Df3z+lfe/fvx8ikQibN28es3y0Cn0q1cWX4vzmEampqQDgeC8VFRVhcHAQW7ZsGfPa2O12XHHFFSgtLR3XzOBcWVlZOHXqFLZu3Yq9e/dCq9WOed5oNCI/Px8/+clP4OnpOeYYV155JYxGI44ePQoAOHDgAJKTk8eVuN58880THjsgIAALFy50/K1QKODn54f09HRHSSMAJCQkjDnn6cQ01et4OYqKilBcXIydO3dCIpEgLy+P9sSmqEnQBJKaF3x8fODp6Ynm5uZpbeft7T1uGZ/PH5Mg/fOf/8S9996L7OxsfP755zh69ChKS0txxRVXTJhITVRt+8UXX+DGG29EUFAQdu7cieLiYpSWluLOO++E0Wh0rNfX1wcOhwOlUjmt8xjV09OD06dPg8vljnlIJBIQQsYlYZNVMZ/vpZdewiOPPIIvv/wSeXl5UCgUuPbaa1FfX39Jcfb19V20l/dtt92GN998E62trbj++uvh5+eH7Oxs/PjjjxfcbmBgAEqlcly7OD8/P3h4eDiq3Wfa+e+l0Sru0fdIT08PAGDz5s3jXp+nn34ahJAxw0yd7w9/+AO2b9+Oo0ePYsOGDfD29sbq1atx/PhxAGfP22q
2024-03-26 12:20:03 +01:00
"text/plain": [
"<Figure size 600x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-03-28 14:13:13 +01:00
"categories = list(X_test_segment_caract.drop([\"segment\", \"share_known_gender\"], axis=1).columns)\n",
2024-03-26 12:20:03 +01:00
"#for i in range(4) :\n",
"# radar_mp_plot(df=X_test_segment_caract, categories=categories, index=i)\n",
2024-03-28 14:13:13 +01:00
"radar_mp_plot(df=X_test_segment_caract, categories=categories, index=2)"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "code",
"execution_count": 739,
"id": "5b3c4bac-396e-4117-a7d9-f39a3d8f95b4",
"metadata": {},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (4005960846.py, line 6)",
"output_type": "error",
"traceback": [
"\u001b[0;36m Cell \u001b[0;32mIn[739], line 6\u001b[0;36m\u001b[0m\n\u001b[0;31m file_name = \"spider_chart_\" + activity + \"_sgt_\" str(index)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
]
}
],
"source": [
"# export to MinIo\n",
"\n",
"activity = \"sport\"\n",
"PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{activity}/\"\n",
"\n",
"file_name = \"spider_chart_\" + activity + \"_sgt_\" + str(index)\n",
"FILE_PATH_OUT_S3 = PATH + file_name + \".csv\"\n",
"\n",
"\n",
"radar_mp_plot(df=X_test_segment_caract, categories=categories, index=3)\n",
"\n",
"image_buffer = io.BytesIO()\n",
"plt.savefig(image_buffer, format='png')\n",
"image_buffer.seek(0)\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:\n",
" s3_file.write(image_buffer.read())\n",
"plt.close()"
]
},
{
"cell_type": "code",
"execution_count": 740,
"id": "276de9a5-d506-4c11-a7c2-a23ebbc59fe5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'projet-bdc2324-team1/Output_marketing_personae_analysis/sport/spider_chart_sport_sgt_3.csv'"
]
},
"execution_count": 740,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"activity = \"sport\"\n",
"PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{activity}/\"\n",
"\n",
"file_name = \"spider_chart_\" + activity + \"_sgt_\" + str(index)\n",
"FILE_PATH_OUT_S3 = PATH + file_name + \".csv\"\n",
"FILE_PATH_OUT_S3"
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 234,
2024-03-26 12:20:03 +01:00
"id": "80e47dbc-3efd-4857-8055-876b308cbcb5",
"metadata": {},
"outputs": [],
"source": [
2024-03-27 15:59:33 +01:00
"# general function to have the 4 radars in one plot\n",
"\n",
2024-03-26 12:20:03 +01:00
"def radar_mp_plot_all(df, categories) :\n",
" \n",
" nb_segments = df.shape[0]\n",
" categories = categories\n",
"\n",
" # Initialize graphic\n",
" fig, ax = plt.subplots(2,2, figsize=(25, 20), subplot_kw=dict(polar=True))\n",
" \n",
" for index in range(nb_segments) :\n",
" row = index // 2 # Division entière pour obtenir le numéro de ligne\n",
" col = index % 2 \n",
" \n",
2024-03-28 14:13:13 +01:00
" # df = X_test_segment_caract\n",
2024-03-26 12:20:03 +01:00
" \n",
" # true values are used to print the true value in parenthesis\n",
" tvalues = list(df.loc[index,categories]) \n",
" \n",
" max_values = df[categories].max()\n",
" \n",
" # values are true values / max among the 4 segments, allows to \n",
" # put values in relation with the values for other segments\n",
" # if the point has a maximal abscisse it means that value is maximal for the segment considered\n",
" # , event if not equal to 1\n",
" \n",
" values = list(df.loc[index,categories]/max_values)\n",
" \n",
" # values normalized are used to adjust the value around the circle\n",
" # for instance if the maximum of values is equal to 0.8, we want the point to be \n",
" # at 8/10th of the circle radius, not at the edge \n",
" values_normalized = [ max(values) * elt for elt in values]\n",
" \n",
" # Nb of categories\n",
" num_categories = len(categories)\n",
"\n",
" angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()\n",
" \n",
" # we have to draw first a transparent line (alpha=0) of values to adjust the radius of the circle\n",
" # which is based on max(value)\n",
" ax[row, col].plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5)\n",
" ax[row, col].plot(angles + angles[:1], values_normalized + values_normalized[:1], color='black', alpha = 0.5, linewidth=1.2)\n",
" \n",
" # fill the sector\n",
" ax[row, col].fill(angles, values_normalized, color='orange', alpha=0.4, label = index)\n",
" \n",
" # labels\n",
" ax[row, col].set_yticklabels([])\n",
" ax[row, col].set_xticks(angles)\n",
2024-03-28 14:13:13 +01:00
" \n",
" # define the ticks\n",
" values_printed = [str(round(tvalues[i],2)) if categories[i] in var_not_perc else f\"{round(100 * tvalues[i],2)}%\" for i in range(len(categories))]\n",
"\n",
" # ticks = [categories[i].replace(\"_\",\" \") + f\"\\n({round(100 * tvalues[i],2)}%)\" for i in range(len(categories))]\n",
" ticks = [categories[i].replace(\"_\",\" \") + f\"\\n({values_printed[i]})\" for i in range(len(categories))]\n",
2024-03-26 16:00:39 +01:00
" ax[row, col].set_xticklabels(ticks, color=\"black\", size = 20)\n",
2024-03-26 12:20:03 +01:00
" \n",
" ax[row, col].spines['polar'].set_visible(False)\n",
" \n",
" # plt.title(f'Characteristics of the segment {index+1}\\n')\n",
2024-03-28 10:27:29 +01:00
" ax[row, col].set_title(f'Segment {index+1}\\n', size = 24)\n",
" \n",
" fig.suptitle(f\"Characteristics of marketing personae of {type_of_activity} companies\", size=32)\n",
2024-03-26 12:20:03 +01:00
"\n",
2024-03-26 16:00:39 +01:00
" # plt.show()"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "code",
2024-03-28 14:13:13 +01:00
"execution_count": 235,
2024-03-26 12:20:03 +01:00
"id": "edf76688-1b7e-469e-873f-4884d551be66",
"metadata": {},
"outputs": [
{
"data": {
2024-03-28 14:13:13 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAACCgAAAbgCAYAAACoVvDLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3gUVf///1caCSmEkIQOAaWLIChVpAkCglIVUZAiKNxy3+ptAxtYUD72ClgQVARvUelYqFJC70iRFnpLaCmkn98f/LLfbM1uyCYBn4/r2iuZ2TNzzrQzM2ffc8bHGGMEAAAAAAAAAAAAAADgRb5FXQAAAAAAAAAAAAAAAHD9I0ABAAAAAAAAAAAAAAB4HQEKAAAAAAAAAAAAAADA6whQAAAAAAAAAAAAAAAAXkeAAgAAAAAAAAAAAAAA8DoCFAAAAAAAAAAAAAAAgNcRoAAAAAAAAAAAAAAAALyOAAUAAAAAAAAAAAAAAOB1BCgAAAAAAAAAAAAAAACvI0ABAK5hBw4c0Pz58zVx4kT93//9n8aNG6fPPvtM33//vVauXKnk5GSP5+nj42P1Aa5ngwYNstrfp06dWtRFumpt27a1Wqbly5cXdZFg4/z583r33XfVpUsXVa5cWSEhIVbbrFq1akVdRLhh+fLlVtutbdu2RV2kIhcXF8e+DAAeysjI0LRp09S3b1/VqFFD4eHh8vX1tapP4+LiirqYAFAscL8LAMD1wb+oCwAA8MzKlSv17bffat68eTp9+rTLtL6+vrrpppvUo0cP9e/fX7Vq1SqkUgIAHPnpp580ZMgQJSYmFnVRAABAEdu5c6d69eqlffv2FXVRAAAAAKDQEKAAANeIdevW6cknn9TatWvdniY7O1s7duzQjh079Prrr6t9+/YaP368mjRp4sWS4loze/Zsbd261TLco0cP3XLLLUVWnn+qCxcu6MMPP7QMly5dWk8++WSRlQcFb9GiRbr//vtljCnqogB2li9fbvUEWtu2bekVAgC86OTJk2rfvr3Onj1b1EVBIeG+CwAAALiCAAUAKOays7M1ZswYjRs3zuWPWr6+vipdurTS09OVlJTkMM3SpUvVrFkzvfHGG3rhhRe8VWRcY2bPnq1vvvnGMlytWjUayorAhQsX9Oqrr1qGY2JiCFC4jhhj9K9//cuqHo+IiFDfvn1Vu3ZtBQUFWcaHhYUVRRHxD7d8+XKrOkgSAQoA4EUvvfSSVXBCQECAunfvriZNmqhUqVJWaSMjIwu7ePAC7rsAAACAKwhQAIBiLDMzUw899JB+/PFHu++qVaum3r176+6779bNN9+syMhI+fr6SpISExP1119/aenSpfrhhx+0Y8cOy3TGGP3999+FtgwAgCs//u7fv98yXK5cOW3evFkVK1YswlIBAICikJKSohkzZliGfXx8tHDhQnXo0KEISwUAAAAAhYMABQAoxh555BG74ISIiAi98sorevzxxxUQEOBwurCwMDVv3lzNmzfXCy+8oGXLlunFF1/UmjVrCqPYwDVj6tSpmjp1alEXo0Dl7qIdxceqVaushocOHUpwAq471apV4xUmAOCGTZs26fLly5bhli1bEpwAAG7gfhcAgOuDb1EXAADg2GeffaZvv/3WalzlypUVGxurJ5980mlwgiPt2rXT6tWrNWHCBKtuxAEAhWPPnj1Ww40aNSqikgAAgKLGdQEAAACAfzICFACgGDpy5Iief/55q3GRkZFatWqV6tSpk695+vj4aMSIEYqNjeWpXQAoZBcuXLAajoiIKJqCAACAIsd1AQAAAIB/Ml7xAADF0NixY5WcnGw17pNPPlFMTMxVz7tRo0Zq2LDhVc8HAOC+lJQUq2FfX+KEAQD4p+K6AAAAAMA/GQEKAFDMnDlzRtOmTbMa165dO/Xr16/A8riaBrDs7Gxt2LBB27dv19mzZxUUFKTo6Gg1adIk37075Jaenq69e/dq9+7dOn36tBITExUYGKgyZcqoatWqatasmUJDQ686H2d27dqlrVu36sSJE0pNTVV4eLg6deqkWrVquZzu6NGj2rVrl+Li4nTx4kVlZWWpTJkyioqKUpMmTVS1atUCL2t6errWr1+vI0eOKD4+XomJiQoJCVH58uVVr1491atXT/7+RX+qT0pK0po1a3TixAmdPXtWxhhFR0erevXqatGihUqUKFHgeWZmZmrjxo3666+/FB8fr8zMTEVHR6tv374KDw8vsHySkpK0fft27dmzRxcuXFBKSoqCgoIUGhqqKlWq6MYbb1StWrWu2Ubnixcvau3atTp16pTi4+OVlpamUqVKqWrVqrrpppt04403ejzP7Oxs/f3339q+fbvlGPf19VVwcLDKlSun6tWrq169el49zl05d+6c1qxZo1OnTlnVcTVq1NBtt90mPz+/fM3XGFPAJb16WVlZ2rBhg3bs2KH4+Hj5+fmpSpUquuOOO1S5cuU8p09NTVVsbKx2796tCxcuqFSpUoqJiVH79u2vevsZY3Tw4EHt3r1bR44c0aVLl+Tj46MyZcqoXLlyatasmcqVK3dVebhy4sQJrV+/XnFxcUpKSlLJkiXVpEkTtW7d2mt5eiJn2x06dEhnzpxRSkqKoqKiVLFiRd1+++0qXbp0URexwHj7ukOS0tLStGLFCh0+fFhnzpxRUFCQYmJi1KJFi2Ld69T+/fu1YcMGHTt2TFlZWapQoYJq1aqlZs2aFeh551o6j585c0bbtm3TwYMHdfHiRaWnpys4OFilS5dWTEyMatWqpSpVqlxV2dLS0rRmzRodOXJEZ8+eVXZ2tqKjo1WpUiXdfvvtCg4Ovqr5O3PhwgWtWrVK+/fvV3JysiIiIlSpUiW1adOmQI75c+fOadeuXdq3b5/Onz+vy5cvKzw8XGXKlNHNN9+s+vXry8fH5+oXJJdroS7z1vYuDtcFFy5c0NatW7Vv3z5dvHhRqampKlmypOVar0aNGvm61nOmMOosY4y2bNmivXv36uzZs0pOTlZUVJTKli2rFi1aqGzZsgWSj63U1FStW7dOe/bs0blz5yRJ5cqV04ABAzx6NWNRycrK0ubNm3XgwAHFx8fr4sWLCg4OVlRUlOrWraubb75ZgYGB+Z5/UlKSYmNjdfz4cZ09e1Z+fn4qW7asYmJi1Lx5c6+cRyQpLi5O69at09GjR5WWlqbIyEg1btxYTZo0cas+27lzpzZt2qTTp0/Lx8dH5cuXV8uWLQv0uMgRHx+v2NhYHThwQJcvX1ZUVJRiYmLUunVrlSxZ8qrnn5iYqF27dmnv3r1KSEhQSkqKwsLCVKZMGdWuXVuNGjXyWttBYVzL5depU6e0fv16nTlzRvHx8QoKClLZsmVVr149NWzY8KrPexkZGdq9e7d27txpabPx9/dXSEiIKlSoYLn35VWsAIB/JAMAKFbee+89I8nqM3PmzELL3zbvHGlpaWb8+PGmfPnydmlyPrVq1TI//PCDx3kePHjQjB8/3rRv396ULFnS6fwlGX9/f9O2bVszf/58j/Np06aN1byWLVtmjDEmIyPDfPzxx+aGG25wmOcHH3xgN6/k5GTzww8/mIceeshUrFjRZZklmZiYGPPWW2+ZCxcueFxuW7///rvp0qWLCQ4OdplnqVKlTM+ePc2sWbNMRkaG1TwGDhyYZ5mdfcaMGeNWOWfPnm3atWtnSpQo4XReoaGh5sEHHzR///23R+vA2X6akJBg/vvf/5rSpUs7zG/Lli0u18OUKVPcyn/FihWme/fuLpct5xMWFmY6depkPv/8c5OSkmI1n0OHDuV7Ozi7jHO2n7srPT3dfP7556Zp06bGz8/PZf6VK1c2jz76qFm7dm2e8z179qx55plnTIUKFfJcLl9fX9OgQQMzevRos3fvXo/Kn18zZ840LVq0ML6+vk7LVaZMGTN48GBz6NChPOeX32OsTZs2BbZMMTExVvPOKXdqaqoZN26cKVu2rNP1f++995r9+/c7nO+ZM2fMyJEjTVhYmMPpAwMDzeOPP27Onz/vUXnPnz9vJk+ebHr16mUiIyPzXFd169Y1EyZMMJcvX/YonzFjxjit0xY
2024-03-26 12:20:03 +01:00
"text/plain": [
"<Figure size 2500x2000 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-03-28 14:13:13 +01:00
"categories = list(X_test_segment_caract.drop([\"segment\", \"share_known_gender\"], axis=1).columns)\n",
2024-03-27 15:59:33 +01:00
"radar_mp_plot_all(df=X_test_segment_caract, categories=categories)"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "code",
2024-03-28 10:27:29 +01:00
"execution_count": 97,
2024-03-26 12:20:03 +01:00
"id": "c48136d1-c980-4f74-a69f-ed4304c83188",
"metadata": {},
"outputs": [],
"source": [
"# export to MinIo\n",
"\n",
2024-03-26 16:00:39 +01:00
"# activity = \"sport\"\n",
"# PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{activity}/\"\n",
2024-03-26 12:20:03 +01:00
"\n",
"file_name = \"spider_chart_all_\" + activity\n",
"FILE_PATH_OUT_S3 = PATH + file_name + \".png\"\n",
"\n",
"radar_mp_plot_all(df=X_test_segment_caract, categories=categories)\n",
"\n",
"image_buffer = io.BytesIO()\n",
2024-03-28 10:27:29 +01:00
"plt.savefig(image_buffer, format='png', dpi=110)\n",
2024-03-26 12:20:03 +01:00
"image_buffer.seek(0)\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:\n",
" s3_file.write(image_buffer.read())\n",
"plt.close()"
]
},
{
"cell_type": "markdown",
"id": "a2395680-69fe-4247-8deb-22f8ee15830b",
"metadata": {},
"source": [
"## --- end of the main part --- here are just some attempts --- ##"
]
},
{
"cell_type": "code",
"execution_count": 489,
"id": "7d9a2aca-d28d-43b3-9b72-5913b20c4f04",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAj8AAAH3CAYAAABU/z5zAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3hb5fn+76M9LU9Z3nvHSeyEONtxwgoQCIW2QGgh9NeGQkMppS1QApQVVoEvhRJ2aMMsJRDKJsOJ7Ww7ieNtxzO2vGVrz/f3h2IRxxkess+R/H6uS1diWTrnPrKkc5/ned7nYQghBBQKhUKhUCjTBB7bAigUCoVCoVCmEmp+KBQKhUKhTCuo+aFQKBQKhTKtoOaHQqFQKBTKtIKaHwqFQqFQKNMKan4oFAqFQqFMK6j5oVAoFAqFMq2g5odCoVAoFMq0gpofCoVCoVAo0wpqfijn5dixY5DJZPjHP/7BtpQx8frrr0Mul2P//v1sS6GchsFgQHp6On7605/C5XKxLYdCoUxTqPmZBmzevBkMw3huAoEAERERuOGGG1BXV3fO5+n1elx//fVYv3491q9fP4WKJ8aRI0dwzz334MMPP0ReXt64tzP0ujU1NXlPnJ/y5JNP4rPPPrvg4379618jPDwcW7ZsAY83uq+fpqYmMAyDzZs3j0nT+++/jxdffPGsv2MYBo888siYtkehUPwHAdsCKFPHO++8g/T0dFgsFhQXF+OJJ57Azp07UV1djaCgoBGP/9WvfoV58+bhqaeeYkHt+BgcHMRPf/pTPP/881i1ahXbcqYNTz75JK6//nqsXr36nI955ZVXcOzYMRQXF0MsFo962xEREdi7dy+SkpLGpOn999/H8ePHcffdd4/43d69exEdHT2m7VEoFP+Bmp9pxIwZMzB37lwAwLJly+B0OvHwww/js88+w9q1a0c8/uOPP55qiRMmICDgvNGs6Y7dbvdE/6aaO++8E3feeeeoH+90OuFwOCAWizF//nyvavH29igUim9B017TmCEj1NnZOez+Q4cO4eqrr0ZwcDAkEglycnJGGCGTyYR7770XCQkJkEgkCA4Oxty5c/HBBx94HrNs2TIsW7ZsxH5vvfVWxMfHe34eSms8++yzePrppxEfHw+pVIply5ahtrYWdrsd9913HyIjI6FSqXDttdeiq6trxHY/+ugjLFiwAHK5HAqFApdddhnKyspG9Vrs27cPixYtgkQiQWRkJO6//37Y7fazPnY0+zlx4gRuuOEGREZGQiwWIzw8HCtWrMCRI0cuqGX//v1YtWoVQkJCIJFIkJSUNCx6UV9fj7Vr1yIlJQUymQxRUVFYtWoVysvLh21n165dYBgG//73v/HHP/4RUVFREIvFqK+vR3d3N+644w5kZmZCoVBArVZj+fLl2LNnzwg9VqsVjz76KDIyMiCRSBASEoKCggKUlJQAcKeQjEYj3n33XU9q9fS/u1arxbp16xAdHQ2RSISEhAT87W9/g8Ph8Dxm6D3wzDPP4PHHH0dCQgLEYjF27tx51rRXd3c3fvOb3yAmJgZisRhhYWFYtGgRfvjhBwDu996XX36J5ubmYSnfIc6W9jrbe+CNN94Ykfo8V8osPj4et95667D7RnPsAPDqq69i1qxZUCgUUCqVSE9PxwMPPDBiHxQKxTvQyM80prGxEQCQmprquW/nzp24/PLLkZeXh02bNkGlUuHDDz/Ez3/+c5hMJs+X+z333IN///vfePzxx5GTkwOj0Yjjx4+jt7d33HpeeeUVzJw5E6+88gp0Oh3++Mc/YtWqVcjLy4NQKMTbb7+N5uZm3Hvvvfh//+//Ydu2bZ7nPvnkk3jwwQexdu1aPPjgg7DZbHj22WexZMkSHDhwAJmZmefcb2VlJVasWIH4+Hhs3rwZMpkM//znP/H++++PeOxo93PFFVfA6XTimWeeQWxsLHp6elBSUgKdTnfe1+Dbb7/FqlWrkJGRgeeffx6xsbFoamrCd99953lMe3s7QkJC8NRTTyEsLAx9fX149913kZeXh7KyMqSlpQ3b5v33348FCxZg06ZN4PF4UKvV6O7uBgA8/PDD0Gg0MBgM2Lp1K5YtW4bt27d7zIvD4cDKlSuxZ88e3H333Vi+fDkcDgf27duHlpYWLFy4EHv37sXy5ctRUFCADRs2AHBH4AD3yX/evHng8Xh46KGHkJSUhL179+Lxxx9HU1MT3nnnnWFaX3rpJaSmpuK5555DQEAAUlJSzvo6/eIXv0BpaSmeeOIJpKamQqfTobS01PP+++c//4nf/OY3aGhowNatW8/7mgNjew+MltEe+4cffog77rgD69evx3PPPQcej4f6+npUVlaOe98UCuUCEIrf88477xAAZN++fcRutxO9Xk+++eYbotFoyNKlS4ndbvc8Nj09neTk5Ay7jxBCrrrqKhIREUGcTichhJAZM2aQ1atXn3e/+fn5JD8/f8T9t9xyC4mLi/P83NjYSACQWbNmebZPCCEvvvgiAUCuvvrqYc+/++67CQAyMDBACCGkpaWFCAQCsn79+mGP0+v1RKPRkJ/97Gfn1fnzn/+cSKVSotVqPfc5HA6Snp5OAJDGxsYx7aenp4cAIC+++OJ593s2kpKSSFJSEjGbzaN+jsPhIDabjaSkpJA//OEPnvt37txJAJClS5eOaht2u52sWLGCXHvttZ77//WvfxEA5I033jjv8+VyObnllltG3L9u3TqiUChIc3PzsPufe+45AoBUVFQQQn58DyQlJRGbzTbssUO/e+eddzz3KRQKcvfdd59X05VXXjnsfXY6AMjDDz/s+Xm074GzPXeIuLi4Ya/BaI/9d7/7HQkMDDzvsVAoFO9C017TiPnz50MoFEKpVOLyyy9HUFAQPv/8c0/9R319Paqrq7FmzRoA7qv+odsVV1yBjo4O1NTUAADmzZuHr7/+Gvfddx927doFs9k8YX1XXHHFsBVAGRkZAIArr7xy2OOG7m9paQHgjpY4HA788pe/HKZZIpEgPz8fu3btOu9+d+7ciRUrViA8PNxzH5/Px89//vNhjxvtfoKDg5GUlIRnn30Wzz//PMrKyka1rLu2thYNDQ341a9+BYlEcs7HORwOPPnkk8jMzIRIJIJAIIBIJEJdXR2qqqpGPP66664763Y2bdqE3NxcSCQSCAQCCIVCbN++fdg2vv76a0gkEtx2220X1H82/ve//6GgoACRkZHDXrOVK1cCAAoLC4c9/uqrr4ZQKLzgdufNm4fNmzfj8ccfx759+86Zohwto30PjIXRHvu8efOg0+lw44034vPPP0dPT8+EjoVCoVwYan6mEf/6179w8OBB7NixA+vWrUNVVRVuvPFGz++Han/uvfdeCIXCYbc77rgDADxfzC+99BL+8pe/4LPPPkNBQQGCg4OxevXqCRUbBwcHD/tZJBKd936LxTJM90UXXTRC90cffXTBk0lvby80Gs2I+8+8b7T7YRgG27dvx2WXXYZnnnkGubm5CAsLw1133QW9Xn9OHUOpqAutQrrnnnuwYcMGrF69Gl988QX279+PgwcPYtasWWc1oRERESPue/755/Hb3/4WeXl5+O9//4t9+/bh4MGDuPzyy4dto7u7G5GRkaNeln4mnZ2d+OKLL0a8XllZWQAw4m9zNq1n46OPPsItt9yCN998EwsWLEBwcDB++ctfQqvVjkvnaN8DY2G0x/6LX/zCk9K97rrroFarkZeXh++//37c+6ZQKOeH1vxMIzIyMjxFzgUFBXA6nXjzzTfxySef4Prrr0doaCgAd43IT37yk7NuY6ieRC6X429/+xv+9re/obOz0xMFWrVqFaqrqwEAEokEAwMDI7bh7SvbId2ffPIJ4uLixvz8kJCQs540z7xvLPuJi4vDW2+9BcAd0fn444/xyCOPwGazYdOmTWd9TlhYGACgra3tvNvesmULfvnLX+LJJ58cdn9PTw8CAwNHPP70Qt/Tt7Fs2TK8+uqrw+4/05yFhYWhqKgILpdrXAYoNDQUM2fOxBNPPHHW30dGRl5Q67m2++KLL+LFF19ES0sLtm3bhvvuuw9
"text/plain": [
"<Figure size 600x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"colors = plt.cm.Blues(np.linspace(0.1, 0.9, 4)) \n",
"colors = [\"blue\", \"green\", \"orange\", \"red\"]\n",
"\n",
"# Initialisez le graphique en étoile\n",
"fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
"\n",
"for i in range(4) :\n",
"\n",
" # Caractéristiques et valeurs associées (exemple)\n",
" categories = ['share_known_gender', 'share_of_women', 'country_fr']\n",
" values = list(X_test_segment_mp.loc[i,categories]) # Exemple de valeurs, ajustez selon vos données\n",
" \n",
" values_normalized = [ max(values) * elt for elt in values]\n",
" \n",
" # Nombre de caractéristiques\n",
" num_categories = len(categories)\n",
" \n",
" # Créer un angle pour chaque axe\n",
" angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()\n",
" \n",
" \n",
" # Tracer uniquement le contour du polygone\n",
" ax.plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5)\n",
" # ax.plot(angles + angles[:1], values_normalized + values_normalized[:1], color='blue', alpha = 0.3, linewidth=1.5)\n",
" \n",
" # Remplir le secteur central avec une couleur\n",
" ax.fill(angles, values_normalized, color=colors[i], alpha=0.2, label = str(i+1))\n",
"\n",
"# Étiqueter les axes\n",
"ax.set_yticklabels([])\n",
"ax.set_xticks(angles)\n",
"ax.set_xticklabels(categories)\n",
"ax.legend()\n",
"\n",
"# Titre du graphique\n",
"plt.title('Résumé des caractéristiques')\n",
"\n",
"# Afficher le graphique\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 301,
"id": "96aa9ff5-c1ed-49eb-8fb7-2319ac0c40be",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAgoAAAITCAYAAABmGDQGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3hUZfbHPzPpvVcCSQgphE4g9G4BkWJvSFkL9l3LrrprW1ddFcUuWFBUYGWxwk8BlU7oISRAQhophPRep7+/P8ZkibSUydyZyf08zzxJptz3zM299/3ec857jkIIIZCRkZGRkZGRuQBKqQ2QkZGRkZGRsVxkoSAjIyMjIyNzUWShICMjIyMjI3NRZKEgIyMjIyMjc1FkoSAjIyMjIyNzUWShICMjIyMjI3NRZKEgIyMjIyMjc1FkoSAjIyMjIyNzUWShICMjIyMjI3NRZKEg02HS0tJwdXXlvffek9qUTvHxxx/j5ubGwYMHpTZF5hwaGxuJi4vjpptuwmAwSG2OjIzMRZCFQi9j9erVKBSKtoe9vT0hISHceuutZGdnX/RzDQ0N3HjjjTz88MM8/PDDZrS4exw7dozHHnuMr7/+mjFjxnR5O637LT8/33TG2SivvPIKP/zww2Xfd8899xAUFMSaNWtQKjt2KcrPz0ehULB69epO2bRu3TrefvvtC76mUCh44YUXOrU9GZnehL3UBshIw+eff05cXBwqlYqkpCRefvllduzYwalTp/Dx8Tnv/XfddReJiYm8+uqrEljbNerr67nppptYvnw5c+bMkdqcXsMrr7zCjTfeyPz58y/6ng8++IC0tDSSkpJwcnLq8LZDQkLYv38/UVFRnbJp3bp1nDhxgr/85S/nvbZ//37CwsI6tT0Zmd6ELBR6KYMHD2bUqFEATJ06Fb1ez/PPP88PP/zAkiVLznv/f//7X3Ob2G08PT0v6SXp7Wi12javkrl58MEHefDBBzv8fr1ej06nw8nJibFjx5rUFlNvT0bG1pBDDzIAbaKhrKys3fNHjhxh7ty5+Pr64uzszIgRI84TDc3NzTzxxBNERkbi7OyMr68vo0aN4j//+U/be6ZOncrUqVPPG3fx4sVERES0/d3qWl62bBmvvfYaERERuLi4MHXqVLKystBqtTz11FOEhobi5eXFddddR3l5+XnbXb9+PePGjcPNzQ13d3euvvpqUlJSOrQvDhw4wIQJE3B2diY0NJSnn34arVZ7wfd2ZJzTp09z6623EhoaipOTE0FBQcyYMYNjx45d1paDBw8yZ84c/Pz8cHZ2Jioqqt1dcU5ODkuWLCE6OhpXV1f69OnDnDlzOH78eLvt7Ny5E4VCwVdffcXjjz9Onz59cHJyIicnh4qKCh544AHi4+Nxd3cnMDCQ6dOns2fPnvPsUavVvPjiiwwcOBBnZ2f8/PyYNm0a+/btA4xu/KamJr744ou28Na5//fS0lKWLl1KWFgYjo6OREZG8s9//hOdTtf2ntZj4PXXX+ell14iMjISJycnduzYccHQQ0VFBffeey99+/bFycmJgIAAJkyYwG+//QYYj72ffvqJgoKCdmG3Vi4UerjQMfDJJ5+cF366WNgiIiKCxYsXt3uuI98dYMWKFQwbNgx3d3c8PDyIi4vj73//+3ljyMiYC9mjIANAXl4eADExMW3P7dixg5kzZzJmzBhWrlyJl5cXX3/9NbfccgvNzc1tF8LHHnuMr776ipdeeokRI0bQ1NTEiRMnqKqq6rI9H3zwAUOHDuWDDz6gtraWxx9/nDlz5jBmzBgcHBz47LPPKCgo4IknnuDuu+9m48aNbZ995ZVXeOaZZ1iyZAnPPPMMGo2GZcuWMWnSJA4dOkR8fPxFx01PT2fGjBlERESwevVqXF1d+fDDD1m3bt157+3oONdccw16vZ7XX3+dfv36UVlZyb59+6itrb3kPti6dStz5sxh4MCBLF++nH79+pGfn88vv/zS9p7i4mL8/Px49dVXCQgIoLq6mi+++IIxY8aQkpJCbGxsu20+/fTTjBs3jpUrV6JUKgkMDKSiogKA559/nuDgYBobG/n++++ZOnUq27Zta5vodTods2bNYs+ePfzlL39h+vTp6HQ6Dhw4QGFhIePHj2f//v1Mnz6dadOm8eyzzwJGzw4YJ8rExESUSiXPPfccUVFR7N+/n5deeon8/Hw+//zzdra+++67xMTE8MYbb+Dp6Ul0dPQF99Odd97J0aNHefnll4mJiaG2tpajR4+2HX8ffvgh9957L7m5uXz//feX3OfQuWOgo3T0u3/99dc88MADPPzww7zxxhsolUpycnJIT0/v8tgyMt1GyPQqPv/8cwGIAwcOCK1WKxoaGsSWLVtEcHCwmDx5stBqtW3vjYuLEyNGjGj3nBBCXHvttSIkJETo9XohhBCDBw8W8+fPv+S4U6ZMEVOmTDnv+UWLFonw8PC2v/Py8gQghg0b1rZ9IYR4++23BSDmzp3b7vN/+ctfBCDq6uqEEEIUFhYKe3t78fDDD7d7X0NDgwgODhY333zzJe285ZZbhIuLiygtLW17TqfTibi4OAGIvLy8To1TWVkpAPH2229fctwLERUVJaKiokRLS0uHP6PT6YRGoxHR0dHi0UcfbXt+x44dAhCTJ0/u0Da0Wq2YMWOGuO6669qe//LLLwUgPvnkk0t+3s3NTSxatOi855cuXSrc3d1FQUFBu+ffeOMNAYiTJ08KIf53DERFRQmNRtPuva2vff75523Pubu7i7/85S+XtGn27NntjrNzAcTzzz/f9ndHj4ELfbaV8PDwdvugo9/9oYceEt7e3pf8LjIy5kYOPfRSxo4di4ODAx4eHsycORMfHx9+/PHHtnh1Tk4Op06d4o477gCMd5Otj2uuuYaSkhIyMzMBSExMZPPmzTz11FPs3LmTlpaWbtt3zTXXtMuEHzhwIACzZ89u977W5wsLCwHjXbhOp2PhwoXtbHZ2dmbKlCns3LnzkuPu2LGDGTNmEBQU1PacnZ0dt9xyS7v3dXQcX19foqKiWLZsGcuXLyclJaVDSwGzsrLIzc3lrrvuwtnZ+aLv0+l0vPLKK8THx+Po6Ii9vT2Ojo5kZ2eTkZFx3vtvuOGGC25n5cqVjBw5EmdnZ+zt7XFwcGDbtm3ttrF582acnZ3505/+dFn7L8T//d//MW3aNEJDQ9vts1mzZgGwa9eudu+fO3cuDg4Ol91uYmIiq1ev5qWXXuLAgQMXDRN1lI4eA52ho989MTGR2tpabrvtNn788UcqKyu79V1kZEyBLBR6KV9++SWHDx9m+/btLF26lIyMDG677ba211tzFZ544gkcHBzaPR544AGAtovYu+++y5NPPskPP/zAtGnT8PX1Zf78+d1KJPT19W33t6Oj4yWfV6lU7ewePXr0eXavX7/+shfeqqoqgoODz3v+j891dByFQsG2bdu4+uqref311xk5ciQBAQE88sgjNDQ0XNSO1nDA5bLxH3vsMZ599lnmz5/Ppk2bOHjwIIcPH2bYsGEXFGwhISHnPbd8+XLuv/9+xowZw7fffsuBAwc4fPgwM2fObLeNiooKQkNDO7yU8Y+UlZWxadOm8/bXoEGDAM7731zI1guxfv16Fi1axKeffsq4cePw9fVl4cKFlJaWdsnOjh4DnaGj3/3OO+9sC6vdcMMNBAYGMmbMGH799dcujy0j013kHIVeysCBA9sSGKdNm4Zer+fTTz/lm2++4cYbb8Tf3x8wxrSvv/76C26jNf7t5ubGP//5T/75z39SVlbW5l2YM2cOp06dAsDZ2Zm6urrztmHqO6ZWu7/55hvCw8M7/Xk/P78LTjB/fK4z44SHh7Nq1SrA6Cn473//ywsvvIBGo2HlypUX/ExAQAAARUVFl9z2mjVrWLhwIa+88kq75ysrK/H29j7v/ecm8Z27jalTp7JixYp2z/9RyAQEBLB3714MBkOXxIK/vz9Dhw7l5ZdfvuDroaGhl7X1Ytt9++23efvttyksLGTjxo089dRTlJeXs2XLlk7b2dFjAMDJyQm1Wn3e83/Mz+nMd1+yZAlLliyhqam
"text/plain": [
"<Figure size 600x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# KEEP THIS CODE !!\n",
"\n",
"# Caractéristiques et valeurs associées (exemple)\n",
"categories = ['Force', 'Vitesse', 'Agilité', 'Précision', 'Endurance']\n",
"values = [8, 7, 6, 9, 7] # Exemple de valeurs, ajustez selon vos données\n",
"\n",
"# Plage de valeurs maximales pour chaque caractéristique\n",
"max_range = [20, 20, 20, 20, 20]\n",
"\n",
"values_normalized = [2 * max(values) * x / y for x, y in zip(values, max_range)]\n",
"\n",
"# Nombre de caractéristiques\n",
"num_categories = len(categories)\n",
"\n",
"# Créer un angle pour chaque axe\n",
"angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()\n",
"\n",
"# Initialisez le graphique en étoile\n",
"fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
"\n",
"# Tracer uniquement le contour du polygone\n",
"ax.plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5)\n",
"ax.plot(angles + angles[:1], values_normalized + values_normalized[:1], color='blue', linewidth=1.5)\n",
"\n",
"# Remplir le secteur central avec une couleur\n",
"ax.fill(angles, values_normalized, color='skyblue', alpha=0.4)\n",
"\n",
"# Étiqueter les axes\n",
"ax.set_yticklabels([])\n",
"ax.set_xticks(angles)\n",
"ax.set_xticklabels(categories)\n",
"\n",
"# Titre du graphique\n",
"plt.title('Résumé des caractéristiques')\n",
"\n",
"# Afficher le graphique\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 149,
"id": "adb7ccb3-7dea-4347-9298-37311a2f1fb1",
"metadata": {},
"outputs": [],
"source": [
"def radar_chart(values, categories, segment) :\n",
" # Caractéristiques et valeurs associées (exemple)\n",
" categories = categories\n",
" values = values # Exemple de valeurs, ajustez selon vos données\n",
" \n",
" # Nombre de caractéristiques\n",
" num_categories = len(categories)\n",
" \n",
" # Créer un angle pour chaque axe\n",
" angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()\n",
" \n",
" # Répéter le premier angle pour fermer la figure\n",
" values += values[:1]\n",
" angles += angles[:1]\n",
" \n",
" # Initialisez le graphique en étoile\n",
" fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
" \n",
" # Tracer les lignes radiales\n",
" ax.fill(angles, values, color='skyblue', alpha=0.4)\n",
" \n",
" # Tracer les points sur les axes radiaux\n",
" ax.plot(angles, values, color='blue', linewidth=2, linestyle='solid')\n",
"\n",
" # Afficher les valeurs associées sous les noms de variables\n",
" \"\"\"\n",
" for i, angle in enumerate(angles[:-1]):\n",
" x = angle\n",
" y = values[i] + 0.2 # Ajustez la distance des valeurs par rapport au centre\n",
" plt.text(x, y, str(values[i]), color='black', ha='center', fontsize=10)\n",
" \"\"\"\n",
" \n",
" # Remplir le secteur central avec une couleur\n",
" # ax.fill(angles, values, color='skyblue', alpha=0.4)\n",
"\n",
" \n",
" # Étiqueter les axes\n",
" ax.set_yticklabels([])\n",
" #ax.set_xticks(angles[:-1])\n",
" #ax.set_xticklabels(categories, # fontsize=12, ha='right', rotation=45\n",
" # )\n",
" # ax.set_xticklabels(categories, fontsize=10, color='black', ha='right')\n",
"\n",
" labels = [f\"{category} = {round(100 *value,2)}%\" for category, value in zip(categories, values[:-1])]\n",
" ax.set_xticks(angles[:-1])\n",
" ax.set_xticklabels(labels, fontsize=10, color='black', ha='right')\n",
" \n",
" # Titre du graphique\n",
" plt.title(f'Caracteristics of segment {segment}')\n",
" \n",
" # Afficher le graphique\n",
" plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 150,
"id": "8793fb51-812c-4500-b252-2e2d61d6ff48",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkkAAAH2CAYAAABk9BgJAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3xT1fvHPzezTZOOdO+W0gmUtoyWUqBYwAGiP5YLBRUVQURUBFyIynShqAwHOFARBQS/OJBRShkFWgqle++92+yc3x+xaSsFum/SnvfrlVfT5N5zPrkZ93Of85znMIQQAgqFQqFQKBRKOzhsC6BQKBQKhUIxRKhJolAoFAqFQukAapIoFAqFQqFQOoCaJAqFQqFQKJQOoCaJQqFQKBQKpQOoSaJQKBQKhULpAGqSKBQKhUKhUDqAmiQKhUKhUCiUDqAmiUKhUCgUCqUDqEmiUHqZq1ev4vHHH4enpydMTEwgFosREhKCLVu2oLq6mm15NyU5ORlvvfUWcnNz+6T9t956CwzDdGmf5uZmvPXWWzh16tQNz+3ZswcMw/SZ3p6wb98+DBs2DKampmAYBleuXGFbksHz+eefY8+ePZ3e/vfff8djjz2GESNGgM/nd/mzRaF0BoYuS0Kh9B5ffPEFlixZAl9fXyxZsgQBAQFQqVS4dOkSvvjiC4wcORIHDx5kW2aH/PLLL5g7dy5OnjyJyMjIXm+/sLAQhYWFCAsL6/Q+lZWVsLW1xdq1a/HWW2+1e66iogJZWVkIDg6GUCjsZbXdp6KiAs7Ozrjrrrvw0ksvQSgUIjAwECKRiG1pBs3w4cNhY2PToSHuiCeffBIxMTEIDg5GVlYWLl++DHo6o/Q2PLYFUCgDhXPnzuHZZ5/F1KlTcejQoXYn7qlTp+Kll17Cn3/+2St9NTc3G81Jt0Wri4sLXFxceq1dW1tb2Nra9lp7vUV6ejpUKhXmz5+PSZMmsS1nwPLFF1+Aw9ENhjz33HO4fPkyy4ooAxJCoVB6hRkzZhAej0fy8/M7tf1PP/1Epk6dShwcHIiJiQnx8/Mjq1atIo2Nje22W7BgATEzMyNXr14lU6dOJWKxmISFhRFCCPn777/JzJkzibOzMxEKhcTLy4s8/fTTpKKi4ob+UlJSyIMPPkjs7OyIQCAgrq6u5NFHHyVyuZzs3r2bALjhtnv3bv3+x44dI3fccQeRSCTE1NSUhIeHk3/++addH2vXriUAyOXLl8ns2bOJpaUlcXBwaPdcW44fP04mTZpEpFIpMTExIa6urmTWrFmkqamJ5OTkdKhpwYIFhBCi15yTk9OuzT/++IPccccdxNzcnJiamhI/Pz+yYcMG/fNZWVnkgQceII6OjkQgEBA7Oztyxx13kISEhNu+Z7/99hsJCwsjpqamRCwWkylTppCzZ8+2e6/+q3fSpEk3ba+pqYm89NJLxMPDgwiFQmJlZUVGjRpFfvjhh3bbXbx4kdx7773EysqKCIVCEhQURPbt23dDezExMSQsLIwIhULi5OREXn/9dfLFF1/ccJzc3d3J9OnTyZEjR0hQUJD+83fkyBH9sfXz8yMikYiMGTOGXLx48Ya+OqOp5T06ceIEWbx4MbG2tiZSqZT83//9HykqKmqn57/Hzd3d/VZvRTuWLl16w2eLQukNaCSJQukFNBoNTpw4gVGjRsHV1bVT+2RkZOCee+7BCy+8ADMzM6SmpmLz5s2Ii4vDiRMn2m2rVCoxc+ZMPPPMM1i9ejXUajUAICsrC+PGjcOiRYtgYWGB3NxcfPjhh4iIiMC1a9fA5/MBAImJiYiIiICNjQ3efvtteHt7o6SkBIcPH4ZSqcT06dOxYcMGvPrqq/jss88QEhICAPDy8gIAfP/993jsscdw33334ZtvvgGfz8fOnTtx55134q+//kJUVFQ7vbNmzcKDDz6IxYsXo6mpqcPXn5ubi+nTp2PChAn4+uuvYWlpiaKiIvz5559QKpVwdHTEn3/+ibvuugtPPvkkFi1aBAC3jB599dVXeOqppzBp0iTs2LEDdnZ2SE9PR1JSkn6be+65BxqNBlu2bIGbmxsqKytx9uxZ1NbW3vL9+uGHH/DII49g2rRp+PHHH6FQKLBlyxZERkbi+PHjiIiIwBtvvIGxY8di6dKl2LBhAyZPngxzc/Obtvniiy/iu+++w7vvvovg4GA0NTUhKSkJVVVV+m1OnjyJu+66C6GhodixYwcsLCzw008/4YEHHkBzczMWLlwIQJcLN3XqVPj4+OCbb76BSCTCjh078P3333fYd2JiItasWYPXXnsNFhYWWLduHWbNmoU1a9bg+PHj2LBhAxiGwapVqzBjxgzk5OTA1NS0S5paWLRoEaZPn44ffvgBBQUFWLlyJebPn6//nB88eBBz5syBhYUFPv/8cwAwqCFUyiCGbZdGoQwESktLCQDy4IMPdmt/rVZLVCoViY6OJgBIYmKi/rmW6MTXX3/dqTby8vIIAPLbb7/pn7vjjjuIpaUlKS8vv+n++/fvJwDIyZMn2z3e1NREpFIpuffee9s9rtFoyMiRI8nYsWP1j7VEi958880b2v9vJOmXX34hAMiVK1duqqmiooIAIGvXrr3huf9GkhoaGoi5uTmJiIggWq22w/YqKysJALJ169ab9tkRGo2GODk5kREjRhCNRqN/vKGhgdjZ2ZHw8HD9YydPniQAyP79+2/b7vDhw8n9999/y238/PxIcHAwUalU7R6fMWMGcXR01OuZO3cuMTMzaxdF1Gg0JCAgoMNIkqmpKSksLNQ/duXKFQKAODo6kqamJv3jhw4dIgDI4cOHu6yp5T1asmRJu+22bNlCAJCSkhL9Y8OGDbtl1O1W0EgSpa+gs9soFJbIzs7Gww8/DAcHB3C5XPD5fH0OS0pKyg3bz549+4bHysvLsXjxYri6uoLH44HP58Pd3b1dG83NzYiOjsa8efO6lcNz9uxZVFdXY8GCBVCr1fqbVqvFXXfdhYsXL94QLepI638JCgqCQCDA008/jW+++QbZ2dld1vZfnfX19ViyZMlNZzpJpVJ4eXnhvffew4cffoiEhARotdrbtp2Wlobi4mI8+uij+jwYABCLxZg9ezbOnz+P5ubmLmseO3Ys/vjjD6xevRqnTp2CTCZr93xmZiZSU1PxyCOPAEC743/PPfegpKQEaWlpAIDo6GjccccdsLGx0e/P4XAwb968DvsOCgqCs7Oz/n9/f38AQGRkZLt8t5bH8/LyuqyphZkzZ7b7PzAwsF2bFIqhQk0ShdIL2NjYQCQSIScnp1PbNzY2YsKECbhw4QLeffddnDp1ChcvXsSBAwcA4IaTpUgkumHYRqvVYtq0aThw4ABeeeUVHD9+HHFxcTh//ny7NmpqaqDRaLqdNF1WVgYAmDNnDvh8frvb5s2bQQi5obSBo6Pjbdv18vLCP//8Azs7OyxduhReXl7w8vLCxx9/3C2dFRUVAHDL18kwDI4fP44777wTW7ZsQUhICGxtbfH888+joaHhpvu1DH919LqcnJyg1WpRU1PTZc2ffPIJVq1ahUOHDmHy5MmQSqW4//77kZGRAaD12L/88ss3HPslS5YA0M0AbNFob29/Qx8dPQboDGNbBALBLR+Xy+Vd1tSCtbV1u/9bhtL++zmnUAwNmpNEofQCXC4XUVFR+OOPP1BYWHhbQ3LixAkUFxfj1KlT7WZA3SwvpqPISFJSEhITE7Fnzx4sWLBA/3hmZma77aRSKbhcLgoLC7vwilppiUxs27btptP3/3si7mzNmgkTJmDChAnQaDS4dOkStm3bhhdeeAH29vZ48MEHu6SzJUp2u9fp7u6Or776CoBuJtrPP/+Mt956C0qlEjt27Ohwn5aTfElJyQ3PFRcXg8PhwMrKqkt6AcDMzAzr1q3DunXrUFZWpo8q3XvvvUhNTdUf+zVr1mDWrFkdtuHr66vX2GJg2lJaWtplXbeiK5ooFGOHRpIolF5izZo1IITgqaeeglKpvOF5lUqFI0eOAGg1Ef9NTt25c2en++tsG6amppg0aRL2799/wxV+W252dT9+/HhYWloiOTkZo0eP7vD
"text/plain": [
"<Figure size 600x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"categories= [\"share_known_gender\",\"share_of_women\",\"country_fr\"]\n",
"radar_chart(values=X_test_segment_mp.loc[0,categories].values.tolist(), categories= categories,\n",
" segment = \"1\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}