BDC-team-1/Sport/Modelization/segment_analysis_sport_0_6.ipynb

2384 lines
1.2 MiB
Plaintext
Raw Normal View History

2024-03-26 12:20:03 +01:00
{
"cells": [
{
"cell_type": "markdown",
"id": "c488134e-680f-44e4-8c43-40c246140519",
"metadata": {},
"source": [
"# Analysis of segments and marketing personae associated"
]
},
{
"cell_type": "code",
"execution_count": 82,
2024-03-26 12:20:03 +01:00
"id": "9a8b8c3a-8e74-49f3-91d1-cccfc057fdcd",
"metadata": {},
"outputs": [],
"source": [
"# importations\n",
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import io\n",
"import s3fs\n",
"import re\n",
"import pickle\n",
2024-03-26 16:00:39 +01:00
"import warnings\n",
"import matplotlib.pyplot as plt"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "code",
"execution_count": 83,
2024-03-26 12:20:03 +01:00
"id": "d553c868-695f-4d57-96d6-d5c6629cefb2",
"metadata": {},
"outputs": [],
"source": [
"def load_model(type_of_activity, model):\n",
" BUCKET = f\"projet-bdc2324-team1/Output_model/{type_of_activity}/{model}/\"\n",
" filename = model + '.pkl'\n",
" file_path = BUCKET + filename\n",
" with fs.open(file_path, mode=\"rb\") as f:\n",
" model_bytes = f.read()\n",
"\n",
" model = pickle.loads(model_bytes)\n",
" return model\n",
"\n",
"\n",
"def load_test_file(type_of_activity):\n",
" file_path_test = f\"projet-bdc2324-team1/Generalization/{type_of_activity}/Test_set.csv\"\n",
" with fs.open(file_path_test, mode=\"rb\") as file_in:\n",
" dataset_test = pd.read_csv(file_in, sep=\",\")\n",
" return dataset_test"
]
},
{
"cell_type": "code",
"execution_count": 84,
2024-03-26 12:20:03 +01:00
"id": "3af80fea-a937-4ea8-bece-cfeaa89d1055",
"metadata": {},
"outputs": [],
"source": [
"# exec(open('utils_segmentation.py').read())\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"# Create filesystem object\n",
"S3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})\n",
"\n",
"# choose the type of companies for which you want to run the pipeline\n",
"type_of_activity = \"sport\""
]
},
{
"cell_type": "code",
"execution_count": 85,
2024-03-26 12:20:03 +01:00
"id": "cc6af7fa-33b2-4d58-ada4-e2ee7262bab9",
"metadata": {},
"outputs": [],
"source": [
"# load test set\n",
"dataset_test = load_test_file(type_of_activity)\n",
"\n",
"# Load Model \n",
"model = load_model(type_of_activity, 'LogisticRegression_Benchmark')"
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "8238ee71-47ec-4621-9813-4b5d2fd03efd",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>customer_id</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>...</th>\n",
" <th>country</th>\n",
" <th>gender_label</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>time_to_open</th>\n",
" <th>y_has_purchased</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>5_476624</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>100.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>5.177187</td>\n",
" <td>5.177187</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5_183496</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>55.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>426.265613</td>\n",
" <td>426.265613</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>5_248456</td>\n",
" <td>17.0</td>\n",
" <td>1.0</td>\n",
" <td>80.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>436.033437</td>\n",
" <td>436.033437</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5_474758</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>120.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>5.196412</td>\n",
" <td>5.196412</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5_192421</td>\n",
" <td>34.0</td>\n",
" <td>2.0</td>\n",
" <td>416.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>478.693148</td>\n",
" <td>115.631470</td>\n",
" <td>363.061678</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96091</th>\n",
" <td>9_761001</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>67.31</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>278.442257</td>\n",
" <td>278.442257</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>ch</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>15.0</td>\n",
" <td>5.0</td>\n",
" <td>0 days 00:35:22.600000</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96092</th>\n",
" <td>9_951910</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>61.41</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>189.207373</td>\n",
" <td>189.207373</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>ch</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>12.0</td>\n",
" <td>9.0</td>\n",
" <td>0 days 10:39:13.333333333</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96093</th>\n",
" <td>9_54095</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>ch</td>\n",
" <td>female</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>29.0</td>\n",
" <td>3.0</td>\n",
" <td>0 days 13:44:43.333333333</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96094</th>\n",
" <td>9_755241</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>79.43</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>279.312905</td>\n",
" <td>279.312905</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>fr</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>20.0</td>\n",
" <td>4.0</td>\n",
" <td>0 days 01:12:12.500000</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96095</th>\n",
" <td>9_2712</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>male</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>31.0</td>\n",
" <td>4.0</td>\n",
" <td>2 days 01:39:12.750000</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>96096 rows × 40 columns</p>\n",
"</div>"
],
"text/plain": [
" customer_id nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 5_476624 4.0 1.0 100.00 1.0 \n",
"1 5_183496 1.0 1.0 55.00 1.0 \n",
"2 5_248456 17.0 1.0 80.00 1.0 \n",
"3 5_474758 4.0 1.0 120.00 1.0 \n",
"4 5_192421 34.0 2.0 416.00 1.0 \n",
"... ... ... ... ... ... \n",
"96091 9_761001 1.0 1.0 67.31 1.0 \n",
"96092 9_951910 1.0 1.0 61.41 1.0 \n",
"96093 9_54095 0.0 0.0 0.00 0.0 \n",
"96094 9_755241 1.0 1.0 79.43 1.0 \n",
"96095 9_2712 0.0 0.0 0.00 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0.0 5.177187 5.177187 \n",
"1 0.0 426.265613 426.265613 \n",
"2 0.0 436.033437 436.033437 \n",
"3 0.0 5.196412 5.196412 \n",
"4 0.0 478.693148 115.631470 \n",
"... ... ... ... \n",
"96091 1.0 278.442257 278.442257 \n",
"96092 1.0 189.207373 189.207373 \n",
"96093 0.0 550.000000 550.000000 \n",
"96094 1.0 279.312905 279.312905 \n",
"96095 0.0 550.000000 550.000000 \n",
"\n",
" time_between_purchase nb_tickets_internet ... country gender_label \\\n",
"0 0.000000 0.0 ... fr female \n",
"1 0.000000 0.0 ... fr male \n",
"2 0.000000 0.0 ... fr female \n",
"3 0.000000 0.0 ... fr female \n",
"4 363.061678 0.0 ... fr female \n",
"... ... ... ... ... ... \n",
"96091 0.000000 1.0 ... ch male \n",
"96092 0.000000 1.0 ... ch male \n",
"96093 -1.000000 0.0 ... ch female \n",
"96094 0.000000 1.0 ... fr male \n",
"96095 -1.000000 0.0 ... NaN male \n",
"\n",
" gender_female gender_male gender_other country_fr nb_campaigns \\\n",
"0 1 0 0 1.0 0.0 \n",
"1 0 1 0 1.0 0.0 \n",
"2 1 0 0 1.0 0.0 \n",
"3 1 0 0 1.0 0.0 \n",
"4 1 0 0 1.0 0.0 \n",
"... ... ... ... ... ... \n",
"96091 0 1 0 0.0 15.0 \n",
"96092 0 1 0 0.0 12.0 \n",
"96093 1 0 0 0.0 29.0 \n",
"96094 0 1 0 1.0 20.0 \n",
"96095 0 1 0 NaN 31.0 \n",
"\n",
" nb_campaigns_opened time_to_open y_has_purchased \n",
"0 0.0 NaN 0.0 \n",
"1 0.0 NaN 1.0 \n",
"2 0.0 NaN 0.0 \n",
"3 0.0 NaN 0.0 \n",
"4 0.0 NaN 1.0 \n",
"... ... ... ... \n",
"96091 5.0 0 days 00:35:22.600000 1.0 \n",
"96092 9.0 0 days 10:39:13.333333333 0.0 \n",
"96093 3.0 0 days 13:44:43.333333333 0.0 \n",
"96094 4.0 0 days 01:12:12.500000 0.0 \n",
"96095 4.0 2 days 01:39:12.750000 0.0 \n",
"\n",
"[96096 rows x 40 columns]"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset_test"
]
},
{
"cell_type": "code",
2024-03-27 16:06:31 +01:00
"execution_count": 180,
2024-03-26 12:20:03 +01:00
"id": "e4287c1a-eab6-4897-91d6-d21804518dc4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_suppliers</th>\n",
" <th>vente_internet_max</th>\n",
" <th>purchase_date_min</th>\n",
" <th>purchase_date_max</th>\n",
" <th>time_between_purchase</th>\n",
" <th>nb_tickets_internet</th>\n",
" <th>is_email_true</th>\n",
" <th>...</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>gender_other</th>\n",
" <th>nb_campaigns</th>\n",
" <th>nb_campaigns_opened</th>\n",
" <th>country_fr</th>\n",
" <th>has_purchased</th>\n",
" <th>has_purchased_estim</th>\n",
" <th>score</th>\n",
" <th>segment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>100.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>5.177187</td>\n",
" <td>5.177187</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.700113</td>\n",
2024-03-26 12:20:03 +01:00
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>55.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>426.265613</td>\n",
" <td>426.265613</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.223569</td>\n",
" <td>1</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>17.0</td>\n",
" <td>1.0</td>\n",
" <td>80.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>436.033437</td>\n",
" <td>436.033437</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.246124</td>\n",
2024-03-26 12:20:03 +01:00
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>120.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>5.196412</td>\n",
" <td>5.196412</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.700106</td>\n",
2024-03-26 12:20:03 +01:00
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>34.0</td>\n",
" <td>2.0</td>\n",
" <td>416.00</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>478.693148</td>\n",
" <td>115.631470</td>\n",
" <td>363.061678</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.922115</td>\n",
2024-03-26 12:20:03 +01:00
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96091</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>67.31</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>278.442257</td>\n",
" <td>278.442257</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>15.0</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.571135</td>\n",
2024-03-26 12:20:03 +01:00
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96092</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>61.41</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>189.207373</td>\n",
" <td>189.207373</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>12.0</td>\n",
" <td>9.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.630924</td>\n",
2024-03-26 12:20:03 +01:00
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96093</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>29.0</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.123418</td>\n",
2024-03-26 12:20:03 +01:00
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96094</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>79.43</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>279.312905</td>\n",
" <td>279.312905</td>\n",
" <td>0.000000</td>\n",
" <td>1.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>20.0</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.578971</td>\n",
2024-03-26 12:20:03 +01:00
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96095</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>550.000000</td>\n",
" <td>550.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.0</td>\n",
" <td>True</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>31.0</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.295349</td>\n",
2024-03-26 12:20:03 +01:00
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>96096 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" nb_tickets nb_purchases total_amount nb_suppliers \\\n",
"0 4.0 1.0 100.00 1.0 \n",
"1 1.0 1.0 55.00 1.0 \n",
"2 17.0 1.0 80.00 1.0 \n",
"3 4.0 1.0 120.00 1.0 \n",
"4 34.0 2.0 416.00 1.0 \n",
"... ... ... ... ... \n",
"96091 1.0 1.0 67.31 1.0 \n",
"96092 1.0 1.0 61.41 1.0 \n",
"96093 0.0 0.0 0.00 0.0 \n",
"96094 1.0 1.0 79.43 1.0 \n",
"96095 0.0 0.0 0.00 0.0 \n",
"\n",
" vente_internet_max purchase_date_min purchase_date_max \\\n",
"0 0.0 5.177187 5.177187 \n",
"1 0.0 426.265613 426.265613 \n",
"2 0.0 436.033437 436.033437 \n",
"3 0.0 5.196412 5.196412 \n",
"4 0.0 478.693148 115.631470 \n",
"... ... ... ... \n",
"96091 1.0 278.442257 278.442257 \n",
"96092 1.0 189.207373 189.207373 \n",
"96093 0.0 550.000000 550.000000 \n",
"96094 1.0 279.312905 279.312905 \n",
"96095 0.0 550.000000 550.000000 \n",
"\n",
" time_between_purchase nb_tickets_internet is_email_true ... \\\n",
"0 0.000000 0.0 True ... \n",
"1 0.000000 0.0 True ... \n",
"2 0.000000 0.0 True ... \n",
"3 0.000000 0.0 True ... \n",
"4 363.061678 0.0 True ... \n",
"... ... ... ... ... \n",
"96091 0.000000 1.0 True ... \n",
"96092 0.000000 1.0 True ... \n",
"96093 -1.000000 0.0 True ... \n",
"96094 0.000000 1.0 True ... \n",
"96095 -1.000000 0.0 True ... \n",
"\n",
" gender_female gender_male gender_other nb_campaigns \\\n",
"0 1 0 0 0.0 \n",
"1 0 1 0 0.0 \n",
"2 1 0 0 0.0 \n",
"3 1 0 0 0.0 \n",
"4 1 0 0 0.0 \n",
"... ... ... ... ... \n",
"96091 0 1 0 15.0 \n",
"96092 0 1 0 12.0 \n",
"96093 1 0 0 29.0 \n",
"96094 0 1 0 20.0 \n",
"96095 0 1 0 31.0 \n",
"\n",
" nb_campaigns_opened country_fr has_purchased has_purchased_estim \\\n",
"0 0.0 1.0 0.0 1.0 \n",
"1 0.0 1.0 1.0 0.0 \n",
"2 0.0 1.0 0.0 0.0 \n",
"3 0.0 1.0 0.0 1.0 \n",
"4 0.0 1.0 1.0 1.0 \n",
"... ... ... ... ... \n",
"96091 5.0 0.0 1.0 1.0 \n",
"96092 9.0 0.0 0.0 1.0 \n",
"96093 3.0 0.0 0.0 0.0 \n",
"96094 4.0 1.0 0.0 1.0 \n",
"96095 4.0 NaN 0.0 0.0 \n",
"\n",
" score segment \n",
"0 0.700113 3 \n",
"1 0.223569 1 \n",
"2 0.246124 1 \n",
"3 0.700106 3 \n",
"4 0.922115 4 \n",
2024-03-26 12:20:03 +01:00
"... ... ... \n",
"96091 0.571135 3 \n",
"96092 0.630924 3 \n",
"96093 0.123418 1 \n",
"96094 0.578971 3 \n",
"96095 0.295349 2 \n",
2024-03-26 12:20:03 +01:00
"\n",
"[96096 rows x 21 columns]"
]
},
2024-03-27 16:06:31 +01:00
"execution_count": 180,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Processing\n",
"X_test = dataset_test[['nb_tickets', 'nb_purchases', 'total_amount', 'nb_suppliers', 'vente_internet_max', 'purchase_date_min', 'purchase_date_max', \n",
" 'time_between_purchase', 'nb_tickets_internet', 'is_email_true', 'opt_in', #'is_partner',\n",
2024-03-27 16:06:31 +01:00
" 'gender_female', 'gender_male', 'gender_other', 'nb_campaigns', 'nb_campaigns_opened', 'country_fr']]\n",
2024-03-26 12:20:03 +01:00
"\n",
"y_test = dataset_test[['y_has_purchased']]\n",
"\n",
"\n",
2024-03-26 12:20:03 +01:00
"X_test_segment = X_test\n",
"\n",
2024-03-27 16:06:31 +01:00
"# X_test_segment.insert(X_test.shape[1], \"country_fr\", dataset_test[\"country_fr\"])\n",
2024-03-26 12:20:03 +01:00
"\n",
"# add y_has_purchased to X_test\n",
"X_test_segment[\"has_purchased\"] = y_test\n",
"\n",
"# Add prediction and probability to dataset_test\n",
"y_pred = model.predict(X_test)\n",
"X_test_segment[\"has_purchased_estim\"] = y_pred\n",
"\n",
"y_pred_prob = model.predict_proba(X_test)[:, 1]\n",
"X_test_segment['score'] = y_pred_prob\n",
"\n",
"X_test_segment[\"segment\"] = np.where(X_test_segment['score']<0.25, '1',\n",
" np.where(X_test_segment['score']<0.5, '2',\n",
" np.where(X_test_segment['score']<0.75, '3', '4')))\n",
"\n",
"X_test_segment"
]
},
{
"cell_type": "markdown",
"id": "9058c3b2-8fa2-4322-a57b-395da4033eaf",
"metadata": {},
"source": [
"## 1. Business KPIs"
]
},
{
"cell_type": "code",
2024-03-27 16:06:31 +01:00
"execution_count": 181,
2024-03-26 12:20:03 +01:00
"id": "3067d919-50c9-49e9-b0a6-b676a5dbae56",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>vente_internet_max</th>\n",
" <th>nb_tickets_internet</th>\n",
" </tr>\n",
" <tr>\n",
" <th>segment</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4324.0</td>\n",
" <td>1446.0</td>\n",
" <td>8.973214e+04</td>\n",
" <td>346.0</td>\n",
" <td>870.0</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>36094.0</td>\n",
" <td>15451.0</td>\n",
" <td>1.138007e+06</td>\n",
" <td>8046.0</td>\n",
" <td>19591.0</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>65817.0</td>\n",
" <td>30677.0</td>\n",
" <td>2.209794e+06</td>\n",
" <td>14407.0</td>\n",
" <td>37260.0</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>392936.0</td>\n",
" <td>102142.0</td>\n",
" <td>1.650005e+07</td>\n",
" <td>6659.0</td>\n",
" <td>112518.0</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" nb_tickets nb_purchases total_amount vente_internet_max \\\n",
"segment \n",
"1 4324.0 1446.0 8.973214e+04 346.0 \n",
"2 36094.0 15451.0 1.138007e+06 8046.0 \n",
"3 65817.0 30677.0 2.209794e+06 14407.0 \n",
"4 392936.0 102142.0 1.650005e+07 6659.0 \n",
2024-03-26 12:20:03 +01:00
"\n",
" nb_tickets_internet \n",
"segment \n",
"1 870.0 \n",
"2 19591.0 \n",
"3 37260.0 \n",
"4 112518.0 "
2024-03-26 12:20:03 +01:00
]
},
2024-03-27 16:06:31 +01:00
"execution_count": 181,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# business figures\n",
"X_test_segment.groupby(\"segment\")[[\"nb_tickets\", \"nb_purchases\", \"total_amount\", \"vente_internet_max\", \n",
" \"nb_tickets_internet\"]].sum()"
]
},
{
"cell_type": "code",
"execution_count": 171,
"id": "5b1acd28-b346-45b1-8da2-b79ca7f4fa96",
"metadata": {},
"outputs": [],
"source": [
"def df_business_fig(df, segment, list_var) :\n",
" df_business_kpi = df.groupby(segment)[list_var].sum().reset_index()\n",
" df_business_kpi.insert(1, \"size\", df.groupby(segment).size().values)\n",
" all_var = [\"size\"] + list_var\n",
" df_business_kpi[all_var] = 100 * df_business_kpi[all_var] / df_business_kpi[all_var].sum()\n",
"\n",
" return df_business_kpi"
]
},
{
"cell_type": "code",
2024-03-27 16:06:31 +01:00
"execution_count": 182,
"id": "bd63d787-3ef8-4f23-9069-e9b16b4a0de8",
2024-03-26 12:20:03 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>size</th>\n",
" <th>nb_tickets</th>\n",
" <th>nb_purchases</th>\n",
" <th>total_amount</th>\n",
" <th>nb_campaigns</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>48.776224</td>\n",
" <td>0.866236</td>\n",
" <td>0.965829</td>\n",
" <td>0.450065</td>\n",
" <td>41.498188</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>19.887404</td>\n",
" <td>7.230789</td>\n",
" <td>10.320206</td>\n",
" <td>5.707847</td>\n",
" <td>24.826893</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>21.990509</td>\n",
" <td>13.185261</td>\n",
" <td>20.490128</td>\n",
" <td>11.083558</td>\n",
" <td>17.737544</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>9.345862</td>\n",
" <td>78.717714</td>\n",
" <td>68.223837</td>\n",
" <td>82.758529</td>\n",
" <td>15.937376</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" segment size nb_tickets nb_purchases total_amount nb_campaigns\n",
"0 1 48.776224 0.866236 0.965829 0.450065 41.498188\n",
"1 2 19.887404 7.230789 10.320206 5.707847 24.826893\n",
"2 3 21.990509 13.185261 20.490128 11.083558 17.737544\n",
"3 4 9.345862 78.717714 68.223837 82.758529 15.937376"
2024-03-26 12:20:03 +01:00
]
},
2024-03-27 16:06:31 +01:00
"execution_count": 182,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"business_var = [\"nb_tickets\", \"nb_purchases\", \"total_amount\", \"nb_campaigns\"]\n",
"X_test_business_fig = df_business_fig(X_test_segment, \"segment\",\n",
" business_var)\n",
2024-03-26 12:20:03 +01:00
"X_test_business_fig"
]
},
{
"cell_type": "code",
2024-03-26 16:00:39 +01:00
"execution_count": 57,
2024-03-26 12:20:03 +01:00
"id": "d2f618b6-c984-4790-bd8f-29c7d01c6707",
"metadata": {},
"outputs": [],
"source": [
"def hist_segment_business_KPIs(df, segment, size, nb_tickets, nb_purchases, total_amount, nb_campaigns) :\n",
" \n",
" plt.figure()\n",
"\n",
" df_plot = df[[segment, size, nb_tickets, nb_purchases, total_amount, nb_campaigns]]\n",
" \n",
" x = [\"number of\\ncustomers\", \"number of\\ntickets\", \"number of\\npurchases\", \"total\\namount\", \n",
" \"number of\\ncampaigns\"]\n",
"\n",
" # liste_var = [size, nb_tickets, nb_purchases, total_amount]\n",
" \n",
" bottom = np.zeros(5)\n",
" \n",
" # Définir une palette de couleurs\n",
" colors = plt.cm.Blues(np.linspace(0.1, 0.9, 4))\n",
" \n",
" for i in range(4) :\n",
" # print(str(df_plot[segment][i]))\n",
" # segment = df_plot[segment][i]\n",
" height = list(df_plot.loc[i,size:].values)\n",
" \n",
" plt.bar(x=x, height=height, label = str(df_plot[segment][i]), bottom=bottom, color=colors[i]#, width=0.1\n",
" )\n",
" \n",
" bottom+=height\n",
2024-03-26 16:00:39 +01:00
"\n",
" # Ajuster les marges\n",
" plt.subplots_adjust(left = 0.125, right = 0.8,\n",
" bottom = 0.1, top = 0.9\n",
" )\n",
"\n",
2024-03-26 12:20:03 +01:00
" \n",
" plt.legend(title = \"segment\", loc = \"upper right\", bbox_to_anchor=(1.2, 1))\n",
" plt.ylabel(\"Fraction represented by the segment (%)\")\n",
" plt.title(\"Relative weight of each segment regarding business KPIs\")\n",
2024-03-26 16:00:39 +01:00
" # plt.show()\n",
2024-03-26 12:20:03 +01:00
" "
]
},
{
"cell_type": "code",
"execution_count": 146,
"id": "14b6ae5c-d704-4f5d-9f9b-5646e29ea470",
2024-03-26 12:20:03 +01:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAksAAAHQCAYAAACx9C4fAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABv3UlEQVR4nO3deVyN6f8/8NdBnU77ghZClBJliyiUNcb6wdgpzGCyG7KTfZsxDDOMZbKMfSwzYywxKrJmyZZ9DSVrkbRevz/8ur+OcpzDOU54PR+PHtzXvVzv+zr3uXt33dd93zIhhAARERER5auQvgMgIiIiKsiYLBERERGpwGSJiIiISAUmS0REREQqMFkiIiIiUoHJEhEREZEKTJaIiIiIVGCyRERERKQCkyUiIiIiFT65ZGnFihWQyWTST5EiRWBvb49OnTrhypUr77XNyMhIyGQyREZGarxuXFwcQkNDcfPmzTzzgoKCUKZMmfeKqSD7kP0KDQ2FTCbDw4cP37ns9OnTsW3btveqR5XHjx+jU6dOKF68OGQyGdq0aaP1Oj5E7vH4559/6juUT56ujqGCTiaTITQ0VJrOPW/md57SNU2+87qo91P1tvPAixcv0KxZMxgYGGDVqlUA8v+9WLJkSfTs2RN379595zbp3T65ZClXWFgYDh8+jL1792LAgAH4+++/UadOHTx58uSjxhEXF4dJkyblexIaP348tm7d+lHj+Rg+1n7p6hfdlClTsHXrVvz00084fPgwZs+erfU6qGD4UpOlNzVv3hyHDx+Gvb29vkP5aL755hscPnxY32FoVXJyMpo0aYKIiAj8+eef6NGjh9L83N+Le/bswbfffot169ahbt26SE1N1VPEn48i+g7gfVWqVAleXl4AAH9/f2RnZ2PixInYtm0bevbsqefoXilXrpy+Q9CJT32/zp07h3LlyqFr1676DoUKuOzsbGRlZUEul+s7FCUvXryAsbGx2ssXK1YMxYoV02FEBU/JkiVRsmRJfYehNUlJSQgICMC1a9ewc+dO1K9fP88yr/9erF+/PrKzszFlyhRs27aN57sP9Mn2LL0p9wC5f/++Uvnx48fRqlUrWFtbw8jICFWrVsXGjRvfub3jx4+jU6dOKFOmDBQKBcqUKYPOnTvj1q1b0jIrVqzA119/DeDVgZnbBbpixQoAeS9XVa1aFXXr1s1TV3Z2NkqUKIG2bdtKZRkZGZg6dSrc3Nwgl8tRrFgx9OzZEw8ePFAZ97///guZTIaYmBipbPPmzZDJZGjevLnSsp6enmjXrp00LYTAr7/+iipVqkChUMDKygrt27fH9evXldbL7zLc06dP0bt3b1hbW8PU1BTNmzfH9evX81wOyHX//n107twZFhYWsLW1Ra9evZCcnCzNl8lkSE1NxcqVK6V29ff3V7nvjx8/RnBwMEqUKAFDQ0OULVsWY8eORXp6OgDg5s2bkMlk2Lt3Ly5cuCBt912XXzds2IDatWvDxMQEpqamCAgIwKlTp5SWUed4yXX37l306dMHjo6OMDQ0hIODA9q3b5/n2M3MzMTYsWPh4OAAc3NzNGrUCJcuXVIZKwA8ePBA2n7usePr64u9e/cqLbd37140bNgQ5ubmMDY2hq+vL/7777882/vrr7/g6ekJuVyOsmXLYv78+fle4pDJZBgwYADCwsLg6uoKhUIBLy8vHDlyBEIIzJkzB05OTjA1NUWDBg1w9erVPHWpE1Nu3efPn9fqMZR7fMyePRtTp06Fk5MT5HI5IiIiAKh/LomOjkbt2rVhZGSEEiVKYPz48Vi2bFmey2AbNmxAkyZNYG9vD4VCgQoVKmDUqFF5egGCgoJgamqKs2fPokmTJjAzM0PDhg0BACkpKfj2229hY2MDU1NTNG3aFJcvX84TU36X4fz9/VGpUiXExMSgbt26MDY2RtmyZTFz5kzk5OQorX/+/Hk0adIExsbGKFasGPr37y+da9QdvhAfH4+2bdvC3NwcFhYW6NatW57z2dvOF2XKlEFQUJA0/eLFCwwfPhxOTk4wMjKCtbU1vLy8sG7dOmmZ/I7RMmXKoEWLFti1axeqVasGhUIBNzc3/P7773nqTExMRN++fVGyZEkYGhrCyckJkyZNQlZWltJyixYtQuXKlWFqagozMzO4ublhzJgxGsX6Lrdu3UKdOnVw584d7Nu3L99EKT+1atWS1n8bdc8XX7pPtmfpTTdu3AAAlC9fXiqLiIhA06ZN4e3tjcWLF8PCwgLr169Hx44d8eLFC6Uv35tu3rwJV1dXdOrUCdbW1khISMCiRYtQo0YNxMXFoWjRomjevDmmT5+OMWPG4JdffkG1atUAvL3npWfPnhg8eDCuXLkCFxcXqTw8PBz37t2TesRycnLQunVrHDhwACEhIfDx8cGtW7cwceJE+Pv74/jx41AoFPnW4efnBwMDA+zduxc1atQA8OoXkEKhQFRUFDIzM2FgYICkpCScO3cO3333nbRu3759sWLFCgwaNAizZs3C48ePMXnyZPj4+OD06dOwtbXNt86cnBy0bNkSx48fR2hoKKpVq4bDhw+jadOmb23fdu3aoWPHjujduzfOnj2L0aNHA4B00jp8+DAaNGiA+vXrY/z48QAAc3Pzt27v5cuXqF+/Pq5du4ZJkybB09MTBw4cwIwZMxAbG4t///0X9vb2OHz4MIKDg5GcnIw1a9YAANzd3d+63enTp2PcuHHo2bMnxo0bh4yMDMyZMwd169bFsWPHpHXVOV6AV4lSjRo1kJmZiTFjxsDT0xOPHj3C7t278eTJE6U2HjNmDHx9fbFs2TKkpKRg5MiRaNmyJS5cuIDChQu/Nebu3bvj5MmTmDZtGsqXL4+nT5/i5MmTePTokbTMH3/8gR49eqB169ZYuXIlDAwM8NtvvyEgIAC7d++Wfhnv2rULbdu2Rb169bBhwwZkZWXhhx9+yJPY5dq+fTtOnTqFmTNnQiaTYeTIkWjevDkCAwNx/fp1LFy4EMnJyRg2bBjatWuH2NhY6ReaujHl0vYxlOvnn39G+fLl8cMPP8Dc3BwuLi5qn0vOnDmDxo0bo3z58li5ciWMjY2xePFi/PHHH3nquXLlCr766isMGTIEJiYmuHjxImbNmoVjx45h3759SstmZGSgVatW6Nu3L0aNGoWsrCwIIdCmTRscOnQIEyZMQI0aNXDw4EE0a9bsnfuYKzExEV27dsX333+PiRMnYuvWrRg9ejQcHBykSzwJCQnw8/ODiYkJFi1ahOLFi2PdunUYMGCA2vUAwP/+9z906NAB/fr1w/nz5zF+/HjExcXh6NGjMDAw0Ghbw4YNw+rVqzF16lRUrVoVqampOHfunNIx/janT5/G999/j1GjRsHW1hbLli1D79694ezsjHr16kntUrNmTRQqVAgTJkxAuXLlcPjwYUydOhU3b95EWFgYAGD9+vUIDg7GwIED8cMPP6BQoUK4evUq4uLitBIrAFy4cAFDhw4FAOzfvx8VKlRQu51y/yBR1auozvmCAIhPTFhYmAAgjhw5IjIzM8WzZ8/Erl27hJ2dnahXr57IzMyUlnVzcxNVq1ZVKhNCiBYtWgh7e3uRnZ0thBAiIiJCABARERFvrTcrK0s8f/5cmJiYiPnz50vlmzZteuu6gYGBonTp0tL0w4cPhaGhoRgzZozSch06dBC2trZSnOvWrRMAxObNm5WWi4mJEQDEr7/+qrKN6tSpIxo0aCBNOzs7ixEjRohChQqJqKgoIYQQa9asEQDE5cuXhRBCHD58WAAQP/74o9K24uPjhUKhECEhIW/dr3///VcAEIsWLVJad8aMGQKAmDhxolQ2ceJEAUDMnj1badng4GBhZGQkcnJypDITExMRGBiocl9zLV68WAAQGzduVCqfNWuWACDCw8OlMj8/P1GxYsV3bvP27duiSJEiYuDAgUrlz549E3Z2dqJDhw5vXfdtx0uvXr2EgYGBiIuLe+u6ucfjV199pVS+ceNGAUAcPnxYZdympqZiyJAhb52fmpoqrK2tRcuWLZXKs7OzReXKlUXNmjWlsho1aghHR0eRnp4ulT179kz
2024-03-26 12:20:03 +01:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"hist_segment_business_KPIs(X_test_business_fig, \"segment\", \"size\", *business_var)"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "code",
2024-03-26 16:00:39 +01:00
"execution_count": 13,
"id": "f358fba3-f778-4414-bf55-c830be647ddd",
2024-03-26 12:20:03 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2024-03-26 16:00:39 +01:00
"'projet-bdc2324-team1/Output_marketing_personae_analysis/sport/segments_business_KPIs_sport.csv'"
2024-03-26 12:20:03 +01:00
]
},
2024-03-26 16:00:39 +01:00
"execution_count": 13,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
2024-03-26 16:00:39 +01:00
"source": [
"activity = \"sport\"\n",
"PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{activity}/\"\n",
"\n",
"file_name = \"segments_business_KPIs_\" + activity\n",
"FILE_PATH_OUT_S3 = PATH + file_name + \".csv\"\n",
"\n",
"FILE_PATH_OUT_S3"
]
},
{
"cell_type": "code",
"execution_count": 178,
"id": "3eee7b59-f658-402d-95b2-fa051188fd10",
"metadata": {},
"outputs": [],
"source": [
"def save_file_s3_mp(File_name, type_of_activity):\n",
" image_buffer = io.BytesIO()\n",
" plt.savefig(image_buffer, format='png')\n",
" image_buffer.seek(0)\n",
" PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{type_of_activity}/\"\n",
" FILE_PATH_OUT_S3 = PATH + File_name + type_of_activity + '.png'\n",
" with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:\n",
" s3_file.write(image_buffer.read())\n",
" plt.close()"
]
},
2024-03-26 16:00:39 +01:00
{
"cell_type": "code",
"execution_count": 59,
"id": "1790cb81-3304-41f1-a371-d8c926d32906",
"metadata": {},
"outputs": [],
2024-03-26 12:20:03 +01:00
"source": [
"# save to Minio\n",
"\n",
"activity = \"sport\"\n",
"PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{activity}/\"\n",
"\n",
"file_name = \"segments_business_KPIs_\" + activity\n",
2024-03-26 16:00:39 +01:00
"FILE_PATH_OUT_S3 = PATH + file_name + \".png\"\n",
2024-03-26 12:20:03 +01:00
"\n",
"hist_segment_business_KPIs(X_test_business_fig, \"segment\", \"size\", \"nb_tickets\", \n",
" \"nb_purchases\", \"total_amount\", \"nb_campaigns\")\n",
"\n",
"image_buffer = io.BytesIO()\n",
"plt.savefig(image_buffer, format='png')\n",
"image_buffer.seek(0)\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:\n",
" s3_file.write(image_buffer.read())\n",
"plt.close()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 179,
"id": "7a42523d-f80f-488b-ad8f-39dd793cddd6",
"metadata": {},
"outputs": [
{
"ename": "ClientError",
"evalue": "An error occurred (InvalidAccessKeyId) when calling the PutObject operation: The Access Key Id you provided does not exist in our records.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mClientError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[179], line 8\u001b[0m\n\u001b[1;32m 3\u001b[0m activity \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msport\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5\u001b[0m hist_segment_business_KPIs(X_test_business_fig, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msegment\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msize\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnb_tickets\u001b[39m\u001b[38;5;124m\"\u001b[39m, \n\u001b[1;32m 6\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnb_purchases\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtotal_amount\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnb_campaigns\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 8\u001b[0m \u001b[43msave_file_s3_mp\u001b[49m\u001b[43m(\u001b[49m\u001b[43mFile_name\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msegments_business_KPIs_\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtype_of_activity\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mactivity\u001b[49m\u001b[43m)\u001b[49m\n",
"Cell \u001b[0;32mIn[178], line 7\u001b[0m, in \u001b[0;36msave_file_s3_mp\u001b[0;34m(File_name, type_of_activity)\u001b[0m\n\u001b[1;32m 5\u001b[0m PATH \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprojet-bdc2324-team1/Output_marketing_personae_analysis/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtype_of_activity\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 6\u001b[0m FILE_PATH_OUT_S3 \u001b[38;5;241m=\u001b[39m PATH \u001b[38;5;241m+\u001b[39m File_name \u001b[38;5;241m+\u001b[39m type_of_activity \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.png\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m----> 7\u001b[0m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mwith\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mFILE_PATH_OUT_S3\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mwb\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mas\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43ms3_file\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43ms3_file\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrite\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimage_buffer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 9\u001b[0m plt\u001b[38;5;241m.\u001b[39mclose()\n",
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/fsspec/spec.py:1963\u001b[0m, in \u001b[0;36mAbstractBufferedFile.__exit__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 1962\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__exit__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs):\n\u001b[0;32m-> 1963\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclose\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/fsspec/spec.py:1930\u001b[0m, in \u001b[0;36mAbstractBufferedFile.close\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1928\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1929\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mforced:\n\u001b[0;32m-> 1930\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mflush\u001b[49m\u001b[43m(\u001b[49m\u001b[43mforce\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 1932\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfs \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1933\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfs\u001b[38;5;241m.\u001b[39minvalidate_cache(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpath)\n",
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/fsspec/spec.py:1801\u001b[0m, in \u001b[0;36mAbstractBufferedFile.flush\u001b[0;34m(self, force)\u001b[0m\n\u001b[1;32m 1798\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclosed \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 1799\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n\u001b[0;32m-> 1801\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_upload_chunk\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfinal\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m:\n\u001b[1;32m 1802\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moffset \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbuffer\u001b[38;5;241m.\u001b[39mseek(\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m2\u001b[39m)\n\u001b[1;32m 1803\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbuffer \u001b[38;5;241m=\u001b[39m io\u001b[38;5;241m.\u001b[39mBytesIO()\n",
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/s3fs/core.py:1252\u001b[0m, in \u001b[0;36mS3File._upload_chunk\u001b[0;34m(self, final)\u001b[0m\n\u001b[1;32m 1249\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mparts\u001b[38;5;241m.\u001b[39mappend({\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPartNumber\u001b[39m\u001b[38;5;124m'\u001b[39m: part, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mETag\u001b[39m\u001b[38;5;124m'\u001b[39m: out[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mETag\u001b[39m\u001b[38;5;124m'\u001b[39m]})\n\u001b[1;32m 1251\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mautocommit \u001b[38;5;129;01mand\u001b[39;00m final:\n\u001b[0;32m-> 1252\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcommit\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1253\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m final\n",
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/s3fs/core.py:1267\u001b[0m, in \u001b[0;36mS3File.commit\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1265\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbuffer\u001b[38;5;241m.\u001b[39mseek(\u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m 1266\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbuffer\u001b[38;5;241m.\u001b[39mread()\n\u001b[0;32m-> 1267\u001b[0m write_result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_s3\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1268\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43ms3\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mput_object\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1269\u001b[0m \u001b[43m \u001b[49m\u001b[43mKey\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mBucket\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbucket\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mBody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 1270\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1271\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfs\u001b[38;5;241m.\u001b[39mversion_aware:\n\u001b[1;32m 1272\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mversion_id \u001b[38;5;241m=\u001b[39m write_result\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mVersionId\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/s3fs/core.py:1130\u001b[0m, in \u001b[0;36mS3File._call_s3\u001b[0;34m(self, method, *kwarglist, **kwargs)\u001b[0m\n\u001b[1;32m 1129\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_call_s3\u001b[39m(\u001b[38;5;28mself\u001b[39m, method, \u001b[38;5;241m*\u001b[39mkwarglist, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m-> 1130\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_s3\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43ms3_additional_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwarglist\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1131\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/s3fs/core.py:200\u001b[0m, in \u001b[0;36mS3FileSystem._call_s3\u001b[0;34m(self, method, *akwarglist, **kwargs)\u001b[0m\n\u001b[1;32m 197\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCALL: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m - \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m - \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m (method\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, akwarglist, kw2))\n\u001b[1;32m 198\u001b[0m additional_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_s3_method_kwargs(method, \u001b[38;5;241m*\u001b[39makwarglist,\n\u001b[1;32m 199\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 200\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43madditional_kwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/botocore/client.py:553\u001b[0m, in \u001b[0;36mClientCreator._create_api_method.<locals>._api_call\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 549\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[1;32m 550\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpy_operation_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m() only accepts keyword arguments.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 551\u001b[0m )\n\u001b[1;32m 552\u001b[0m \u001b[38;5;66;03m# The \"self\" in this scope is referring to the BaseClient.\u001b[39;00m\n\u001b[0;32m--> 553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_api_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43moperation_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/opt/mamba/lib/python3.11/site-packages/botocore/client.py:1009\u001b[0m, in \u001b[0;36mBaseClient._make_api_call\u001b[0;34m(self, operation_name, api_params)\u001b[0m\n\u001b[1;32m 1005\u001b[0m error_code \u001b[38;5;241m=\u001b[39m error_info\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mQueryErrorCode\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m error_info\u001b[38;5;241m.\u001b[39mget(\n\u001b[1;32m 1006\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCode\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1007\u001b[0m )\n\u001b[1;32m 1008\u001b[0m error_class \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mfrom_code(error_code)\n\u001b[0;32m-> 1009\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m error_class(parsed_response, operation_name)\n\u001b[1;32m 1010\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1011\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m parsed_response\n",
"\u001b[0;31mClientError\u001b[0m: An error occurred (InvalidAccessKeyId) when calling the PutObject operation: The Access Key Id you provided does not exist in our records."
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAksAAAHQCAYAAACx9C4fAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABv3UlEQVR4nO3deVyN6f8/8NdBnU77ghZClBJliyiUNcb6wdgpzGCyG7KTfZsxDDOMZbKMfSwzYywxKrJmyZZ9DSVrkbRevz/8ur+OcpzDOU54PR+PHtzXvVzv+zr3uXt33dd93zIhhAARERER5auQvgMgIiIiKsiYLBERERGpwGSJiIiISAUmS0REREQqMFkiIiIiUoHJEhEREZEKTJaIiIiIVGCyRERERKQCkyUiIiIiFT65ZGnFihWQyWTST5EiRWBvb49OnTrhypUr77XNyMhIyGQyREZGarxuXFwcQkNDcfPmzTzzgoKCUKZMmfeKqSD7kP0KDQ2FTCbDw4cP37ns9OnTsW3btveqR5XHjx+jU6dOKF68OGQyGdq0aaP1Oj5E7vH4559/6juUT56ujqGCTiaTITQ0VJrOPW/md57SNU2+87qo91P1tvPAixcv0KxZMxgYGGDVqlUA8v+9WLJkSfTs2RN379595zbp3T65ZClXWFgYDh8+jL1792LAgAH4+++/UadOHTx58uSjxhEXF4dJkyblexIaP348tm7d+lHj+Rg+1n7p6hfdlClTsHXrVvz00084fPgwZs+erfU6qGD4UpOlNzVv3hyHDx+Gvb29vkP5aL755hscPnxY32FoVXJyMpo0aYKIiAj8+eef6NGjh9L83N+Le/bswbfffot169ahbt26SE1N1VPEn48i+g7gfVWqVAleXl4AAH9/f2RnZ2PixInYtm0bevbsqefoXilXrpy+Q9CJT32/zp07h3LlyqFr1676DoUKuOzsbGRlZUEul+s7FCUvXryAsbGx2ssXK1YMxYoV02FEBU/JkiVRsmRJfYehNUlJSQgICMC1a9ewc+dO1K9fP88yr/9erF+/PrKzszFlyhRs27aN57sP9Mn2LL0p9wC5f/++Uvnx48fRqlUrWFtbw8jICFWrVsXGjRvfub3jx4+jU6dOKFOmDBQKBcqUKYPOnTvj1q1b0jIrVqzA119/DeDVgZnbBbpixQoAeS9XVa1aFXXr1s1TV3Z2NkqUKIG2bdtKZRkZGZg6dSrc3Nwgl8tRrFgx9OzZEw8ePFAZ97///guZTIaYmBipbPPmzZDJZGjevLnSsp6enmjXrp00LYTAr7/+iipVqkChUMDKygrt27fH9evXldbL7zLc06dP0bt3b1hbW8PU1BTNmzfH9evX81wOyHX//n107twZFhYWsLW1Ra9evZCcnCzNl8lkSE1NxcqVK6V29ff3V7nvjx8/RnBwMEqUKAFDQ0OULVsWY8eORXp6OgDg5s2bkMlk2Lt3Ly5cuCBt912XXzds2IDatWvDxMQEpqamCAgIwKlTp5SWUed4yXX37l306dMHjo6OMDQ0hIODA9q3b5/n2M3MzMTYsWPh4OAAc3NzNGrUCJcuXVIZKwA8ePBA2n7usePr64u9e/cqLbd37140bNgQ5ubmMDY2hq+vL/7777882/vrr7/g6ekJuVyOsmXLYv78+fle4pDJZBgwYADCwsLg6uoKhUIBLy8vHDlyBEIIzJkzB05OTjA1NUWDBg1w9erVPHWpE1Nu3efPn9fqMZR7fMyePRtTp06Fk5MT5HI5IiIiAKh/LomOjkbt2rVhZGSEEiVKYPz48Vi2bFmey2AbNmxAkyZNYG9vD4VCgQoVKmDUqFF5egGCgoJgamqKs2fPokmTJjAzM0PDhg0BACkpKfj2229hY2MDU1NTNG3aFJcvX84TU36X4fz9/VGpUiXExMSgbt26MDY2RtmyZTFz5kzk5OQorX/+/Hk0adIExsbGKFasGPr37y+da9QdvhAfH4+2bdvC3NwcFhYW6NatW57z2dvOF2XKlEFQUJA0/eLFCwwfPhxOTk4wMjKCtbU1vLy8sG7dOmmZ/I7RMmXKoEWLFti1axeqVasGhUIBNzc3/P7773nqTExMRN++fVGyZEkYGhrCyckJkyZNQlZWltJyixYtQuXKlWFqagozMzO4ublhzJgxGsX6Lrdu3UKdOnVw584d7Nu3L99EKT+1atWS1n8bdc8XX7pPtmfpTTdu3AAAlC9fXiqLiIhA06ZN4e3tjcWLF8PCwgLr169Hx44d8eLFC6Uv35tu3rwJV1dXdOrUCdbW1khISMCiRYtQo0YNxMXFoWjRomjevDmmT5+OMWPG4JdffkG1atUAvL3npWfPnhg8eDCuXLkCFxcXqTw8PBz37t2TesRycnLQunVrHDhwACEhIfDx8cGtW7cwceJE+Pv74/jx41AoFPnW4efnBwMDA+zduxc1atQA8OoXkEKhQFRUFDIzM2FgYICkpCScO3cO3333nbRu3759sWLFCgwaNAizZs3C48ePMXnyZPj4+OD06dOwtbXNt86cnBy0bNkSx48fR2hoKKpVq4bDhw+jadOmb23fdu3aoWPHjujduzfOnj2L0aNHA4B00jp8+DAaNGiA+vXrY/z48QAAc3Pzt27v5cuXqF+/Pq5du4ZJkybB09MTBw4cwIwZMxAbG4t///0X9vb2OHz4MIKDg5GcnIw1a9YAANzd3d+63enTp2PcuHHo2bMnxo0bh4yMDMyZMwd169bFsWPHpHXVOV6AV4lSjRo1kJmZiTFjxsDT0xOPHj3C7t278eTJE6U2HjNmDHx9fbFs2TKkpKRg5MiRaNmyJS5cuIDChQu/Nebu3bvj5MmTmDZtGsqXL4+nT5/i5MmTePTokbTMH3/8gR49eqB169ZYuXIlDAwM8NtvvyEgIAC7d++Wfhnv2rULbdu2Rb169bBhwwZkZWXhhx9+yJPY5dq+fTtOnTqFmTNnQiaTYeTIkWjevDkCAwNx/fp1LFy4EMnJyRg2bBjatWuH2NhY6ReaujHl0vYxlOvnn39G+fLl8cMPP8Dc3BwuLi5qn0vOnDmDxo0bo3z58li5ciWMjY2xePFi/PHHH3nquXLlCr766isMGTIEJiYmuHjxImbNmoVjx45h3759SstmZGSgVatW6Nu3L0aNGoWsrCwIIdCmTRscOnQIEyZMQI0aNXDw4EE0a9bsnfuYKzExEV27dsX333+PiRMnYuvWrRg9ejQcHBykSzwJCQnw8/ODiYkJFi1ahOLFi2PdunUYMGCA2vUAwP/+9z906NAB/fr1w/nz5zF+/HjExcXh6NGjMDAw0Ghbw4YNw+rVqzF16lRUrVoVqampOHfunNIx/janT5/G999/j1GjRsHW1hbLli1D79694ezsjHr16kntUrNmTRQqVAgTJkxAuXLlcPjwYUydOhU3b95EWFgYAGD9+vUIDg7GwIED8cMPP6BQoUK4evUq4uLitBIrAFy4cAFDhw4FAOzfvx8VKlRQu51y/yBR1auozvmCAIhPTFhYmAAgjhw5IjIzM8WzZ8/Erl27hJ2dnahXr57IzMyUlnVzcxNVq1ZVKhNCiBYtWgh7e3uRnZ0thBAiIiJCABARERFvrTcrK0s8f/5cmJiYiPnz50vlmzZteuu6gYGBonTp0tL0w4cPhaGhoRgzZozSch06dBC2trZSnOvWrRMAxObNm5WWi4mJEQDEr7/+qrKN6tSpIxo0aCBNOzs7ixEjRohChQqJqKgoIYQQa9asEQDE5cuXhRBCHD58WAAQP/74o9K24uPjhUKhECEhIW/dr3///VcAEIsWLVJad8aMGQKAmDhxolQ2ceJEAUDMnj1badng4GBhZGQkcnJypDITExMRGBiocl9zLV68WAAQGzduVCqfNWuWACDCw8OlMj8/P1GxYsV3bvP27duiSJEiYuDAgUrlz549E3Z2dqJDhw5vXfdtx0uvXr2EgYGBiIuLe+u6ucfjV199pVS+ceNGAUAcPnxYZdympqZiyJAhb52fmpoqrK2tRcuWLZXKs7OzReXKlUXNmjWlsho1aghHR0eRnp4ulT179kz
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# with function\n",
"\n",
"activity = \"sport\"\n",
"\n",
"hist_segment_business_KPIs(X_test_business_fig, \"segment\", \"size\", \"nb_tickets\", \n",
" \"nb_purchases\", \"total_amount\", \"nb_campaigns\")\n",
"\n",
"save_file_s3_mp(File_name = \"segments_business_KPIs_\", type_of_activity = activity)"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "markdown",
"id": "53d24165-6b98-4b66-9ad8-7514564689d8",
"metadata": {},
"source": [
"## 2. Spider plot summarizing sociodemographic characteristics and purchasing behaviour"
]
},
{
"cell_type": "code",
"execution_count": 165,
"id": "beb31e4b-a01b-4312-879a-fe5757ea061f",
"metadata": {},
"outputs": [],
"source": [
"def df_segment_mp(df, segment, gender_female, gender_male, gender_other, country_fr) :\n",
" df_mp = df.groupby(segment)[[gender_female, gender_male, gender_other, country_fr]].mean().reset_index()\n",
" df_mp.insert(3, \"share_known_gender\", X_test_segment_mp[gender_female]+X_test_segment_mp[gender_male])\n",
" df_mp.insert(4, \"share_of_women\", X_test_segment_mp[gender_female]/(X_test_segment_mp[\"share_known_gender\"]))\n",
" return df_mp"
]
},
{
"cell_type": "code",
"execution_count": 159,
2024-03-26 12:20:03 +01:00
"id": "267ebaee-eaef-4720-8ca9-e40c0cf125df",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>share_known_gender</th>\n",
" <th>share_of_women</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.222670</td>\n",
" <td>0.419974</td>\n",
" <td>0.642644</td>\n",
" <td>0.346491</td>\n",
" <td>0.357356</td>\n",
" <td>0.477963</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.274449</td>\n",
" <td>0.556486</td>\n",
" <td>0.830935</td>\n",
" <td>0.330290</td>\n",
" <td>0.169065</td>\n",
" <td>0.763818</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.272998</td>\n",
" <td>0.541312</td>\n",
" <td>0.814310</td>\n",
" <td>0.335251</td>\n",
" <td>0.185690</td>\n",
" <td>0.661414</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.206547</td>\n",
" <td>0.622871</td>\n",
" <td>0.829418</td>\n",
" <td>0.249027</td>\n",
" <td>0.170582</td>\n",
" <td>0.657373</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" segment gender_female gender_male share_known_gender share_of_women \\\n",
"0 1 0.222670 0.419974 0.642644 0.346491 \n",
"1 2 0.274449 0.556486 0.830935 0.330290 \n",
"2 3 0.272998 0.541312 0.814310 0.335251 \n",
"3 4 0.206547 0.622871 0.829418 0.249027 \n",
2024-03-26 12:20:03 +01:00
"\n",
" gender_other country_fr \n",
"0 0.357356 0.477963 \n",
"1 0.169065 0.763818 \n",
"2 0.185690 0.661414 \n",
"3 0.170582 0.657373 "
2024-03-26 12:20:03 +01:00
]
},
"execution_count": 159,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# description of marketing personae\n",
"\n",
"X_test_segment_mp = X_test_segment.groupby(\"segment\")[['gender_female', 'gender_male', 'gender_other', 'country_fr']].mean().reset_index()\n",
"X_test_segment_mp.insert(3, \"share_known_gender\", X_test_segment_mp[\"gender_female\"]+X_test_segment_mp[\"gender_male\"])\n",
"X_test_segment_mp.insert(4, \"share_of_women\", X_test_segment_mp[\"gender_female\"]/(X_test_segment_mp[\"share_known_gender\"]))\n",
"X_test_segment_mp"
]
},
{
"cell_type": "code",
2024-03-27 16:06:31 +01:00
"execution_count": 183,
"id": "5f908232-b0fe-4707-a8c5-5cadb7d8653f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>gender_female</th>\n",
" <th>gender_male</th>\n",
" <th>share_known_gender</th>\n",
" <th>share_of_women</th>\n",
" <th>gender_other</th>\n",
" <th>country_fr</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.222670</td>\n",
" <td>0.419974</td>\n",
" <td>0.642644</td>\n",
" <td>0.346491</td>\n",
" <td>0.357356</td>\n",
" <td>0.477963</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.274449</td>\n",
" <td>0.556486</td>\n",
" <td>0.830935</td>\n",
" <td>0.330290</td>\n",
" <td>0.169065</td>\n",
" <td>0.763818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.272998</td>\n",
" <td>0.541312</td>\n",
" <td>0.814310</td>\n",
" <td>0.335251</td>\n",
" <td>0.185690</td>\n",
" <td>0.661414</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.206547</td>\n",
" <td>0.622871</td>\n",
" <td>0.829418</td>\n",
" <td>0.249027</td>\n",
" <td>0.170582</td>\n",
" <td>0.657373</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" segment gender_female gender_male share_known_gender share_of_women \\\n",
"0 1 0.222670 0.419974 0.642644 0.346491 \n",
"1 2 0.274449 0.556486 0.830935 0.330290 \n",
"2 3 0.272998 0.541312 0.814310 0.335251 \n",
"3 4 0.206547 0.622871 0.829418 0.249027 \n",
"\n",
" gender_other country_fr \n",
"0 0.357356 0.477963 \n",
"1 0.169065 0.763818 \n",
"2 0.185690 0.661414 \n",
"3 0.170582 0.657373 "
]
},
2024-03-27 16:06:31 +01:00
"execution_count": 183,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment_mp = df_segment_mp(X_test_segment, \"segment\", \"gender_female\", \n",
" \"gender_male\", \"gender_other\", \"country_fr\")\n",
"X_test_segment_mp"
]
},
{
"cell_type": "code",
"execution_count": 150,
2024-03-26 12:20:03 +01:00
"id": "910876fe-e6df-4f8d-9978-5d6fdd893ac0",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>share_tickets_internet</th>\n",
" <th>share_campaigns_opened</th>\n",
" <th>opt_in</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.247851</td>\n",
" <td>0.136462</td>\n",
" <td>0.595110</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.630889</td>\n",
" <td>0.233739</td>\n",
" <td>0.411281</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.679076</td>\n",
" <td>0.297721</td>\n",
" <td>0.045523</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.588024</td>\n",
" <td>0.366443</td>\n",
" <td>0.134395</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" segment share_tickets_internet share_campaigns_opened opt_in\n",
"0 1 0.247851 0.136462 0.595110\n",
"1 2 0.630889 0.233739 0.411281\n",
"2 3 0.679076 0.297721 0.045523\n",
"3 4 0.588024 0.366443 0.134395"
2024-03-26 12:20:03 +01:00
]
},
"execution_count": 150,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# purchasing behaviour\n",
"\n",
"X_test_segment[\"share_tickets_internet\"] = X_test_segment[\"nb_tickets_internet\"]/X_test_segment[\"nb_tickets\"]\n",
"X_test_segment[\"share_campaigns_opened\"] = X_test_segment[\"nb_campaigns_opened\"]/X_test_segment[\"nb_campaigns\"]\n",
"X_test_segment_pb = X_test_segment.groupby(\"segment\")[[\"share_tickets_internet\", \"share_campaigns_opened\", \"opt_in\"]].mean().reset_index()\n",
"X_test_segment_pb"
]
},
{
"cell_type": "code",
"execution_count": 168,
"id": "8d3ab073-040c-4480-bd44-33fc88626707",
"metadata": {},
"outputs": [],
"source": [
"def df_segment_pb (df, segment, nb_tickets_internet, nb_tickets, nb_campaigns_opened, nb_campaigns, opt_in) :\n",
" df_used = df\n",
" df_used[\"share_tickets_internet\"] = df_used[nb_tickets_internet]/df_used[nb_tickets]\n",
" df_used[\"share_campaigns_opened\"] = df_used[nb_campaigns_opened]/df_used[nb_campaigns]\n",
" df_pb = df_used.groupby(segment)[[\"share_tickets_internet\", \"share_campaigns_opened\", opt_in]].mean().reset_index()\n",
" return df_pb"
]
},
{
"cell_type": "code",
"execution_count": 170,
"id": "0cb8f47a-bf0f-4285-b2ff-d90de394c787",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>share_tickets_internet</th>\n",
" <th>share_campaigns_opened</th>\n",
" <th>opt_in</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.247851</td>\n",
" <td>0.136462</td>\n",
" <td>0.595110</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.630889</td>\n",
" <td>0.233739</td>\n",
" <td>0.411281</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.679076</td>\n",
" <td>0.297721</td>\n",
" <td>0.045523</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.588024</td>\n",
" <td>0.366443</td>\n",
" <td>0.134395</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" segment share_tickets_internet share_campaigns_opened opt_in\n",
"0 1 0.247851 0.136462 0.595110\n",
"1 2 0.630889 0.233739 0.411281\n",
"2 3 0.679076 0.297721 0.045523\n",
"3 4 0.588024 0.366443 0.134395"
]
},
"execution_count": 170,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment_pb = df_segment_pb(X_test_segment, \"segment\", \"nb_tickets_internet\", \"nb_tickets\", \n",
" \"nb_campaigns_opened\", \"nb_campaigns\", \"opt_in\")\n",
"X_test_segment_pb"
]
},
{
"cell_type": "code",
"execution_count": 151,
2024-03-26 12:20:03 +01:00
"id": "ba2884e3-004a-4554-ab82-6d477dcc4869",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>segment</th>\n",
" <th>share_tickets_internet</th>\n",
" <th>share_campaigns_opened</th>\n",
" <th>opt_in</th>\n",
" <th>share_known_gender</th>\n",
" <th>share_of_women</th>\n",
" <th>country_fr</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.247851</td>\n",
" <td>0.136462</td>\n",
" <td>0.595110</td>\n",
" <td>0.642644</td>\n",
" <td>0.346491</td>\n",
" <td>0.477963</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.630889</td>\n",
" <td>0.233739</td>\n",
" <td>0.411281</td>\n",
" <td>0.830935</td>\n",
" <td>0.330290</td>\n",
" <td>0.763818</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.679076</td>\n",
" <td>0.297721</td>\n",
" <td>0.045523</td>\n",
" <td>0.814310</td>\n",
" <td>0.335251</td>\n",
" <td>0.661414</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.588024</td>\n",
" <td>0.366443</td>\n",
" <td>0.134395</td>\n",
" <td>0.829418</td>\n",
" <td>0.249027</td>\n",
" <td>0.657373</td>\n",
2024-03-26 12:20:03 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" segment share_tickets_internet share_campaigns_opened opt_in \\\n",
"0 1 0.247851 0.136462 0.595110 \n",
"1 2 0.630889 0.233739 0.411281 \n",
"2 3 0.679076 0.297721 0.045523 \n",
"3 4 0.588024 0.366443 0.134395 \n",
2024-03-26 12:20:03 +01:00
"\n",
" share_known_gender share_of_women country_fr \n",
"0 0.642644 0.346491 0.477963 \n",
"1 0.830935 0.330290 0.763818 \n",
"2 0.814310 0.335251 0.661414 \n",
"3 0.829418 0.249027 0.657373 "
2024-03-26 12:20:03 +01:00
]
},
"execution_count": 151,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment_caract = pd.concat([X_test_segment_pb, X_test_segment_mp[['share_known_gender', 'share_of_women', 'country_fr']]], axis=1)\n",
"X_test_segment_caract"
]
},
{
"cell_type": "code",
"execution_count": 152,
2024-03-26 12:20:03 +01:00
"id": "23a37e9b-bb29-4122-85cb-cc15cc344ee2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"share_tickets_internet 0.679076\n",
"share_campaigns_opened 0.366443\n",
"opt_in 0.595110\n",
"share_known_gender 0.830935\n",
"share_of_women 0.346491\n",
"country_fr 0.763818\n",
2024-03-26 12:20:03 +01:00
"dtype: float64"
]
},
"execution_count": 152,
2024-03-26 12:20:03 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test_segment_caract.loc[:,\"share_tickets_internet\":].max()"
]
},
{
"cell_type": "code",
"execution_count": 153,
2024-03-26 12:20:03 +01:00
"id": "0809e2ae-3487-4b24-8f60-741c683cb9af",
"metadata": {},
"outputs": [],
"source": [
"# def d'une fonction associée - KEEP THIS !!!\n",
"\n",
"def radar_mp_plot(df, categories, index) :\n",
" categories = categories\n",
"\n",
" # true values are used to print the true value in parenthesis\n",
" tvalues = list(df.loc[index,categories]) \n",
"\n",
" max_values = df[categories].max()\n",
"\n",
" # values are true values / max among the 4 segments, allows to \n",
" # put values in relation with the values for other segments\n",
" # if the point has a maximal abscisse it means that value is maximal for the segment considered\n",
" # , event if not equal to 1\n",
" \n",
" values = list(df.loc[index,categories]/max_values)\n",
" \n",
" # values normalized are used to adjust the value around the circle\n",
" # for instance if the maximum of values is equal to 0.8, we want the point to be \n",
" # at 8/10th of the circle radius, not at the edge \n",
" values_normalized = [ max(values) * elt for elt in values]\n",
"\n",
" # Nb of categories\n",
" num_categories = len(categories)\n",
" \n",
" angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()\n",
" \n",
" # Initialize graphic\n",
" fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
" \n",
" # we have to draw first a transparent line (alpha=0) of values to adjust the radius of the circle\n",
" # which is based on max(value)\n",
" ax.plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5)\n",
" ax.plot(angles + angles[:1], values_normalized + values_normalized[:1], color='black', alpha = 0.5, linewidth=1.2)\n",
" \n",
" # fill the sector\n",
" ax.fill(angles, values_normalized, color='orange', alpha=0.4)\n",
" \n",
" # labels\n",
" ax.set_yticklabels([])\n",
" ax.set_xticks(angles)\n",
" ticks = [categories[i].replace(\"_\",\" \") + f\"\\n({round(100 * tvalues[i],2)}%)\" for i in range(len(categories))]\n",
" ax.set_xticklabels(ticks, color=\"black\")\n",
" \n",
" ax.spines['polar'].set_visible(False)\n",
" \n",
" plt.title(f'Characteristics of the segment {index+1}\\n')\n",
" \n",
" # plt.show()"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "code",
"execution_count": 157,
2024-03-26 12:20:03 +01:00
"id": "56cb026b-857f-42eb-baed-0ebdf5aee447",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAApkAAAIICAYAAAAllfW0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd1hb5/k38K/2QAMkIfbe22CMDd6xncSJs1fTNHVG21/qtE3TpE0z2qRpmtW0SZo3q5lNnL2XEw+8bQzYBrM3iCmGQBNJaDzvHy6qMWAzBELi+VyXLhvpnPPcOuhwbj2TQQghoCiKoiiKoig3Yno6AIqiKIqiKMr30CSToiiKoiiKcjuaZFIURVEURVFuR5NMiqIoiqIoyu1okklRFEVRFEW5HU0yKYqiKIqiKLejSSZFURRFURTldjTJpCiKoiiKotyOJpkURVEURVGU29Ekk6ImUFFRgVtvvRUxMTHg8/kQiUTIycnB008/jcHBQdd20dHR2LJliwcjnbmXXnoJb7/99pwdn8Fg4JFHHpnWPjt27Jh0n+joaNxyyy2zjsvdBgcH8aMf/QhKpRIMBgNXXnnlpNtOds73798PBoOBTz/9dO4CpcZ4//338dxzz81oX0II1qxZAwaDgV/96lfuDYyifAjb0wFQ1ELz2muvYdu2bUhKSsLvf/97pKamwmaz4fjx43jllVdQVFSEL774wtNhztpLL70EhUIxZ4lbUVERwsPDp7XPjh078OKLL06YaH7xxReQSCRuis59/vrXv+KLL77Am2++ibi4OMhkskm3netzTk3d+++/j6qqKvz2t7+d9r4vvvgimpqa3B8URfkYmmRS1BmKiorwy1/+Eps2bcKXX34JHo/nem3Tpk2455578MMPP8xrTA6HA3a7fUwsCxUhBBaLBQKBACtWrHDrsbOzs916PHepqqpCXFwcbrrpJk+HQs2DtrY23H///XjnnXdw9dVXezocilrQaHM5RZ3h8ccfB4PBwL///e8Jkzoul4vLL7983PM//PADcnJyIBAIkJycjDfffHPM6/39/di2bRtSU1MhEomgVCpxwQUX4NChQ2O2a2trA4PBwNNPP43HHnsMMTEx4PF42LdvHywWC+655x4sWbIEUqkUMpkM+fn5+Oqrr8bF43Q68cILL2DJkiUQCATw9/fHihUr8PXXXwM43fRcXV2NAwcOgMFggMFgIDo62rW/Xq/Hvffei5iYGHC5XISFheG3v/0tTCbTmHJGmwtfeeUVpKSkgMfj4T//+Y/rtTNrJIeHh13H5PP5kMlkyM3NxQcffAAAuOWWW/Diiy+69h19tLW1uWI+uwZQq9XinnvuQWxsLHg8HpRKJS655BLU1dW5tnn55ZeRlZUFkUgEsViM5ORkPPDAA+PO2dkGBwexbds2hIWFgcvlIjY2Fg8++CCsVuuY39WePXtQW1vrinf//v0THu985xwAbDYbHnzwQYSGhkIikWDjxo2or68fd6w9e/Zgw4YNkEgkEAqFWLlyJQoLC8/7npxOJx577DEkJSW5PheZmZl4/vnnx2zX2NiIH//4x1AqleDxeEhJSXH9bs5UXV2NCy+8EEKhEIGBgbjzzjvx3XffjTsP69atQ3p6OoqKilBQUACBQIDo6Gi89dZbAIDvvvsOOTk5EAqFyMjImPCL3FRiGu128MEHH5zzPK5btw7fffcdVCrVmM/aVPziF7/Apk2bcNVVV01pe4pazGhNJkX9l8PhwN69e7F06VJERERMeb9Tp07hnnvuwR//+EcEBQXh9ddfx+233474+HisWbMGAFz9OB9++GEEBwfDaDTiiy++wLp161BYWIh169aNOea//vUvJCYm4plnnoFEIkFCQgKsVisGBwdx7733IiwsDCMjI9izZw+uvvpqvPXWW/jpT3/q2v+WW27B9u3bcfvtt+PRRx8Fl8vFyZMnXQnbF198gWuvvRZSqRQvvfQSALiS6uHhYaxduxadnZ144IEHkJmZierqavz5z39GZWUl9uzZM+aG/OWXX+LQoUP485//jODgYCiVygnP0+9+9zu8++67eOyxx5CdnQ2TyYSqqipoNBoAwJ/+9CeYTCZ8+umnKCoqcu0XEhIy4fEMBgNWrVqFtrY23HfffVi+fDmMRiMOHjyInp4eJCcn48MPP8S2bdvw61//Gs888wyYTCaamppQU1Nzzt+pxWLB+vXr0dzcjL/85S/IzMzEoUOH8MQTT6C8vBzfffcdQkJCUFRUhG3btkGn0+G9994DAKSmpk54zHOd81EPPPAAVq5ciddffx16vR733XcfLrvsMtTW1oLFYgEAtm/fjp/+9Ke44oor8J///AccDgevvvoqLrroIuzcuRMbNmyY9H09/fTTeOSRR/DQQw9hzZo1sNlsqKurg1ardW1TU1ODgoICREZG4h//+AeCg4Oxc+dO/OY3v8HAwAAefvhhAEBPTw/Wrl0LPz8/vPzyy1Aqlfjggw8m7aOoVqtx66234g9/+APCw8Pxwgsv4LbbbkNHRwc+/fRTPPDAA5BKpXj00Udx5ZVXoqWlBaGhodOKaarn8aWXXsIvfvELNDc3T6vry+uvv46SkpLzfn4oivovQlEUIYQQtVpNAJAf/ehHU94nKiqK8Pl8olKpXM+ZzWYik8nI//3f/026n91uJzabjWzYsIFcddVVrudbW1sJABIXF0dGRkbOWfboMW6//XaSnZ3tev7gwYMEAHnwwQfPuX9aWhpZu3btuOefeOIJwmQySWlp6ZjnP/30UwKA7Nixw/UcACKVSsng4OC44wAgDz/8sOvn9PR0cuWVV54zpjvvvJNM9mcpKiqKbN261fXzo48+SgCQ3bt3T3q8X/3qV8Tf3/+cZU7klVdeIQDIxx9/POb5p556igAgu3btcj23du1akpaWNqXjTnbO9+3bRwCQSy65ZMzzH3/8MQFAioqKCCGEmEwmIpPJyGWXXTZmO4fDQbKyskheXt45y9+yZQtZsmTJObe56KKLSHh4ONHpdGOe/9WvfkX4fL7rd/373/+eMBgMUl1dPW5/AGTfvn2u59auXUsAkOPHj7ue02g0hMViEYFAQLq6ulzPl5eXEwDkX//617Rjmup5JISQSy+9lERFRZ3zXJyps7OTSKVS8uqrr7qeA0DuvPPOKR+DohYb2lxOUbO0ZMkSREZGun7m8/lITEyESqUas90rr7yCnJwc8Pl8sNlscDgcFBYWora2dtwxL7/8cnA4nHHPf/LJJ1i5ciVEIpHrGG+88caYY3z//fcAgDvvvHNG7+fbb79Feno6lixZArvd7npcdNFFEzYHX3DBBQgICDjvcfPy8vD999/jj3/8I/bv3w+z2Tyj+EZ9//33SExMxMaNG89ZplarxY033oivvvoKAwMDUzr23r174efnh2uvvXbM86PN9VNpmp6Js7tiZGZmAoDrs3T06FEMDg5i69atY343TqcTF198MUpLS8d1aThTXl4eTp06hW3btmHnzp3Q6/VjXrdYLCgsLMRVV10FoVA4poxLLrkEFosFx44dAwAcOHAA6enp42pub7zxxgnLDgkJwdKlS10/y2QyKJVKLFmyxFVjCQApKSlj3vN0YprqeZyJO+64A1lZWfj5z38+42NQ1GJDk0yK+i+FQgGhUIjW1tZp7SeXy8c9x+PxxiRR//znP/HLX/4Sy5cvx2effYZjx46htLQUF1988YTJ1kRNxJ9//jmuv/56hIWFYfv27SgqKkJpaSluu+02WCwW13b9/f1gsVgIDg6e1vsY1dvbi4qKCnA4nDEPsVgMQsi4RG2y5uyz/etf/8J9992HL7/8EuvXr4dMJsOVV16JxsbGGcXZ399/3tHrN998M958802oVCpcc801UCqVWL58OXbv3n3O/TQaDYKDg8f101MqlWCz2a4mfnc7+7M02pw++hnp7e0FAFx77bXjfj9PPfUUCCFjptg62/33349nnnkGx44dw+bNmyGXy7FhwwYcP34cwOn3bbfb8cILL4w7/iWXXAIArt+/RqNBUFDQuDImeg7AhKPuuVzuuOe5XC4AuD7T04lp1Pn
2024-03-26 12:20:03 +01:00
"text/plain": [
"<Figure size 600x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"categories = list(X_test_segment_caract.drop(\"segment\", axis=1).columns)\n",
"#for i in range(4) :\n",
"# radar_mp_plot(df=X_test_segment_caract, categories=categories, index=i)\n",
"radar_mp_plot(df=X_test_segment_caract, categories=categories, index=3)"
]
},
{
"cell_type": "code",
"execution_count": 739,
"id": "5b3c4bac-396e-4117-a7d9-f39a3d8f95b4",
"metadata": {},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (4005960846.py, line 6)",
"output_type": "error",
"traceback": [
"\u001b[0;36m Cell \u001b[0;32mIn[739], line 6\u001b[0;36m\u001b[0m\n\u001b[0;31m file_name = \"spider_chart_\" + activity + \"_sgt_\" str(index)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
]
}
],
"source": [
"# export to MinIo\n",
"\n",
"activity = \"sport\"\n",
"PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{activity}/\"\n",
"\n",
"file_name = \"spider_chart_\" + activity + \"_sgt_\" + str(index)\n",
"FILE_PATH_OUT_S3 = PATH + file_name + \".csv\"\n",
"\n",
"\n",
"radar_mp_plot(df=X_test_segment_caract, categories=categories, index=3)\n",
"\n",
"image_buffer = io.BytesIO()\n",
"plt.savefig(image_buffer, format='png')\n",
"image_buffer.seek(0)\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:\n",
" s3_file.write(image_buffer.read())\n",
"plt.close()"
]
},
{
"cell_type": "code",
"execution_count": 740,
"id": "276de9a5-d506-4c11-a7c2-a23ebbc59fe5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'projet-bdc2324-team1/Output_marketing_personae_analysis/sport/spider_chart_sport_sgt_3.csv'"
]
},
"execution_count": 740,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"activity = \"sport\"\n",
"PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{activity}/\"\n",
"\n",
"file_name = \"spider_chart_\" + activity + \"_sgt_\" + str(index)\n",
"FILE_PATH_OUT_S3 = PATH + file_name + \".csv\"\n",
"FILE_PATH_OUT_S3"
]
},
{
"cell_type": "code",
"execution_count": 154,
2024-03-26 12:20:03 +01:00
"id": "80e47dbc-3efd-4857-8055-876b308cbcb5",
"metadata": {},
"outputs": [],
"source": [
"# general function to have the 4 radars in one plot\n",
"\n",
2024-03-26 12:20:03 +01:00
"def radar_mp_plot_all(df, categories) :\n",
" \n",
" nb_segments = df.shape[0]\n",
" categories = categories\n",
"\n",
" # Initialize graphic\n",
" fig, ax = plt.subplots(2,2, figsize=(25, 20), subplot_kw=dict(polar=True))\n",
" \n",
" for index in range(nb_segments) :\n",
" row = index // 2 # Division entière pour obtenir le numéro de ligne\n",
" col = index % 2 \n",
" \n",
" df = X_test_segment_caract\n",
" \n",
" # true values are used to print the true value in parenthesis\n",
" tvalues = list(df.loc[index,categories]) \n",
" \n",
" max_values = df[categories].max()\n",
" \n",
" # values are true values / max among the 4 segments, allows to \n",
" # put values in relation with the values for other segments\n",
" # if the point has a maximal abscisse it means that value is maximal for the segment considered\n",
" # , event if not equal to 1\n",
" \n",
" values = list(df.loc[index,categories]/max_values)\n",
" \n",
" # values normalized are used to adjust the value around the circle\n",
" # for instance if the maximum of values is equal to 0.8, we want the point to be \n",
" # at 8/10th of the circle radius, not at the edge \n",
" values_normalized = [ max(values) * elt for elt in values]\n",
" \n",
" # Nb of categories\n",
" num_categories = len(categories)\n",
"\n",
" angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()\n",
" \n",
" # we have to draw first a transparent line (alpha=0) of values to adjust the radius of the circle\n",
" # which is based on max(value)\n",
" ax[row, col].plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5)\n",
" ax[row, col].plot(angles + angles[:1], values_normalized + values_normalized[:1], color='black', alpha = 0.5, linewidth=1.2)\n",
" \n",
" # fill the sector\n",
" ax[row, col].fill(angles, values_normalized, color='orange', alpha=0.4, label = index)\n",
" \n",
" # labels\n",
" ax[row, col].set_yticklabels([])\n",
" ax[row, col].set_xticks(angles)\n",
" ticks = [categories[i].replace(\"_\",\" \") + f\"\\n({round(100 * tvalues[i],2)}%)\" for i in range(len(categories))]\n",
2024-03-26 16:00:39 +01:00
" ax[row, col].set_xticklabels(ticks, color=\"black\", size = 20)\n",
2024-03-26 12:20:03 +01:00
" \n",
" ax[row, col].spines['polar'].set_visible(False)\n",
" \n",
" # plt.title(f'Characteristics of the segment {index+1}\\n')\n",
2024-03-26 16:00:39 +01:00
" ax[row, col].set_title(f'Characteristics of the segment {index+1}\\n', size = 24)\n",
2024-03-26 12:20:03 +01:00
"\n",
2024-03-26 16:00:39 +01:00
" # plt.show()"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "code",
"execution_count": 156,
2024-03-26 12:20:03 +01:00
"id": "edf76688-1b7e-469e-873f-4884d551be66",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAACDoAAAZgCAYAAACVvbXGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3gUVf828HvTNr2QSjqhJfTQQzH0piBFKSIEUAERuwKiP0AfFcWGogj4KEUBEZDeEiAJNYQACQklgRQglVTS+3n/4M08WbK72TSSwP25rr0yu3PmnDOzM2dnvjlzRiaEECAiIiIiIiIiIiIiIiIiIiJqBrQauwJEREREREREREREREREREREmmJHByIiIiIiIiIiIiIiIiIiImo22NGBiIiIiIiIiIiIiIiIiIiImg12dCAiIiIiIiIiIiIiIiIiIqJmgx0diIiIiIiIiIiIiIiIiIiIqNlgRwciIiIiIiIiIiIiIiIiIiJqNtjRgYiIiIiIiIiIiIiIiIiIiJoNdnQgIiIiIiIiIiIiIiIiIiKiZoMdHYiIiIiIiIiIiIiIiIiIiKjZYEcHIqoXgwYNgkwmg0wmQ0BAQGNXh5qBTZs2SfvMrFmzGrs6VXCfrp2ysjL8/vvvGDFiBGxtbaGnp/fYv+emvm8REREREVHd8HqNaqqpXydyn64dxiCIiIiebjqNXQEialwPHjzAkSNH4Ofnh0uXLiE1NRVpaWnQ09ODhYUF2rdvj169emHs2LHw8vJq7OoSURNWXFyM0aNH4+TJk41dFSKqB2lpaQgJCcHFixdx8eJFhISEICkpSZrv7++PQYMGNV4FiYiIqNlhDIKI6gtjEERPjuLiYpw6dQonTpxASEgIbt68ibS0NACApaUlOnfujGHDhmH27Nlo0aJFI9eWiJoSdnQgekrl5+dj9erV+Pbbb5GZmVllfnFxMXJzc3Hv3j0cP34cK1euRLt27bBixQpMnToVMpmsEWpNj5o1axY2b94MANi4cSN7jqswaNAgBAYGAuA/5hrSd999pxBg8Pb2Rps2baCvrw8A6Nu3b43zDAgIwODBg6X8eGcL0UOVf4eFEPWef79+/XD+/Pl6z5eIiIieToxBPBkYg9AMYxCPB2MQRI9PQ8YgFi1ahN9++w1ZWVlK5yckJCAhIQFHjx7F8uXL8e2332L+/Pn1Wgciar7Y0YHoKXT37l2MHTsWV69eVfjc2dkZXbp0gbW1NcrKypCcnIywsDCkpKQAAKKiovDSSy/h3r17WLRoUWNUnYiasIqAV8X0zJkzG7E2RFQXiYmJjV0FIiIiekIwBkFEDYExCKInw/79+xU6ORgbG6N3795wcHCAnp4ebt26hfPnz6OkpAR5eXl4/fXXcefOHaxcubLxKk1ETQY7OhA9ZeLi4uDl5YXk5GQAD3tjTps2DUuXLkXHjh2rpBdCICQkBGvWrMHWrVtRXl6O/Pz8x11tegLNmjWrSd/9wV77NZOfn4/IyEgAgJ6eHl5++eVGrhER1ZWuri46deqEXr16Sa9u3bo1drWIiIioGWEMgpoKxiCeLIxBED1ZdHR0MGnSJLzyyisYPHgwdHQU/3V59+5dzJkzBydOnAAAfPXVV3jmmWcwevToxqguETUh7OhA9BQpLi7Giy++KAUY9PX1sX37dowfP17lMjKZDL169cKWLVuwaNEiTJs27THVloiak8rDz9ra2kJLS6sRa0NEdbV//360a9dOGvaViIiIqKYYgyCihsIYBNGTY+rUqZgxYwZat26tMo2zszMOHTqEgQMH4uLFiwCAzz77jB0diAg8AyB6iqxatQohISHS+82bN6sNMDyqU6dOCAoKwvDhwxugdkTUnJWUlEjTDDAQNX9dunRhJwciIiKqE8YgiKihMAZB9ORYsWKF2k4OFeRyOT799FPp/YULF5Cent6QVSOiZoBnAURPiYKCAvz000/S+4kTJ2Ly5Mk1zsfIyAj9+/fXKG1GRga+/vpr9OrVC1ZWVjAwMICbmxteeeUVREREaFzvvXv34q233sKAAQNga2sLPT09GBsbw9XVFRMnTsQff/yB4uLiavMKCAiATCaDTCbDoEGDpM8PHz6MadOmoW3btjA2NoZMJsPq1asVli0pKcGxY8ewaNEiDB48GPb29tDX14eBgQEcHR0xZswY/Pjjj8jNzdVovSpLSUnBqlWrMHz4cDg7O8PAwAAGBgZwdnbG6NGjsWrVKsTFxSks4+rqCplMpvA8wtmzZ0vrV/m1YsUKlWWXlJTgzz//xOTJk+Hm5gYTExMYGRmhVatWmDZtGvbs2QMhhNr613a7btq0SVquuuEjL168iIULF6J79+6wsLCArq4uWrRogXbt2mHAgAF47733cOzYMRQUFCgsV5F/YGCg9NngwYOVbqdNmzYpLDto0CBpnqZDSB45cgTz5s1Dp06dYGlpCV1dXZibm6N79+6YN28e9u/fj9LS0npfz7oQQmDnzp2YNm0aWrduDWNjYxgbG6N169Z46aWXsGvXLrX7QMU2atWqlfTZnTt3qmzfyvuGJlasWAGZTIbBgwdLnwUGBir97lxdXTXKc8+ePRg7diycnZ0hl8thY2ODESNG4K+//qp2P3/UjRs3sHTpUvTu3Vtql6ytrdGnTx8sW7YMiYmJNcpPEzdv3sSiRYvQt29fWFlZQU9PD2ZmZmjTpg369u2LBQsWYO/evcjOzq42r/o49itLS0vD8uXL0a1bN5iZmcHExAQeHh546623cOPGDQAPhy2u7jtTlcbf3x/Tpk2Dm5sb9PX1YWFhgREjRuDAgQNV8igrK8OePXswevRouLi4QF9fH46Ojnj55ZcRHh6u8TrV13ZS1UaePHkSU6dOldbJ0tISzzzzDH7++WeFoJ2qvCpTdlzIZLIqvx1EREREjwtjEIxBKMMYBGMQjEEwBsEYRMNupyc9BlH5nEAIgTt37jR4mUTUxAkieips2bJFAJBeZ86cqdf8vb29pbz9/f3FmTNnhIODg0KZlV/a2tpiw4YNavMMCgoSxsbGKvOo/HJ1dRWXL19Wm5+/v7+U3tvbW2RlZYkJEyYoze+HH36Qlrt7966wtLTUqB5WVlbC19dXo21WVlYmPv30U2FoaFhtvlpaWuLatWvSsi4uLhrVB4BYvny5yu3RunXrapfv27eviI+Pr/ftunHjRulzHx8fpXmXlJSIuXPnaryuH3/8scLymi4HQGzcuFFh2Uf3aXUiIiJEz549NSpnypQp9b6etRUVFSU8PT2rLa9Hjx7i9u3bSvPQtM7e3t41qtvy5cs1ztvFxUVh2Uf3raysLDFu3Di1eYwaNUrk5+dXW6/CwkIxf/58oa2trTY/AwMDsWbNmhqtc3XbQ0dHR6PtMX36dLV51dexX+HYsWPCyspKZT56enpi3bp1IjY2VuV3VuHRNKWlpWLhwoVq6/nJJ59Iy6ekpIi+ffuqTKurqyt27typ0TZvqDayqKio2uO9e/fuIjU1VW1emrxiY2M1WteaqJx/dW0jERERPb0Yg2AMQtn2YAzify/GIFS/GIP4H8YgqmIM4umKQTwqOztboczg4OAGL5OImjYdENFT4eTJk9K0s7OzxndE1EZERAQ++ugj5ObmwsbGBgMHDoSlpSUSEhJw8uRJFBQUoKysDPPnz0enTp3g5eWlNJ/MzEzp7gQbGxt07NgRjo6OMDIyQn5+Pm7fvo3g4GCUlpYiLi4O3t7euHz5Mtq0aVNtHYUQePnll3Hw4EHpGaAeHh4QQiAiIkKhp2peXp40DJaFhQU6duwIFxcXGBsbo7i4GLGxsQgKCkJhYSHS0tIwZswYBAYGol+/firLLysrw4svvog9e/ZIn+np6cHLywuurq7Q0dFBcnIyLl++jKSkJJSXlyvcMeLj44P09HScOHECN2/eBAAMHToU7u7uVcrq3bt3lc927tyJ6dOnSz129fX10bdvX7i6ukJbWxtRUVE4f/48SktLERQUBC8vL1y8eBG2trb1tl018cEHH2DDhg3SewcHB/Tu3RtWVlYQQiA9PR3Xr19HZGSk0uXfeOMNAA970Vf0bh8/fjw
2024-03-26 12:20:03 +01:00
"text/plain": [
"<Figure size 2500x2000 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"categories = list(X_test_segment_caract.drop(\"segment\", axis=1).columns)\n",
"radar_mp_plot_all(df=X_test_segment_caract, categories=categories)"
2024-03-26 12:20:03 +01:00
]
},
{
"cell_type": "code",
2024-03-26 16:00:39 +01:00
"execution_count": 81,
2024-03-26 12:20:03 +01:00
"id": "c48136d1-c980-4f74-a69f-ed4304c83188",
"metadata": {},
"outputs": [],
"source": [
"# export to MinIo\n",
"\n",
2024-03-26 16:00:39 +01:00
"# activity = \"sport\"\n",
"# PATH = f\"projet-bdc2324-team1/Output_marketing_personae_analysis/{activity}/\"\n",
2024-03-26 12:20:03 +01:00
"\n",
"file_name = \"spider_chart_all_\" + activity\n",
"FILE_PATH_OUT_S3 = PATH + file_name + \".png\"\n",
"\n",
"radar_mp_plot_all(df=X_test_segment_caract, categories=categories)\n",
"\n",
"image_buffer = io.BytesIO()\n",
"plt.savefig(image_buffer, format='png')\n",
"image_buffer.seek(0)\n",
"\n",
"with fs.open(FILE_PATH_OUT_S3, 'wb') as s3_file:\n",
" s3_file.write(image_buffer.read())\n",
"plt.close()"
]
},
{
"cell_type": "markdown",
"id": "a2395680-69fe-4247-8deb-22f8ee15830b",
"metadata": {},
"source": [
"## --- end of the main part --- here are just some attempts --- ##"
]
},
{
"cell_type": "code",
"execution_count": 489,
"id": "7d9a2aca-d28d-43b3-9b72-5913b20c4f04",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAj8AAAH3CAYAAABU/z5zAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3hb5fn+76M9LU9Z3nvHSeyEONtxwgoQCIW2QGgh9NeGQkMppS1QApQVVoEvhRJ2aMMsJRDKJsOJ7Ww7ieNtxzO2vGVrz/f3h2IRxxkess+R/H6uS1diWTrnPrKkc5/ned7nYQghBBQKhUKhUCjTBB7bAigUCoVCoVCmEmp+KBQKhUKhTCuo+aFQKBQKhTKtoOaHQqFQKBTKtIKaHwqFQqFQKNMKan4oFAqFQqFMK6j5oVAoFAqFMq2g5odCoVAoFMq0gpofCoVCoVAo0wpqfijn5dixY5DJZPjHP/7BtpQx8frrr0Mul2P//v1sS6GchsFgQHp6On7605/C5XKxLYdCoUxTqPmZBmzevBkMw3huAoEAERERuOGGG1BXV3fO5+n1elx//fVYv3491q9fP4WKJ8aRI0dwzz334MMPP0ReXt64tzP0ujU1NXlPnJ/y5JNP4rPPPrvg4379618jPDwcW7ZsAY83uq+fpqYmMAyDzZs3j0nT+++/jxdffPGsv2MYBo888siYtkehUPwHAdsCKFPHO++8g/T0dFgsFhQXF+OJJ57Azp07UV1djaCgoBGP/9WvfoV58+bhqaeeYkHt+BgcHMRPf/pTPP/881i1ahXbcqYNTz75JK6//nqsXr36nI955ZVXcOzYMRQXF0MsFo962xEREdi7dy+SkpLGpOn999/H8ePHcffdd4/43d69exEdHT2m7VEoFP+Bmp9pxIwZMzB37lwAwLJly+B0OvHwww/js88+w9q1a0c8/uOPP55qiRMmICDgvNGs6Y7dbvdE/6aaO++8E3feeeeoH+90OuFwOCAWizF//nyvavH29igUim9B017TmCEj1NnZOez+Q4cO4eqrr0ZwcDAkEglycnJGGCGTyYR7770XCQkJkEgkCA4Oxty5c/HBBx94HrNs2TIsW7ZsxH5vvfVWxMfHe34eSms8++yzePrppxEfHw+pVIply5ahtrYWdrsd9913HyIjI6FSqXDttdeiq6trxHY/+ugjLFiwAHK5HAqFApdddhnKyspG9Vrs27cPixYtgkQiQWRkJO6//37Y7fazPnY0+zlx4gRuuOEGREZGQiwWIzw8HCtWrMCRI0cuqGX//v1YtWoVQkJCIJFIkJSUNCx6UV9fj7Vr1yIlJQUymQxRUVFYtWoVysvLh21n165dYBgG//73v/HHP/4RUVFREIvFqK+vR3d3N+644w5kZmZCoVBArVZj+fLl2LNnzwg9VqsVjz76KDIyMiCRSBASEoKCggKUlJQAcKeQjEYj3n33XU9q9fS/u1arxbp16xAdHQ2RSISEhAT87W9/g8Ph8Dxm6D3wzDPP4PHHH0dCQgLEYjF27tx51rRXd3c3fvOb3yAmJgZisRhhYWFYtGgRfvjhBwDu996XX36J5ubmYSnfIc6W9jrbe+CNN94Ykfo8V8osPj4et95667D7RnPsAPDqq69i1qxZUCgUUCqVSE9PxwMPPDBiHxQKxTvQyM80prGxEQCQmprquW/nzp24/PLLkZeXh02bNkGlUuHDDz/Ez3/+c5hMJs+X+z333IN///vfePzxx5GTkwOj0Yjjx4+jt7d33HpeeeUVzJw5E6+88gp0Oh3++Mc/YtWqVcjLy4NQKMTbb7+N5uZm3Hvvvfh//+//Ydu2bZ7nPvnkk3jwwQexdu1aPPjgg7DZbHj22WexZMkSHDhwAJmZmefcb2VlJVasWIH4+Hhs3rwZMpkM//znP/H++++PeOxo93PFFVfA6XTimWeeQWxsLHp6elBSUgKdTnfe1+Dbb7/FqlWrkJGRgeeffx6xsbFoamrCd99953lMe3s7QkJC8NRTTyEsLAx9fX149913kZeXh7KyMqSlpQ3b5v33348FCxZg06ZN4PF4UKvV6O7uBgA8/PDD0Gg0MBgM2Lp1K5YtW4bt27d7zIvD4cDKlSuxZ88e3H333Vi+fDkcDgf27duHlpYWLFy4EHv37sXy5ctRUFCADRs2AHBH4AD3yX/evHng8Xh46KGHkJSUhL179+Lxxx9HU1MT3nnnnWFaX3rpJaSmpuK5555DQEAAUlJSzvo6/eIXv0BpaSmeeOIJpKamQqfTobS01PP+++c//4nf/OY3aGhowNatW8/7mgNjew+MltEe+4cffog77rgD69evx3PPPQcej4f6+npUVlaOe98UCuUCEIrf88477xAAZN++fcRutxO9Xk+++eYbotFoyNKlS4ndbvc8Nj09neTk5Ay7jxBCrrrqKhIREUGcTichhJAZM2aQ1atXn3e/+fn5JD8/f8T9t9xyC4mLi/P83NjYSACQWbNmebZPCCEvvvgiAUCuvvrqYc+/++67CQAyMDBACCGkpaWFCAQCsn79+mGP0+v1RKPRkJ/97Gfn1fnzn/+cSKVSotVqPfc5HA6Snp5OAJDGxsYx7aenp4cAIC+++OJ593s2kpKSSFJSEjGbzaN+jsPhIDabjaSkpJA//OEPnvt37txJAJClS5eOaht2u52sWLGCXHvttZ77//WvfxEA5I033jjv8+VyObnllltG3L9u3TqiUChIc3PzsPufe+45AoBUVFQQQn58DyQlJRGbzTbssUO/e+eddzz3KRQKcvfdd59X05VXXjnsfXY6AMjDDz/s+Xm074GzPXeIuLi4Ya/BaI/9d7/7HQkMDDzvsVAoFO9C017TiPnz50MoFEKpVOLyyy9HUFAQPv/8c0/9R319Paqrq7FmzRoA7qv+odsVV1yBjo4O1NTUAADmzZuHr7/+Gvfddx927doFs9k8YX1XXHHFsBVAGRkZAIArr7xy2OOG7m9paQHgjpY4HA788pe/HKZZIpEgPz8fu3btOu9+d+7ciRUrViA8PNxzH5/Px89//vNhjxvtfoKDg5GUlIRnn30Wzz//PMrKyka1rLu2thYNDQ341a9+BYlEcs7HORwOPPnkk8jMzIRIJIJAIIBIJEJdXR2qqqpGPP66664763Y2bdqE3NxcSCQSCAQCCIVCbN++fdg2vv76a0gkEtx2220X1H82/ve//6GgoACRkZHDXrOVK1cCAAoLC4c9/uqrr4ZQKLzgdufNm4fNmzfj8ccfx759+86Zohwto30PjIXRHvu8efOg0+lw44034vPPP0dPT8+EjoVCoVwYan6mEf/6179w8OBB7NixA+vWrUNVVRVuvPFGz++Han/uvfdeCIXCYbc77rgDADxfzC+99BL+8pe/4LPPPkNBQQGCg4OxevXqCRUbBwcHD/tZJBKd936LxTJM90UXXTRC90cffXTBk0lvby80Gs2I+8+8b7T7YRgG27dvx2WXXYZnnnkGubm5CAsLw1133QW9Xn9OHUOpqAutQrrnnnuwYcMGrF69Gl988QX279+PgwcPYtasWWc1oRERESPue/755/Hb3/4WeXl5+O9//4t9+/bh4MGDuPzyy4dto7u7G5GRkaNeln4mnZ2d+OKLL0a8XllZWQAw4m9zNq1n46OPPsItt9yCN998EwsWLEBwcDB++ctfQqvVjkvnaN8DY2G0x/6LX/zCk9K97rrroFarkZeXh++//37c+6ZQKOeH1vxMIzIyMjxFzgUFBXA6nXjzzTfxySef4Prrr0doaCgAd43IT37yk7NuY6ieRC6X429/+xv+9re/obOz0xMFWrVqFaqrqwEAEokEAwMDI7bh7SvbId2ffPIJ4uLixvz8kJCQs540z7xvLPuJi4vDW2+9BcAd0fn444/xyCOPwGazYdOmTWd9TlhYGACgra3tvNvesmULfvnLX+LJJ58cdn9PTw8CAwNHPP70Qt/Tt7Fs2TK8+uqrw+4/05yFhYWhqKgILpdrXAYoNDQUM2fOxBNPPHHW30dGRl5Q67m2++KLL+LFF19ES0sLtm3bhvvuuw9
"text/plain": [
"<Figure size 600x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"colors = plt.cm.Blues(np.linspace(0.1, 0.9, 4)) \n",
"colors = [\"blue\", \"green\", \"orange\", \"red\"]\n",
"\n",
"# Initialisez le graphique en étoile\n",
"fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
"\n",
"for i in range(4) :\n",
"\n",
" # Caractéristiques et valeurs associées (exemple)\n",
" categories = ['share_known_gender', 'share_of_women', 'country_fr']\n",
" values = list(X_test_segment_mp.loc[i,categories]) # Exemple de valeurs, ajustez selon vos données\n",
" \n",
" values_normalized = [ max(values) * elt for elt in values]\n",
" \n",
" # Nombre de caractéristiques\n",
" num_categories = len(categories)\n",
" \n",
" # Créer un angle pour chaque axe\n",
" angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()\n",
" \n",
" \n",
" # Tracer uniquement le contour du polygone\n",
" ax.plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5)\n",
" # ax.plot(angles + angles[:1], values_normalized + values_normalized[:1], color='blue', alpha = 0.3, linewidth=1.5)\n",
" \n",
" # Remplir le secteur central avec une couleur\n",
" ax.fill(angles, values_normalized, color=colors[i], alpha=0.2, label = str(i+1))\n",
"\n",
"# Étiqueter les axes\n",
"ax.set_yticklabels([])\n",
"ax.set_xticks(angles)\n",
"ax.set_xticklabels(categories)\n",
"ax.legend()\n",
"\n",
"# Titre du graphique\n",
"plt.title('Résumé des caractéristiques')\n",
"\n",
"# Afficher le graphique\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 301,
"id": "96aa9ff5-c1ed-49eb-8fb7-2319ac0c40be",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAgoAAAITCAYAAABmGDQGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3hUZfbHPzPpvVcCSQgphE4g9G4BkWJvSFkL9l3LrrprW1ddFcUuWFBUYGWxwk8BlU7oISRAQhophPRep7+/P8ZkibSUydyZyf08zzxJptz3zM299/3ec857jkIIIZCRkZGRkZGRuQBKqQ2QkZGRkZGRsVxkoSAjIyMjIyNzUWShICMjIyMjI3NRZKEgIyMjIyMjc1FkoSAjIyMjIyNzUWShICMjIyMjI3NRZKEgIyMjIyMjc1FkoSAjIyMjIyNzUWShICMjIyMjI3NRZKEg02HS0tJwdXXlvffek9qUTvHxxx/j5ubGwYMHpTZF5hwaGxuJi4vjpptuwmAwSG2OjIzMRZCFQi9j9erVKBSKtoe9vT0hISHceuutZGdnX/RzDQ0N3HjjjTz88MM8/PDDZrS4exw7dozHHnuMr7/+mjFjxnR5O637LT8/33TG2SivvPIKP/zww2Xfd8899xAUFMSaNWtQKjt2KcrPz0ehULB69epO2bRu3TrefvvtC76mUCh44YUXOrU9GZnehL3UBshIw+eff05cXBwqlYqkpCRefvllduzYwalTp/Dx8Tnv/XfddReJiYm8+uqrEljbNerr67nppptYvnw5c+bMkdqcXsMrr7zCjTfeyPz58y/6ng8++IC0tDSSkpJwcnLq8LZDQkLYv38/UVFRnbJp3bp1nDhxgr/85S/nvbZ//37CwsI6tT0Zmd6ELBR6KYMHD2bUqFEATJ06Fb1ez/PPP88PP/zAkiVLznv/f//7X3Ob2G08PT0v6SXp7Wi12javkrl58MEHefDBBzv8fr1ej06nw8nJibFjx5rUFlNvT0bG1pBDDzIAbaKhrKys3fNHjhxh7ty5+Pr64uzszIgRI84TDc3NzTzxxBNERkbi7OyMr68vo0aN4j//+U/be6ZOncrUqVPPG3fx4sVERES0/d3qWl62bBmvvfYaERERuLi4MHXqVLKystBqtTz11FOEhobi5eXFddddR3l5+XnbXb9+PePGjcPNzQ13d3euvvpqUlJSOrQvDhw4wIQJE3B2diY0NJSnn34arVZ7wfd2ZJzTp09z6623EhoaipOTE0FBQcyYMYNjx45d1paDBw8yZ84c/Pz8cHZ2Jioqqt1dcU5ODkuWLCE6OhpXV1f69OnDnDlzOH78eLvt7Ny5E4VCwVdffcXjjz9Onz59cHJyIicnh4qKCh544AHi4+Nxd3cnMDCQ6dOns2fPnvPsUavVvPjiiwwcOBBnZ2f8/PyYNm0a+/btA4xu/KamJr744ou28Na5//fS0lKWLl1KWFgYjo6OREZG8s9//hOdTtf2ntZj4PXXX+ell14iMjISJycnduzYccHQQ0VFBffeey99+/bFycmJgIAAJkyYwG+//QYYj72ffvqJgoKCdmG3Vi4UerjQMfDJJ5+cF366WNgiIiKCxYsXt3uuI98dYMWKFQwbNgx3d3c8PDyIi4vj73//+3ljyMiYC9mjIANAXl4eADExMW3P7dixg5kzZzJmzBhWrlyJl5cXX3/9NbfccgvNzc1tF8LHHnuMr776ipdeeokRI0bQ1NTEiRMnqKqq6rI9H3zwAUOHDuWDDz6gtraWxx9/nDlz5jBmzBgcHBz47LPPKCgo4IknnuDuu+9m48aNbZ995ZVXeOaZZ1iyZAnPPPMMGo2GZcuWMWnSJA4dOkR8fPxFx01PT2fGjBlERESwevVqXF1d+fDDD1m3bt157+3oONdccw16vZ7XX3+dfv36UVlZyb59+6itrb3kPti6dStz5sxh4MCBLF++nH79+pGfn88vv/zS9p7i4mL8/Px49dVXCQgIoLq6mi+++IIxY8aQkpJCbGxsu20+/fTTjBs3jpUrV6JUKgkMDKSiogKA559/nuDgYBobG/n++++ZOnUq27Zta5vodTods2bNYs+ePfzlL39h+vTp6HQ6Dhw4QGFhIePHj2f//v1Mnz6dadOm8eyzzwJGzw4YJ8rExESUSiXPPfccUVFR7N+/n5deeon8/Hw+//zzdra+++67xMTE8MYbb+Dp6Ul0dPQF99Odd97J0aNHefnll4mJiaG2tpajR4+2HX8ffvgh9957L7m5uXz//feX3OfQuWOgo3T0u3/99dc88MADPPzww7zxxhsolUpycnJIT0/v8tgyMt1GyPQqPv/8cwGIAwcOCK1WKxoaGsSWLVtEcHCwmDx5stBqtW3vjYuLEyNGjGj3nBBCXHvttSIkJETo9XohhBCDBw8W8+fPv+S4U6ZMEVOmTDnv+UWLFonw8PC2v/Py8gQghg0b1rZ9IYR4++23BSDmzp3b7vN/+ctfBCDq6uqEEEIUFhYKe3t78fDDD7d7X0NDgwgODhY333zzJe285ZZbhIuLiygtLW17TqfTibi4OAGIvLy8To1TWVkpAPH2229fctwLERUVJaKiokRLS0uHP6PT6YRGoxHR0dHi0UcfbXt+x44dAhCTJ0/u0Da0Wq2YMWOGuO6669qe//LLLwUgPvnkk0t+3s3NTSxatOi855cuXSrc3d1FQUFBu+ffeOMNAYiTJ08KIf53DERFRQmNRtPuva2vff75523Pubu7i7/85S+XtGn27NntjrNzAcTzzz/f9ndHj4ELfbaV8PDwdvugo9/9oYceEt7e3pf8LjIy5kYOPfRSxo4di4ODAx4eHsycORMfHx9+/PHHtnh1Tk4Op06d4o477gCMd5Otj2uuuYaSkhIyMzMBSExMZPPmzTz11FPs3LmTlpaWbtt3zTXXtMuEHzhwIACzZ89u977W5wsLCwHjXbhOp2PhwoXtbHZ2dmbKlCns3LnzkuPu2LGDGTNmEBQU1PacnZ0dt9xyS7v3dXQcX19foqKiWLZsGcuXLyclJaVDSwGzsrLIzc3lrrvuwtnZ+aLv0+l0vPLKK8THx+Po6Ii9vT2Ojo5kZ2eTkZFx3vtvuOGGC25n5cqVjBw5EmdnZ+zt7XFwcGDbtm3ttrF582acnZ3505/+dFn7L8T//d//MW3aNEJDQ9vts1mzZgGwa9eudu+fO3cuDg4Ol91uYmIiq1ev5qWXXuLAgQMXDRN1lI4eA52ho989MTGR2tpabrvtNn788UcqKyu79V1kZEyBLBR6KV9++SWHDx9m+/btLF26lIyMDG677ba211tzFZ544gkcHBzaPR544AGAtovYu+++y5NPPskPP/zAtGnT8PX1Zf78+d1KJPT19W33t6Oj4yWfV6lU7ewePXr0eXavX7/+shfeqqoqgoODz3v+j891dByFQsG2bdu4+uqref311xk5ciQBAQE88sgjNDQ0XNSO1nDA5bLxH3vsMZ599lnmz5/Ppk2bOHjwIIcPH2bYsGEXFGwhISHnPbd8+XLuv/9+xowZw7fffsuBAwc4fPgwM2fObLeNiooKQkNDO7yU8Y+UlZWxadOm8/bXoEGDAM7731zI1guxfv16Fi1axKeffsq4cePw9fVl4cKFlJaWdsnOjh4DnaGj3/3OO+9sC6vdcMMNBAYGMmbMGH799dcujy0j013kHIVeysCBA9sSGKdNm4Zer+fTTz/lm2++4cYbb8Tf3x8wxrSvv/76C26jNf7t5ubGP//5T/75z39SVlbW5l2YM2cOp06dAsDZ2Zm6urrztmHqO6ZWu7/55hvCw8M7/Xk/P78LTjB/fK4z44SHh7Nq1SrA6Cn473//ywsvvIBGo2HlypUX/ExAQAAARUVFl9z2mjVrWLhwIa+88kq75ysrK/H29j7v/ecm8Z27jalTp7JixYp2z/9RyAQEBLB3714MBkOXxIK/vz9Dhw7l5ZdfvuDroaGhl7X1Ytt9++23efvttyksLGTjxo089dRTlJeXs2XLlk7b2dFjAMDJyQm1Wn3e83/Mz+nMd1+yZAlLliyhqam
"text/plain": [
"<Figure size 600x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# KEEP THIS CODE !!\n",
"\n",
"# Caractéristiques et valeurs associées (exemple)\n",
"categories = ['Force', 'Vitesse', 'Agilité', 'Précision', 'Endurance']\n",
"values = [8, 7, 6, 9, 7] # Exemple de valeurs, ajustez selon vos données\n",
"\n",
"# Plage de valeurs maximales pour chaque caractéristique\n",
"max_range = [20, 20, 20, 20, 20]\n",
"\n",
"values_normalized = [2 * max(values) * x / y for x, y in zip(values, max_range)]\n",
"\n",
"# Nombre de caractéristiques\n",
"num_categories = len(categories)\n",
"\n",
"# Créer un angle pour chaque axe\n",
"angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()\n",
"\n",
"# Initialisez le graphique en étoile\n",
"fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
"\n",
"# Tracer uniquement le contour du polygone\n",
"ax.plot(angles + angles[:1], values + values[:1], color='skyblue', alpha=0, linewidth=1.5)\n",
"ax.plot(angles + angles[:1], values_normalized + values_normalized[:1], color='blue', linewidth=1.5)\n",
"\n",
"# Remplir le secteur central avec une couleur\n",
"ax.fill(angles, values_normalized, color='skyblue', alpha=0.4)\n",
"\n",
"# Étiqueter les axes\n",
"ax.set_yticklabels([])\n",
"ax.set_xticks(angles)\n",
"ax.set_xticklabels(categories)\n",
"\n",
"# Titre du graphique\n",
"plt.title('Résumé des caractéristiques')\n",
"\n",
"# Afficher le graphique\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 149,
"id": "adb7ccb3-7dea-4347-9298-37311a2f1fb1",
"metadata": {},
"outputs": [],
"source": [
"def radar_chart(values, categories, segment) :\n",
" # Caractéristiques et valeurs associées (exemple)\n",
" categories = categories\n",
" values = values # Exemple de valeurs, ajustez selon vos données\n",
" \n",
" # Nombre de caractéristiques\n",
" num_categories = len(categories)\n",
" \n",
" # Créer un angle pour chaque axe\n",
" angles = np.linspace(0, 2 * np.pi, num_categories, endpoint=False).tolist()\n",
" \n",
" # Répéter le premier angle pour fermer la figure\n",
" values += values[:1]\n",
" angles += angles[:1]\n",
" \n",
" # Initialisez le graphique en étoile\n",
" fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
" \n",
" # Tracer les lignes radiales\n",
" ax.fill(angles, values, color='skyblue', alpha=0.4)\n",
" \n",
" # Tracer les points sur les axes radiaux\n",
" ax.plot(angles, values, color='blue', linewidth=2, linestyle='solid')\n",
"\n",
" # Afficher les valeurs associées sous les noms de variables\n",
" \"\"\"\n",
" for i, angle in enumerate(angles[:-1]):\n",
" x = angle\n",
" y = values[i] + 0.2 # Ajustez la distance des valeurs par rapport au centre\n",
" plt.text(x, y, str(values[i]), color='black', ha='center', fontsize=10)\n",
" \"\"\"\n",
" \n",
" # Remplir le secteur central avec une couleur\n",
" # ax.fill(angles, values, color='skyblue', alpha=0.4)\n",
"\n",
" \n",
" # Étiqueter les axes\n",
" ax.set_yticklabels([])\n",
" #ax.set_xticks(angles[:-1])\n",
" #ax.set_xticklabels(categories, # fontsize=12, ha='right', rotation=45\n",
" # )\n",
" # ax.set_xticklabels(categories, fontsize=10, color='black', ha='right')\n",
"\n",
" labels = [f\"{category} = {round(100 *value,2)}%\" for category, value in zip(categories, values[:-1])]\n",
" ax.set_xticks(angles[:-1])\n",
" ax.set_xticklabels(labels, fontsize=10, color='black', ha='right')\n",
" \n",
" # Titre du graphique\n",
" plt.title(f'Caracteristics of segment {segment}')\n",
" \n",
" # Afficher le graphique\n",
" plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 150,
"id": "8793fb51-812c-4500-b252-2e2d61d6ff48",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkkAAAH2CAYAAABk9BgJAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3xT1fvHPzezTZOOdO+W0gmUtoyWUqBYwAGiP5YLBRUVQURUBFyIynShqAwHOFARBQS/OJBRShkFWgqle++92+yc3x+xaSsFum/SnvfrlVfT5N5zPrkZ93Of85znMIQQAgqFQqFQKBRKOzhsC6BQKBQKhUIxRKhJolAoFAqFQukAapIoFAqFQqFQOoCaJAqFQqFQKJQOoCaJQqFQKBQKpQOoSaJQKBQKhULpAGqSKBQKhUKhUDqAmiQKhUKhUCiUDqAmiUKhUCgUCqUDqEmiUHqZq1ev4vHHH4enpydMTEwgFosREhKCLVu2oLq6mm15NyU5ORlvvfUWcnNz+6T9t956CwzDdGmf5uZmvPXWWzh16tQNz+3ZswcMw/SZ3p6wb98+DBs2DKampmAYBleuXGFbksHz+eefY8+ePZ3e/vfff8djjz2GESNGgM/nd/mzRaF0BoYuS0Kh9B5ffPEFlixZAl9fXyxZsgQBAQFQqVS4dOkSvvjiC4wcORIHDx5kW2aH/PLLL5g7dy5OnjyJyMjIXm+/sLAQhYWFCAsL6/Q+lZWVsLW1xdq1a/HWW2+1e66iogJZWVkIDg6GUCjsZbXdp6KiAs7Ozrjrrrvw0ksvQSgUIjAwECKRiG1pBs3w4cNhY2PToSHuiCeffBIxMTEIDg5GVlYWLl++DHo6o/Q2PLYFUCgDhXPnzuHZZ5/F1KlTcejQoXYn7qlTp+Kll17Cn3/+2St9NTc3G81Jt0Wri4sLXFxceq1dW1tb2Nra9lp7vUV6ejpUKhXmz5+PSZMmsS1nwPLFF1+Aw9ENhjz33HO4fPkyy4ooAxJCoVB6hRkzZhAej0fy8/M7tf1PP/1Epk6dShwcHIiJiQnx8/Mjq1atIo2Nje22W7BgATEzMyNXr14lU6dOJWKxmISFhRFCCPn777/JzJkzibOzMxEKhcTLy4s8/fTTpKKi4ob+UlJSyIMPPkjs7OyIQCAgrq6u5NFHHyVyuZzs3r2bALjhtnv3bv3+x44dI3fccQeRSCTE1NSUhIeHk3/++addH2vXriUAyOXLl8ns2bOJpaUlcXBwaPdcW44fP04mTZpEpFIpMTExIa6urmTWrFmkqamJ5OTkdKhpwYIFhBCi15yTk9OuzT/++IPccccdxNzcnJiamhI/Pz+yYcMG/fNZWVnkgQceII6OjkQgEBA7Oztyxx13kISEhNu+Z7/99hsJCwsjpqamRCwWkylTppCzZ8+2e6/+q3fSpEk3ba+pqYm89NJLxMPDgwiFQmJlZUVGjRpFfvjhh3bbXbx4kdx7773EysqKCIVCEhQURPbt23dDezExMSQsLIwIhULi5OREXn/9dfLFF1/ccJzc3d3J9OnTyZEjR0hQUJD+83fkyBH9sfXz8yMikYiMGTOGXLx48Ya+OqOp5T06ceIEWbx4MbG2tiZSqZT83//9HykqKmqn57/Hzd3d/VZvRTuWLl16w2eLQukNaCSJQukFNBoNTpw4gVGjRsHV1bVT+2RkZOCee+7BCy+8ADMzM6SmpmLz5s2Ii4vDiRMn2m2rVCoxc+ZMPPPMM1i9ejXUajUAICsrC+PGjcOiRYtgYWGB3NxcfPjhh4iIiMC1a9fA5/MBAImJiYiIiICNjQ3efvtteHt7o6SkBIcPH4ZSqcT06dOxYcMGvPrqq/jss88QEhICAPDy8gIAfP/993jsscdw33334ZtvvgGfz8fOnTtx55134q+//kJUVFQ7vbNmzcKDDz6IxYsXo6mpqcPXn5ubi+nTp2PChAn4+uuvYWlpiaKiIvz5559QKpVwdHTEn3/+ibvuugtPPvkkFi1aBAC3jB599dVXeOqppzBp0iTs2LEDdnZ2SE9PR1JSkn6be+65BxqNBlu2bIGbmxsqKytx9uxZ1NbW3vL9+uGHH/DII49g2rRp+PHHH6FQKLBlyxZERkbi+PHjiIiIwBtvvIGxY8di6dKl2LBhAyZPngxzc/Obtvniiy/iu+++w7vvvovg4GA0NTUhKSkJVVVV+m1OnjyJu+66C6GhodixYwcsLCzw008/4YEHHkBzczMWLlwIQJcLN3XqVPj4+OCbb76BSCTCjh078P3333fYd2JiItasWYPXXnsNFhYWWLduHWbNmoU1a9bg+PHj2LBhAxiGwapVqzBjxgzk5OTA1NS0S5paWLRoEaZPn44ffvgBBQUFWLlyJebPn6//nB88eBBz5syBhYUFPv/8cwAwqCFUyiCGbZdGoQwESktLCQDy4IMPdmt/rVZLVCoViY6OJgBIYmKi/rmW6MTXX3/dqTby8vIIAPLbb7/pn7vjjjuIpaUlKS8vv+n++/fvJwDIyZMn2z3e1NREpFIpuffee9s9rtFoyMiRI8nYsWP1j7VEi958880b2v9vJOmXX34hAMiVK1duqqmiooIAIGvXrr3huf9GkhoaGoi5uTmJiIggWq22w/YqKysJALJ169ab9tkRGo2GODk5kREjRhCNRqN/vKGhgdjZ2ZHw8HD9YydPniQAyP79+2/b7vDhw8n9999/y238/PxIcHAwUalU7R6fMWMGcXR01OuZO3cuMTMzaxdF1Gg0JCAgoMNIkqmpKSksLNQ/duXKFQKAODo6kqamJv3jhw4dIgDI4cOHu6yp5T1asmRJu+22bNlCAJCSkhL9Y8OGDbtl1O1W0EgSpa+gs9soFJbIzs7Gww8/DAcHB3C5XPD5fH0OS0pKyg3bz549+4bHysvLsXjxYri6uoLH44HP58Pd3b1dG83NzYiOjsa8efO6lcNz9uxZVFdXY8GCBVCr1fqbVqvFXXfdhYsXL94QLepI638JCgqCQCDA008/jW+++QbZ2dld1vZfnfX19ViyZMlNZzpJpVJ4eXnhvffew4cffoiEhARotdrbtp2Wlobi4mI8+uij+jwYABCLxZg9ezbOnz+P5ubmLmseO3Ys/vjjD6xevRqnTp2CTCZr93xmZiZSU1PxyCOPAEC743/PPfegpKQEaWlpAIDo6GjccccdsLGx0e/P4XAwb968DvsOCgqCs7Oz/n9/f38AQGRkZLt8t5bH8/LyuqyphZkzZ7b7PzAwsF2bFIqhQk0ShdIL2NjYQCQSIScnp1PbNzY2YsKECbhw4QLeffddnDp1ChcvXsSBAwcA4IaTpUgkumHYRqvVYtq0aThw4ABeeeUVHD9+HHFxcTh//ny7NmpqaqDRaLqdNF1WVgYAmDNnDvh8frvb5s2bQQi5obSBo6Pjbdv18vLCP//8Azs7OyxduhReXl7w8vLCxx9/3C2dFRUVAHDL18kwDI4fP44777wTW7ZsQUhICGxtbfH888+joaHhpvu1DH919LqcnJyg1WpRU1PTZc2ffPIJVq1ahUOHDmHy5MmQSqW4//77kZGRAaD12L/88ss3HPslS5YA0M0AbNFob29/Qx8dPQboDGNbBALBLR+Xy+Vd1tSCtbV1u/9bhtL++zmnUAwNmpNEofQCXC4XUVFR+OOPP1BYWHhbQ3LixAkUFxfj1KlT7WZA3SwvpqPISFJSEhITE7Fnzx4sWLBA/3hmZma77aRSKbhcLgoLC7vwilppiUxs27btptP3/3si7mzNmgkTJmDChAnQaDS4dOkStm3bhhdeeAH29vZ48MEHu6SzJUp2u9fp7u6Or776CoBuJtrPP/+Mt956C0qlEjt27Ohwn5aTfElJyQ3PFRcXg8PhwMrKqkt6AcDMzAzr1q3DunXrUFZWpo8q3XvvvUhNTdUf+zVr1mDWrFkdtuHr66vX2GJg2lJaWtplXbeiK5ooFGOHRpIolF5izZo1IITgqaeeglKpvOF5lUqFI0eOAGg1Ef9NTt25c2en++tsG6amppg0aRL2799/wxV+W252dT9+/HhYWloiOTkZo0eP7vD
"text/plain": [
"<Figure size 600x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"categories= [\"share_known_gender\",\"share_of_women\",\"country_fr\"]\n",
"radar_chart(values=X_test_segment_mp.loc[0,categories].values.tolist(), categories= categories,\n",
" segment = \"1\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}