2026-04-03 10:55:04 +02:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 43,
"id": "7aa09644-4d17-4a7a-841e-3bfcfb8a8901",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fichiers Flows : ['projet-bdc-data/carmignac/Flows ENSAE V1 -20251027.csv', 'projet-bdc-data/carmignac/Flows ENSAE V2 -20251105.csv']\n",
"Fichiers AUM : ['projet-bdc-data/carmignac/AUM ENSAE V1 -20251027.csv', 'projet-bdc-data/carmignac/AUM ENSAE V2 -20251105.csv']\n"
]
}
],
"source": [
"# Import des données\n",
"\n",
"import os\n",
"import s3fs\n",
"import pandas as pd\n",
"\n",
"s3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': s3_ENDPOINT_URL})\n",
"\n",
"BUCKET = \"projet-bdc-data\"\n",
"carmignac_path = \"projet-bdc-data/carmignac\"\n",
"\n",
"# Liste des fichiers FLOWS\n",
"all_files = fs.ls(carmignac_path)\n",
"flows_files = [f for f in all_files if \"Flows\" in f and f.endswith(\".csv\")]\n",
"print(\"Fichiers Flows :\", flows_files)\n",
"\n",
"# Lire tous les fichiers dans un dictionnaire\n",
"flows_data = {}\n",
"for file_path in flows_files:\n",
" with fs.open(file_path, 'r') as f:\n",
" df = pd.read_csv(f, sep=';',low_memory=False)\n",
" flows_data[os.path.basename(file_path)] = df\n",
"\n",
"\n",
"# Liste des fichiers AUM\n",
"all_files = fs.ls(carmignac_path)\n",
"aum_files = [f for f in all_files if \"AUM\" in f and f.endswith(\".csv\")]\n",
"print(\"Fichiers AUM :\", aum_files)\n",
"\n",
"# Lire tous les fichiers dans un dictionnaire\n",
"aum_data = {}\n",
"for file_path in aum_files:\n",
" with fs.open(file_path, 'r') as f:\n",
" df = pd.read_csv(f, sep=';',low_memory=False)\n",
" aum_data[os.path.basename(file_path)] = df\n",
"\n",
"df = aum_data['AUM ENSAE V2 -20251105.csv']\n",
"dg = flows_data['Flows ENSAE V2 -20251105.csv']"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "35bb08c3-873a-462b-879d-dde601388d8f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Agreement - Code</th>\n",
" <th>Company - Id</th>\n",
" <th>Company - Ultimate Parent Id</th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Registrar Account - Region</th>\n",
" <th>RegistrarAccount - Country</th>\n",
" <th>Product - Asset Type</th>\n",
" <th>Product - Strategy</th>\n",
" <th>Product - Legal Status</th>\n",
" <th>Product - Is Dedie ?</th>\n",
" <th>Product - Fund</th>\n",
" <th>Product - Shareclass Type</th>\n",
" <th>Product - Shareclass Currency</th>\n",
" <th>Product - Isin</th>\n",
" <th>Centralisation Date</th>\n",
" <th>Quantity - AUM</th>\n",
" <th>Value - AUM CCY</th>\n",
" <th>Value - AUM €</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-03-31</td>\n",
" <td>35.368</td>\n",
" <td>24648.6666</td>\n",
" <td>24648.6666</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-11-30</td>\n",
" <td>35.368</td>\n",
" <td>22413.0553</td>\n",
" <td>22413.0553</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-12-31</td>\n",
" <td>35.368</td>\n",
" <td>22051.2406</td>\n",
" <td>22051.2406</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2016-03-31</td>\n",
" <td>35.368</td>\n",
" <td>21626.1173</td>\n",
" <td>21626.1173</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2016-11-30</td>\n",
" <td>35.368</td>\n",
" <td>22489.4502</td>\n",
" <td>22489.4502</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
"0 003 166 166 \n",
"1 003 166 166 \n",
"2 003 166 166 \n",
"3 003 166 166 \n",
"4 003 166 166 \n",
"\n",
" Registrar Account - ID Registrar Account - Region \\\n",
"0 200000647 France \n",
"1 200000647 France \n",
"2 200000647 France \n",
"3 200000647 France \n",
"4 200000647 France \n",
"\n",
" RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
"0 France Diversified Patrimoine \n",
"1 France Diversified Patrimoine \n",
"2 France Diversified Patrimoine \n",
"3 France Diversified Patrimoine \n",
"4 France Diversified Patrimoine \n",
"\n",
" Product - Legal Status Product - Is Dedie ? Product - Fund \\\n",
"0 FCP NO Carmignac Patrimoine \n",
"1 FCP NO Carmignac Patrimoine \n",
"2 FCP NO Carmignac Patrimoine \n",
"3 FCP NO Carmignac Patrimoine \n",
"4 FCP NO Carmignac Patrimoine \n",
"\n",
" Product - Shareclass Type Product - Shareclass Currency Product - Isin \\\n",
"0 A EUR FR0010135103 \n",
"1 A EUR FR0010135103 \n",
"2 A EUR FR0010135103 \n",
"3 A EUR FR0010135103 \n",
"4 A EUR FR0010135103 \n",
"\n",
" Centralisation Date Quantity - AUM Value - AUM CCY Value - AUM € \n",
"0 2015-03-31 35.368 24648.6666 24648.6666 \n",
"1 2015-11-30 35.368 22413.0553 22413.0553 \n",
"2 2015-12-31 35.368 22051.2406 22051.2406 \n",
"3 2016-03-31 35.368 21626.1173 21626.1173 \n",
"4 2016-11-30 35.368 22489.4502 22489.4502 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
2026-04-07 12:31:16 +02:00
"execution_count": 222,
2026-04-03 10:55:04 +02:00
"id": "d5262683-6ae5-4ee6-b949-58a468c7c7b5",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Agreement - Code</th>\n",
" <th>Company - Id</th>\n",
" <th>Company - Ultimate Parent Id</th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Registrar Account - Region</th>\n",
" <th>RegistrarAccount - Country</th>\n",
" <th>Product - Asset Type</th>\n",
" <th>Product - Strategy</th>\n",
" <th>Product - Legal Status</th>\n",
" <th>Product - Is Dedie ?</th>\n",
" <th>...</th>\n",
" <th>Centralisation Date</th>\n",
" <th>Quantity - Subscription</th>\n",
" <th>Quantity - Redemption</th>\n",
" <th>Quantity - NetFlows</th>\n",
" <th>Value Ccy - Subscription</th>\n",
" <th>Value Ccy - Redemption</th>\n",
" <th>Value Ccy - NetFlows</th>\n",
" <th>Value € - Subscription</th>\n",
" <th>Value € - Redemption</th>\n",
" <th>Value € - NetFlows</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200127202</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>...</td>\n",
" <td>2020-11-05</td>\n",
2026-04-07 12:31:16 +02:00
" <td>1636.00</td>\n",
2026-04-03 10:55:04 +02:00
" <td>0.000</td>\n",
" <td>1636.000</td>\n",
" <td>280983.00</td>\n",
" <td>0.00</td>\n",
" <td>280983.00</td>\n",
" <td>280983.00</td>\n",
" <td>0.00</td>\n",
" <td>280983.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>...</td>\n",
" <td>2015-03-09</td>\n",
2026-04-07 12:31:16 +02:00
" <td>144.69</td>\n",
2026-04-03 10:55:04 +02:00
" <td>0.000</td>\n",
" <td>144.690</td>\n",
" <td>99985.13</td>\n",
" <td>0.00</td>\n",
" <td>99985.13</td>\n",
" <td>99985.13</td>\n",
" <td>0.00</td>\n",
" <td>99985.13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>...</td>\n",
" <td>2016-10-26</td>\n",
2026-04-07 12:31:16 +02:00
" <td>0.00</td>\n",
2026-04-03 10:55:04 +02:00
" <td>-8.321</td>\n",
" <td>-8.321</td>\n",
" <td>0.00</td>\n",
" <td>-9384.76</td>\n",
" <td>-9384.76</td>\n",
" <td>0.00</td>\n",
" <td>-9384.76</td>\n",
" <td>-9384.76</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>...</td>\n",
" <td>2018-10-18</td>\n",
2026-04-07 12:31:16 +02:00
" <td>0.00</td>\n",
2026-04-03 10:55:04 +02:00
" <td>-22.083</td>\n",
" <td>-22.083</td>\n",
" <td>0.00</td>\n",
" <td>-25227.40</td>\n",
" <td>-25227.40</td>\n",
" <td>0.00</td>\n",
" <td>-25227.40</td>\n",
" <td>-25227.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>...</td>\n",
" <td>2019-04-08</td>\n",
2026-04-07 12:31:16 +02:00
" <td>0.00</td>\n",
2026-04-03 10:55:04 +02:00
" <td>-465.992</td>\n",
" <td>-465.992</td>\n",
" <td>0.00</td>\n",
" <td>-563775.76</td>\n",
" <td>-563775.76</td>\n",
" <td>0.00</td>\n",
" <td>-563775.76</td>\n",
" <td>-563775.76</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
2026-04-07 12:31:16 +02:00
"<p>5 rows × 24 columns</p>\n",
2026-04-03 10:55:04 +02:00
"</div>"
],
"text/plain": [
2026-04-07 12:31:16 +02:00
" Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
"0 003 166 166 \n",
"1 003 166 166 \n",
"2 003 166 166 \n",
"3 003 166 166 \n",
"4 003 166 166 \n",
2026-04-03 10:55:04 +02:00
"\n",
2026-04-07 12:31:16 +02:00
" Registrar Account - ID Registrar Account - Region \\\n",
"0 200127202 France \n",
"1 406533 France \n",
"2 406533 France \n",
"3 406533 France \n",
"4 406533 France \n",
2026-04-03 10:55:04 +02:00
"\n",
2026-04-07 12:31:16 +02:00
" RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
"0 France Equity Investissement \n",
"1 France Diversified Patrimoine \n",
"2 France Equity Investissement \n",
"3 France Equity Investissement \n",
"4 France Equity Investissement \n",
2026-04-03 10:55:04 +02:00
"\n",
2026-04-07 12:31:16 +02:00
" Product - Legal Status Product - Is Dedie ? ... Centralisation Date \\\n",
"0 SICAV NO ... 2020-11-05 \n",
"1 FCP NO ... 2015-03-09 \n",
"2 FCP NO ... 2016-10-26 \n",
"3 FCP NO ... 2018-10-18 \n",
"4 FCP NO ... 2019-04-08 \n",
2026-04-03 10:55:04 +02:00
"\n",
2026-04-07 12:31:16 +02:00
" Quantity - Subscription Quantity - Redemption Quantity - NetFlows \\\n",
"0 1636.00 0.000 1636.000 \n",
"1 144.69 0.000 144.690 \n",
"2 0.00 -8.321 -8.321 \n",
"3 0.00 -22.083 -22.083 \n",
"4 0.00 -465.992 -465.992 \n",
2026-04-03 10:55:04 +02:00
"\n",
2026-04-07 12:31:16 +02:00
" Value Ccy - Subscription Value Ccy - Redemption Value Ccy - NetFlows \\\n",
"0 280983.00 0.00 280983.00 \n",
"1 99985.13 0.00 99985.13 \n",
"2 0.00 -9384.76 -9384.76 \n",
"3 0.00 -25227.40 -25227.40 \n",
"4 0.00 -563775.76 -563775.76 \n",
2026-04-03 10:55:04 +02:00
"\n",
2026-04-07 12:31:16 +02:00
" Value € - Subscription Value € - Redemption Value € - NetFlows \n",
"0 280983.00 0.00 280983.00 \n",
"1 99985.13 0.00 99985.13 \n",
"2 0.00 -9384.76 -9384.76 \n",
"3 0.00 -25227.40 -25227.40 \n",
"4 0.00 -563775.76 -563775.76 \n",
2026-04-03 10:55:04 +02:00
"\n",
2026-04-07 12:31:16 +02:00
"[5 rows x 24 columns]"
2026-04-03 10:55:04 +02:00
]
},
2026-04-07 12:31:16 +02:00
"execution_count": 222,
2026-04-03 10:55:04 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dg.head()"
]
},
{
"cell_type": "code",
"execution_count": 76,
"id": "b31d3cb3-479c-4d2b-b6f5-8f78cd69407a",
"metadata": {},
"outputs": [],
"source": [
"# Filtrer les comptes techniques\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"df['Centralisation Date'] = pd.to_datetime(df['Centralisation Date'])\n",
"dg['Centralisation Date'] = pd.to_datetime(dg['Centralisation Date'])\n",
"df = df[~df['Registrar Account - ID'].isin(['Off Distribution','Private Clients', 'Private Client'])]\n",
"dg = dg[~dg['Registrar Account - ID'].isin(['Off Distribution','Private Clients','Private Client'])]"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "dec37ff8-0f54-4e3e-ac63-a71f9b3583d9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(431, 2)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Value - AUM €</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3890</th>\n",
" <td>420350</td>\n",
" <td>1.623308e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2622</th>\n",
" <td>364765</td>\n",
" <td>1.383209e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>956</th>\n",
" <td>200127454</td>\n",
" <td>8.784361e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2598</th>\n",
" <td>312933</td>\n",
" <td>8.379604e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1099</th>\n",
" <td>200127809</td>\n",
" <td>8.342839e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3880</th>\n",
" <td>420259</td>\n",
" <td>8.296663e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2634</th>\n",
" <td>364907</td>\n",
" <td>8.151083e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2785</th>\n",
" <td>366441</td>\n",
" <td>7.707213e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2637</th>\n",
" <td>364929</td>\n",
" <td>7.479766e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2710</th>\n",
" <td>365538</td>\n",
" <td>7.200408e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2775</th>\n",
" <td>366403</td>\n",
" <td>7.092081e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3818</th>\n",
" <td>418961</td>\n",
" <td>6.529718e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>336</th>\n",
" <td>200058108</td>\n",
" <td>6.110961e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1764</th>\n",
" <td>200131722</td>\n",
" <td>5.758019e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>409</th>\n",
" <td>200073354</td>\n",
" <td>4.619978e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2741</th>\n",
" <td>365848</td>\n",
" <td>4.563625e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>423</th>\n",
" <td>200075932</td>\n",
" <td>4.375607e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>925</th>\n",
" <td>200127410</td>\n",
" <td>3.920364e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>872</th>\n",
" <td>200127316</td>\n",
" <td>3.707238e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>200001349</td>\n",
" <td>3.650226e+08</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID Value - AUM €\n",
"3890 420350 1.623308e+09\n",
"2622 364765 1.383209e+09\n",
"956 200127454 8.784361e+08\n",
"2598 312933 8.379604e+08\n",
"1099 200127809 8.342839e+08\n",
"3880 420259 8.296663e+08\n",
"2634 364907 8.151083e+08\n",
"2785 366441 7.707213e+08\n",
"2637 364929 7.479766e+08\n",
"2710 365538 7.200408e+08\n",
"2775 366403 7.092081e+08\n",
"3818 418961 6.529718e+08\n",
"336 200058108 6.110961e+08\n",
"1764 200131722 5.758019e+08\n",
"409 200073354 4.619978e+08\n",
"2741 365848 4.563625e+08\n",
"423 200075932 4.375607e+08\n",
"925 200127410 3.920364e+08\n",
"872 200127316 3.707238e+08\n",
"74 200001349 3.650226e+08"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Date de référence et sélection des 400+ principaux codes\n",
"\n",
"ref_date = pd.Timestamp('2025-10-31')\n",
"\n",
"df_ref = df[df['Centralisation Date'] == ref_date]\n",
"\n",
"aum_account = (\n",
" df_ref\n",
" .groupby('Registrar Account - ID')['Value - AUM €']\n",
" .sum()\n",
" .reset_index()\n",
" .sort_values(by='Value - AUM €', ascending=False)\n",
")\n",
"aum_account = aum_account[aum_account['Value - AUM €'] > 5_000_000]\n",
"selected_accounts = aum_account['Registrar Account - ID']\n",
"\n",
"print(aum_account.shape)\n",
"aum_account.head(20)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "95238a61-c263-43b4-965f-e09226ec4c73",
"metadata": {},
"outputs": [],
"source": [
"df_aum = df[df['Registrar Account - ID'].isin(selected_accounts)].copy()\n",
"df_flows = dg[dg['Registrar Account - ID'].isin(selected_accounts)].copy()"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "a6dcac05-e9f9-4c25-ade9-7b1f9e35582f",
"metadata": {},
"outputs": [],
"source": [
"# Clustering\n",
"\n",
"# Parse dates\n",
"df_flows[\"Centralisation Date\"] = pd.to_datetime(df_flows[\"Centralisation Date\"], errors=\"coerce\")\n",
"df_aum[\"Centralisation Date\"] = pd.to_datetime(df_aum[\"Centralisation Date\"], errors=\"coerce\")\n",
"\n",
"ID_COL = \"Registrar Account - ID\"\n",
"FLOW_COL = \"Quantity - NetFlows\"\n",
"AUM_COL = \"Quantity - AUM\"\n",
"\n",
"# Month key\n",
"df_flows[\"month\"] = df_flows[\"Centralisation Date\"].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
"df_aum[\"month\"] = df_aum[\"Centralisation Date\"].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
"# Flows sont journaliers, AUM est mensuel → il faut une granularité commune."
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "5ea26597-af38-41f1-9cde-e9cd115e8678",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(33972, 6)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>month</th>\n",
" <th>aum_qty</th>\n",
" <th>net_flow_qty</th>\n",
" <th>gross_flow_qty</th>\n",
" <th>n_tx</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>2015-01-31</td>\n",
" <td>179864.637</td>\n",
" <td>-1524.010</td>\n",
" <td>15230.010</td>\n",
" <td>32</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>18872</td>\n",
" <td>2015-02-28</td>\n",
" <td>186761.736</td>\n",
" <td>7247.100</td>\n",
" <td>18571.880</td>\n",
" <td>38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>18872</td>\n",
" <td>2015-03-31</td>\n",
" <td>190357.718</td>\n",
" <td>3655.380</td>\n",
" <td>9754.040</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>18872</td>\n",
" <td>2015-04-30</td>\n",
" <td>191429.324</td>\n",
" <td>-218.394</td>\n",
" <td>12840.950</td>\n",
" <td>39</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>18872</td>\n",
" <td>2015-05-31</td>\n",
" <td>189056.475</td>\n",
" <td>-4782.849</td>\n",
" <td>6332.849</td>\n",
" <td>24</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID month aum_qty net_flow_qty gross_flow_qty \\\n",
"0 18872 2015-01-31 179864.637 -1524.010 15230.010 \n",
"1 18872 2015-02-28 186761.736 7247.100 18571.880 \n",
"2 18872 2015-03-31 190357.718 3655.380 9754.040 \n",
"3 18872 2015-04-30 191429.324 -218.394 12840.950 \n",
"4 18872 2015-05-31 189056.475 -4782.849 6332.849 \n",
"\n",
" n_tx \n",
"0 32 \n",
"1 38 \n",
"2 47 \n",
"3 39 \n",
"4 24 "
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 1) Monthly aggregation for FLOWS : je fais mon etude mensuel parce que aum valeur mensuel \n",
"\n",
"ID_COL = \"Registrar Account - ID\"\n",
"FLOW_COL = \"Quantity - NetFlows\"\n",
"AUM_COL = \"Quantity - AUM\"\n",
"\n",
"df_flows_m = (\n",
" df_flows\n",
" .dropna(subset=[ID_COL, \"month\", FLOW_COL])\n",
" .assign(gross_flow_qty=lambda x: x[FLOW_COL].abs()) # absolute quantity moved\n",
" .groupby([ID_COL, \"month\"], as_index=False)\n",
" .agg(\n",
" net_flow_qty=(FLOW_COL, \"sum\"), # net quantity change over the month\n",
" gross_flow_qty=(\"gross_flow_qty\", \"sum\"), # total traded quantity (activity intensity)\n",
" n_tx=(FLOW_COL, \"size\"), # number of transactions\n",
" )\n",
")\n",
"\n",
"# 2) Monthly aggregation for AUM (client-month holdings) ---\n",
"df_aum_m = (\n",
" df_aum\n",
" .dropna(subset=[ID_COL, \"month\", AUM_COL])\n",
" .groupby([ID_COL, \"month\"], as_index=False)\n",
" .agg(aum_qty=(AUM_COL, \"sum\")) # total held quantity across ISINs\n",
")\n",
"\n",
"df_month = df_aum_m.merge(df_flows_m, on=[ID_COL, \"month\"], how=\"left\")\n",
"\n",
"# 4) Months without transactions => flows are 0 ---\n",
"df_month[\"net_flow_qty\"] = df_month[\"net_flow_qty\"].fillna(0.0)\n",
"df_month[\"gross_flow_qty\"] = df_month[\"gross_flow_qty\"].fillna(0.0)\n",
"df_month[\"n_tx\"] = df_month[\"n_tx\"].fillna(0).astype(int)\n",
"\n",
"print(df_month.shape)\n",
"df_month.head()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "f9c7a0e3-b15a-4404-a99d-b23894cbd3f4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>n_months</th>\n",
" <th>n_active_months</th>\n",
" <th>flow_freq</th>\n",
" <th>aum_qty_mean</th>\n",
" <th>aum_qty_median</th>\n",
" <th>net_flow_qty_sum</th>\n",
" <th>gross_flow_qty_sum</th>\n",
" <th>gross_flow_qty_mean</th>\n",
" <th>net_flow_qty_vol</th>\n",
" <th>rel_intensity</th>\n",
" <th>netflow_to_aum</th>\n",
" <th>n_tx_total</th>\n",
" <th>log_aum_qty_mean</th>\n",
" <th>log_gross_flow_qty_mean</th>\n",
" <th>gross_flow_to_aum</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>130</td>\n",
" <td>130</td>\n",
" <td>1.000000</td>\n",
" <td>143505.697354</td>\n",
" <td>144653.1645</td>\n",
" <td>-45677.1480</td>\n",
" <td>1.244126e+06</td>\n",
" <td>9570.200015</td>\n",
" <td>9832.357264</td>\n",
" <td>0.069449</td>\n",
" <td>-0.003918</td>\n",
" <td>1926</td>\n",
" <td>11.874137</td>\n",
" <td>9.166514</td>\n",
" <td>8.669523</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>200000076</td>\n",
" <td>130</td>\n",
" <td>119</td>\n",
" <td>0.915385</td>\n",
" <td>24141.541138</td>\n",
" <td>19888.8255</td>\n",
" <td>54791.9840</td>\n",
" <td>2.314415e+05</td>\n",
" <td>1780.319492</td>\n",
" <td>2838.000232</td>\n",
" <td>0.083230</td>\n",
" <td>-0.000893</td>\n",
" <td>518</td>\n",
" <td>10.091731</td>\n",
" <td>7.485110</td>\n",
" <td>9.586858</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>200000082</td>\n",
" <td>130</td>\n",
" <td>130</td>\n",
" <td>1.000000</td>\n",
" <td>422994.464523</td>\n",
" <td>462973.7880</td>\n",
" <td>178371.1590</td>\n",
" <td>2.327246e+06</td>\n",
" <td>17901.894469</td>\n",
" <td>13288.481111</td>\n",
" <td>0.047480</td>\n",
" <td>0.005194</td>\n",
" <td>7103</td>\n",
" <td>12.955117</td>\n",
" <td>9.792718</td>\n",
" <td>5.501836</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>200000146</td>\n",
" <td>130</td>\n",
" <td>130</td>\n",
" <td>1.000000</td>\n",
" <td>212108.397869</td>\n",
" <td>210616.5330</td>\n",
" <td>457533.3310</td>\n",
" <td>1.150546e+06</td>\n",
" <td>8850.350438</td>\n",
" <td>10074.748210</td>\n",
" <td>0.051622</td>\n",
" <td>0.024910</td>\n",
" <td>4774</td>\n",
" <td>12.264857</td>\n",
" <td>9.088325</td>\n",
" <td>5.424328</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>200000147</td>\n",
" <td>130</td>\n",
" <td>130</td>\n",
" <td>1.000000</td>\n",
" <td>145729.199224</td>\n",
" <td>79260.8255</td>\n",
" <td>677492.4351</td>\n",
" <td>1.213963e+06</td>\n",
" <td>9338.178685</td>\n",
" <td>13868.197522</td>\n",
" <td>0.061164</td>\n",
" <td>0.022213</td>\n",
" <td>7585</td>\n",
" <td>11.889512</td>\n",
" <td>9.141974</td>\n",
" <td>8.330268</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID n_months n_active_months flow_freq aum_qty_mean \\\n",
"0 18872 130 130 1.000000 143505.697354 \n",
"1 200000076 130 119 0.915385 24141.541138 \n",
"2 200000082 130 130 1.000000 422994.464523 \n",
"3 200000146 130 130 1.000000 212108.397869 \n",
"4 200000147 130 130 1.000000 145729.199224 \n",
"\n",
" aum_qty_median net_flow_qty_sum gross_flow_qty_sum gross_flow_qty_mean \\\n",
"0 144653.1645 -45677.1480 1.244126e+06 9570.200015 \n",
"1 19888.8255 54791.9840 2.314415e+05 1780.319492 \n",
"2 462973.7880 178371.1590 2.327246e+06 17901.894469 \n",
"3 210616.5330 457533.3310 1.150546e+06 8850.350438 \n",
"4 79260.8255 677492.4351 1.213963e+06 9338.178685 \n",
"\n",
" net_flow_qty_vol rel_intensity netflow_to_aum n_tx_total \\\n",
"0 9832.357264 0.069449 -0.003918 1926 \n",
"1 2838.000232 0.083230 -0.000893 518 \n",
"2 13288.481111 0.047480 0.005194 7103 \n",
"3 10074.748210 0.051622 0.024910 4774 \n",
"4 13868.197522 0.061164 0.022213 7585 \n",
"\n",
" log_aum_qty_mean log_gross_flow_qty_mean gross_flow_to_aum \n",
"0 11.874137 9.166514 8.669523 \n",
"1 10.091731 7.485110 9.586858 \n",
"2 12.955117 9.792718 5.501836 \n",
"3 12.264857 9.088325 5.424328 \n",
"4 11.889512 9.141974 8.330268 "
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"eps = 1e-9 \n",
"\n",
"# 1) Active month indicator: did the client trade this month?\n",
"df_month[\"active_month\"] = (df_month[\"gross_flow_qty\"] > 0).astype(int)\n",
"\n",
"#client avec beaucoup de mois à 0 → “stable / dormant”\n",
"#client actif presque tous les mois → “rebalancer / institutionnel actif”\n",
"\n",
"\n",
"# 2) Monthly relative intensity (turnover proxy in quantity terms) : Mesurer l’ intensité de trading relativement à la taille et pouvoir ocmparer client petit avec client plus gros\n",
"df_month[\"rel_intensity_m\"] = df_month[\"gross_flow_qty\"] / (df_month[\"aum_qty\"].abs() + eps)\n",
"\n",
"# 3) Monthly net flow ratio (directional change): sert a Capturer la direction de la dynamique\n",
"df_month[\"netflow_to_aum_m\"] = df_month[\"net_flow_qty\"] / (df_month[\"aum_qty\"].abs() + eps)\n",
"\n",
"# 4) Aggregate to client-level features (1 row per client)\n",
"df_client_feat = (\n",
" df_month.groupby(ID_COL, as_index=False)\n",
" .agg(\n",
" # Coverage / activity\n",
" n_months=(\"month\", \"nunique\"),\n",
" n_active_months=(\"active_month\", \"sum\"),\n",
" flow_freq=(\"active_month\", \"mean\"),\n",
"\n",
" # Size in quantity terms\n",
" aum_qty_mean=(\"aum_qty\", \"mean\"),\n",
" aum_qty_median=(\"aum_qty\", \"median\"),\n",
"\n",
" # Flows in quantity terms\n",
" net_flow_qty_sum=(\"net_flow_qty\", \"sum\"),\n",
" gross_flow_qty_sum=(\"gross_flow_qty\", \"sum\"),\n",
" gross_flow_qty_mean=(\"gross_flow_qty\", \"mean\"),\n",
"\n",
" # Dispersion / volatility proxy\n",
" net_flow_qty_vol=(\"net_flow_qty\", \"std\"),\n",
" rel_intensity=(\"rel_intensity_m\", \"mean\"),\n",
" netflow_to_aum=(\"netflow_to_aum_m\", \"mean\"),\n",
"\n",
" # Trading frequency proxy\n",
" n_tx_total=(\"n_tx\", \"sum\"),\n",
" )\n",
")\n",
"\n",
"# 5) Clean NaNs due to std on constant series\n",
"df_client_feat[\"net_flow_qty_vol\"] = df_client_feat[\"net_flow_qty_vol\"].fillna(0.0)\n",
"\n",
"# 6) Log transforms (useful because distributions are heavy-tailed)\n",
"df_client_feat[\"log_aum_qty_mean\"] = np.log1p(df_client_feat[\"aum_qty_mean\"].clip(lower=0))\n",
"df_client_feat[\"log_gross_flow_qty_mean\"] = np.log1p(df_client_feat[\"gross_flow_qty_mean\"].clip(lower=0))\n",
"\n",
"# 7) Global turnover proxy\n",
"df_client_feat[\"gross_flow_to_aum\"] = df_client_feat[\"gross_flow_qty_sum\"] / (df_client_feat[\"aum_qty_mean\"].abs() + eps)\n",
"\n",
"df_client_feat.head()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "4ddd1305-fe5a-4d0f-b4d3-b07de27b5dc6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(431, 16)"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_client_feat.shape"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "34a37448-ab63-4fc1-8c93-f9f59db385c2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Filtered clients: (421, 16)\n"
]
}
],
"source": [
"dfc = df_client_feat.copy()\n",
"\n",
"# Minimal filters (adjust if needed)\n",
"dfc = dfc[(dfc[\"n_months\"] >= 6)] # at least 6 observed months\n",
"dfc = dfc[(dfc[\"aum_qty_mean\"].abs() > 0)] # avoid zero holdings\n",
"print(\"Filtered clients:\", dfc.shape)"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "2763cc28-f9a7-4ced-8331-c2b79ac7c122",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Q33: 1946.5229933948517 Q66: 8013.920450704226\n",
"seg_quantiles\n",
"High-flow 143\n",
"Low-flow 139\n",
"Intermediate-flow 139\n",
"Name: count, dtype: int64\n"
]
}
],
"source": [
"# Baseline Clustering 1\n",
"\n",
"import matplotlib.pyplot as plt\n",
"\n",
"\n",
"\n",
"# Baseline 1 variable: average monthly gross traded quantity\n",
"x = dfc[\"gross_flow_qty_mean\"].copy()\n",
"\n",
"q33, q66 = x.quantile([0.33, 0.66])\n",
"\n",
"dfc[\"seg_quantiles\"] = pd.cut(\n",
" x,\n",
" bins=[-np.inf, q33, q66, np.inf],\n",
" labels=[\"Low-flow\", \"Intermediate-flow\", \"High-flow\"]\n",
")\n",
"\n",
"print(\"Q33:\", q33, \" Q66:\", q66)\n",
"print(dfc[\"seg_quantiles\"].value_counts(dropna=False))"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "5afe137b-a09c-4fbc-a03c-b54f0422862d",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAqQAAAHHCAYAAABpzkrAAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAaHZJREFUeJzt3XdYU+ffBvA7BAgbBJmKoLjALSqi4qSiolXr1ipYVytu62rrrBZrrVqt1da2av25rXvvjQv3AEVxVcENIsrK8/7hxXkNCco+BO/PdXFpnjw553tOkpM7ZzxRCCEEiIiIiIhkYiB3AURERET0cWMgJSIiIiJZMZASERERkawYSImIiIhIVgykRERERCQrBlIiIiIikhUDKRERERHJioGUiIiIiGTFQEpEREREssr3QDpp0iQoFIr8ng0AoHHjxmjcuLF0++DBg1AoFFi3bl2BzD84OBju7u4FMq+cSkhIQN++feHk5ASFQoFhw4bJXRIVkCVLlkChUODMmTMf7JvxvURvxcbGomPHjrCzs4NCocCcOXOk7czBgwflLi9LEhIS4ODggOXLl8tdSp5Jf23fvn07z6a5cOFClCpVCklJSXk2TV0UCgUmTZqU5b6DBg3K13oKs4zrKj+e98y4u7ujdevW+T6frMrrem7fvg2FQoElS5Z8sG9+ZZ1sBdL0Jz/9z8TEBC4uLggICMDcuXPx8uXLPCnqwYMHmDRpEs6fP58n08tLhbm2rPjhhx+wZMkSfPXVV1i2bBl69uwpd0mUx3777bcsbVQo+4YPH45du3Zh3LhxWLZsGVq0aCF3Sdn2yy+/wNLSEl27dpW7lGz74YcfsHHjxiz1ze37IDg4GMnJyfj9999zPI2cOH78OCZNmoQXL14U6HwLi+3bt2c5oOe1q1evYtKkSQUScElbjvaQTpkyBcuWLcOCBQswePBgAMCwYcNQpUoVXLx4UaPvd999h9evX2dr+g8ePMDkyZOzHfp2796N3bt3Z+sx2fW+2hYtWoTIyMh8nX9u7d+/H3Xr1sXEiRPx+eefw9vbW+6SKI8xkOaf/fv3o23btvj666/x+eefo2LFinKXlC0pKSn45Zdf0LdvXyiVSrnLybbMAmnPnj3x+vVruLm5SW25fR+YmJggKCgIs2bNghAix9P5kNevX+O7776Tbh8/fhyTJ0/+qAPp5MmTdd6XcV3ltatXr2Ly5MkMpDLJUSBt2bIlPv/8c/Tu3Rvjxo3Drl27sHfvXjx69AiffvqpRgA1NDSEiYlJnhWsS2JiIgDA2NgYxsbG+Tqv9zEyMoJKpZJt/lnx6NEj2NjYyF1GtqU/x/TxefXqldwlSPT1/ZNu69atePz4MTp37ix3KXlKqVTCxMQkz08P69y5M+7cuYMDBw7k6XTfZWJiAkNDw3ybflGir+uqMG3DCrM8O4e0adOmGD9+PO7cuYP//e9/Uruuc0j37NmDBg0awMbGBhYWFqhQoQK++eYbAG/P+6xduzYAoHfv3tLpAenfdBs3bozKlSsjPDwcDRs2hJmZmfTYzM57S0tLwzfffAMnJyeYm5vj008/xb179zT6uLu7Izg4WOux707zQ7XpOq/i1atXGDlyJFxdXaFSqVChQgXMnDlT6xt3+rlBGzduROXKlaFSqVCpUiXs3LlT9wrP4NGjR+jTpw8cHR1hYmKCatWqYenSpdL96ee5RUdHY9u2bVLt7/smuHjxYjRt2hQODg5QqVTw8vLCggULNPq0bt0aZcqU0fl4X19f1KpVS6Ptf//7H7y9vWFqagpbW1t07dpV67l433O8adMmBAYGwsXFBSqVCh4eHvj++++RlpamNf/58+ejTJkyMDU1RZ06dXDkyBGdr5GkpCRMnDgRZcuWhUqlgqurK0aPHp2lc8fSa7148SIaNWoEMzMzlC1bVjpv+dChQ/Dx8YGpqSkqVKiAvXv3ak3j3LlzaNmyJaysrGBhYYFmzZrhxIkTGn3ST5c5duwYRowYAXt7e5ibm6N9+/Z4/Pix1M/d3R1XrlzBoUOHpOdY1/K+bxoZJSQkwNzcHEOHDtW67/79+1AqlQgNDX3venr69Cl69uwJKysr2NjYICgoCBcuXNA6Zyk4OBgWFha4efMmWrVqBUtLS/To0QNA1t9L79u+pJs3bx4qVaoEMzMzFCtWDLVq1cKKFSsyrT99/QshMH/+fGndvs/atWul13rx4sXx+eef47///pPu37x5MxQKhcZRpX///RcKhQKfffaZxrQ8PT3RpUuXbC2jLhs3boS7uzs8PDx03le5cmWYmJigcuXK2LBhg9Y2LbPzZXWdf3bx4kUEBwejTJkyMDExgZOTE7744gs8ffpU47HpnxFRUVEIDg6GjY0NrK2t0bt3b40vogqFAq9evcLSpUul9Z++zc54LmFm74Nbt25BoVBg9uzZWst//PhxKBQKrFy5Umrz9vaGra0tNm3a9N71OnfuXCiVSo29mj///DMUCgVGjBghtaWlpcHS0hJjxozRWK70Q9STJk3CqFGjAAClS5fOdDud08+J+/fvo127djA3N4eDg4N0CkrG5zQrn4cAkJycjAkTJsDb2xvW1tYwNzeHn5+fVoBPf33MnDkTf/zxBzw8PKBSqVC7dm2cPn1a6hccHIz58+dL6yXj+yyr59vu2LEDfn5+MDc3h6WlJQIDA3HlypX3PmbJkiXo1KkTAKBJkybSvDO+1o8ePYo6derAxMQEZcqUwT///KM1HYVCgUOHDmHgwIFwcHBAyZIls1VbTEwMevfujZIlS0KlUsHZ2Rlt27bV+Xn9oXoA4NatW+jUqRNsbW1hZmaGunXrYtu2be9dH+l0bRd0WbVqFby9vWFpaQkrKytUqVIFv/zyS5bmkS5Pv2r07NkT33zzDXbv3o1+/frp7HPlyhW0bt0aVatWxZQpU6BSqRAVFYVjx44BeLvhnTJlCiZMmID+/fvDz88PAFCvXj1pGk+fPkXLli3RtWtXfP7553B0dHxvXdOmTYNCocCYMWPw6NEjzJkzB/7+/jh//jxMTU2zvHxZqe1dQgh8+umnOHDgAPr06YPq1atj165dGDVqFP777z+tjeLRo0exfv16DBw4EJaWlpg7dy46dOiAu3fvws7OLtO6Xr9+jcaNGyMqKgqDBg1C6dKlsXbtWgQHB+PFixcYOnQoPD09sWzZMgwfPhwlS5bEyJEjAQD29vaZTnfBggWoVKkSPv30UxgaGmLLli0YOHAg1Go1QkJCAABdunRBr169cPr0aSmsA8CdO3dw4sQJ/PTTT1LbtGnTMH78eHTu3Bl9+/bF48ePMW/ePDRs2BDnzp3T2POU2XO8ZMkSWFhYYMSIEbCwsMD+/fsxYcIExMfHa8xrwYIFGDRoEPz8/DB8+HDcvn0b7dq1Q7FixTQ2Dmq1Gp9++imOHj2K/v37w9PTE5cuXcLs2bNx/fr1LJ2v9vz5c7Ru3Rpdu3ZFp06dsGDBAnTt2hXLly/HsGHD8OWXX6J79+746aef0LFjR9y7dw+WlpYA3r4f/Pz8YGVlhdGjR8PIyAi///47GjduLIXZdw0ePBjFihXDxIkTcfv2bcyZMweDBg3C6tWrAQBz5szB4MGDYWFhgW+//RYAtN4fH5pGRhYWFmjfvj1Wr16NWbNmaRzqXblyJYQQUmjURa1Wo02bNjh16hS++uorVKxYEZs2bUJQUJDO/qmpqQgICECDBg0wc+ZMmJmZZfm99KHtC/D21JohQ4agY8eOGDp0KN68eYOLFy/i5MmT6N69u86aGjZsKJ1z/cknn6BXr16ZLi/w9nXau3dv1K5dG6GhoYiNjcUvv/yCY8eOSa/1Bg0aQKFQ4PDhw6hatSoA4MiRIzAwMMDRo0elaT1+/BgRERHSxSxZWcbMHD9+HDVr1tRq3717Nzp06AAvLy+Ehobi6dOn0gdiTu3Zswe3bt1C79694eTkhCtXruCPP/7AlStXcOLECa1A37lzZ5QuXRqhoaE4e/Ys/vzzTzg4OODHH38EACxbtgx9+/ZFnTp10L9/fwDQGayBzN8HZcqUQf369bF8+XIMHz5c4zHLly+
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure()\n",
"plt.hist(dfc[\"gross_flow_qty_mean\"], bins=100)\n",
"plt.axvline(q33, linestyle=\"--\")\n",
"plt.axvline(q66, linestyle=\"--\")\n",
"plt.xlabel(\"Average monthly gross flow (quantity)\")\n",
"plt.ylabel(\"Count\")\n",
"plt.title(\"Distribution of average monthly gross flows (quantity) with quantile thresholds\")\n",
"plt.show()\n",
"\n",
"#X= activite moyenen mensuelle , Y = combien de client ont cette valeurde X"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "619559a8-c205-4e25-a810-4d80894644c2",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjIAAAHHCAYAAACle7JuAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAUxBJREFUeJzt3XdYFFfbBvB76UgHqdKs2HvsWImIaNTYJQrGFntJYjTGHks0MUZjLHnf2KKxJLEk2BvW2BB7D9gRK0gRKef7g5f5HFhgwYVh9P5d117XztnZM8/Ozsw+e86ZGY0QQoCIiIhIhQyUDoCIiIiooJjIEBERkWoxkSEiIiLVYiJDREREqsVEhoiIiFSLiQwRERGpFhMZIiIiUi0mMkRERKRaTGSIiIhItZjIqIRGo8GUKVOk6RUrVkCj0SAqKqrQlx0SEgJvb29pOioqChqNBt9++22hLxsApkyZAo1GUyTLKqjU1FSMHTsWHh4eMDAwQMeOHZUOiYrIgQMHoNFo8Pvvv+c5b9Z9iTLEx8ejf//+cHFxgUajwahRo6TjzIoVK5QOTyfp6emoWrUqZsyYoXQoepO5bR84cEBvde7YsQOWlpZ49OiR3up85xOZzITg1KlTSodS6BITEzFlyhS9bpT6Upxj08Uvv/yCuXPnokuXLli5ciVGjx6tdEikZ2vXrsX8+fOVDuOtNHPmTKxYsQKDBw/G6tWr0bt3b6VDyrfffvsNd+7cwbBhw5QOJd9++uknnRPGN90P2rRpg3LlymHWrFkFriMb8Y5bvny5ACBOnjypdCi5AiAmT54sTaempoqkpCSRnp6ucx2PHj3KVo8uXr16JV6+fClNR0ZGCgBi7ty5+aqnoLGlpKSIpKQkvS2rMHTv3l2UKlVK6TCoEAUGBgovL69s5fv37xcAxMaNG/OsIzg4WGsd77r69euLxo0by8oyjzPLly9XJqh8qlGjhhg4cKDSYRRIlSpVRLNmzbKVp6WliaSkJJGWliaV5bQf5MdPP/0kSpQoIeLi4t6onkzvfIuMWhkaGsLMzKxQu1wSEhIAAMbGxjA1NS205eTFyMgIZmZmii1fFzExMbC1tVU6jHzL/I7p3VOcvnu17j+Zzpw5g7Nnz6Jbt25Kh6JXBgYGMDMzg4GBflOFzp07Izk5GRs3btRLfUxkdHTmzBkEBATA2toalpaWaNWqFf75559s8507dw7NmjWDubk53N3d8fXXX2P58uU6j2dJTk7G6NGj4ejoCCsrK3zwwQe4e/dutvm0jZE5deoU/P39UbJkSZibm6N06dL4+OOPAWSMa3F0dAQATJ06FRqNRjbuJiQkBJaWlrh58ybatm0LKysrBAUFSa/l1K///fffw8vLC+bm5mjWrBkuXLgge7158+Zo3rx5tve9XmdesWkbI5Oamorp06ejbNmyMDU1hbe3N7788kskJyfL5vP29ka7du1w+PBh1KtXD2ZmZihTpgxWrVql9fNklZCQgE8//RQeHh4wNTWFj48Pvv32W4j/3TQ+sx9///79uHjxohR7bl1kW7ZsQWBgINzc3GBqaoqyZcti+vTpSEtLk+YZNmwYLC0tkZiYmO39PXv2hIuLi2z+7du3w9fXFxYWFrCyskJgYCAuXrwoe19u3/GhQ4fQtWtXeHp6wtTUFB4eHhg9ejSSkpKyLX/jxo2oXLkyzMzMULVqVWzatEnrNpKeno758+ejSpUqMDMzg7OzMwYNGoRnz57lud4zY719+zbatWsHS0tLlCpVCosWLQIAnD9/Hi1btoSFhQW8vLywdu3abHX8+++/6Nq1K+zt7VGiRAk0aNAAoaGhsnkyxwBs2LABM2bMgLu7O8zMzNCqVSvcuHFDmq958+YIDQ3FrVu3pO9Y2+fNrY6shBDw9vZGhw4dsr328uVL2NjYYNCgQbmup6SkJIwYMQIlS5aUjhf37t3LNqYucx+6dOkSevXqBTs7OzRp0gSA7vtSbseXTOvWrUOdOnVgZWUFa2trVKtWDT/88EOO8Weu/8jISISGhkrrNrdj5b59+6Rt3dbWFh06dMDly5el18+dOweNRoOtW7dKZadPn4ZGo0Ht2rVldQUEBKB+/fr5+ozabN68GSYmJmjatGm21w4fPoz33nsPZmZmKFu2LJYuXZrtmJbbeKCs3+WtW7cwZMgQ+Pj4wNzcHA4ODujatWu2dZb5G3HkyBGMGTMGjo6OsLCwQKdOnWTjU7y9vXHx4kWEhYVJ6z/zmJ11jExO+0F8fDwsLCwwcuTIbPHfvXsXhoaGsq4kJycnVK9eHVu2bMlz3erCSC+1vOUuXrwIX19fWFtbY+zYsTA2NsbSpUvRvHlzhIWFSTvCvXv30KJFC2g0GowfPx4WFhb4z3/+k6/WjP79++PXX39Fr1690KhRI+zbtw+BgYF5vi8mJgatW7eGo6Mjxo0bB1tbW0RFReHPP/8EADg6OmLx4sUYPHgwOnXqhA8//BAAUL16damO1NRU+Pv7o0mTJvj2229RokSJXJe5atUqvHjxAkOHDsXLly/xww8/oGXLljh//jycnZ11/sy6xJZV//79sXLlSnTp0gWffvopjh8/jlmzZuHy5cvYtGmTbN4bN26gS5cu6NevH4KDg/HLL78gJCQEderUQZUqVXJchhACH3zwAfbv349+/fqhZs2a2LlzJz7//HPcu3cP33//PRwdHbF69WrMmDED8fHx0s5aqVKlHOtdsWIFLC0tMWbMGFhaWmLfvn2YNGkS4uLiMHfuXABA9+7dsWjRIoSGhqJr167SexMTE/HXX38hJCQEhoaGAIDVq1cjODgY/v7++Oabb5CYmIjFixejSZMmOHPmjOwHN6fveOPGjUhMTMTgwYPh4OCAEydOYOHChbh7967sX1NoaCi6d++OatWqYdasWXj27Bn69euHUqVKZfucgwYNwooVK9C3b1+MGDECkZGR+PHHH3HmzBkcOXIExsbGOa4jAEhLS0NAQACaNm2KOXPmYM2aNRg2bBgsLCwwYcIEBAUF4cMPP8SSJUvQp08fNGzYEKVLlwYAPHz4EI0aNUJiYiJGjBgBBwcHrFy5Eh988AF+//13dOrUSbas2bNnw8DAAJ999hliY2MxZ84cBAUF4fjx4wCACRMmIDY2Fnfv3sX3338PALC0tMxXHVlpNBp89NFHmDNnDp4+fQp7e3vptb/++gtxcXH46KOPcl1HISEh2LBhA3r37o0GDRogLCws1+NF165dUb58ecycOVNKxnXZl/I6vgDA7t270bNnT7Rq1QrffPMNAODy5cs4cuSI1h84IGM/Wb16NUaPHg13d3d8+umnADKOCdoGg+7ZswcBAQEoU6YMpkyZgqSkJCxcuBCNGzdGeHg4vL29UbVqVdja2uLgwYP44IMPAGQk6gYGBjh79izi4uJgbW2N9PR0HD16FAMHDtT5M+bk6NGjqFq1arZt+vz581KdU6ZMQWpqKiZPnpyv42NWJ0+exNGjR9GjRw+4u7sjKioKixcvRvPmzXHp0qVsx+3hw4fDzs4OkydPRlRUFObPn49hw4Zh/fr1AID58+dj+PDhsLS0xIQJEwAgx/hy2g8sLS3RqVMnrF+/HvPmzZOOTUDG2CEhhPSnKVOdOnWwefPmAq8HGb10UKmYLmNkOnbsKExMTMTNmzelsvv37wsrKyvRtGlTqWz48OFCo9GIM2fOSGVPnjwR9vb2AoCIjIzMNZaIiAgBQAwZMkRW3qtXr2zjRzLjzqxz06ZNeX6O3MahBAcHCwBi3LhxWl97vU80s+/a3Nxc3L17Vyo/fvy4ACBGjx4tlTVr1kxr32vWOnOLbfLkyeL1TTVzPfXv318232effSYAiH379kllXl5eAoA4ePCgVBYTEyNMTU3Fp59+mm1Zr9u8ebMAIL7++mtZeZcuXYRGoxE3btyQfc4qVarkWl+mxMTEbGWDBg0SJUqUkMYipaeni1KlSonOnTvL5tuwYYPs87x48ULY2tqKAQMGyOaLjo4WNjY2svLcvmNtMc2aNUtoNBpx69YtqaxatWrC3d1dvHjxQio7cOCAACD7Pg8dOiQAiDV
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure()\n",
"plt.hist(np.log1p(dfc[\"gross_flow_qty_mean\"]), bins=100)\n",
"plt.axvline(np.log1p(q33), linestyle=\"--\")\n",
"plt.axvline(np.log1p(q66), linestyle=\"--\")\n",
"plt.xlabel(\"log(1 + avg monthly gross flow) (quantity)\")\n",
"plt.ylabel(\"Count\")\n",
"plt.title(\"Log-distribution of average monthly gross flows (quantity)\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "af32c07d-3908-428a-b388-a6e1a8d528bd",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_1011/630006569.py:3: MatplotlibDeprecationWarning: The 'labels' parameter of boxplot() has been renamed 'tick_labels' since Matplotlib 3.9; support for the old name will be dropped in 3.11.\n",
" plt.boxplot(\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmsAAAGzCAYAAABwyVA7AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAakZJREFUeJzt3XdcU1f/B/BPQEFAhoIMJ4gDUOpAHCAq1Rapi+Leo46nolZxVG2rtrXSuhdqnfQRV6VIrRatdVRU2iqOSgU31SqCi6UokpzfH/7IYwQkgYQk5PN+vXhpzj2593uTm5tvzr3nHIkQQoCIiIiIdJKRtgMgIiIiouIxWSMiIiLSYUzWiIiIiHQYkzUiIiIiHcZkjYiIiEiHMVkjIiIi0mFM1oiIiIh0GJM1IiIiIh3GZI2IiIhIh2klWXN2dsaIESPkj48dOwaJRIJjx45pI5xSi4iIgEQiQUpKirZDwYgRI+Ds7KxQJpFIMG/ePIWy06dPw8fHBxYWFpBIJDh//jwA4MCBA2jevDmqVKkCiUSCjIyMcolb3V4/topTlvdu3rx5kEgkqgenRikpKZBIJIiIiFC67uLFi0usq4l9S0tLQ58+fWBrawuJRILly5fr1Ge+qM+JvlL2+Cfd995772HMmDHaDkNtVDlnVUQvXrxAnTp1sGbNmlI9X63J2vXr1zFu3DjUr18fVapUgZWVFXx9fbFixQrk5uaqc1OlsmDBAsTExGg7DK158eIF+vbti0ePHmHZsmXYunUr6tWrh4cPH6Jfv34wMzNDeHg4tm7dCgsLC22HSyr6+eefdTLpmDJlCg4ePIhZs2Zh69at6Nq1q7ZDUhtdP6dcunQJ8+bN04kflBXR3bt3MW/ePPmPXnU5efIkfvnlF3z88cdqXW952L59O5YvX65UXU2fs6RSKWrWrAmJRILY2Ngi64wYMQJVq1Ytdh1Vq1YtsnFJIpEgMjKyyOf4+vpCIpGgadOm8rLKlSsjNDQUX331FZ49e6b6zgg12bdvnzAzMxM2NjZi0qRJYv369WL16tViwIABonLlymLMmDHyuvXq1RPDhw+XP5ZKpSI3N1dIpVJ1hVMkCwsLhe2WVX5+vsjNzRUymUxt6yyt4cOHi3r16imU5ebmihcvXsgfJyUlCQBiw4YNCvViY2MFAHHo0KHyCFWjnj17JvLy8kqst2XLFgFA3Lx5U+VtzJ07V6jxo1MqMplM5Obmivz8fHlZSEhIkXHdvHlTABCLFi0qcb0vXrwQubm5ao3VwcFBDB48WKHs6NGjAoA4evSoWrdVGq9/TlSh7nNKWb1+/O/evVtnXueK6PTp0wKA2LJli1rX26tXL/Huu++qdZ3lpVu3boW+i4RQ7ZylLr/88osAIJydnQudgwoMHz5cWFhYFLuO1z/jBeeuKlWqiMDAwEL1C863VapUEU2aNFFY9vjxY2FiYiI2bdqk8r5UUj29K+zmzZsYMGAA6tWrhyNHjsDJyUm+LCQkBNeuXcP+/fuLfb6RkRGqVKmijlDKlbGxMYyNjbUdRrFef03T09MBADY2NkqVl8WTJ0+00jpnampa7tvUBolEopHPTKVKlVCpklpOC3Lp6elqPbbUTR/PPcUxlOO/IktPT8f+/fuxbt06bYeiVpo6Z71JZGQkWrZsieHDh2P27Nlq/V567733sHfvXjx48AB2dnby8u3bt8PBwQENGzbE48ePFZ5jY2ODd999FxERERg1apRqG1Q5vSvCf/7zHwFAnDx5Uqn6r7esFfcr+/fffxcBAQHCyspKmJmZiQ4dOogTJ04o1Clo5bh69aoYPny4sLa2FlZWVmLEiBHiyZMn8noACv2V9It45cqVwsPDQ95i6OXlJbZt2yZf/nrrTEEsJW1LKpWKZcuWCQ8PD2Fqairs7e3F2LFjxaNHj5R6/fbs2SOaNGkiTE1NRZMmTUR0dHSRLWsAxNy5c4UQL389vB5Tx44dRceOHd8Yqyrvwd9//y0GDhwobGxsRPPmzYUQQqSmpooRI0aIWrVqCRMTE+Ho6Ch69uz5xhatH3/8UQAQFy5ckJdFRUUJAOL9999XqOvm5ib69esnf/z6sSWEEImJicLf319UqVJF1KpVS3z55Zdi06ZNRbas/fzzz6J9+/bC3NxcVK1aVbz33nsiMTGxyP0t0KFDB/HWW28VuS+NGjV64y/kKVOmiOrVqyu0zk6YMEEAECtWrJCX3bt3TwAQa9asEUL879dbwS/6ot7fghhfbVn79ttvRf369YWJiYlo1aqV+PPPP9+4b0K8PI5CQkLkx52JiYnw8PAQsbGxxe6XEP/7fBQVU3Gf+e+//160bNlSVKlSRdja2orBgweLf//9V768LMdGcV79nLz6GpT1nPLvv/+KkSNHCnt7e/lr9vov6oLXYdeuXWL+/PmiVq1awtTUVLz99tvi6tWrCnWvXLkigoODhYODgzA1NRW1atUS/fv3FxkZGfI6rx7/xb3+R48eFcOGDRO2trZFtkK/8847olGjRm98zZSJRQghtm7dKn8/q1WrJvr37y9u3bpVaH2rV68WLi4uokqVKsLb21scP35cfn4q6rWaN2+eqFmzpqhataro3bu3yMjIEM+ePRMfffSRqFGjhrCwsBAjRowQz549K7QtZWLq2LGjaNKkifj7779Fp06dhJmZmahZs6b45ptvCsXz+l/BZ1LZ1+h1mzdvFgBESkpKoWXKnsteP6YLvH5+fPjwoZg6dapo2rSpsLCwEJaWlqJr167i/PnzCs9T9jgt6vuk4HtJ2XOWTCYT9erVEz179iwUf25urrCyshJjx45942sohBBPnz4VlpaWYuHChSI1NVUYGRkpfH8XKG3L2nfffScsLCzk5+QCTZo0ERMnTpQfQ69bsWKFkEgk4uHDhyXuw6vU8hP6p59+Qv369eHj46OO1QEAjhw5gsDAQHh5eWHu3LkwMjLCli1b8PbbbyMuLg6tW7dWqN+vXz+4uLggLCwMZ8+excaNG2Fvb49vvvkGALB161aMHj0arVu3xtixYwEArq6uxW5/w4YNmDRpEvr06YOPPvoIz549w19//YU//vgDgwYNKvI5wcHBaNCggUJZQkICli9fDnt7e3nZuHHjEBERgZEjR2LSpEm4efMmVq9ejXPnzuHkyZOoXLlysXH98ssv6N27Nzw8PBAWFoaHDx9i5MiRqF279htfz3HjxqFWrVpYsGABJk2aBG9vbzg4OAAAGjdujPXr1+OLL76Ai4uL/HVR9T3o27cvGjZsiAULFkAIAQDo3bs3/v77b0ycOBHOzs5IT0/HoUOHcOvWrUIdIgq0b98eEokEx48fx1tvvQUAiIuLg5GREU6cOCGvd//+fSQnJ2PChAnF7ve9e/fg7++P/Px8zJw5ExYWFli/fj3MzMwK1d26dSuGDx+OgIAAfPPNN3j69CnWrl2L9u3b49y5c8XGO3ToUIwZMwaJiYkK9yicPn0aV65cwaefflpsfH5+fli2bBn+/vtv+XML9jUuLg6TJk2SlwFAhw4dilzPuHHjcPfuXRw6dAhbt24tss727duRnZ2NcePGQSKRYOHChQgODsaNGzfeeMwBwIkTJxAdHY3x48fD0tISK1euRO/evXHr1i3Y2toW+ZwOHTpg69atGDp0KN555x0MGzbsjdso+Ex4e3sjLCwMaWlpWLFiBU6ePIlz587BxsZGrcdGScpyTklLS0Pbtm0hkUgwYcIE1KhRA7Gxsfjggw+QlZWFyZMnK2zr66+/hpGREaZNm4bMzEwsXLgQgwcPxh9//AEAyMvLQ0BAAJ4/f46JEyfC0dERd+7cwb59+5CRkQFra+tC8Xfo0AGTJk3CypUrMXv2bLi7uwMA3N3dMXToUPz3v//FwYMH0b17d/lz7t27hyNHjmDu3LnFvi7KxvLVV1/hs88+Q79+/TB69Gjcv38fq1atQocOHeTvJwCsXbsWEyZMgJ+fH6ZMmYKUlBQEBQWhWrVqRZ7XwsLCYGZmhpkzZ+LatWtYtWoVKleuDCMjIzx+/Bjz5s3D77//joiICLi4uGDOnDny5yobEwA8fvwYXbt
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#limite client size \n",
"plt.figure()\n",
"plt.boxplot(\n",
" [dfc.loc[dfc[\"seg_quantiles\"]==s, \"aum_qty_mean\"].dropna()\n",
" for s in [\"Low-flow\",\"Intermediate-flow\",\"High-flow\"]],\n",
" labels=[\"Low\",\"Mid\",\"High\"]\n",
")\n",
"plt.yscale(\"log\")\n",
"plt.ylabel(\"Mean AUM (quantity) [log scale]\")\n",
"plt.title(\"Client size differs widely within flow-intensity segments (quantity AUM)\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "0596d9fe-524a-493a-948a-69f37075d1ca",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 421.000000\n",
"mean 80.643705\n",
"std 37.155098\n",
"min 7.000000\n",
"25% 52.000000\n",
"50% 71.000000\n",
"75% 130.000000\n",
"max 130.000000\n",
"Name: n_months, dtype: float64"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Baseline 2\n",
"\n",
"\n",
"\n",
"\n",
"dfc[\"n_months\"].describe()"
]
},
{
"cell_type": "code",
"execution_count": 116,
"id": "56ac17f8-a25f-4726-a7ac-103a35ac6d6a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"seg_2D\n",
"Highly active (high int, high freq) 109\n",
"Small rebalancers (low int, high freq) 106\n",
"Dormant (low int, low freq) 104\n",
"Occasional large movers (high int, low freq) 102\n",
"Name: count, dtype: int64\n",
"thr_int: 4.174084305917157 thr_freq: 0.9859154929577465\n"
]
}
],
"source": [
"dfc[\"rel_intensity_total\"] = dfc[\"gross_flow_to_aum\"] # turnover proxy\n",
"dfc[\"frequency\"] = dfc[\"flow_freq\"] # share of active months\n",
"\n",
"# Thresholds: medians (simple + explainable)\n",
"thr_int = dfc[\"rel_intensity_total\"].median()\n",
"thr_freq = dfc[\"frequency\"].median()\n",
"thr_tx = dfc[\"n_tx_total\"].median()\n",
"\n",
"def quadrant(row):\n",
" low_int = row[\"rel_intensity_total\"] < thr_int\n",
" low_frq = row[\"frequency\"] < thr_freq\n",
"\n",
" if low_int and low_frq:\n",
" return \"Dormant (low int, low freq)\"\n",
" if low_int and (not low_frq):\n",
" return \"Small rebalancers (low int, high freq)\"\n",
" if (not low_int) and low_frq:\n",
" return \"Occasional large movers (high int, low freq)\"\n",
" return \"Highly active (high int, high freq)\"\n",
"\n",
"dfc[\"seg_2D\"] = dfc.apply(quadrant, axis=1)\n",
"\n",
"print(dfc[\"seg_2D\"].value_counts())\n",
"print(\"thr_int:\", thr_int, \"thr_freq:\", thr_freq)\n"
]
},
{
"cell_type": "code",
"execution_count": 196,
"id": "8635328a-fb66-45cb-b0fa-d1dc6c3cc1e1",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAApgAAAHHCAYAAAAbASh2AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAA3ENJREFUeJzs3XdYFMf/B/D3Hb33KiCCFZUmaDQqqCg27DVGscavYuyJGhNrjC22KBGNNcYoNtQYO7FrrIi9o6CCiggoKOVufn/wuwvH3cEd3F6Bz+t5ePT25nZm9/Z2PzszO8NjjDEQQgghhBCiInxNF4AQQgghhFQuFGASQgghhBCVogCTEEIIIYSoFAWYhBBCCCFEpSjAJIQQQgghKkUBJiGEEEIIUSkKMAkhhBBCiEpRgEkIIYQQQlSKAkxCCCGEEKJSWh9ghoaGokGDBipdJ4/Hw5gxY1S6TkVs2rQJPB4PT58+VXveADB48GB4enpqJG8i6enTp+DxeNi0aZOmi6IyXG2Tp6cnBg8erNJ1anO+XAgNDUVoaKimi6ESW7ZsQd26dWFgYABra2tNF6dSSklJgbGxMc6dO6fpohANO3z4MMzNzfHmzRulP6tUgHn58mWMGTMG9evXh5mZGTw8PNCnTx88ePBAKm1oaCh4PB54PB74fD4sLS1Rp04dDBw4EMeOHVO6oISo2507dzBr1qwK3RD8+eefWL58ucrKVFmdP38es2bNQmZmpqaLohK//vqrVt88vHz5ErNmzcL169c1XRSl3Lt3D4MHD4a3tzd+++03rF27VtNFqpTmzJmDJk2a4PPPP5ebpm3btqVW1qxfvx716tWDsbExatWqhZUrV8pM9+LFC/Tp0wfW1tawtLRE165d8eTJE5VsR1kyMzNhbGwMHo+Hu3fvqiVPTTh48CBmzZpVrs+2b98eNWvWxPz585X/MFNCz549mbOzM/v666/Zb7/9xubOncucnJyYmZkZu3nzpkTakJAQ5ubmxrZs2cK2bNnCYmJi2OTJk5mXlxcDwPr06cPy8/PLzDMkJITVr19fmWKWCQCLiopS6ToVUVhYyD5+/MiEQqHa82aMscjISFa9enWN5K2Ldu7cyQCwEydOlHsdnTp1krnPhUIh+/jxIyssLCx/AbVMUlISA8A2btyo9GcXL17MALCkpCSp9z59+qTQuULVKpJv/fr1WUhIiGoLVAF5eXksLy9P/Pry5cvl/q40afXq1QwAe/jwoaaLUmm9fv2aGRgYsD///FNumt27dzMzMzO519KYmBgGgPXs2ZOtXbuWDRw4kAFgCxYskEj3/v17VqtWLebo6MgWLlzIli5dytzd3ZmbmxtLT09X+baVtHbtWmZsbMycnZ3Z9OnTOc9PU6KiopiS4Z6EX3/9lZmamrLs7GylPqdUDebEiRPx7Nkz/PLLLxg+fDi+//57nDlzBoWFhViwYIFUeisrK3z55Zf48ssvMXLkSCxevBgPHjzA6NGjsWPHDnz//ffKR8Q6TE9PT3y3pAq5ubkqWQ9RPx6PB2NjY+jp6Wm6KHIJhUJ8+vRJ08WAkZERDAwMqky+XDA0NIShoaGmi1Fhr1+/BoAym8YZY/j48aMaSlT5/PHHH9DX10dERITM9z99+oRJkyZhypQpMt//+PEjpk+fjk6dOmHXrl0YMWIEfv/9dwwYMABz587Fu3fvxGl//fVXPHz4EAcOHMC3336LCRMm4OjRo0hNTcWSJUtKLeesWbMq3OXrjz/+QMeOHdG/f3/8+eefFVpXZdazZ0/k5eVh586dyn2w3CFtMYGBgSwwMFBiWWk1j4WFhczHx4eZmpqyzMzMUtctWs+VK1dY06ZNmbGxMfP09GSrV6+WSvvp0yc2Y8YM5u3tzQwNDZmbmxv75ptv2KdPnyTS4f/vuuLi4lj9+vWZoaEh8/HxYYcOHZJI9/TpUzZq1ChWu3ZtZmxszGxtbVmvXr0kallENQGbNm2SKs/hw4cZAPbXX38xxhjbuHGjzFqa6Oho5uPjwwwNDZmLiwsbPXo0e/fundz90KJFC2ZiYsLGjRvHGGNs7969rGPHjszFxYUZGhoyLy8vNmfOHKnaMUVrMC9fvszatWvH7OzsxPt7yJAhEmkEAgFbtmwZ8/HxYUZGRszR0ZF99dVXLCMjQyrdzJkzmYuLCzMxMWGhoaHs9u3brHr16iwyMlKcTrRvzpw5w77++mtmb2/PrKys2FdffcXy8vLYu3fv2MCBA5m1tTWztrZm33zzjVRNsKJlql69OuvUqRM7c+YMCw4OZkZGRqxGjRps8+bNUuUp+SeqzVRkn4eEhEh9XrT/5dX2xcfHs+bNmzNTU1NmZWXFunTpwu7cuSORZubMmeJanMjISGZlZcUsLS3Z4MGDWU5OjkTaN2/esLt370otl0X0u/jjjz+Yj48P09fXZ3FxcYwxxp4/f86GDBnCHB0dxb+X9evXS3xe1jYlJiayyMhIVqNGDWZkZMScnJzYkCFDJGonRNtT8k/0Oyl+rCjze1O03PLIO0bPnj3LJkyYwOzt7ZmpqSnr1q0be/36tcTnSm5L8drMd+/esXHjxjE3NzdmaGjIvL292YIFC5hAIJDal4sXL2Zr1qxhXl5ezNDQkAUFBbFLly5JlDM1NZUNHjyYVatWjRkaGjJnZ2fWpUsXifNMSEiIuAwnTpyQub83btzIZsyYwfT19SW2R2TEiBHMysqKffz4Ueb+EtVCP336VOq9qVOnMgMDA/Fv8cGDB6xHjx7MycmJGRkZsWrVqrG+ffuWej2QtV9nzpwpfq9Tp07s8OHDrFGjRszIyIgtW7ZM4f0tShcZGcksLS2ZlZUVGzRoEEtISJA6povvy+JknV9VeU4qXs7x48ez6tWrM0NDQ1atWjU2cOBA9ubNG/b+/XtmamrKxo4dK/W5lJQUxufz2U8//SR3HzPGWMuWLVloaKjc92fPns08PDxYbm6uzBrMv//+mwFgf//9t8Ty8+fPMwBsy5Yt4mXBwcEsODhYKo927doxb2/vUss5c+bMCrXIPXv2jPF4PLZjxw528eJFBoCdO3dOZtotW7aw4OBgZmJiwqytrVmLFi3YkSNHJNIcPHiQtWzZkpmbmzMLCwsWFBTEtm7dKpFmx44dLDAwkBkbGzM7Ozs2YMAA9vz5c4k0ih5fip4jIiMjZf7eRbZt28YCAwPF5W7QoAFbvny5VP4BAQGsS5cucvenLBUOMIVCIatWrRpr166dxPKymrbnzp3LALADBw6Uuv6QkBDm6urKHB0d2ZgxY9gvv/zCmjdvzgBIXCgEAgFr164dMzU1ZePHj2dr1qxhY8aMYfr6+qxr164S6wTA/Pz8mIuLC5s7dy5bvnw58/LyYqamphIXvp07dzI/Pz82Y8YMtnbtWvbdd98xGxsbVr16dYkLtpeXF+vYsaNU2YcMGcJsbGzEzWyyAkzRxTUsLIytXLmSjRkzhunp6bHg4GCJ5rmQkBDm7OzMHBwc2Ndff83WrFnD9u7dyxhjrFu3bqxPnz5s8eLFbPXq1ax3794MAJs8ebJEeRQJMF+9esVsbGxY7dq12eLFi9lvv/3Gpk+fzurVqyeRbvjw4UxfX5+NGDGCxcTEsClTpjAzMzOpcn/77bcMAIuIiGCrVq1iI0aMYG5ubsze3l7mxdvf35+1b9+eRUdHi5tVvv32W9a8eXP2xRdfsF9//ZV17tyZAZA6+SpapurVq7M6deowJycn9t1337FVq1axwMBAxuPx2K1btxhjjD1+/JiNHTuWAWDfffeduKtHWlqawvv86NGjzN/fn9nb24s/LwrYZAVjx44dY/r6+qx27dps0aJFbPbs2cze3p7Z2NjIPGYCAgJYjx492K+//sqGDx8u3lfFidIq0swPgNWrV485ODiw2bNns+joaJaQkMDS0tKYm5sbc3d3Z3PmzGGrV69mXbp0YQDEF3F52/Tzzz+zFi1asDlz5rC1a9eycePGMRMTE9a4cWPxDUJiYiLr37+/eH2iffXhwwfx91X8WFH096ZoueWRF2AGBASw1q1bs5UrV7JJkyYxPT091qdPH3G6uLg
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure()\n",
"for name, g in dfc.groupby(\"seg_2D\"):\n",
" plt.scatter(g[\"frequency\"], g[\"rel_intensity_total\"], s=10, label=name)\n",
"\n",
"plt.yscale(\"log\")\n",
"plt.axvline(thr_freq, linestyle=\"--\")\n",
"plt.axhline(thr_int, linestyle=\"--\")\n",
"plt.xlabel(\"Activity frequency (share of active months)\")\n",
"plt.ylabel(\"Gross flow / mean AUM (quantity) [log scale]\")\n",
"plt.title(\"2D behavioral segmentation: relative intensity vs frequency (400+ Accounts)\")\n",
"plt.legend(markerscale=2)\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "9bf72f4b-95ac-4233-929b-47f6f101db49",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>aum_qty_med</th>\n",
" <th>gross_flow_qty_med</th>\n",
" <th>freq_med</th>\n",
" <th>rel_int_med</th>\n",
" <th>n_tx_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>seg_2D</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Highly active (high int, high freq)</th>\n",
" <td>109</td>\n",
" <td>106244.381208</td>\n",
" <td>7877.054714</td>\n",
" <td>1.000000</td>\n",
" <td>7.201297</td>\n",
" <td>3861.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Small rebalancers (low int, high freq)</th>\n",
" <td>106</td>\n",
" <td>108438.852153</td>\n",
" <td>4100.832454</td>\n",
" <td>1.000000</td>\n",
" <td>2.468000</td>\n",
" <td>2067.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Dormant (low int, low freq)</th>\n",
" <td>104</td>\n",
" <td>55310.790504</td>\n",
" <td>1687.835370</td>\n",
" <td>0.632500</td>\n",
" <td>1.641374</td>\n",
" <td>110.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Occasional large movers (high int, low freq)</th>\n",
" <td>102</td>\n",
" <td>37406.845662</td>\n",
" <td>2949.680688</td>\n",
" <td>0.830986</td>\n",
" <td>8.951903</td>\n",
" <td>536.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients aum_qty_med \\\n",
"seg_2D \n",
"Highly active (high int, high freq) 109 106244.381208 \n",
"Small rebalancers (low int, high freq) 106 108438.852153 \n",
"Dormant (low int, low freq) 104 55310.790504 \n",
"Occasional large movers (high int, low freq) 102 37406.845662 \n",
"\n",
" gross_flow_qty_med freq_med \\\n",
"seg_2D \n",
"Highly active (high int, high freq) 7877.054714 1.000000 \n",
"Small rebalancers (low int, high freq) 4100.832454 1.000000 \n",
"Dormant (low int, low freq) 1687.835370 0.632500 \n",
"Occasional large movers (high int, low freq) 2949.680688 0.830986 \n",
"\n",
" rel_int_med n_tx_med \n",
"seg_2D \n",
"Highly active (high int, high freq) 7.201297 3861.0 \n",
"Small rebalancers (low int, high freq) 2.468000 2067.5 \n",
"Dormant (low int, low freq) 1.641374 110.5 \n",
"Occasional large movers (high int, low freq) 8.951903 536.0 "
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"profile_2d = (\n",
" dfc.groupby(\"seg_2D\")\n",
" .agg(\n",
" n_clients=(ID_COL, \"count\"),\n",
" aum_qty_med=(\"aum_qty_mean\",\"median\"),\n",
" gross_flow_qty_med=(\"gross_flow_qty_mean\",\"median\"),\n",
" freq_med=(\"frequency\",\"median\"),\n",
" rel_int_med=(\"rel_intensity_total\",\"median\"),\n",
" n_tx_med=(\"n_tx_total\",\"median\"),\n",
" )\n",
" .sort_values(\"n_clients\", ascending=False)\n",
")\n",
"profile_2d\n"
]
},
{
"cell_type": "code",
2026-04-07 12:31:16 +02:00
"execution_count": 225,
2026-04-03 10:55:04 +02:00
"id": "0434097b-ff04-4fc7-8430-8e3e4c8ab120",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Clustering matrix shape: (421, 6)\n"
]
}
],
"source": [
"# Kmeans\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.cluster import KMeans\n",
"from sklearn.metrics import silhouette_score\n",
"from sklearn.mixture import GaussianMixture\n",
"\n",
"# Safety: ensure baseline-2 columns exist\n",
"dfc = dfc.copy()\n",
"dfc[\"frequency\"] = dfc[\"flow_freq\"]\n",
"dfc[\"rel_intensity_total\"] = dfc[\"gross_flow_to_aum\"]\n",
"\n",
"# Choose a compact, interpretable feature set (quantity-based)\n",
"features = [\n",
" \"log_aum_qty_mean\", # size (log)\n",
" \"log_gross_flow_qty_mean\", # activity intensity (log)\n",
" \"frequency\", # activity frequency\n",
" \"rel_intensity_total\", # turnover proxy\n",
" \"net_flow_qty_vol\", # volatility of net flows\n",
" \"n_tx_total\", # total number of transactions\n",
"]\n",
"\n",
"# Build X (drop NaNs/Infs)\n",
"X = (dfc[features]\n",
" .replace([np.inf, -np.inf], np.nan)\n",
" .dropna()\n",
" .copy())\n",
"\n",
"# Keep IDs aligned\n",
"ids = dfc.loc[X.index, ID_COL].copy()\n",
"\n",
"# Standardize (critical for distance-based clustering)\n",
"scaler = StandardScaler()\n",
"X_scaled = scaler.fit_transform(X)\n",
"\n",
"print(\"Clustering matrix shape:\", X_scaled.shape)\n"
]
},
{
"cell_type": "code",
2026-04-07 12:31:16 +02:00
"execution_count": 239,
"id": "9980bfb5-4655-44f1-ad46-6532e161a0bf",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>log_aum_qty_mean</th>\n",
" <th>log_gross_flow_qty_mean</th>\n",
" <th>frequency</th>\n",
" <th>rel_intensity_total</th>\n",
" <th>net_flow_qty_vol</th>\n",
" <th>n_tx_total</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>11.874137</td>\n",
" <td>9.166514</td>\n",
" <td>1.000000</td>\n",
" <td>8.669523</td>\n",
" <td>9832.357264</td>\n",
" <td>1926</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10.091731</td>\n",
" <td>7.485110</td>\n",
" <td>0.915385</td>\n",
" <td>9.586858</td>\n",
" <td>2838.000232</td>\n",
" <td>518</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>12.955117</td>\n",
" <td>9.792718</td>\n",
" <td>1.000000</td>\n",
" <td>5.501836</td>\n",
" <td>13288.481111</td>\n",
" <td>7103</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>12.264857</td>\n",
" <td>9.088325</td>\n",
" <td>1.000000</td>\n",
" <td>5.424328</td>\n",
" <td>10074.748210</td>\n",
" <td>4774</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>11.889512</td>\n",
" <td>9.141974</td>\n",
" <td>1.000000</td>\n",
" <td>8.330268</td>\n",
" <td>13868.197522</td>\n",
" <td>7585</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>426</th>\n",
" <td>11.614810</td>\n",
" <td>9.116890</td>\n",
" <td>1.000000</td>\n",
" <td>10.692193</td>\n",
" <td>13260.634566</td>\n",
" <td>2921</td>\n",
" </tr>\n",
" <tr>\n",
" <th>427</th>\n",
" <td>11.294315</td>\n",
" <td>9.205628</td>\n",
" <td>0.430769</td>\n",
" <td>16.099039</td>\n",
" <td>28616.097840</td>\n",
" <td>185</td>\n",
" </tr>\n",
" <tr>\n",
" <th>428</th>\n",
" <td>11.638514</td>\n",
" <td>8.072315</td>\n",
" <td>1.000000</td>\n",
" <td>1.412726</td>\n",
" <td>2193.842219</td>\n",
" <td>2142</td>\n",
" </tr>\n",
" <tr>\n",
" <th>429</th>\n",
" <td>11.200555</td>\n",
" <td>8.946562</td>\n",
" <td>1.000000</td>\n",
" <td>13.645709</td>\n",
" <td>11205.382980</td>\n",
" <td>2264</td>\n",
" </tr>\n",
" <tr>\n",
" <th>430</th>\n",
" <td>10.454506</td>\n",
" <td>8.009115</td>\n",
" <td>0.976923</td>\n",
" <td>11.266566</td>\n",
" <td>4359.629462</td>\n",
" <td>1507</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>421 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" log_aum_qty_mean log_gross_flow_qty_mean frequency \\\n",
"0 11.874137 9.166514 1.000000 \n",
"1 10.091731 7.485110 0.915385 \n",
"2 12.955117 9.792718 1.000000 \n",
"3 12.264857 9.088325 1.000000 \n",
"4 11.889512 9.141974 1.000000 \n",
".. ... ... ... \n",
"426 11.614810 9.116890 1.000000 \n",
"427 11.294315 9.205628 0.430769 \n",
"428 11.638514 8.072315 1.000000 \n",
"429 11.200555 8.946562 1.000000 \n",
"430 10.454506 8.009115 0.976923 \n",
"\n",
" rel_intensity_total net_flow_qty_vol n_tx_total \n",
"0 8.669523 9832.357264 1926 \n",
"1 9.586858 2838.000232 518 \n",
"2 5.501836 13288.481111 7103 \n",
"3 5.424328 10074.748210 4774 \n",
"4 8.330268 13868.197522 7585 \n",
".. ... ... ... \n",
"426 10.692193 13260.634566 2921 \n",
"427 16.099039 28616.097840 185 \n",
"428 1.412726 2193.842219 2142 \n",
"429 13.645709 11205.382980 2264 \n",
"430 11.266566 4359.629462 1507 \n",
"\n",
"[421 rows x 6 columns]"
]
},
"execution_count": 239,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c697888b-cb72-4a98-86af-56647f5a5161",
"metadata": {},
"outputs": [],
"source": [
"X_sorted = X.sort_values(by=\"n_tx_total\", ascending=False)\n",
"X_sorted"
]
},
{
"cell_type": "code",
"execution_count": 226,
2026-04-03 10:55:04 +02:00
"id": "e18be5a6-c8af-47f9-888f-3edf21bd28dd",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAcwdJREFUeJzt3XdYU2f7B/BvAiTssAkIIm5R3FVxDyruOlpbtY7W0Traqh3WDlH7qzi6+6pt37eO1rbaWrWO1hYVnDgREVCqiKBAQEGm7JzfH5TUCEiCCQnk+7muXHLOec7J/XCA3D7nGSJBEAQQERERmTCxoQMgIiIiMjQmRERERGTymBARERGRyWNCRERERCaPCRERERGZPCZEREREZPKYEBEREZHJY0JEREREJo8JEREREZk8JkRERkwkEmH58uWq7eXLl0MkEuHu3buGC8qE5OfnY9asWZDL5RCJRFi4cKGhQyIiPWFCRFTPtmzZApFIVOPr9OnThg6R/rFq1Sps2bIFc+fOxffff4+pU6fq9f2aNWuGUaNGVdn//fffw8zMDMOGDUNRUZFeYyAyVeaGDoDIVK1cuRK+vr5V9rds2dIA0VB1jhw5gl69eiE4ONhgMfzwww+YMWMGAgMDsWfPHlhaWhosFqLGjAkRkYEMHz4c3bt3N3QYDUpBQQFsbGzq7f0yMjLg5+ens+uVlZVBqVRCIpFoVH779u2YPn06Bg8ejN9++43JEJEe8ZEZUQN09+5dTJw4Efb29nB2dsZrr71W5VFKWVkZPvjgA7Ro0QJSqRTNmjXDO++8g+LiYlWZxYsXw9nZGYIgqPa98sorEIlE+OKLL1T70tPTIRKJsHHjxlpj27ZtG3r06AFra2s4Ojqif//++Ouvv1THH+4XValZs2aYMWOGarvy0eLRo0cxb948uLm5wcvLCzt37lTtf9jXX38NkUiEmJgY1b6rV6/i6aefhpOTEywtLdG9e3fs3bv3kXUIDw+HSCRCYmIiDhw4oHqcefPmTQAVidLMmTPh7u4OS0tLdOrUCVu3blW7xs2bNyESifDRRx/hs88+U92HuLi4Wr+HAPDzzz/j+eefx8CBA7F3795akyGRSIQFCxbgl19+gZ+fH6ysrBAQEIDLly+rvjctW7aEpaUlBg4cqKrLg86cOYNhw4ZBJpPB2toaAwYMwMmTJ9XKJCUlYd68eWjTpg2srKzg7OyMZ555psr1Ku/fyZMnsXjxYri6usLGxgbjxo3DnTt31MqeP38eQUFBcHFxgZWVFXx9ffHiiy9q9H0i0hW2EBEZSE5OTpXO0SKRCM7OzrWeO3HiRDRr1gwhISE4ffo0vvjiC9y7dw/fffedqsysWbOwdetWPP3003j99ddx5swZhISE4MqVK9i9ezcAoF+/fvj0008RGxuLDh06AACOHz8OsViM48eP49VXX1XtA4D+/fs/Mq4VK1Zg+fLl6N27N1auXAmJRIIzZ87gyJEjGDp0qObfnAfMmzcPrq6uWLZsGQoKCjBy5EjY2tri559/xoABA9TK7tixA+3bt1fVJTY2Fn369EGTJk3w9ttvw8bGBj///DPGjh2LX3/9FePGjav2Pdu1a4fvv/8eixYtgpeXF15//XUAgKurKwoLCzFw4EBcv34dCxYsgK+vL3755RfMmDED2dnZeO2119SutXnzZhQVFWHOnDmQSqVwcnKqtc6//vorpkyZgv79+2Pfvn2wsrLS6Ht1/Phx7N27F/PnzwcAhISEYNSoUXjrrbewYcMGzJs3D/fu3cPatWvx4osv4siRI6pzjxw5guHDh6Nbt24IDg6GWCzG5s2bMXjwYBw/fhw9evQAAJw7dw6nTp3Cc889By8vL9y8eRMbN27EwIEDERcXB2tra7WYXnnlFTg6OiI4OBg3b97EZ599hgULFmDHjh0AKpLLoUOHwtXVFW+//TYcHBxw8+ZN7Nq1S6M6E+mMQET1avPmzQKAal9SqVStLAAhODhYtR0cHCwAEMaMGaNWbt68eQIA4dKlS4IgCEJUVJQAQJg1a5ZauTfeeEMAIBw5ckQQBEHIyMgQAAgbNmwQBEEQsrOzBbFYLDzzzDOCu7u76rxXX31VcHJyEpRKZY31unbtmiAWi4Vx48YJ5eXlascePO/hOlXy8fERpk+fXuX71LdvX6GsrEyt7KRJkwQ3Nze1/WlpaYJYLBZWrlyp2jdkyBDB399fKCoqUould+/eQqtWrWqsy4MxjRw5Um3fZ599JgAQtm3bptpXUlIiBAQECLa2tkJubq4gCIKQmJgoABDs7e2FjIyMWt+r8v08PT0Fc3NzYeDAgUJBQYFG5wmCoPr5SUxMVO37+uuvBQCCXC5XxSUIgrB06VIBgKqsUqkUWrVqJQQFBandq/v37wu+vr7Ck08+qbbvYREREQIA4bvvvlPtq7x/gYGBatdctGiRYGZmJmRnZwuCIAi7d+8WAAjnzp3TuK5E+sBHZkQGsn79eoSGhqq9/vjjD43OrWwBqPTKK68AAH7//Xe1fxcvXqxWrrKl48CBAwAqWjzatm2LY8eOAQBOnjwJMzMzvPnmm0hPT8e1a9cAVLQ89O3bFyKRqMaY9uzZA6VSiWXLlkEsVv/T8qjzajN79myYmZmp7Xv22WeRkZGB8PBw1b6dO3dCqVTi2WefBQBkZWXhyJEjmDhxIvLy8nD37l3cvXsXmZmZCAoKwrVr15CSkqJ1PL///jvkcjkmTZqk2mdhYYFXX30V+fn5VR7lTZgwAa6urhpfPysrC2VlZfDy8tK4ZajSkCFD0KxZM9V2z549VTHY2dlV2X/jxg0AQFRUFK5du4bJkycjMzNT9b0qKCjAkCFDcOzYMSiVSgBQi6m0tBSZmZlo2bIlHBwcEBkZWSWmOXPmqN3/fv36oby8HElJSQAABwcHAMD+/ftRWlqqVX2JdImPzIgMpEePHnXuVN2qVSu17RYtWkAsFqv6cSQlJUEsFlcZsSaXy+Hg4KD6MAIqPqAqE6jjx4+je/fu6N69O5ycnHD8+HG4u7vj0qVLmDx58iNjSkhIgFgs1mknZADVjsSr7OeyY8cODBkyBEDF47LOnTujdevWAIDr169DEAS8//77eP/996u9dkZGBpo0aaJVPElJSWjVqlWVpK9du3aq47XF/yhDhgxB06ZNsXHjRjg5OeHzzz9XHcvJyUFhYaFqWyKRqD2Ca9q0qdq1ZDIZAMDb27va/ffu3QMAVeI7ffr0GuPKycmBo6MjCgsLERISgs2bNyMlJUWt/1lOTk6V8x6OydHRUe29BwwYgAkTJmDFihX49NNPMXDgQIwdOxaTJ0+GVCqtMR4iXWNCRNQI1NQCo0nLTN++ffHf//4XN27cwPHjx9GvXz+IRCL07dsXx48fh6enJ5RKJfr166frsNWUl5dXu7+6VhKpVIqxY8di9+7d2LBhA9LT03Hy5EmsWrVKVaayReONN95AUFBQtdeujykOtG3lAYD//Oc/uHfvHr744gs4OjqqOqG/9tprap23BwwYoNZK9nBLWm37K5OZyu/VunXr0Llz52rL2traAqhojdy8eTMWLlyIgIAAyGQyiEQiPPfcc6rraPPeIpEIO3fuxOnTp7Fv3z78+eefePHFF/Hxxx/j9OnTqvcl0jcmREQN0LVr19RaHq5fvw6lUql6XOLj4wOlUolr166pWi6AitFi2dnZ8PHxUe2rTHRCQ0Nx7tw5vP322wAqOlBv3LgRnp6esLGxQbdu3R4ZU4sWLaBUKhEXF1fjhypQ0UKQnZ2ttq+kpARpaWmaVF3l2WefxdatW3H48GFcuXIFgiCoHpcBQPPmzQFUPM4KDAzU6tqP4uPjg+joaCiVSrVWoqtXr6qOPy6xWIzvvvsOOTk5WLFiBZycnPDqq6/irbfewvPPP68qV9na8rhatGgBALC3t6/1e7Vz505Mnz4dH3/8sWpfUVFRlXuqrV69eqFXr1748MMP8eOPP2LKlCnYvn07Zs2a9VjXJdIU+xARNUDr169X2/7yyy8BVMxtBAAjRowAAHz22Wdq5T755BMAwMiRI1X7fH190aRJE3z66ac
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkAAAAHHCAYAAABXx+fLAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAiUlJREFUeJzt3Xd4k+X6B/BvkjZJZ7onpQtKKavKqCBLKRZElgPkoAwVFUVFcMDxh5WjHobj4OCAogzleOSooOAoyigCFouUVUYppVBG994jeX9/pAmErqRktfl+rquX9s2bN/fbFHLzPPfz3CJBEAQQERER2RCxpQMgIiIiMjcmQERERGRzmAARERGRzWECRERERDaHCRARERHZHCZAREREZHOYABEREZHNYQJERERENocJEBEREdkcJkBERhISEoJZs2Zpv09MTIRIJEJiYqL22MiRI9G7d2/zB0dm88477yAsLAwSiQTR0dGWDoeIWsAEiKgNJ0+exIMPPojg4GDI5XIEBgZi9OjR+OijjywdmkmcPn0ab7zxBi5evNjksX//+9/YuHGj2WPqKH799Ve88soruPPOO7Fhwwb885//NOnrzZo1C87Ozk2OnzhxAl5eXggJCWn2fSQiwM7SARBZsz/++AN33XUXunbtijlz5sDPzw+XL1/GoUOH8MEHH+C5557TnpuWlgaxuOP/m+L06dNYunQpRo4ciZCQEJ3H/v3vf8PLy0tnpIuu27NnD8RiMT7//HNIpVKLxJCamopRo0bByckJe/fubfIeEpEaEyCiVrz99ttQKBQ4fPgw3NzcdB7Ly8vT+V4mk5kxMjImQRBQU1MDBweHW7pOXl4eHBwcjJb8GBrXqVOncPfdd8PBwQF79+5FaGioUeIg6ow6/j9XiUwoIyMDvXr1apL8AICPj4/O9zfXALXm9OnTuOuuu+Do6IjAwECsXLmyyTl5eXl4/PHH4evrC7lcjn79+mHTpk065zRXZwQAFy9ehEgkajJddfbsWTz44IPw8PCAXC7HgAEDsH37du3jGzduxEMPPQQAuOuuuyASibTXDwkJwalTp7Bv3z7t8ZEjR2qfW1JSgvnz5yMoKAgymQzdunXDihUroFKp2vx5/PXXX4iLi4OXlxccHBwQGhqKxx57TOcclUqFDz74AH369IFcLoe3tzfGjBmDv/76S3tOQ0MD3nzzTYSHh0MmkyEkJAR///vfUVtbq3OtkJAQ3Hfffdi5cycGDBgABwcHfPLJJ7d0HyKRCBs2bEBlZaX256P5+RsjrracOXMGo0aNgkwmw969exEWFtbq+Zp6tBMnTmDEiBFwdHREt27d8O233wIA9u3bh5iYGDg4OKBHjx7YtWtXk2tcvXoVjz32GHx9fSGTydCrVy+sX79e55y6ujq8/vrr6N+/PxQKBZycnDBs2DDs3btX5zzN7+y7776LTz/9VPuzGjhwIA4fPqxzbk5ODmbPno0uXbpAJpPB398fEydO5HQfGYQjQEStCA4ORlJSElJTU41WvFxcXIwxY8bg/vvvx5QpU/Dtt9/i1VdfRZ8+fTB27FgAQHV1NUaOHInz589j3rx5CA0NxTfffINZs2ahpKQEL7zwgsGve+rUKdx5550IDAzEokWL4OTkhP/973+YNGkSvvvuO0yePBnDhw/H888/jw8//BB///vf0bNnTwBAz549sWrVKjz33HNwdnbGa6+9BgDw9fUFAFRVVWHEiBG4evUqnnrqKXTt2hV//PEHFi9ejOzsbKxatarFuPLy8nDPPffA29sbixYtgpubGy5evIitW7fqnPf4449j48aNGDt2LJ544gk0NDRg//79OHToEAYMGAAAeOKJJ7Bp0yY8+OCDWLhwIf78808sW7YMZ86cwbZt23Sul5aWhmnTpuGpp57CnDlz0KNHj1u6jy+//BKffvopkpOT8dlnnwEAhgwZYpS42pKWloa7774bdnZ22Lt3L8LDw9t8DqD+Xbzvvvvw8MMP46GHHsKaNWvw8MMP4z//+Q/mz5+Pp59+Gn/729/wzjvv4MEHH8Tly5fh4uICAMjNzcUdd9wBkUiEefPmwdvbG7/88gsef/xxlJWVYf78+QCAsrIyfPbZZ5g2bRrmzJmD8vJyfP7554iLi0NycnKTQvGvvvoK5eXleOqppyASibBy5Urcf//9uHDhAuzt7QEADzzwAE6dOoXnnnsOISEhyMvLw2+//YasrCxO+ZH+BCJq0a+//ipIJBJBIpEIgwcPFl555RVh586dQl1dXZNzg4ODhZkzZ2q/37t3rwBA2Lt3r/bYiBEjBADCF198oT1WW1sr+Pn5CQ888ID22KpVqwQAwubNm7XH6urqhMGDBwvOzs5CWVlZi68hCIKQmZkpABA2bNigPTZq1CihT58+Qk1NjfaYSqUShgwZInTv3l177Jtvvmn2moIgCL169RJGjBjR5Pibb74pODk5CefOndM5vmjRIkEikQhZWVlNnqOxbds2AYBw+PDhFs/Zs2ePAEB4/vnnmzymUqkEQRCEY8eOCQCEJ554Qufxl156SQAg7NmzR3ssODhYACAkJCQY7T4EQRBmzpwpODk56RwzRlytvZ69vb3g7+8vBAQENIm7NZrfxa+++kp77OzZswIAQSwWC4cOHdIe37lzZ5Pfp8cff1zw9/cXCgoKdK778MMPCwqFQqiqqhIEQRAaGhqE2tpanXOKi4sFX19f4bHHHtMe0/zOenp6CkVFRdrjP/zwgwBA2LFjh/a5AIR33nlH73slag6nwIhaMXr0aCQlJWHChAk4fvw4Vq5cibi4OAQGBupMHRnC2dkZjzzyiPZ7qVSKQYMG4cKFC9pjP//8M/z8/DBt2jTtMXt7ezz//POoqKjAvn37DHrNoqIi7NmzB1OmTEF5eTkKCgpQUFCAwsJCxMXFIT09HVevXm3X/QDAN998g2HDhsHd3V177YKCAsTGxkKpVOL3339v8bma6cUff/wR9fX1zZ7z3XffQSQSIT4+vsljIpEIgPpnBgALFizQeXzhwoUAgJ9++knneGhoKOLi4ox2Hy0xRlytUSqVKCgogIeHB7y8vAyKzdnZGQ8//LD2+x49esDNzQ09e/ZETEyM9rjm/zW/o4Ig4LvvvsP48eMhCILOzyouLg6lpaVISUkBAEgkEm1NlEqlQlFRERoaGjBgwADtOTeaOnUq3N3dtd8PGzZM57U1NVaJiYkoLi426H6JbsQpMKI2DBw4EFu3bkVdXR2OHz+Obdu24V//+hcefPBBHDt2DFFRUQZdr0uXLtoPbQ13d3ecOHFC+/2lS5fQvXv3JqvKNFNSly5dMug1z58/D0EQsGTJEixZsqTZc/Ly8hAYGGjQdTXS09Nx4sQJeHt7t3jtlowYMQIPPPAAli5din/9618YOXIkJk2ahL/97W/awvKMjAwEBATAw8OjxetcunQJYrEY3bp10znu5+cHNze3Jj+z5gqEb+U+TBlXaxwcHPDZZ59h+vTpGDduHH777Tc4OTkBUE+llpaWNnldjeZ+FxUKBYKCgpocA6BNOPLz81FSUoJPP/0Un376abNx3fiz2rRpE9577z2cPXtWJ8lt7l67du2q870mGdK8tkwmw4oVK7Bw4UL4+vrijjvuwH333YcZM2bo3BtRW5gAEelJKpVi4MCBGDhwICIiIjB79mx88803zY5KtEYikTR7XBAEg2O6+cNLQ6lU6nyvKeB96aWXWhxduPkD2hAqlQqjR4/GK6+80uzjERERLT5XJBLh22+/xaFDh7Bjxw7s3LkTjz32GN577z0cOnSo2X1uWtPSz+Rmza2supX7MGVcbXn44YdRXFyMZ555Bvfffz927NgBqVSKLVu2YPbs2Trn3vh71tLvYlu/o5rfp0ceeQQzZ85s9ty+ffsCADZv3oxZs2Zh0qRJePnll+Hj4wOJRIJly5YhIyPD4NcGgPnz52P8+PH4/vvvsXPnTixZsgTLli3Dnj17cNtttzX7fKKbMQEiagdN0W12drZJrh8cHIwTJ05ApVLpjAKdPXtW+zhw/V/HJSUlOs+/eVRBsyLI3t4esbGxrb5
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best K by silhouette: 5\n"
]
},
{
"data": {
"text/plain": [
"' Ce que c’ est :\\nInertia = somme des distances intra-cluster (SSE).\\nPlus elle baisse, plus les clusters sont “serrés”.\\n\\nComment lire :\\nQuand K augmente, inertia baisse toujours (normal).\\nOn cherche un “coude” : à partir d’ un certain K, ajouter des clusters apporte peu\\n'"
]
},
2026-04-07 12:31:16 +02:00
"execution_count": 226,
2026-04-03 10:55:04 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"k_range = range(2, 21)\n",
"inertias = []\n",
"silhouettes = []\n",
"\n",
"for k in k_range:\n",
" km = KMeans(n_clusters=k, n_init=30, random_state=42)\n",
" labels = km.fit_predict(X_scaled)\n",
" inertias.append(km.inertia_)\n",
" silhouettes.append(silhouette_score(X_scaled, labels))\n",
"\n",
"# Elbow plot\n",
"plt.figure()\n",
"plt.plot(list(k_range), inertias, marker=\"o\")\n",
"plt.xlabel(\"Number of clusters K\")\n",
"plt.ylabel(\"Inertia (within-cluster SSE)\")\n",
"plt.title(\"Elbow curve for K-means\")\n",
"plt.show()\n",
"\n",
"# Silhouette plot\n",
"plt.figure()\n",
"plt.plot(list(k_range), silhouettes, marker=\"o\")\n",
"plt.xlabel(\"Number of clusters K\")\n",
"plt.ylabel(\"Silhouette score\")\n",
"plt.title(\"Silhouette score for K-means\")\n",
"plt.show()\n",
"\n",
"best_k = list(k_range)[int(np.argmax(silhouettes))]\n",
"print(\"Best K by silhouette:\", best_k)\n",
"\n",
"\n",
"''' Ce que c’ est :\n",
"Inertia = somme des distances intra-cluster (SSE).\n",
"Plus elle baisse, plus les clusters sont “serrés”.\n",
"\n",
"Comment lire :\n",
"Quand K augmente, inertia baisse toujours (normal).\n",
"On cherche un “coude” : à partir d’ un certain K, ajouter des clusters apporte peu\n",
"'''"
]
},
{
"cell_type": "code",
2026-04-07 12:31:16 +02:00
"execution_count": 227,
2026-04-03 10:55:04 +02:00
"id": "2759f049-d8fe-4fee-9bc9-856a28b392a9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>aum_qty_med</th>\n",
" <th>freq_med</th>\n",
" <th>rel_int_med</th>\n",
" <th>gross_flow_med</th>\n",
" <th>n_tx_med</th>\n",
" <th>vol_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_kmeans</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2.0</th>\n",
" <td>235</td>\n",
" <td>3.936071e+04</td>\n",
" <td>0.986111</td>\n",
" <td>4.136974</td>\n",
" <td>2031.883965</td>\n",
" <td>1069.0</td>\n",
" <td>2.735326e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1.0</th>\n",
" <td>105</td>\n",
" <td>4.528840e+05</td>\n",
" <td>1.000000</td>\n",
" <td>4.651358</td>\n",
" <td>28651.252789</td>\n",
" <td>7585.0</td>\n",
" <td>3.004524e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.0</th>\n",
" <td>66</td>\n",
" <td>6.912599e+04</td>\n",
" <td>0.109903</td>\n",
" <td>1.632692</td>\n",
" <td>2773.037334</td>\n",
" <td>7.5</td>\n",
" <td>1.080610e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4.0</th>\n",
" <td>13</td>\n",
" <td>4.783496e+04</td>\n",
" <td>0.884615</td>\n",
" <td>27.093690</td>\n",
" <td>10629.415385</td>\n",
" <td>1712.0</td>\n",
" <td>1.876254e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3.0</th>\n",
" <td>2</td>\n",
" <td>1.470709e+07</td>\n",
" <td>0.586207</td>\n",
" <td>5.705179</td>\n",
" <td>851698.564766</td>\n",
" <td>2210.5</td>\n",
" <td>3.218539e+06</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients aum_qty_med freq_med rel_int_med \\\n",
"cluster_kmeans \n",
"2.0 235 3.936071e+04 0.986111 4.136974 \n",
"1.0 105 4.528840e+05 1.000000 4.651358 \n",
"0.0 66 6.912599e+04 0.109903 1.632692 \n",
"4.0 13 4.783496e+04 0.884615 27.093690 \n",
"3.0 2 1.470709e+07 0.586207 5.705179 \n",
"\n",
" gross_flow_med n_tx_med vol_med \n",
"cluster_kmeans \n",
"2.0 2031.883965 1069.0 2.735326e+03 \n",
"1.0 28651.252789 7585.0 3.004524e+04 \n",
"0.0 2773.037334 7.5 1.080610e+04 \n",
"4.0 10629.415385 1712.0 1.876254e+04 \n",
"3.0 851698.564766 2210.5 3.218539e+06 "
]
},
2026-04-07 12:31:16 +02:00
"execution_count": 227,
2026-04-03 10:55:04 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"km = KMeans(n_clusters=best_k, n_init=50, random_state=42)\n",
"labels_km = km.fit_predict(X_scaled)\n",
"\n",
"dfc.loc[X.index, \"cluster_kmeans\"] = labels_km\n",
"\n",
"# Profiling table (medians = robust to outliers)\n",
"k_profile = (\n",
" dfc.loc[X.index]\n",
" .groupby(\"cluster_kmeans\")\n",
" .agg(\n",
" n_clients=(ID_COL, \"count\"),\n",
" aum_qty_med=(\"aum_qty_mean\", \"median\"),\n",
" freq_med=(\"frequency\", \"median\"),\n",
" rel_int_med=(\"rel_intensity_total\", \"median\"),\n",
" gross_flow_med=(\"gross_flow_qty_mean\", \"median\"),\n",
" n_tx_med=(\"n_tx_total\", \"median\"),\n",
" vol_med=(\"net_flow_qty_vol\", \"median\"),\n",
" )\n",
" .sort_values(\"n_clients\", ascending=False)\n",
")\n",
"\n",
"k_profile\n"
]
},
{
"cell_type": "code",
"execution_count": 74,
"id": "f7883188-9981-431b-9d33-d330b8b9dfc2",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>n_months</th>\n",
" <th>n_active_months</th>\n",
" <th>flow_freq</th>\n",
" <th>aum_qty_mean</th>\n",
" <th>aum_qty_median</th>\n",
" <th>net_flow_qty_sum</th>\n",
" <th>gross_flow_qty_sum</th>\n",
" <th>gross_flow_qty_mean</th>\n",
" <th>net_flow_qty_vol</th>\n",
" <th>...</th>\n",
" <th>netflow_to_aum</th>\n",
" <th>n_tx_total</th>\n",
" <th>log_aum_qty_mean</th>\n",
" <th>log_gross_flow_qty_mean</th>\n",
" <th>gross_flow_to_aum</th>\n",
" <th>seg_quantiles</th>\n",
" <th>rel_intensity_total</th>\n",
" <th>frequency</th>\n",
" <th>seg_2D</th>\n",
" <th>cluster_kmeans</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>240</th>\n",
" <td>200130818</td>\n",
" <td>58</td>\n",
" <td>10</td>\n",
" <td>0.172414</td>\n",
" <td>3.819992e+06</td>\n",
" <td>0.000000e+00</td>\n",
" <td>9.586849e+06</td>\n",
" <td>3.429192e+07</td>\n",
" <td>5.912401e+05</td>\n",
" <td>2.088032e+06</td>\n",
" <td>...</td>\n",
" <td>-4.619540e+07</td>\n",
" <td>11</td>\n",
" <td>15.155759</td>\n",
" <td>13.289979</td>\n",
" <td>8.976963</td>\n",
" <td>High-flow</td>\n",
" <td>8.976963</td>\n",
" <td>0.172414</td>\n",
" <td>Occasional large movers (high int, low freq)</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246</th>\n",
" <td>200130906</td>\n",
" <td>56</td>\n",
" <td>56</td>\n",
" <td>1.000000</td>\n",
" <td>2.559419e+07</td>\n",
" <td>2.814182e+07</td>\n",
" <td>1.482869e+07</td>\n",
" <td>6.228080e+07</td>\n",
" <td>1.112157e+06</td>\n",
" <td>4.349047e+06</td>\n",
" <td>...</td>\n",
" <td>4.092506e-03</td>\n",
" <td>4410</td>\n",
" <td>17.057876</td>\n",
" <td>13.921813</td>\n",
" <td>2.433395</td>\n",
" <td>High-flow</td>\n",
" <td>2.433395</td>\n",
" <td>1.000000</td>\n",
" <td>Small rebalancers (low int, high freq)</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID n_months n_active_months flow_freq \\\n",
"240 200130818 58 10 0.172414 \n",
"246 200130906 56 56 1.000000 \n",
"\n",
" aum_qty_mean aum_qty_median net_flow_qty_sum gross_flow_qty_sum \\\n",
"240 3.819992e+06 0.000000e+00 9.586849e+06 3.429192e+07 \n",
"246 2.559419e+07 2.814182e+07 1.482869e+07 6.228080e+07 \n",
"\n",
" gross_flow_qty_mean net_flow_qty_vol ... netflow_to_aum n_tx_total \\\n",
"240 5.912401e+05 2.088032e+06 ... -4.619540e+07 11 \n",
"246 1.112157e+06 4.349047e+06 ... 4.092506e-03 4410 \n",
"\n",
" log_aum_qty_mean log_gross_flow_qty_mean gross_flow_to_aum \\\n",
"240 15.155759 13.289979 8.976963 \n",
"246 17.057876 13.921813 2.433395 \n",
"\n",
" seg_quantiles rel_intensity_total frequency \\\n",
"240 High-flow 8.976963 0.172414 \n",
"246 High-flow 2.433395 1.000000 \n",
"\n",
" seg_2D cluster_kmeans \n",
"240 Occasional large movers (high int, low freq) 3.0 \n",
"246 Small rebalancers (low int, high freq) 3.0 \n",
"\n",
"[2 rows x 21 columns]"
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfc[dfc['cluster_kmeans']==3.0]"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "9e26e6c4-ea3e-4aad-9136-a3bbbad8ad47",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>n_months</th>\n",
" <th>n_active_months</th>\n",
" <th>flow_freq</th>\n",
" <th>aum_qty_mean</th>\n",
" <th>aum_qty_median</th>\n",
" <th>net_flow_qty_sum</th>\n",
" <th>gross_flow_qty_sum</th>\n",
" <th>gross_flow_qty_mean</th>\n",
" <th>net_flow_qty_vol</th>\n",
" <th>...</th>\n",
" <th>netflow_to_aum</th>\n",
" <th>n_tx_total</th>\n",
" <th>log_aum_qty_mean</th>\n",
" <th>log_gross_flow_qty_mean</th>\n",
" <th>gross_flow_to_aum</th>\n",
" <th>seg_quantiles</th>\n",
" <th>rel_intensity_total</th>\n",
" <th>frequency</th>\n",
" <th>seg_2D</th>\n",
" <th>cluster_kmeans</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>246</th>\n",
" <td>200130906</td>\n",
" <td>56</td>\n",
" <td>56</td>\n",
" <td>1.000000</td>\n",
" <td>2.559419e+07</td>\n",
" <td>2.814182e+07</td>\n",
" <td>1.482869e+07</td>\n",
" <td>6.228080e+07</td>\n",
" <td>1.112157e+06</td>\n",
" <td>4.349047e+06</td>\n",
" <td>...</td>\n",
" <td>4.092506e-03</td>\n",
" <td>4410</td>\n",
" <td>17.057876</td>\n",
" <td>13.921813</td>\n",
" <td>2.433395</td>\n",
" <td>High-flow</td>\n",
" <td>2.433395</td>\n",
" <td>1.000000</td>\n",
" <td>Small rebalancers (low int, high freq)</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>355</th>\n",
" <td>364765</td>\n",
" <td>130</td>\n",
" <td>130</td>\n",
" <td>1.000000</td>\n",
" <td>2.729485e+06</td>\n",
" <td>2.268174e+06</td>\n",
" <td>5.924221e+06</td>\n",
" <td>3.910049e+07</td>\n",
" <td>3.007730e+05</td>\n",
" <td>4.111484e+05</td>\n",
" <td>...</td>\n",
" <td>9.821006e-03</td>\n",
" <td>17976</td>\n",
" <td>14.819624</td>\n",
" <td>12.614115</td>\n",
" <td>14.325226</td>\n",
" <td>High-flow</td>\n",
" <td>14.325226</td>\n",
" <td>1.000000</td>\n",
" <td>Highly active (high int, high freq)</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>183</th>\n",
" <td>200127603</td>\n",
" <td>71</td>\n",
" <td>71</td>\n",
" <td>1.000000</td>\n",
" <td>1.365998e+07</td>\n",
" <td>1.293037e+07</td>\n",
" <td>1.588720e+07</td>\n",
" <td>3.896140e+07</td>\n",
" <td>5.487521e+05</td>\n",
" <td>5.535868e+05</td>\n",
" <td>...</td>\n",
" <td>2.203899e-02</td>\n",
" <td>2044</td>\n",
" <td>16.429981</td>\n",
" <td>13.215404</td>\n",
" <td>2.852229</td>\n",
" <td>High-flow</td>\n",
" <td>2.852229</td>\n",
" <td>1.000000</td>\n",
" <td>Small rebalancers (low int, high freq)</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>213</th>\n",
" <td>200128363</td>\n",
" <td>67</td>\n",
" <td>55</td>\n",
" <td>0.820896</td>\n",
" <td>1.092064e+07</td>\n",
" <td>9.611394e+06</td>\n",
" <td>7.331428e+06</td>\n",
" <td>3.823942e+07</td>\n",
" <td>5.707376e+05</td>\n",
" <td>7.883754e+05</td>\n",
" <td>...</td>\n",
" <td>4.514853e-02</td>\n",
" <td>957</td>\n",
" <td>16.206165</td>\n",
" <td>13.254687</td>\n",
" <td>3.501573</td>\n",
" <td>High-flow</td>\n",
" <td>3.501573</td>\n",
" <td>0.820896</td>\n",
" <td>Dormant (low int, low freq)</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>240</th>\n",
" <td>200130818</td>\n",
" <td>58</td>\n",
" <td>10</td>\n",
" <td>0.172414</td>\n",
" <td>3.819992e+06</td>\n",
" <td>0.000000e+00</td>\n",
" <td>9.586849e+06</td>\n",
" <td>3.429192e+07</td>\n",
" <td>5.912401e+05</td>\n",
" <td>2.088032e+06</td>\n",
" <td>...</td>\n",
" <td>-4.619540e+07</td>\n",
" <td>11</td>\n",
" <td>15.155759</td>\n",
" <td>13.289979</td>\n",
" <td>8.976963</td>\n",
" <td>High-flow</td>\n",
" <td>8.976963</td>\n",
" <td>0.172414</td>\n",
" <td>Occasional large movers (high int, low freq)</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>210</th>\n",
" <td>200127901</td>\n",
" <td>71</td>\n",
" <td>2</td>\n",
" <td>0.028169</td>\n",
" <td>2.728996e+04</td>\n",
" <td>2.731466e+04</td>\n",
" <td>-3.729000e+01</td>\n",
" <td>3.729000e+01</td>\n",
" <td>5.252113e-01</td>\n",
" <td>3.776849e+00</td>\n",
" <td>...</td>\n",
" <td>-1.905423e-05</td>\n",
" <td>2</td>\n",
" <td>10.214311</td>\n",
" <td>0.422133</td>\n",
" <td>0.001366</td>\n",
" <td>Low-flow</td>\n",
" <td>0.001366</td>\n",
" <td>0.028169</td>\n",
" <td>Dormant (low int, low freq)</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>266</th>\n",
" <td>200131477</td>\n",
" <td>34</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>7.623185e+03</td>\n",
" <td>7.623185e+03</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>...</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0</td>\n",
" <td>8.939081</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>Low-flow</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>Dormant (low int, low freq)</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>238</th>\n",
" <td>200130743</td>\n",
" <td>69</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>8.899686e+03</td>\n",
" <td>8.410000e+03</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>...</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0</td>\n",
" <td>9.093884</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>Low-flow</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>Dormant (low int, low freq)</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>198</th>\n",
" <td>200127798</td>\n",
" <td>71</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>2.667762e+04</td>\n",
" <td>2.790356e+04</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>...</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0</td>\n",
" <td>10.191618</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>Low-flow</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>Dormant (low int, low freq)</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>328</th>\n",
" <td>200139346</td>\n",
" <td>13</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>2.908100e+05</td>\n",
" <td>2.908100e+05</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>...</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0</td>\n",
" <td>12.580429</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>Low-flow</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>Dormant (low int, low freq)</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>421 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID n_months n_active_months flow_freq \\\n",
"246 200130906 56 56 1.000000 \n",
"355 364765 130 130 1.000000 \n",
"183 200127603 71 71 1.000000 \n",
"213 200128363 67 55 0.820896 \n",
"240 200130818 58 10 0.172414 \n",
".. ... ... ... ... \n",
"210 200127901 71 2 0.028169 \n",
"266 200131477 34 0 0.000000 \n",
"238 200130743 69 0 0.000000 \n",
"198 200127798 71 0 0.000000 \n",
"328 200139346 13 0 0.000000 \n",
"\n",
" aum_qty_mean aum_qty_median net_flow_qty_sum gross_flow_qty_sum \\\n",
"246 2.559419e+07 2.814182e+07 1.482869e+07 6.228080e+07 \n",
"355 2.729485e+06 2.268174e+06 5.924221e+06 3.910049e+07 \n",
"183 1.365998e+07 1.293037e+07 1.588720e+07 3.896140e+07 \n",
"213 1.092064e+07 9.611394e+06 7.331428e+06 3.823942e+07 \n",
"240 3.819992e+06 0.000000e+00 9.586849e+06 3.429192e+07 \n",
".. ... ... ... ... \n",
"210 2.728996e+04 2.731466e+04 -3.729000e+01 3.729000e+01 \n",
"266 7.623185e+03 7.623185e+03 0.000000e+00 0.000000e+00 \n",
"238 8.899686e+03 8.410000e+03 0.000000e+00 0.000000e+00 \n",
"198 2.667762e+04 2.790356e+04 0.000000e+00 0.000000e+00 \n",
"328 2.908100e+05 2.908100e+05 0.000000e+00 0.000000e+00 \n",
"\n",
" gross_flow_qty_mean net_flow_qty_vol ... netflow_to_aum n_tx_total \\\n",
"246 1.112157e+06 4.349047e+06 ... 4.092506e-03 4410 \n",
"355 3.007730e+05 4.111484e+05 ... 9.821006e-03 17976 \n",
"183 5.487521e+05 5.535868e+05 ... 2.203899e-02 2044 \n",
"213 5.707376e+05 7.883754e+05 ... 4.514853e-02 957 \n",
"240 5.912401e+05 2.088032e+06 ... -4.619540e+07 11 \n",
".. ... ... ... ... ... \n",
"210 5.252113e-01 3.776849e+00 ... -1.905423e-05 2 \n",
"266 0.000000e+00 0.000000e+00 ... 0.000000e+00 0 \n",
"238 0.000000e+00 0.000000e+00 ... 0.000000e+00 0 \n",
"198 0.000000e+00 0.000000e+00 ... 0.000000e+00 0 \n",
"328 0.000000e+00 0.000000e+00 ... 0.000000e+00 0 \n",
"\n",
" log_aum_qty_mean log_gross_flow_qty_mean gross_flow_to_aum \\\n",
"246 17.057876 13.921813 2.433395 \n",
"355 14.819624 12.614115 14.325226 \n",
"183 16.429981 13.215404 2.852229 \n",
"213 16.206165 13.254687 3.501573 \n",
"240 15.155759 13.289979 8.976963 \n",
".. ... ... ... \n",
"210 10.214311 0.422133 0.001366 \n",
"266 8.939081 0.000000 0.000000 \n",
"238 9.093884 0.000000 0.000000 \n",
"198 10.191618 0.000000 0.000000 \n",
"328 12.580429 0.000000 0.000000 \n",
"\n",
" seg_quantiles rel_intensity_total frequency \\\n",
"246 High-flow 2.433395 1.000000 \n",
"355 High-flow 14.325226 1.000000 \n",
"183 High-flow 2.852229 1.000000 \n",
"213 High-flow 3.501573 0.820896 \n",
"240 High-flow 8.976963 0.172414 \n",
".. ... ... ... \n",
"210 Low-flow 0.001366 0.028169 \n",
"266 Low-flow 0.000000 0.000000 \n",
"238 Low-flow 0.000000 0.000000 \n",
"198 Low-flow 0.000000 0.000000 \n",
"328 Low-flow 0.000000 0.000000 \n",
"\n",
" seg_2D cluster_kmeans \n",
"246 Small rebalancers (low int, high freq) 3.0 \n",
"355 Highly active (high int, high freq) 1.0 \n",
"183 Small rebalancers (low int, high freq) 1.0 \n",
"213 Dormant (low int, low freq) 1.0 \n",
"240 Occasional large movers (high int, low freq) 3.0 \n",
".. ... ... \n",
"210 Dormant (low int, low freq) 0.0 \n",
"266 Dormant (low int, low freq) 0.0 \n",
"238 Dormant (low int, low freq) 0.0 \n",
"198 Dormant (low int, low freq) 0.0 \n",
"328 Dormant (low int, low freq) 0.0 \n",
"\n",
"[421 rows x 21 columns]"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfc.sort_values(by=\"gross_flow_qty_sum\", ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 197,
"id": "2437e11b-04d2-4c49-b265-32a5681ef96a",
"metadata": {},
"outputs": [],
"source": [
"# Définition de la correspondance entre les codes numériques et les nouveaux labels\n",
"mapping = {\n",
" 0.0: \"Cluster 1 (66) : Dormant\",\n",
" 1.0: \"Cluster 2 (105) : Highly Active\",\n",
" 2.0: \"Cluster 3 (235)\",\n",
" 3.0: \"Cluster 4 (2)\",\n",
" 4.0: \"Cluster 5 (13) : Large Movers\"\n",
"}\n",
"\n",
"# Création de la nouvelle colonne 'cluster'\n",
"dfc['cluster'] = dfc['cluster_kmeans'].map(mapping)"
]
},
{
"cell_type": "code",
"execution_count": 198,
"id": "2a677706-ae26-4474-8a69-a0e486421feb",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAqqNJREFUeJzs3XlczPkfB/DXdB906NR9iUqRwmKJlU1sznUvybGOyLXIsu5rWdYVYd3LuhZr3bS5+bkqR26RK6EUlY6Zz++Ptu82zUzN1FzV+/l49GA+3+s936aZ93xOHmOMgRBCCCGkGtNQdQCEEEIIIapGCREhhBBCqj1KiAghhBBS7VFCRAghhJBqjxIiQgghhFR7lBARQgghpNqjhIgQQggh1R4lRIQQQgip9ighIoQQQki1RwmRmmndujXq168v13PyeDyMGjVKrueUxubNm8Hj8fD06VOlXxsABg4cCCcnJ5Vcmwh7+vQpeDweNm/erOpQ5EZRz8nJyQkDBw6U6znV+bqK0Lp1a7Ru3VrVYcjFtm3bUK9ePWhra8PExETV4VRplBCV4urVqxg1ahS8vLxgaGgIBwcH9OzZEw8ePBDZt3Xr1uDxeODxeNDQ0ICRkRHq1q2L/v374+TJkyqInhDZJCYmYubMmRVKYHfs2IFly5bJLaaq6uLFi5g5cyY+fPig6lDkYvXq1Wqd7L569QozZ85EfHy8qkORyb179zBw4EC4urpi/fr1WLdunapDqtK0VB2AOvv5559x4cIF9OjRAz4+PkhJScGqVavQqFEjXL58WaQmx87ODgsWLAAAZGVl4dGjR9i3bx9+//139OzZE7///ju0tbVV8VRUon///ujduzd0dXVVHQqRQmJiImbNmoXWrVuXu2Ztx44duH37NsaOHStU7ujoiJycnGr1+i/NxYsXMWvWLAwcOFDkW//9+/ehoaH876oVue7q1athbm6uNjVMJ06cEHr86tUrzJo1C05OTmjYsKFqgiqH06dPQyAQYPny5XBzc1N1OFUeJUSlGD9+PHbs2AEdHR2urFevXvD29sbChQvx+++/C+1vbGyM7777Tqhs4cKFiIiIwOrVq+Hk5ISff/5ZKbGrA01NTWhqasrtfNnZ2TAwMJDb+Yjy8Hg86OnpqTqMUgkEAuTl5ak8TlV9gahKX1yKv2dXZqmpqQBQZlMZYwyfP3+Gvr6+EqKqwhiRWaNGjVijRo2EygICApiXl5fY/QsKCpinpyczMDBgHz58KPXcRee5du0aa9asGdPT02NOTk5szZo1Ivt+/vyZTZ8+nbm6ujIdHR1mZ2fHJk6cyD5//iy0HwAWHh7O9u/fz7y8vJiOjg7z9PRkR48eFdrv6dOnbMSIEczd3Z3p6emxWrVqsW+//ZYlJSVx+1y9epUBYJs3bxaJ59ixYwwA+/vvvxljjG3atIkBEDqeMcaioqKYp6cn09HRYbVr12YjR45k6enpEu9Dy5Ytmb6+PhszZgxjjLEDBw6wDh06sNq1azMdHR3m4uLCZs+ezQoKCoTOERoayhwdHUu52/89p6+//pqZmZlx9zssLExoHz6fz3799Vfm6enJdHV1maWlJfv+++9ZWlqayH4zZsxgtWvXZvr6+qx169bszp07zNHRkYWGhnL7Fd2bc+fOsdGjRzNzc3NmbGzMvv/+e5abm8vS09NZ//79mYmJCTMxMWETJ05kAoGgXDE5Ojqyjh07snPnzrHGjRszXV1d5uzszLZs2SIST8mf2NhYqe95QECAyPFF9z8pKYkBYJs2bRKKLSYmhn355ZfMwMCAGRsbs06dOrHExEShfWbMmMEAsIcPH7LQ0FBmbGzMjIyM2MCBA1lWVpbQvm/fvmV3794VKRen6O/i999/Z56enkxLS4vt37+fMcbYixcvWFhYGLO0tOT+XjZs2CB0vLjnlJCQwEJDQ5mzszPT1dVlVlZWLCwsjL17907k+ZT8Kfo7Kf5akeXvTdq4JZH0Gj1//jwbN24cMzc3ZwYGBqxLly4sNTVV6LiSzyUgIIDbnp6ezsaMGcPs7OyYjo4Oc3V1ZQsXLmR8Pl/kXi5evJitXbuWubi4MB0dHebv78+uXLkiFOfr16/ZwIEDma2tLdPR0WHW1tasU6dOQu8zAQEBXAyxsbFi7/emTZvY9OnTmZaWltDzKTJ06FBmbGzMcnJyxN6vxYsXMwDs6dOnItsiIyOZtrY297f44MED1q1bN2ZlZcV0dXWZra0t69WrV6mfB+Lu64wZM7htHTt2ZMeOHWN+fn5MV1eX/frrr1Lf76L9QkNDmZGRETM2NmYDBgxgcXFxIq/p4veyOHHvr/J8Tyoe59ixY5mjoyPT0dFhtra2rH///uzt27fs48ePzMDAgEVERIgc9/z5c6ahocHmz58v8R6XRAmRjAQCAbO1tWVff/21UHlpCRFjjM2ZM4cBYIcOHSr1/AEBAczGxoZZWlqyUaNGsRUrVrAvv/ySARB6Y+Pz+ezrr79mBgYGbOzYsWzt2rVs1KhRTEtLi3Xu3FnonABYgwYNWO3atdmcOXPYsmXLmIuLCzMwMBB6o96zZw9r0KABmz59Olu3bh378ccfmampKXN0dBT6gHFxcWEdOnQQiT0sLIyZmpqyvLw8xpj4hKjowyAwMJCtXLmSjRo1imlqarLGjRtzxxXdB2tra2ZhYcFGjx7N1q5dyw4cOMAYY6xLly6sZ8+ebPHixWzNmjWsR48eDAD74YcfhOKRJiF68+YNMzU1Ze7u7mzx4sVs/fr1bOrUqczDw0NovyFDhjAtLS02dOhQFh0dzSZPnswMDQ1F4p40aRIDwEJCQtiqVavY0KFDmZ2dHTM3Nxf7YdOwYUPWvn17FhUVxfr3788AsEmTJrEvv/yS9e3bl61evZp98803DIDIm4W0MTk6OrK6desyKysr9uOPP7JVq1axRo0aMR6Px27fvs0YY+zx48csIiKCAWA//vgj27ZtG9u2bRtLSUmR+p6fOHGCNWzYkJmbm3PHFyUY4pKHkydPMi0tLebu7s4WLVrEZs2axczNzZmpqanY14yvry/r1q0bW716NRsyZAh3r4or2rcokSsNAObh4cEsLCzYrFmzWFRUFIuLi2MpKSnMzs6O2dvbs9mzZ7M1a9awTp06MQDch46k5/TLL7+wli1bstmzZ7N169axMWPGMH19fdakSRMuoU1ISGB9+vThzld0rz59+sT9voq/VqT9e5M2bkkkJUS+vr7sq6++YitXrmQTJkxgmpqarGfPntx++/fvZ3Z2dqxevXrcczlx4gRjjLGsrCzm4+PDzMzM2I8//siio6PZgAEDGI/H477gFL+Xvr6+zM3Njf38889s0aJFzNzcnNnZ2Qm9nps3b86MjY3ZtGnT2G+//cbmz5/P2rRpw86cOcPtU/xDPCUlhc2ePZsBYN9//z0X4+PHj9nDhw8ZALZy5Uqhe5Gbm8tMTU3ZoEGDJN6vZ8+eMR6PxxYtWiSyzcXFhXXs2JE7l7OzM7OxsWFz585lv/32G5s1axZr3Lix2GSq+H3t2rUrA8DWrFnDtm3bxhISErjflZubGzM1NWWRkZEsOjqaxcbGSn2/BQIBa9WqFdPQ0GAjR45kK1euZF999RXz8fGpUEIkz/ckxhj7+PEjq1+/PtPU1GRDhw5la9asYXPmzGGNGzdmcXFxjDHG+vXrx6ysrES+EC9atIjxeDz27Nkzife4JEqIZLRt2zaR5ISxshOi/fv3MwBs+fLlpZ6/6Fv2kiVLuLLc3FzWsGFDZmlpyb2otm3bxjQ0NNi5c+eEjo+OjmYA2IULF7gyAExHR4c9evSIK0tISBB5I8jOzhaJ59KlSwwA27p1K1c2ZcoUoW8/RTGamJgIvYGUTIhSU1OZjo4O+/rrr4W+raxatYoBYBs3bhS5D9HR0SIxiYtz2LBhzMDAQKh2TJqEqOj3cvXqVYn7nDt3jgFg27dvFyov+oZeVJ6SksK0tLRYly5dhPabOXMmAyD2wyYoKEio5qdZs2aMx+Ox4cOHc2UFBQXMzs5O6E1J2pgY+++b5tmzZ7my1NRUpquryyZMmMC
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure()\n",
"for name, g in dfc[~dfc['cluster_kmeans'].isin([2.0, 3.0])].groupby(\"cluster\"):\n",
" plt.scatter(g[\"frequency\"], g[\"rel_intensity_total\"], s=10, label=name)\n",
"\n",
"plt.yscale(\"log\")\n",
"plt.axvline(thr_freq, linestyle=\"--\")\n",
"plt.axhline(thr_int, linestyle=\"--\")\n",
"plt.xlabel(\"Activity frequency (share of active months)\")\n",
"plt.ylabel(\"Gross flow / mean AUM (quantity) [log scale]\")\n",
"plt.title(\"2D behavioral segmentation: relative intensity vs frequency\")\n",
"plt.legend(markerscale=2)\n",
"plt.ylim(0.1,100)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 201,
"id": "41f5ffd2-1c90-48a6-be8f-adcce2d42185",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjwAAAHHCAYAAAC7soLdAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAjhBJREFUeJzt3XdYU9f/B/B3GGHJUpAhS3CBA3AWHLgp7lHFUbe2VdxWxdq662q1tYqzrq97obV1i3u0DkZVBBVRcC+GijKS8/vDHykhCeSGjJvweT0Pz0NOTu795Obm3k/OPedcAWOMgRBCCCHEgBnpOgBCCCGEEE2jhIcQQgghBo8SHkIIIYQYPEp4CCGEEGLwKOEhhBBCiMGjhIcQQgghBo8SHkIIIYQYPEp4CCGEEGLwKOEhhBBCiMGjhEcHWrZsiTp16qh1mQKBAKNHj1brMpWxadMmCAQCPHjwQOvrBoDBgwfDy8tLJ+sm0h48eACBQIBNmzbpOhS10dR78vLywuDBg9W6TD6vVxNatmyJli1b6joMtdiyZQtq1aoFU1NT2NnZ6Tocg1XuE56rV69i9OjRqF27NqysrODh4YHevXvjzp07MnVbtmwJgUAAgUAAIyMj2NjYoGbNmhgwYABOnDihg+gJ4SYxMRGzZs0qU4K6fft2/Prrr2qLyVBdunQJs2bNQmZmpq5DUYuVK1fyOpl98uQJZs2ahfj4eF2HwklSUhIGDx4MHx8frFu3DmvXrtV1SAbLRNcB6NqiRYtw8eJF9OrVC/Xq1cOzZ8+wYsUK1K9fH3///bdMS4ybmxsWLFgAAHj//j3u3buH6OhobN26Fb1798bWrVthamqqi7eiEwMGDECfPn1gZmam61CIEhITEzF79my0bNlS5Zax7du34+bNmxg/frxUuaenJz58+FCu9v+SXLp0CbNnz8bgwYNlfrUnJyfDyEj7vzfLst6VK1fCwcGBNy1Ex48fl3r85MkTzJ49G15eXggICNBNUCo4c+YMxGIxli1bhmrVquk6HINW7hOeiRMnYvv27RAKhZKy8PBw1K1bFwsXLsTWrVul6tva2uLLL7+UKlu4cCHGjh2LlStXwsvLC4sWLdJK7HxgbGwMY2NjtS0vJycHlpaWalse0R6BQABzc3Ndh1EisViMvLw8ncepqx8IhvTDpOgxW5+9ePECAEq9lMUYw8ePH2FhYaGFqAwUI3LVr1+f1a9fX6osJCSE1a5dW279goIC5ufnxywtLVlmZmaJyy5czrVr11hQUBAzNzdnXl5ebNWqVTJ1P378yGbMmMF8fHyYUChkbm5ubPLkyezjx49S9QCwiIgItn//fla7dm0mFAqZn58fO3LkiFS9Bw8esJEjR7IaNWowc3NzVrFiRfbFF1+w1NRUSZ2rV68yAGzTpk0y8Rw9epQBYH/++SdjjLGNGzcyAFKvZ4yxqKgo5ufnx4RCIXNxcWGjRo1iGRkZCrdD8+bNmYWFBRs3bhxjjLEDBw6wDh06MBcXFyYUCpm3tzebM2cOKygokFrGoEGDmKenZwlb+7/31L59e1apUiXJ9h4yZIhUHZFIxH755Rfm5+fHzMzMWOXKldlXX33F3rx5I1Nv5syZzMXFhVlYWLCWLVuyW7duMU9PTzZo0CBJvcJtc/78eTZmzBjm4ODAbG1t2VdffcVyc3NZRkYGGzBgALOzs2N2dnZs8uTJTCwWqxSTp6cn69ixIzt//jxr1KgRMzMzY1WrVmWbN2+Wiaf43+nTp5Xe5iEhITKvL9z+qampDADbuHGjVGwxMTGsWbNmzNLSktna2rIuXbqwxMREqTozZ85kANjdu3fZoEGDmK2tLbOxsWGDBw9m79+/l6r78uVLdvv2bZlyeQq/F1u3bmV+fn7MxMSE7d+/nzHG2KNHj9iQIUNY5cqVJd+X9evXS71e3ntKSEhggwYNYlWrVmVmZmbMycmJDRkyhL169Urm/RT/K/yeFN1XuHzflI1bEUX76IULF9iECROYg4MDs7S0ZN26dWMvXryQel3x9xISEiJ5PiMjg40bN465ubkxoVDIfHx82MKFC5lIJJLZlj/99BNbs2YN8/b2ZkKhkDVs2JBduXJFKs6nT5+ywYMHsypVqjChUMicnZ1Zly5dpI4zISEhkhhOnz4td3tv3LiRzZgxg5mYmEi9n0IjRoxgtra27MOHD3K3108//cQAsAcPHsg8FxkZyUxNTSXfxTt37rAePXowJycnZmZmxqpUqcLCw8NLPB/I264zZ86UPNexY0d29OhR1qBBA2ZmZsZ++eUXpbd3Yb1BgwYxGxsbZmtrywYOHMji4uJk9umi27IoecdXdR6TisY5fvx45unpyYRCIatSpQobMGAAe/nyJXv79i2ztLRkY8eOlXldeno6MzIyYvPnz1e4jYuihEcOsVjMqlSpwtq3by9VXlLCwxhjc+fOZQDYX3/9VeLyQ0JCmKurK6tcuTIbPXo0++2331izZs0YAKkDl0gkYu3bt2eWlpZs/PjxbM2aNWz06NHMxMSEde3aVWqZAJi/vz9zcXFhc+fOZb/++ivz9vZmlpaWUgfiPXv2MH9/fzZjxgy2du1a9t133zF7e3vm6ekpdQLx9vZmHTp0kIl9yJAhzN7enuXl5THG5Cc8hQf7tm3bsuXLl7PRo0czY2Nj1qhRI8nrCreDs7Mzc3R0ZGPGjGFr1qxhBw4cYIwx1q1bN9a7d2/2008/sVWrVrFevXoxAOzbb7+VikeZhOf58+fM3t6e1ahRg/30009s3bp1bPr06czX11eq3vDhw5mJiQkbMWIEW716NZs6dSqzsrKSiXvKlCkMAOvcuTNbsWIFGzFiBHNzc2MODg5yTyYBAQHs888/Z1FRUWzAgAEMAJsyZQpr1qwZ69evH1u5ciXr1KkTAyBzMFA2Jk9PT1azZk3m5OTEvvvuO7ZixQpWv359JhAI2M2bNxljjKWkpLCxY8cyAOy7775jW7ZsYVu2bGHPnj1TepsfP36cBQQEMAcHB8nrCxMIecnBiRMnmImJCatRowZbvHgxmz17NnNwcGD29vZy95nAwEDWo0cPtnLlSjZ8+HDJtiqqsG5holYSAMzX15c5Ojqy2bNns6ioKBYXF8eePXvG3NzcmLu7O5szZw5btWoV69KlCwMgOakoek8///wza968OZszZw5bu3YtGzduHLOwsGCNGzeWJKwJCQmsb9++kuUVbqt3795JPq+i+4qy3zdl41ZEUcITGBjIWrduzZYvX84mTZrEjI2NWe/evSX19u/fz9zc3FitWrUk7+X48eOMMcbev3/P6tWrxypVqsS+++47tnr1ajZw4EAmEAgkP2CKbsvAwEBWrVo1tmjRIrZ48WLm4ODA3NzcpPbn4OBgZmtry77//nv2+++/s/nz57NWrVqxs2fPSuoUPUk/e/aMzZkzhwFgX331lSTGlJQUdvfuXQaALV++XGpb5ObmMnt7ezZ06FCF2+vhw4dMIBCwxYsXyzzn7e3NOnbsKFlW1apVmaurK5s3bx77/fff2ezZs1mjRo3kJktFt2v37t0ZALZq1Sq2ZcsWlpCQIPmsqlWrxuzt7VlkZCRbvXo1O336tNLbWywWsxYtWjAjIyM2atQotnz5cta6dWtWr169MiU86jwmMcbY27dvWZ06dZixsTEbMWIEW7VqFZs7dy5r1KgRi4uLY4wx1r9/f+bk5CTzg3fx4sVMIBCwhw8fKtzGRVHCI8eWLVtkkg/GSk949u/fzwCwZcuWlbj8wl/JS5YskZTl5uaygIAAVrlyZclOs2XLFmZkZMTOnz8v9frVq1czAOzixYuSMgBMKBSye/fuScoSEhJkvug5OTky8Vy+fJkBYP/73/8kZdOmTZP69VIYo52dndQBonjC8+LFCyYUCln79u2lfm2sWLGCAWAbNmyQ2Q6rV6+WiUlenF9//TWztLSUat1SJuEp/FyuXr2qsM758+cZALZt2zap8sJf2IXlz549YyYmJqxbt25S9WbNmsUAyD2ZhIaGSrXcBAUFMYFAwL755htJWUFBAXNzc5M66CgbE2P//VI8d+6cpOz
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure()\n",
"for name, g in dfc[dfc['cluster_kmeans']==2.0].groupby(\"cluster\"): \n",
" plt.scatter(g[\"frequency\"], g[\"rel_intensity_total\"], s=10, label=name)\n",
"\n",
"plt.yscale(\"log\")\n",
"plt.axvline(thr_freq, linestyle=\"--\")\n",
"plt.axhline(thr_int, linestyle=\"--\")\n",
"plt.xlabel(\"Activity frequency (share of active months)\")\n",
"plt.ylabel(\"Gross flow / mean AUM (quantity) [log scale]\")\n",
"plt.title(\"2D behavioral segmentation: relative intensity vs frequency\")\n",
"plt.legend(markerscale=2)\n",
"plt.show()\n",
"\n",
"\n",
"#\"log_aum_qty_mean\", # size (log)\n",
" # \"log_gross_flow_qty_mean\", # activity intensity (log)\n",
" # \"frequency\", # activity frequency\n",
" # \"rel_intensity_total\", # turnover proxy\n",
" # \"net_flow_qty_vol\", # volatility of net flows\n",
" # \"n_tx_total\", "
]
},
{
"cell_type": "code",
"execution_count": 206,
"id": "ba298e96-5919-44d1-91e3-67d38041349f",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABKUAAAHqCAYAAADVi/1VAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAA1CdJREFUeJzs3XdYU9cbB/Bv2ENBFFwsBScqigtHLVq1uK3aatW6tWpxW8VVZ911VXHW/XNVq9Zqratq3XvjXtS9caCsnN8faSIhgyRkwvfzPDyQk3PvfXO5uRzenCERQggQERERERERERGZkZ2lAyAiIiIiIiIiouyHSSkiIiIiIiIiIjI7JqWIiIiIiIiIiMjsmJQiIiIiIiIiIiKzY1KKiIiIiIiIiIjMjkkpIiIiIiIiIiIyOyaliIiIiIiIiIjI7JiUIiIiIiIiIiIis2NSioiIiIiIiIiIzI5JqWykZs2aKF26tFH3KZFI0KtXL6PuUxfLli2DRCLBnTt3zH5sAOjYsSMKFSpkkWOTsjt37kAikWDZsmWWDsVoTPWaChUqhI4dOxp1n9Z8XFOoWbMmatasaekwjGLlypUoUaIEHB0dkStXLkuHQ6Q3tmuMh+0a68F2je7Yrsk8tmvIGjApZSEnTpxAr169UKpUKbi7uyMgIAAtW7bEtWvXVOrWrFkTEokEEokEdnZ28PDwQPHixdGuXTvs2rXLAtET6Sc2NhajR4/OVGN79erVmDlzptFiyqoOHz6M0aNH49WrV5YOxSjmzp1r1Q3zBw8eYPTo0Th79qylQ9HLlStX0LFjRwQHB2PRokVYuHChpUMiG8d2DWUnbNeYD9s15sV2DVmCg6UDyK4mT56MQ4cO4auvvkJoaCgePXqEOXPmoHz58jh69KjKJ39+fn6YOHEiAODdu3e4ceMGNm7ciP/9739o2bIl/ve//8HR0dESL8Ui2rVrh6+//hrOzs6WDoV0EBsbizFjxqBmzZoGfxK7evVqXLx4Ef369VMqDwwMxPv377PV9a/N4cOHMWbMGHTs2FHlU6KrV6/Czs78n0Vk5rhz586Ft7e31XwiuXPnTqXHDx48wJgxY1CoUCGUK1fOMkEZYN++fZBKpZg1axaKFCli6XAoC2C7JnPYrrEtbNeYD9s1psV2DVkDJqUsZMCAAVi9ejWcnJwUZa1atUKZMmUwadIk/O9//1Oq7+npiW+++UapbNKkSejTpw/mzp2LQoUKYfLkyWaJ3RrY29vD3t7eaPtLSEiAm5ub0fZH5iORSODi4mLpMLSSSqVISkqyeJyW+mcnK/2TlfaebcuePHkCABl2bxdC4MOHD3B1dTVDVGTL2K7JHLZrSI7tGt2xXZN5bNeQVRBkVcqXLy/Kly+vVBYRESFKlSqltn5KSooICQkRbm5u4tWrV1r3Ld/PyZMnRdWqVYWLi4soVKiQmDdvnkrdDx8+iJEjR4rg4GDh5OQk/Pz8xKBBg8SHDx+U6gEQUVFRYtOmTaJUqVLCyclJhISEiO3btyvVu3PnjujZs6coVqyYcHFxEblz5xZffvmluH37tqLOiRMnBACxbNkylXj++usvAUD88ccfQgghli5dKgAobS+EEDExMSIkJEQ4OTmJAgUKiO+++068fPlS43moUaOGcHV1FX379hVCCLF582bRoEEDUaBAAeHk5CSCgoLE2LFjRUpKitI+OnToIAIDA7Wc7Y+v6fPPPxd58uRRnO9OnTop1UlNTRUzZswQISEhwtnZWeTNm1d8++234sWLFyr1Ro0aJQoUKCBcXV1FzZo1xaVLl0RgYKDo0KGDop783Bw4cED07t1beHt7C09PT/Htt9+KxMRE8fLlS9GuXTuRK1cukStXLjFo0CAhlUoNiikwMFA0bNhQHDhwQFSqVEk4OzuLwoULi+XLl6vEk/5r7969Op/ziIgIle3l5//27dsCgFi6dKlSbHv27BGffPKJcHNzE56enqJJkyYiNjZWqc6oUaMEAHH9+nXRoUMH4enpKTw8PETHjh3Fu3fvlOo+ffpUXL58WaVcHfn74n//+58ICQkRDg4OYtOmTUIIIe7duyc6deok8ubNq3i/LF68WGl7da/p3LlzokOHDqJw4cLC2dlZ5MuXT3Tq1Ek8e/ZM5fWk/5K/T9JeK/q833SNWxNN1+jBgwdF//79hbe3t3BzcxNffPGFePLkidJ26V9LRESE4vmXL1+Kvn37Cj8/P+Hk5CSCg4PFpEmTRGpqqsq5nDp1qliwYIEICgoSTk5OomLFiuL48eNKcT58+FB07NhR+Pr6CicnJ5E/f37RpEkTpftMRESEIoa9e/eqPd9Lly4VI0eOFA4ODkqvR65bt27C09NTvH//Xu35mjp1qgAg7ty5o/LckCFDhKOjo+K9eO3aNdG8eXORL18+4ezsLHx9fUWrVq20/j1Qd15HjRqleK5hw4bir7/+EhUqVBDOzs5ixowZOp9veb0OHToIDw8P4enpKdq3by/OnDmjck2nPZdpqbu/GvOelDbOfv36icDAQOHk5CR8fX1Fu3btxNOnT8WbN2+Em5ub6NOnj8p2//77r7CzsxMTJkzQeI7pI7Zr2K5hu4btGrZrPm7Hdo0ytmvYrhFCCCalrIhUKhW+vr7i888/VyrX1ngTQohx48YJAGLr1q1a9x8RESEKFiwo8ubNK3r16iV+/vln8cknnwgASjfh1NRU8fnnnws3NzfRr18/sWDBAtGrVy/h4OAgmjZtqrRPAKJs2bKiQIECYty4cWLmzJkiKChIuLm5Kf1RWb9+vShbtqwYOXKkWLhwoRg2bJjw8vISgYGBSn8Mg4KCRIMGDVRi79Spk/Dy8hJJSUlCCPWNN/kfrjp16ojZs2eLXr16CXt7e1GpUiXFdvLzkD9/fuHj4yN69+4tFixYIDZv3iyEEOKLL74QLVu2FFOnThXz5s0TX331lQAgvv/+e6V4dGm8PX78WHh5eYlixYqJqVOnikWLFonhw4eLkiVLKtXr2rWrcHBwEN26dRPz588X0dHRwt3dXSXuwYMHCwCicePGYs6cOaJbt27Cz89PeHt7q/3DWK5cOVGvXj0RExMj2rVrJwCIwYMHi08++US0adNGzJ07VzRq1EgAULmx6RpTYGCgKF68uMiXL58YNmyYmDNnjihfvryQSCTi4sWLQgghbt68Kfr06SMAiGHDhomVK1eKlStXikePHul8znfu3CnKlSsnvL29FdvLG0PqGjq7du0SDg4OolixYmLKlClizJgxwtvbW3h5eam9ZsLCwkTz5s3F3LlzRdeuXRXnKi15XXmjUxsAomTJksLHx0eMGTNGxMTEiDNnzohHjx4JPz8/4e/vL8aOHSvmzZsnmjRpIgAo/kBqek0//fSTqFGjhhg7dqxYuHCh6Nu3r3B1dRWVK1dWNL7PnTsnWrdurdif/Fy9fftW8ftKe63o+n7TNW5NNDXewsLCxGeffSZmz54tBg4cKOzt7UXLli0V9TZt2iT8/PxEiRIlFK9l586dQggh3r17J0JDQ0WePHnEsGHDxPz580X79u2FRCJR/DOW9lyGhYWJIkWKiMmTJ4spU6YIb29v4efnp3Q9V6tWTXh6eooRI0aIX375RUyYMEHUqlVL7N+/X1EnbYPj0aNHYuzYsQKA+PbbbxUx3rx5U1y/fl0AELNnz1Y6F4mJicLLy0t07txZ4/m6e/eukEgkYsqUKSrPBQUFiYYNGyr2VbhwYVGwYEHx448/il9++UWMGTNGVKpUSW3DL+15bdasmQAg5s2bJ1auXCnOnTun+F0VKVJEeHl5iSFDhoj58+eLvXv36ny+pVKp+PTTT4WdnZ347rvvxOzZs8Vnn30mQkNDM9V4M+Y9SQgh3rx5I0qXLi3s7e1Ft27dxLx588S4ceNEpUqVxJkzZ4QQQrRt21bky5dP5Z/3KVOmCIlEIu7evavxHJMM2zVs17Bdw3aNptfEdg3bNUKwXcN2jQyTUlZk5cqVKg0pITJuvG3atEkAELNmzdK6f/mnMtOmTVOUJSYminLlyom8efMq3gArV64UdnZ24sCBA0rbz58/XwAQhw4
"text/plain": [
"<Figure size 1200x500 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig, axes = plt.subplots(1, 2, figsize=(12,5), sharey=True)\n",
"\n",
"# --- Graphique 1 ---\n",
"for name, g in dfc[~dfc['cluster_kmeans'].isin([2.0, 3.0])].groupby(\"cluster\"):\n",
" axes[0].scatter(g[\"frequency\"], g[\"rel_intensity_total\"], s=10, label=name)\n",
"\n",
"axes[0].set_yscale(\"log\")\n",
"axes[0].axvline(thr_freq, linestyle=\"--\")\n",
"axes[0].axhline(thr_int, linestyle=\"--\")\n",
"axes[0].set_xlabel(\"Activity frequency\")\n",
"axes[0].set_ylabel(\"Gross flow / mean AUM\")\n",
"axes[0].set_title(\"2D behavioral segmentation: relative intensity vs frequency\")\n",
"axes[0].set_ylim(0.1,100)\n",
"axes[0].legend(markerscale=2)\n",
"\n",
"# --- Graphique 2 ---\n",
"for name, g in dfc[dfc['cluster_kmeans']==2.0].groupby(\"cluster\"):\n",
" axes[1].scatter(\n",
" g[\"frequency\"], g[\"rel_intensity_total\"],\n",
" s=10,\n",
" label=name,\n",
" color=\"red\" # 👈 ici\n",
" )\n",
"\n",
"axes[1].set_yscale(\"log\")\n",
"axes[1].axvline(thr_freq, linestyle=\"--\")\n",
"axes[1].axhline(thr_int, linestyle=\"--\")\n",
"axes[1].set_xlabel(\"Activity frequency\")\n",
"axes[1].set_title(\"2D behavioral segmentation: relative intensity vs frequency\")\n",
"axes[1].legend(markerscale=2)\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 129,
"id": "64176145-ee5b-4ea8-98ed-c155146dd2f6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>month</th>\n",
" <th>aum_qty</th>\n",
" <th>net_flow_qty</th>\n",
" <th>gross_flow_qty</th>\n",
" <th>n_tx</th>\n",
" <th>active_month</th>\n",
" <th>rel_intensity_m</th>\n",
" <th>netflow_to_aum_m</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>2015-01-31</td>\n",
" <td>179864.637</td>\n",
" <td>-1524.010</td>\n",
" <td>15230.010</td>\n",
" <td>32</td>\n",
" <td>1</td>\n",
" <td>0.084675</td>\n",
" <td>-0.008473</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>18872</td>\n",
" <td>2015-02-28</td>\n",
" <td>186761.736</td>\n",
" <td>7247.100</td>\n",
" <td>18571.880</td>\n",
" <td>38</td>\n",
" <td>1</td>\n",
" <td>0.099442</td>\n",
" <td>0.038804</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>18872</td>\n",
" <td>2015-03-31</td>\n",
" <td>190357.718</td>\n",
" <td>3655.380</td>\n",
" <td>9754.040</td>\n",
" <td>47</td>\n",
" <td>1</td>\n",
" <td>0.051241</td>\n",
" <td>0.019203</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>18872</td>\n",
" <td>2015-04-30</td>\n",
" <td>191429.324</td>\n",
" <td>-218.394</td>\n",
" <td>12840.950</td>\n",
" <td>39</td>\n",
" <td>1</td>\n",
" <td>0.067079</td>\n",
" <td>-0.001141</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>18872</td>\n",
" <td>2015-05-31</td>\n",
" <td>189056.475</td>\n",
" <td>-4782.849</td>\n",
" <td>6332.849</td>\n",
" <td>24</td>\n",
" <td>1</td>\n",
" <td>0.033497</td>\n",
" <td>-0.025299</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33967</th>\n",
" <td>422874</td>\n",
" <td>2025-06-30</td>\n",
" <td>55540.077</td>\n",
" <td>1303.393</td>\n",
" <td>1303.393</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>0.023468</td>\n",
" <td>0.023468</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33968</th>\n",
" <td>422874</td>\n",
" <td>2025-07-31</td>\n",
" <td>55179.460</td>\n",
" <td>-1013.363</td>\n",
" <td>2066.489</td>\n",
" <td>9</td>\n",
" <td>1</td>\n",
" <td>0.037450</td>\n",
" <td>-0.018365</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33969</th>\n",
" <td>422874</td>\n",
" <td>2025-08-31</td>\n",
" <td>56928.472</td>\n",
" <td>1749.012</td>\n",
" <td>2010.564</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0.035317</td>\n",
" <td>0.030723</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33970</th>\n",
" <td>422874</td>\n",
" <td>2025-09-30</td>\n",
" <td>57042.358</td>\n",
" <td>113.886</td>\n",
" <td>3895.248</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>0.068287</td>\n",
" <td>0.001997</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33971</th>\n",
" <td>422874</td>\n",
" <td>2025-10-31</td>\n",
" <td>56522.708</td>\n",
" <td>-555.680</td>\n",
" <td>1619.142</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>0.028646</td>\n",
" <td>-0.009831</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>33972 rows × 9 columns</p>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID month aum_qty net_flow_qty \\\n",
"0 18872 2015-01-31 179864.637 -1524.010 \n",
"1 18872 2015-02-28 186761.736 7247.100 \n",
"2 18872 2015-03-31 190357.718 3655.380 \n",
"3 18872 2015-04-30 191429.324 -218.394 \n",
"4 18872 2015-05-31 189056.475 -4782.849 \n",
"... ... ... ... ... \n",
"33967 422874 2025-06-30 55540.077 1303.393 \n",
"33968 422874 2025-07-31 55179.460 -1013.363 \n",
"33969 422874 2025-08-31 56928.472 1749.012 \n",
"33970 422874 2025-09-30 57042.358 113.886 \n",
"33971 422874 2025-10-31 56522.708 -555.680 \n",
"\n",
" gross_flow_qty n_tx active_month rel_intensity_m netflow_to_aum_m \n",
"0 15230.010 32 1 0.084675 -0.008473 \n",
"1 18571.880 38 1 0.099442 0.038804 \n",
"2 9754.040 47 1 0.051241 0.019203 \n",
"3 12840.950 39 1 0.067079 -0.001141 \n",
"4 6332.849 24 1 0.033497 -0.025299 \n",
"... ... ... ... ... ... \n",
"33967 1303.393 5 1 0.023468 0.023468 \n",
"33968 2066.489 9 1 0.037450 -0.018365 \n",
"33969 2010.564 3 1 0.035317 0.030723 \n",
"33970 3895.248 7 1 0.068287 0.001997 \n",
"33971 1619.142 6 1 0.028646 -0.009831 \n",
"\n",
"[33972 rows x 9 columns]"
]
},
"execution_count": 129,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Analyse temporelle \n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 130,
"id": "17787765-a4bb-4e23-80f0-92c91d16f1cc",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>month</th>\n",
" <th>aum_qty</th>\n",
" <th>net_flow_qty</th>\n",
" <th>gross_flow_qty</th>\n",
" <th>n_tx</th>\n",
" <th>active_month</th>\n",
" <th>rel_intensity_m</th>\n",
" <th>netflow_to_aum_m</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>2015-01-31</td>\n",
" <td>179864.637</td>\n",
" <td>-1524.010</td>\n",
" <td>15230.010</td>\n",
" <td>32</td>\n",
" <td>1</td>\n",
" <td>8.467484e-02</td>\n",
" <td>-8.473094e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27192</th>\n",
" <td>365377</td>\n",
" <td>2015-01-31</td>\n",
" <td>0.000</td>\n",
" <td>3640.020</td>\n",
" <td>7687.660</td>\n",
" <td>63</td>\n",
" <td>1</td>\n",
" <td>7.687660e+12</td>\n",
" <td>3.640020e+12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>650</th>\n",
" <td>200000201</td>\n",
" <td>2015-01-31</td>\n",
" <td>17072.819</td>\n",
" <td>-494.780</td>\n",
" <td>800.440</td>\n",
" <td>20</td>\n",
" <td>1</td>\n",
" <td>4.688388e-02</td>\n",
" <td>-2.898057e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25893</th>\n",
" <td>365172</td>\n",
" <td>2015-01-31</td>\n",
" <td>67707.000</td>\n",
" <td>11917.000</td>\n",
" <td>11957.000</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1.765992e-01</td>\n",
" <td>1.760084e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33712</th>\n",
" <td>422691</td>\n",
" <td>2015-01-31</td>\n",
" <td>60705.316</td>\n",
" <td>3724.160</td>\n",
" <td>6372.040</td>\n",
" <td>24</td>\n",
" <td>1</td>\n",
" <td>1.049668e-01</td>\n",
" <td>6.134817e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26152</th>\n",
" <td>365236</td>\n",
" <td>2025-10-31</td>\n",
" <td>74195.145</td>\n",
" <td>-29100.206</td>\n",
" <td>32046.852</td>\n",
" <td>98</td>\n",
" <td>1</td>\n",
" <td>4.319265e-01</td>\n",
" <td>-3.922117e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13522</th>\n",
" <td>200127403</td>\n",
" <td>2025-10-31</td>\n",
" <td>17711.000</td>\n",
" <td>197.000</td>\n",
" <td>197.000</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1.112303e-02</td>\n",
" <td>1.112303e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13593</th>\n",
" <td>200127404</td>\n",
" <td>2025-10-31</td>\n",
" <td>44881.000</td>\n",
" <td>3099.500</td>\n",
" <td>3539.500</td>\n",
" <td>15</td>\n",
" <td>1</td>\n",
" <td>7.886411e-02</td>\n",
" <td>6.906040e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18584</th>\n",
" <td>200128363</td>\n",
" <td>2025-10-31</td>\n",
" <td>7491447.864</td>\n",
" <td>0.000</td>\n",
" <td>0.000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33971</th>\n",
" <td>422874</td>\n",
" <td>2025-10-31</td>\n",
" <td>56522.708</td>\n",
" <td>-555.680</td>\n",
" <td>1619.142</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>2.864587e-02</td>\n",
" <td>-9.831093e-03</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>33972 rows × 9 columns</p>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID month aum_qty net_flow_qty \\\n",
"0 18872 2015-01-31 179864.637 -1524.010 \n",
"27192 365377 2015-01-31 0.000 3640.020 \n",
"650 200000201 2015-01-31 17072.819 -494.780 \n",
"25893 365172 2015-01-31 67707.000 11917.000 \n",
"33712 422691 2015-01-31 60705.316 3724.160 \n",
"... ... ... ... ... \n",
"26152 365236 2025-10-31 74195.145 -29100.206 \n",
"13522 200127403 2025-10-31 17711.000 197.000 \n",
"13593 200127404 2025-10-31 44881.000 3099.500 \n",
"18584 200128363 2025-10-31 7491447.864 0.000 \n",
"33971 422874 2025-10-31 56522.708 -555.680 \n",
"\n",
" gross_flow_qty n_tx active_month rel_intensity_m netflow_to_aum_m \n",
"0 15230.010 32 1 8.467484e-02 -8.473094e-03 \n",
"27192 7687.660 63 1 7.687660e+12 3.640020e+12 \n",
"650 800.440 20 1 4.688388e-02 -2.898057e-02 \n",
"25893 11957.000 4 1 1.765992e-01 1.760084e-01 \n",
"33712 6372.040 24 1 1.049668e-01 6.134817e-02 \n",
"... ... ... ... ... ... \n",
"26152 32046.852 98 1 4.319265e-01 -3.922117e-01 \n",
"13522 197.000 4 1 1.112303e-02 1.112303e-02 \n",
"13593 3539.500 15 1 7.886411e-02 6.906040e-02 \n",
"18584 0.000 0 0 0.000000e+00 0.000000e+00 \n",
"33971 1619.142 6 1 2.864587e-02 -9.831093e-03 \n",
"\n",
"[33972 rows x 9 columns]"
]
},
"execution_count": 130,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_month.sort_values(by=\"month\", ascending=True)"
]
},
{
"cell_type": "code",
"execution_count": 150,
"id": "0ff98dd4-cd21-443a-a603-6dbdee066b87",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Filtered clients: (154, 18)\n",
"Filtered clients: (355, 18)\n",
"Filtered clients: (421, 18)\n"
]
}
],
"source": [
"import pandas as pd\n",
"\n",
"# 1. Définir les fenêtres temporelles (ex: 3 ans glissants)\n",
"windows = [\n",
" (\"2016-10-31\", \"2019-10-31\"),\n",
" (\"2019-10-31\", \"2022-10-31\"),\n",
" (\"2022-10-31\", \"2025-10-31\")\n",
"]\n",
"\n",
"stability_results = []\n",
"\n",
"for start, end in windows:\n",
" # FILTRAGE : On recalcule les variables sur la période (simulation)\n",
" # Note : Tu dois adapter cette partie à tes données brutes par date\n",
" df_period = df_month[(df_month['month'] > start) & (df_month['month'] <= end)].copy()\n",
"\n",
" eps = 1e-9 \n",
"\n",
" # 1) Active month indicator: did the client trade this month?\n",
" df_period[\"active_month\"] = (df_period[\"gross_flow_qty\"] > 0).astype(int)\n",
"\n",
" #client avec beaucoup de mois à 0 → “stable / dormant”\n",
" #client actif presque tous les mois → “rebalancer / institutionnel actif”\n",
"\n",
"\n",
" # 2) Monthly relative intensity (turnover proxy in quantity terms) : Mesurer l’ intensité de trading relativement à la taille et pouvoir ocmparer client petit avec client plus gros\n",
" df_period[\"rel_intensity_m\"] = df_period[\"gross_flow_qty\"] / (df_period[\"aum_qty\"].abs() + eps)\n",
"\n",
" # 3) Monthly net flow ratio (directional change): sert a Capturer la direction de la dynamique\n",
" df_period[\"netflow_to_aum_m\"] = df_period[\"net_flow_qty\"] / (df_period[\"aum_qty\"].abs() + eps)\n",
"\n",
" # 4) Aggregate to client-level features (1 row per client)\n",
" dft_client_feat = (\n",
" df_period.groupby(ID_COL, as_index=False)\n",
" .agg(\n",
" # Coverage / activity\n",
" n_months=(\"month\", \"nunique\"),\n",
" n_active_months=(\"active_month\", \"sum\"),\n",
" flow_freq=(\"active_month\", \"mean\"),\n",
"\n",
" # Size in quantity terms\n",
" aum_qty_mean=(\"aum_qty\", \"mean\"),\n",
" aum_qty_median=(\"aum_qty\", \"median\"),\n",
"\n",
" # Flows in quantity terms\n",
" net_flow_qty_sum=(\"net_flow_qty\", \"sum\"),\n",
" gross_flow_qty_sum=(\"gross_flow_qty\", \"sum\"),\n",
" gross_flow_qty_mean=(\"gross_flow_qty\", \"mean\"),\n",
"\n",
" # Dispersion / volatility proxy\n",
" net_flow_qty_vol=(\"net_flow_qty\", \"std\"),\n",
" rel_intensity=(\"rel_intensity_m\", \"mean\"),\n",
" netflow_to_aum=(\"netflow_to_aum_m\", \"mean\"),\n",
"\n",
" # Trading frequency proxy\n",
" n_tx_total=(\"n_tx\", \"sum\"),\n",
" )\n",
")\n",
"\n",
" # 5) Clean NaNs due to std on constant series\n",
" dft_client_feat[\"net_flow_qty_vol\"] = dft_client_feat[\"net_flow_qty_vol\"].fillna(0.0)\n",
"\n",
" # 6) Log transforms (useful because distributions are heavy-tailed)\n",
" dft_client_feat[\"log_aum_qty_mean\"] = np.log1p(dft_client_feat[\"aum_qty_mean\"].clip(lower=0))\n",
" dft_client_feat[\"log_gross_flow_qty_mean\"] = np.log1p(dft_client_feat[\"gross_flow_qty_mean\"].clip(lower=0))\n",
"\n",
" # 7) Global turnover proxy\n",
" dft_client_feat[\"gross_flow_to_aum\"] = dft_client_feat[\"gross_flow_qty_sum\"] / (dft_client_feat[\"aum_qty_mean\"].abs() + eps)\n",
"\n",
" dfct = dft_client_feat.copy()\n",
"\n",
" # Minimal filters (adjust if needed)\n",
" dfct = dfct[(dfct[\"n_months\"] >= 6)] # at least 6 observed months\n",
" dfct = dfct[(dfct[\"aum_qty_mean\"].abs() > 0)] # avoid zero holdings\n",
"\n",
" dfct[\"rel_intensity_total\"] = dfct[\"gross_flow_to_aum\"] # turnover proxy\n",
" dfct[\"frequency\"] = dfct[\"flow_freq\"] \n",
" \n",
" if start == \"2016-10-31\":\n",
" df_2016 = dfct.copy()\n",
" if start == \"2019-10-31\":\n",
" df_2019 = dfct.copy()\n",
" if start == \"2022-10-31\":\n",
" df_2022 = dfct.copy()\n",
" print(\"Filtered clients:\", dfct.shape)"
]
},
{
"cell_type": "code",
"execution_count": 180,
"id": "39bcd74a-5828-47d5-9709-a1c1eb4540f7",
"metadata": {},
"outputs": [],
"source": [
"ids_2016 = set(df_2016['Registrar Account - ID'])\n",
"ids_2019 = set(df_2019['Registrar Account - ID'])\n",
"ids_2022 = set(df_2022['Registrar Account - ID'])\n",
"\n",
"common_ids = ids_2016 & ids_2019 & ids_2022\n",
"\n",
"common_id = ids_2019 & ids_2022\n",
"\n",
"df_2016_common = df_2016[df_2016['Registrar Account - ID'].isin(common_ids)].copy()\n",
"df_2019_common = df_2019[df_2019['Registrar Account - ID'].isin(common_ids)].copy()\n",
"df_2022_common = df_2022[df_2022['Registrar Account - ID'].isin(common_ids)].copy()\n",
"\n",
"df_2019_common2 = df_2019[df_2019['Registrar Account - ID'].isin(common_id)].copy()\n",
"df_2022_common2 = df_2022[df_2022['Registrar Account - ID'].isin(common_id)].copy()"
]
},
{
"cell_type": "code",
"execution_count": 181,
"id": "c9526723-3aaf-4a7a-961a-a426e8aa7a9b",
"metadata": {},
"outputs": [],
"source": [
"# Evolution des clusters dans le temps\n",
"\n",
"X_2016 =(df_2016_common[features]\n",
" .replace([np.inf, -np.inf], np.nan)\n",
" .dropna()\n",
" .copy())\n",
"X_2019 = (df_2019_common[features]\n",
" .replace([np.inf, -np.inf], np.nan)\n",
" .dropna()\n",
" .copy())\n",
"X_2022 = (df_2022_common[features]\n",
" .replace([np.inf, -np.inf], np.nan)\n",
" .dropna()\n",
" .copy())\n",
"\n",
"X_2019_2 = (df_2019_common2[features]\n",
" .replace([np.inf, -np.inf], np.nan)\n",
" .dropna()\n",
" .copy())\n",
"X_2022_2 = (df_2022_common2[features]\n",
" .replace([np.inf, -np.inf], np.nan)\n",
" .dropna()\n",
" .copy())\n",
"\n",
"X_2016_scaled = scaler.transform(X_2016)\n",
"X_2019_scaled = scaler.transform(X_2019)\n",
"X_2022_scaled = scaler.transform(X_2022)\n",
"\n",
"X_2019_scaled2 = scaler.transform(X_2019_2)\n",
"X_2022_scaled2 = scaler.transform(X_2022_2)\n",
"\n",
"labels_2016 = km.predict(X_2016_scaled)\n",
"labels_2019 = km.predict(X_2019_scaled)\n",
"labels_2022 = km.predict(X_2022_scaled)\n",
"\n",
"labels_2019_2 = km.predict(X_2019_scaled2)\n",
"labels_2022_2 = km.predict(X_2022_scaled2)"
]
},
{
"cell_type": "code",
"execution_count": 182,
"id": "85fde921-cad6-4528-b27f-261522304e33",
"metadata": {},
"outputs": [],
"source": [
"df_2016_common[\"cluster_kmeans\"] = labels_2016\n",
"df_2019_common[\"cluster_kmeans\"] = labels_2019\n",
"df_2022_common[\"cluster_kmeans\"] = labels_2022\n",
"\n",
"df_2019_common2[\"cluster_kmeans\"] = labels_2019_2\n",
"df_2022_common2[\"cluster_kmeans\"] = labels_2022_2"
]
},
{
"cell_type": "code",
"execution_count": 208,
"id": "6a1351c8-5819-493e-80b6-7274bc6d8b03",
"metadata": {},
"outputs": [],
"source": [
"# Définition de la correspondance entre les codes numériques et les nouveaux labels\n",
"mapping = {\n",
" 0.0: 1,\n",
" 1.0: 2,\n",
" 2.0: 3,\n",
" 3.0: 4,\n",
" 4.0: 5\n",
"}\n",
"\n",
"# Création de la nouvelle colonne 'cluster'\n",
"df_2016_common['cluster'] = df_2016_common['cluster_kmeans'].map(mapping)\n",
"df_2019_common['cluster'] = df_2019_common['cluster_kmeans'].map(mapping)\n",
"df_2022_common['cluster'] = df_2022_common['cluster_kmeans'].map(mapping)\n",
"\n",
"df_2019_common2['cluster'] = df_2019_common2['cluster_kmeans'].map(mapping)\n",
"df_2022_common2['cluster'] = df_2022_common2['cluster_kmeans'].map(mapping)\n",
"\n",
"clusters_keep = [1, 2, 3]\n",
"\n",
"df_2016_f = df_2016_common[df_2016_common[\"cluster\"].isin(clusters_keep)]\n",
"df_2019_f = df_2019_common[df_2019_common[\"cluster\"].isin(clusters_keep)]\n",
"df_2022_f = df_2022_common[df_2022_common[\"cluster\"].isin(clusters_keep)]\n",
"\n",
"df_2019_f2 = df_2019_common2[df_2019_common2[\"cluster\"].isin(clusters_keep)]\n",
"df_2022_f2 = df_2022_common2[df_2022_common2[\"cluster\"].isin(clusters_keep)]"
]
},
{
"cell_type": "code",
"execution_count": 211,
"id": "c50a2b85-6124-4df2-8b92-590bbc4865ba",
"metadata": {},
"outputs": [],
"source": [
"# Merge\n",
"\n",
"df_evo = (\n",
" df_2016_f[[ID_COL, \"cluster\"]]\n",
" .rename(columns={\"cluster\": \"cluster_2016\"})\n",
" .merge(\n",
" df_2019_f[[ID_COL, \"cluster\"]]\n",
" .rename(columns={\"cluster\": \"cluster_2019\"}),\n",
" on=ID_COL\n",
" )\n",
" .merge(\n",
" df_2022_f[[ID_COL, \"cluster\"]]\n",
" .rename(columns={\"cluster\": \"cluster_2022\"}),\n",
" on=ID_COL\n",
" )\n",
")\n",
"\n",
"df_evo2 = (\n",
" df_2019_f2[[ID_COL, \"cluster\"]]\n",
" .rename(columns={\"cluster\": \"cluster_2019\"})\n",
" .merge(\n",
" df_2022_f2[[ID_COL, \"cluster\"]]\n",
" .rename(columns={\"cluster\": \"cluster_2022\"}),\n",
" on=ID_COL\n",
" )\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 212,
"id": "9d78a9ee-8b17-457d-8b4c-8f93ad1584b3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>cluster_2019</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_2016</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>8.0</td>\n",
" <td>2.0</td>\n",
" <td>10.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.0</td>\n",
" <td>38.0</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NaN</td>\n",
" <td>5.0</td>\n",
" <td>82.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"cluster_2019 1 2 3\n",
"cluster_2016 \n",
"1 8.0 2.0 10.0\n",
"2 1.0 38.0 5.0\n",
"3 NaN 5.0 82.0"
]
},
"execution_count": 212,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_evo.groupby([\"cluster_2016\", \"cluster_2019\"]).size().unstack()"
]
},
{
"cell_type": "code",
"execution_count": 213,
"id": "9aaf2863-dbd2-43d1-861c-98ddd9354881",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>cluster_2022</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_2019</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>34.0</td>\n",
" <td>2.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>NaN</td>\n",
" <td>59.0</td>\n",
" <td>15.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>6.0</td>\n",
" <td>16.0</td>\n",
" <td>206.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"cluster_2022 1 2 3\n",
"cluster_2019 \n",
"1 34.0 2.0 7.0\n",
"2 NaN 59.0 15.0\n",
"3 6.0 16.0 206.0"
]
},
"execution_count": 213,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_evo2.groupby([\"cluster_2019\", \"cluster_2022\"]).size().unstack()"
]
},
{
"cell_type": "code",
"execution_count": 218,
"id": "82e96b64-0d48-4bbf-b334-0dcb311440d0",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoAAAAIjCAYAAACTRapjAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAYqRJREFUeJzt3XdclfX///HnQeGALDeIA3Ei7lyR2xQ0c6Rm2dDUbJFmlpWZOVLRylWOlpGaNqy0Mkdu85Oai3LnKksFNwgoGFy/P/pxvp4APSjHg1yPe7frduu8r/e5rtd1WC9f7/f1viyGYRgCAACAabi5OgAAAADcWiSAAAAAJkMCCAAAYDIkgAAAACZDAggAAGAyJIAAAAAmQwIIAABgMiSAAAAAJkMCCAAAYDIkgHDIqFGjZLFYXB2GJOmTTz6RxWLRH3/84epQbrncfB3y4+dksVg0atQoV4eRRX6N61Z45pln1K5dO1eHgZuwfPly+fj46PTp064OBbcREsB8JvOPtsVi0caNG7PsNwxD5cuXl8Vi0b333ntD5xg/frwWL158k5EWHHv37tWoUaPyVaKUG3w987cTJ05o1KhRio2NdXUoWRw9elQfffSRXn31VVvbX3/9pdGjR6tx48YqVqyYSpYsqVatWmnVqlXZHuPChQt64oknVKpUKXl7e6t169basWNHln5ffPGFHnnkEVWtWlUWi0WtWrW6Zmw7duxQ586dVbx4cRUpUkS1atXSO++8c1PXm1urV69Wv379VK1aNRUpUkSVKlXS448/rpMnT2bb/+eff1azZs1UpEgRBQYGatCgQUpKSrLrk5SUpJEjR6p9+/YqXry4LBaLPvnkkxxjmD59umrUqCGr1aqyZctqyJAhSk5OtuvTvn17ValSRdHR0Td9zTARA/lKTEyMIcnw9PQ0nn766Sz7165da0gyrFar0bFjxxs6h7e3t9GnT59cvefKlSvGpUuXbuh8eS3zMzp69GieHG/hwoWGJGPt2rV5cjxnyu7rkNPX859//jEuXbpkZGRk3KLork+SMXLkSFeHkYUz49q6dashyYiJiXHK8W/Gc889Z1SrVs2u7d133zW8vLyMXr16GdOnTzemTp1q3HHHHYYk4+OPP7brm56ebtx1112Gt7e3MWrUKGP69OlGWFiY4evra/z+++92fVu2bGn4+PgYrVu3NooVK2a0bNkyx7hWrFhheHh4GE2aNDEmT55sfPDBB8bLL79sDB06NM+u3RENGjQwQkJCjJdeesn48MMPjWHDhhm+vr5GQECAcfLkSbu+O3fuNDw9PY369esbs2bNMoYPH25YrVajffv2dv2OHj1qSDIqVKhgtGrV6prfGy+99JIhyejRo4cxa9YsY+DAgUbhwoWNiIiILH1nzpxpFClSxEhMTMyz60fBRgKYz2QmN926dTNKlixpXLlyxW7/gAEDjAYNGhjBwcG3JAFMSkq6oXM4kysTwIyMDCMlJSVPzptXbiShdxUSwLxzsz+baWlpRsmSJY3XXnvNrn337t3G6dOn7douX75shIaGGuXKlbNr/+KLLwxJxsKFC21tp06dMooWLWr06tXLru+xY8eM9PR0wzAMo2bNmjkmgAkJCUZAQIBx33332fq7yvr167PEsH79ekOSMXz4cLv2Dh06GGXKlDESEhJsbR9++KEhyVixYoWt7fLly7bk8VrfGydOnDAKFy5sPProo3bt7777riHJ+O677+za4+PjjUKFChmzZ8++oWuF+TAEnE/16tVLZ8+e1cqVK21taWlp+uqrr/TQQw9l+563335bd911l0qUKCEvLy81aNBAX331lV0fi8Wi5ORkzZkzxzbU/Nhjj0n6v/lle/fu1UMPPaRixYqpWbNmdvv+69NPP1Xjxo1VpEgRFStWTC1atNCPP/5o12fZsmVq3ry5vL295evrq44dO2rPnj0OfQ579uxRmzZt5OXlpXLlymns2LHKyMjItu+NnOeTTz7R/fffL0lq3bq17TNZt26dJKlixYq69957tWLFCjVs2FBeXl56//33JUkxMTFq06aNSpcuLavVqrCwMM2aNSvLOTKPsXHjRjVu3Fienp6qVKmS5s6da9fvypUrGj16tKpWrSpPT0+VKFFCzZo1s/se+O/X4Vpfz5zmAM6cOVM1a9aU1WpVUFCQoqKidOHCBbs+rVq1Uq1atbR37161bt1aRYoUUdmyZfXmm29e8/PMlJqaqueff16lSpWSr6+vOnfurL///jvbvsePH1e/fv0UEBAgq9WqmjVr6uOPP87S791331XNmjVt32sNGzbUggULrhvL5cuXNWrUKFWrVk2enp4qU6aMunXrpsOHD+f4nscee0wVK1bM0p7dz8HKlSvVrFkzFS1aVD4+PqpevbptSHXdunVq1KiRJKlv3762r9HVQ35btmxR+/bt5e/vryJFiqhly5b63//+l+15s/vZjIuLU9++fVWuXDlZrVaVKVNGXbp0ue6Uho0bN+rMmTNq27atXXvNmjVVsmRJuzar1ap77rlHf//9ty5evGhr/+qrrxQQEKBu3brZ2kqVKqWePXvq22+/VWpqqq29fPnycnO7/p+cBQsWKD4+XuPGjZObm5uSk5Nz/Jl3thYtWmSJuUWLFipevLj27dtna0tMTNTKlSv1yCOPyM/Pz9beu3dv+fj46Msvv7S1Wa1WBQYGXvfcmzZt0j///KMHH3zQrj3z9eeff27XXrp0adWpU0fffvut4xcIUyMBzKcqVqyo8PBwffbZZ7a2ZcuWKSEhIcsvhEzTpk1T/fr1NWbMGI0fP16FCxfW/fffrx9++MHWZ968ebJarWrevLnmzZunefPm6cknn7Q7zv3336+UlBSNHz9eAwYMyDHG0aNH69FHH5W7u7vGjBmj0aNHq3z58lqzZo3d+Tp27CgfHx9NnDhRI0aM0N69e9WsWbPr/oGKi4tT69atFRsbq1deeUWDBw/W3LlzNW3atCx9b/Q8LVq00KBBgyRJr776qu0zqVGjhq3PgQMH1KtXL7Vr107Tpk1TvXr1JEmzZs1ScHCwXn31VU2aNEnly5fXM888oxkzZmQ5z6FDh9SjRw+1a9dOkyZNUrFixfTYY4/ZJaijRo3S6NGj1bp1a02fPl3Dhw9XhQoVsp1PdfV1X+/rebVRo0YpKipKQUFBmjRpkrp37673339fERERunLlil3f8+fPq3379qpbt64mTZqk0NBQvfzyy1q2bFmOx8/0+OOPa+rUqYqIiNCECRPk7u6ujh07ZukXHx+vO++8U6tWrdKzzz6radOmqUqVKurfv7+mTp1q6/fhhx9q0KBBCgsL09SpUzV69GjVq1dPW7ZsuWYc6enpuvfeezV69Gg1aNBAkyZN0nPPPaeEhATt3r37utdxPXv27NG9996r1NRUjRkzRpMmTVLnzp1tCVyNGjU0ZswYSdITTzxh+xq1aNFCkrRmzRq1aNFCiYmJGjlypMaPH68LFy6oTZs2+uWXX7KcL7ufze7du2vRokXq27evZs6cqUGDBunixYs6duzYNWP/+eefZbFYVL9+fYeuNS4uTkWKFFGRIkVsbTt37tQdd9yRJUlq3LixUlJS9Pvvvzt07KutWrVKfn5+On78uKpXry4fHx/5+fnp6aef1uXLl3N9vKtdvnxZPXr00LZt2274GElJSUpKSrJLknft2qV//vlHDRs2tOvr4eGhevXqaefOnbk+T2by7OXlZdee+flv3749y3saNGign3/+Odfngkm5ugQJe5nDm1u3bjWmT59u+Pr62oYc77//fqN169aGYRjZDgH/d2gyLS3NqFWrltGmTRu79pyGDEeOHGlIyjJ0c/W+TAcPHjTc3NyyHabJnHN28eJFo2jRosaAAQPs9sfFxRn+/v5Z2v9r8ODBhiRjy5YttrZTp04Z/v7+dkPAN3ueaw0BBwcHG5KM5cuXZ9mX3VBwZGSkUalSpWyPsWHDBrvrsFqtxgsvvGBrq1u37nWH9f/7dTCMnL+e/x0qP3XqlOHh4WFERETYfc2mT5+eZX5Xy5YtDUnG3LlzbW2pqalGYGCg0b1
"text/plain": [
"<Figure size 800x600 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# matrice de transition\n",
"transition1 = df_evo.groupby([\"cluster_2016\", \"cluster_2019\"]).size().unstack(fill_value=0)\n",
"\n",
"plt.figure(figsize=(8,6))\n",
"sns.heatmap(transition1, annot=True, fmt=\"d\", cmap=\"Blues\")\n",
"\n",
"plt.ylabel(\"Cluster 2016-2019\")\n",
"plt.xlabel(\"Cluster 2019-2022\")\n",
"plt.title(\"Matrice de transition des clusters (2016 → 2019)\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 219,
"id": "aac4fc2b-8f5c-49d5-aaa6-8c563dfc0ef3",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoUAAAIjCAYAAAB1bGEnAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAYjtJREFUeJzt3Xt8zvX/x/HntdnRZmObjZk5H5ZjTg0ZkVMRJUVFTvXVUBZFyik5RJFzEYVECOXMckgUOZPzsbCxYRg2ts/vD79dddk1ttm1y3jcv7frdvvu/Xl/3p/X53LFa6/3+/O+TIZhGAIAAMAjzcHeAQAAAMD+SAoBAABAUggAAACSQgAAAIikEAAAACIpBAAAgEgKAQAAIJJCAAAAiKQQsItx48Zp5syZ9g4DAAAzkkI8kEwmk7p165Zl433zzTcymUz6888/79m3bt26qlu3rvnnEydOyGQy6ZtvvjG3DRw4UCaTKVOxjBs3ToMHD9YTTzyRqfNTmEwmDRw48L7GyIms/Xncjb3fp7feektPP/203a6P+7dixQp5eHjo/Pnz9g4FsCmSQqRbSmKV8nJ1dVWpUqXUrVs3RUdH2zs8uxs6dKgWLVp01z5bt25V//799fPPP6tkyZLZE1g6pSf+B9WyZcseyAT5+PHjmjp1qj744ANz299//61BgwapevXqyps3r3x9fVW3bl2tWbPG6hiXLl3SG2+8IT8/P+XOnVv16tXT9u3bU/WbO3euXn31VZUsWVImk8niFxtrtm/frubNmytfvnxyd3dXuXLlNHbs2Pu634yKjIxUx44dVapUKbm7u6tYsWLq3Lmzzp49a7X/pk2bVLt2bbm7uysgIEA9evTQ1atXLfpcvXpVAwYMUOPGjZUvX757/gIxfvx4lS1bVi4uLgoMDFRERITi4+Mt+jRu3FglSpTQsGHD7vuegQeaAaTT9OnTDUnG4MGDjZkzZxpTpkwx2rdvbzg4OBhFixY14uPjs+xakozw8PAsGy8l9q1bt96zb0JCgpGQkGD++fjx44YkY/r06ea2mzdvGtevX7c4L3fu3Eb79u3vGUdkZGSGYk+LJGPAgAFZMpZhpC/+B0FycrJx/fp149atW+a28PBwI62/zq5fv27cvHkzu8Kz8PbbbxulSpWyaBs3bpzh5uZmtGnTxhg/frwxZswY4/HHHzckGdOmTbPom5SUZNSsWdPInTu3MXDgQGP8+PFGSEiI4enpaRw6dMiib1hYmOHh4WHUq1fPyJs3rxEWFpZmXCtXrjScnZ2NGjVqGJ9//rnx1VdfGe+//77Ru3fvLLv39KhSpYpRtGhR47333jOmTJli9O3b1/D09DT8/f2Ns2fPWvTdsWOH4erqalSuXNmYNGmS0a9fP8PFxcVo3LixRb+U/14LFy5s1K1bN9V/u//13nvvGZKMVq1aGZMmTTK6d+9u5MqVy2jYsGGqvhMnTjTc3d2Ny5cvZ9n9Aw8akkKkW1qJVUREhCHJmD17dprnXr16NUPXsmdSeCdrSaE12Z1U2TMpzOifp63dLSm0l8TERMPX19f48MMPLdr37t1rnD9/3qLtxo0bRpkyZYxChQpZtM+dO9eQZMybN8/cdu7cOcPb29to06aNRd9Tp04ZSUlJhmEYxmOPPZZmUhgXF2f4+/sbLVu2NPe3l/Xr16eKYf369YYko1+/fhbtTZo0MQoUKGDExcWZ26ZMmWJIMlauXGluu3Hjhjmh3Lp1a5r/7Z45c8bIlSuX8dprr1m0jxs3zpBk/PTTTxbt0dHRhqOjo/H1119n6l6BnIDpY9y3p556StLtqTJJev311+Xh4aGjR4+qadOm8vT01CuvvCJJio+P17vvvqugoCC5uLiodOnSGjVqlAzDsDr2d999p9KlS8vV1VVVqlTRhg0bLI6fPHlSb731lkqXLi03Nzf5+PjoxRdf1IkTJ6yOd+3aNb355pvy8fFRnjx51K5dO128eNGiz51rCq25c02hyWRSfHy8vv32W/P0+uuvv24+fvr0aXXs2FH+/v5ycXHRY489pmnTpt31GikSEhLUs2dP+fn5ydPTU82bN9c///xjtW9mr3O3+FPu9a+//lLbtm2VN29e1a5dW5K0e/duvf766ypWrJhcXV0VEBCgjh07KjY21ur7deTIEb3++uvy9vaWl5eXOnTooGvXrln0Xb16tWrXri1vb295eHiodOnSFtOvd64pfP311zVhwgTzfaS8/ntvd04t79ixQ02aNFGePHnk4eGh+vXr6/fff7fok7Jc4rffflNERIR5+rZly5bpWlu2ceNGxcTEqEGDBhbtjz32mHx9fS3aXFxc1LRpU/3zzz+6cuWKuX3+/Pny9/fX888/b27z8/NT69attXjxYiUkJJjbg4KC5OBw77/SZ8+erejoaH3yySdycHBQfHy8kpOT73meLdSpUydVzHXq1FG+fPm0f/9+c9vly5e1evVqvfrqq8qTJ4+5vV27dvLw8NAPP/xgbnNxcVFAQMA9r71582bdunVLL7/8skV7ys9z5syxaM+fP78qVKigxYsXp/8GgRwml70DQM539OhRSZKPj4+57datW2rUqJFq166tUaNGyd3dXYZhqHnz5lq7dq06deqkSpUqaeXKlerdu7dOnz6t0aNHW4y7fv16zZ07Vz169JCLi4smTpyoxo0ba8uWLSpXrpyk22v0Nm3apJdfflmFChXSiRMnNGnSJNWtW1d//fWX3N3dLcbs1q2bvL29NXDgQB08eFCTJk3SyZMntW7dukw/OCJJM2fOVOfOnVW9enW98cYbkqTixYtLkqKjo/XEE0+YH57x8/PT8uXL1alTJ12+fFnvvPPOXcfu3LmzZs2apbZt26pmzZr65Zdf9Mwzz6Tqdz/XuVv8KV588UWVLFlSQ4cONSfxq1ev1rFjx9ShQwcFBARo3759+uqrr7Rv3z79/vvvqd7T1q1bq2jRoho2bJi2b9+uqVOnKn/+/BoxYoQkad++fXr22WdVoUIFDR48WC4uLjpy5Ih+++23NGN/8803debMGa1evTpdT3Tv27dPTz75pPLkyaP33ntPTk5O+vLLL1W3bl2tX79eNWrUsOjfvXt35c2bVwMGDNCJEyc0ZswYdevWTXPnzr3rdTZt2iSTyaTKlSvfMyZJioqKkru7u8VndseOHXr88cdTJU7Vq1fXV199pUOHDql8+fLpGj/FmjVrlCdPHp0+fVotWrTQoUOHlDt3br322msaPXq0XF1dMzTef924cUOvvvqq+vTpo6pVq2ZqjKtXr+rq1asWifOePXt069atVGM6OzurUqVK2rFjR4avk5JQu7m5WbSnvP/btm1LdU6VKlVy7LpbIF3sXKlEDpIyBbtmzRrj/Pnzxt9//23MmTPH8PHxMdzc3Ix//vnHMAzDaN++vSHJ6NOnj8X5ixYtMiQZQ4YMsWhv1aqVYTKZjCNHjpjbJBmSjD///NPcdvLkScPV1dVo2bKlue3atWup4ty8ebMhyZgxY0aq2KtUqWIkJiaa2z/99FNDkrF48WJzW1hYmMXUm7Xp4wEDBqSarkxr+rVTp05GgQIFjJiYGIv2l19+2fDy8rJ6Dyl27txpSDLeeusti/a2bdummj6+n+vcLf6Ue71zutIwrL//33//vSHJ2LBhQ6oxOnbsaNG3ZcuWho+Pj/nn0aNHG5JSTa/+l7U/j7tNH9/5PrVo0cJwdnY2jh49am47c+aM4enpadSpU8fclvKZadCggZGcnGxu79mzp+Ho6GhcunQpzRgNwzBeffVVi3u7m8OHDxuurq6ppjJz586d6j0zDMNYunSpIclYsWKF1fHuNn1coUIFw93d3XB3dze6d+9uLFiwwOjevbshyXj55ZfTFW9arly5YoSGhhp58+Y1duzYkakxPv74Y0OSxdrbefPmpfpMpXjxxReNgIAAq2Pdbfp427ZthiTj448/tmhfsWKFIcnw8PBIdc7QoUMNSUZ0dHQG7wrIGZg+RoY1aNBAfn5+CgoK0ssvvywPDw8tXLhQgYGBFv26du1q8fOyZcvk6OioHj16WLS/++67MgxDy5c
"text/plain": [
"<Figure size 800x600 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"transition1 = df_evo.groupby([\"cluster_2016\", \"cluster_2019\"]).size().unstack(fill_value=0)\n",
"transition2 = df_evo2.groupby([\"cluster_2019\", \"cluster_2022\"]).size().unstack(fill_value=0)\n",
"\n",
"transition_pct1 = transition1.div(transition1.sum(axis=1), axis=0)\n",
"transition_pct2 = transition2.div(transition2.sum(axis=1), axis=0)\n",
"\n",
"plt.figure(figsize=(8,6))\n",
"sns.heatmap(transition_pct2, annot=True, fmt=\".2f\", cmap=\"Blues\")\n",
"\n",
"plt.xlabel(\"Cluster 2019-2022\")\n",
"plt.ylabel(\"Cluster 2016-2019\")\n",
"plt.title(\"Probabilité de transition (2016 → 2019)\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 169,
"id": "0f5cc8dd-4f9b-4c66-9e21-951b05b506e0",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"link": {
"source": [
0,
0,
1,
1,
2,
2,
3
],
"target": [
4,
5,
5,
6,
5,
6,
6
],
"value": {
"bdata": "CQEgDQZcAQ==",
"dtype": "i1"
}
},
"node": {
"label": [
"2019_0",
"2019_1",
"2019_2",
"2019_4",
"2022_0",
"2022_1",
"2022_2",
"2022_4"
]
},
"type": "sankey"
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"histogram": [
{
"marker": {
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"fillpattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermap": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermap"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
}
}
},
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAt4AAAFoCAYAAABg7/yqAAAgAElEQVR4Xu2dCZgU1dWwz8AMDAzLgKiIOyBxS/iMye+fxGwmGmPyG7ckxhgVxQ0RNwTBhUVExBXcUMEYI6LGiKJ8xiW4JZovavyI2VwTt7iwDAyzMPtf55IzXorqmZ6p7uqe6befp5/urqp7b9V7a6bePn3q3qKW4CE8IAABCEAAAhCAAAQgAIGsEihCvLPKl8ohAAEIQAACEIAABCDgCCDenAgQgAAEIAABCEAAAhBIgADinQBkmoAABCAAAQhAAAIQgADizTkAAQhAAAIQgAAEIACBBAgg3glApgkIQAACEIAABCAAAQgg3pwDEIAABCAAAQhAAAIQSIAA4p0AZJqAAAQgAAEIQAACEIAA4s05AAEIQAACEIAABCAAgQQIIN4JQKYJCEAAAhCAAAQgAAEIIN6cAxCAAAQgAAEIQAACEEiAAOKdAGSagAAEIAABCEAAAhCAAOLNOQABCEAAAhCAAAQgAIEECCDeCUCmCQhAAAIQgAAEIAABCCDenAMQgAAEIAABCEAAAhBIgADinQBkmoAABCAAAQhAAAIQgADizTkAAQhAAAIQgAAEIACBBAgg3glApgkIQAACEIAABCAAAQgg3pwDEIAABCAAAQhAAAIQSIAA4p0AZJqAAAQgAAEIQAACEIAA4s05AAEIQAACEIAABCAAgQQIIN4JQKYJCEAAAhCAAAQgAAEIIN6cAxCAAAQgAAEIQAACEEiAAOKdAGSagAAEIAABCEAAAhCAAOLNOQABCEAAAhCAAAQgAIEECCDeCUCmCQhAAAIQgAAEIAABCCDenAMQgAAEIAABCEAAAhBIgADinQBkmoAABCAAAQhAAAIQgADizTkAAQhAAAIQgAAEIACBBAgg3glApgkIQAACEIAABCAAAQgg3pwDEIAABCAAAQhAAAIQSIAA4p0AZJqAAAQgAAEIQAACEIAA4s05AAEIQAACEIAABCAAgQQIIN4JQKYJCEAAAhCAAAQgAAEIIN6cAxCAAAQgAAEIQAACEEiAAOKdAGSagAAEIAABCEAAAhCAAOLNOQABCEAAAhCAAAQgAIEECCDeCUCmCQhAAAIQgAAEIAABCCDenAMQgAAEIAABCEAAAhBIgADinQBkmoAABCAAAQhAAAIQgADizTkAAQhAAAIQgAAEIACBBAgg3glApgkIQAACEIAABCAAAQgg3pwDEIAABCAAAQhAAAIQSIAA4p0AZJqAAAQgAAEIQAACEIAA4s05AAEIQAACEIAABCAAgQQIIN4JQKYJCEAAAhCAAAQgAAEIIN6cAxCAAAQgAAEIQAACEEiAAOKdAGSagAAEIAABCEAAAhCAAOLNOQABCEAAAhCAAAQgAIEECCDeCUCmCQhAAAIQgAAEIAABCCDenAMQgAAEIAABCEAAAhBIgADinQBkmoAABCAAAQhAAAIQgADizTkAAQhAAAIQgAAEIACBBAgg3glApgkIQAACEIAABCAAAQgg3pwDEIAABCAAAQhAAAIQSIAA4p0AZJqAAAQgAAEIQAACEIAA4s05AAEIQAACEIAABCAAgQQIIN4JQKYJCEAAAhCAAAQgAAEIIN6cAxCAAAQgAAEIQAACEEiAAOKdAGSagAAEIAABCEAAAhCAAOLNOQABCEAAAhCAAAQgAIEECCDeCUCmCQhAAAIQgAAEIAABCCDenAMQgAAEIAABCEAAAhBIgADinQBkmoAABCAAAQhAAAIQgADizTkAAQhAAAIQgAAEIACBBAgg3glApgkIQAACEIAABCAAAQgg3pwDEIAABCAAAQhAAAIQSIAA4p0AZJqAAAQgAAEIQAACEIAA4s05AAEIQAACEIAABCAAgQQIIN4JQKYJCEAAAhCAAAQgAAEIIN6cAxCAAAQgAAEIQAACEEiAAOKdAGSagAAEIAABCEAAAhCAAOLNOQABCEAAAhCAAAQgAIEECCDeCUCmCQhAAAIQgAAEIAABCCDenAMQgAAEIAABCEAAAhBIgADinQBkmoAABCAAAQhAAAIQgADizTkAAQhAAAIQgAAEIACBBAgg3glApgkIQAACEIAABCAAAQgg3pwDEIAABCAAAQhAAAIQSIAA4p0AZJqAAAQgAAEIQAACEIAA4s05AAEIQAACEIAABCAAgQQIIN4JQKYJCEAAAhCAAAQgAAEIIN6cAxCAAAQgAAEIQAACEEiAAOKdAGSagAAEIAABCEAAAhCAAOLNOQABCEAAAhCAAAQgAIEECCDeCUCmCQhAAAIQgAAEIAABCCDenAMQgAAEIAABCEAAAhBIgADinQBkmoAABCAAAQhAAAIQgADizTkAAQhAAAIQgAAEIACBBAgg3glApgkIQAACEIAABCAAAQgg3pwDEIAABCAAAQhAAAIQSIAA4p0AZJqAAAQgAAEIQAACEIAA4s05AAEIQAACEIAABCAAgQQIIN4JQKYJCEAAAhCAAAQgAAEIIN6cAxCAAAQgAAEIQAACEEiAAOKdAGSagAAEIAABCEAAAhCAAOLNOQABCEAAAhCAAAQgAIEECCDeCUCmCQhAAAIQgAAEIAABCCDeXeQcqK7ZKI1NTTKwf9kWe9zS0iIV66ukZ48eMnDAluvbO8T1G6pd2X5lfdrbdIv1NbUbpXZjvQwI9qukuGeHy1MAAhCAAAQgkO8EmpqapaqmNuW1sr1rYVvX8Pr6BncdLS3tJb17lXQIRdzrf4caY+OMEEC8Y2CsqqqSt956K0YNWxYdMWKE9OvXr3XFMy+slHFTrm39vPvIneSWuefJkMED3bKK9RvkhydPkw8/Wes+H/2DA2TqhGOlZ88erWV0m4OOniiPLblKBpf3b12+LpD1c6bfIH985R9u2ec/O0qunzVBygd+2n5bB/fgb34nF85Z6DbpFfyzWHLTxaL7xwMCEIAABCDQHQio2M5f9IDcetfDrYej19kpE34qxT03BZvauha2dQ1vaGySY8ZdKn97/V+tdU+feIL88PvfSAtdOtf/tCpio0QJIN4xcK9cuVJuXviG9Os/PEYtnxbdUPm2fPvrLTJ8+PBAnHtKWVmZ/OGVN6S+sVkO2H8f2VjXIOOnzpMRuwyTOVPGSlGRyOTZC6W6pk7mXnSKrFqzTsace6VMPPUoOfCr+0jlhiqZOGuRvPHPD5wYL7t9pjQ31UtdXZ2Ulw+Ss6bdLD2CSPflQV11wTfu0yZf4+q+dOLxsn79OtmwYcMWx1VSUiKDBg2Sj1ZXygnnXOna3W+fPYJ/So/IQ48/L0sXTpeNtdVB+fXS3NycES5UAoFMEtC/LT3vw89MtkFdEIBA5gkUBRc9/bvVV3tGfc5Uy1p3r1695LqFS2X//7O37L37cHn73Q/llPOvlqlnHiNHfu9r8vrb78tPxs2Sqy45Tb60716y4M5lsjQISj1615zgV+Ae8punXpSaIJr9za/8l4tqnzHlOhm56/Zy1cWnSVV1jVx+wxIZ8+ODZeg2W8kTz74kc4LPd10/RYbvNFRqa2tFxT/80H0qLS2V82YuCOrYKFdPO10+WV0hx501R6aM/4l895tflI0bg1/JGxvl73//u7z0xAMydNCnQbc4fD6u0EDeyTJ69Og41RR0WcQ7RvereN95b5OUD/5cjFo+Lbp+7auBxL7pxFv/4Pv06SO9e/d2T32vf2jLArm9477H5N6bL5YNwR/t94+/SBbMOVv2/swurqK7H1whjz/zktw8e4LU19fJx2sqpb6hScZfOF8eWDhDpLlBGhoapLRPmRw+drpcO32c7LP3SFf2b2+8I2MnXi0PLpohzY11UlFRscUfve7HkCFDZOljL8iTz/5JFl090UXXa2rr5KBjJstNQbs7Dh0kq1atQrwzclZQCQQgAAEI5IJAcXGx9O3b18m3PjUYpsEnFd7hO20n444/VO564El54pmXI6+Fw3fcxgmwlglfw++af0GwrtZdJ/Xaru00N7fIIT+bIpdOGiMjd9om8jqqXzi22mor6VHcSw4dc0nk9f/GWeOD6/da0V/l3377bRm15jkZvcOAjCD88/uV0vK
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import plotly.graph_objects as go\n",
"\n",
"# Préparer les flux\n",
"flows = df_evo.groupby([\"cluster_2019\", \"cluster_2022\"]).size().reset_index(name=\"count\")\n",
"\n",
"labels = sorted(set(flows[\"cluster_2019\"]).union(set(flows[\"cluster_2022\"])))\n",
"\n",
"label_map = {k: i for i, k in enumerate(labels)}\n",
"\n",
"fig = go.Figure(data=[go.Sankey(\n",
" node=dict(label=[f\"2019_{l}\" for l in labels] + [f\"2022_{l}\" for l in labels]),\n",
" link=dict(\n",
" source=[label_map[s] for s in flows[\"cluster_2019\"]],\n",
" target=[label_map[t] + len(labels) for t in flows[\"cluster_2022\"]],\n",
" value=flows[\"count\"]\n",
" )\n",
")])\n",
"\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": 221,
"id": "4e139972-f4de-4f18-b9b3-601819f06d3f",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABUQAAAJOCAYAAAB/QA2/AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAo+ZJREFUeJzs3Xd4FNUax/HfpocaICRA6J0QilRDVxFEBEGRKiAiKhBFIwiRJqDSFJGuKEUEQbGh0gTBhoh06R0USSCBUEOA5Nw/9rKyZBMSSHaB/X7uM89zc+bM7Du7S3xzzjtnLMYYIwAAAAAAAABwAx6uDgAAAAAAAAAAnIUBUQAAAAAAAABugwFRAAAAAAAAAG6DAVEAAAAAAAAAboMBUQAAAAAAAABugwFRAAAAAAAAAG6DAVEAAAAAAAAAboMBUQAAAAAAAABugwFRAAAAAAAAAG6DAVE4zaxZs2SxWHTo0KEb9l29erUsFotWr16d5XGlV/HixfXUU0+5OowUbte4nGHMmDEqX768kpOTXR0KcMe699579eqrr7o6DADIMuSgWeN2jcsZyEGBW9e+fXu1bdvW1WHAjTEgeoeyWCzp2m6nZM6RKVOmaNasWa4OA6m4cOGCXn/99dvye3TmzBmNHj1a/fv3l4eH9VdZXFycxo4dqwYNGih//vwKCAjQvffeqwULFjg8R2Jiovr3769ChQrJ399ftWvX1g8//JCi3/Lly9W9e3eFhYXJ09NTxYsXTzO2/fv3q2PHjgoKCpK/v7/KlCmjgQMH3vCaLly4oMmTJ6tJkyYqWLCgcubMqXvuuUdTp05VUlJSiv7JyckaM2aMSpQoIT8/P1WuXFmffvppin7r1q1Tr169VL16dXl7e8tisaQaQ0xMjLp162aLvVq1avr8889vGHtm27Vrl1599VVVrVpVOXPmVMGCBdW8eXOtX7/eYf+jR4+qbdu2CggIUK5cufToo4/qwIEDKfpNnTpVTzzxhIoWLSqLxZLmH3I//PCD6tWrp2zZsilPnjxq06ZNuv6YlqS///5bw4YNU61atZQnTx4FBgaqUaNGWrFihcP+8fHxevbZZ5U/f35lz55d9913nzZu3GjXJyPf7z///FMRERGqWLGismfPrqJFi6pt27bas2dPir79+/fX5MmTFR0dna5rA+DeyEHhDOSgVhnJQfft26c2bdooT548ypYtm+rVq6dVq1al65qyIgdNTk7WrFmz1LJlSxUpUkTZs2dXWFiY3njjDV28eNGub0bzpqx0J+Vwjrj6s5RS/+/EqFGj7Pr1799fX3zxhbZs2ZKuawMyncEdac6cOXbbgw8+aCSlaI+OjnZ1qDZXrlwxCQkJJjk52dZWsWJF07BhwxR9k5KSTEJCgklKSnJihGkrVqyY6dq1q6vDSCEr4zpx4oSRZIYOHZol578V7777rsmVK5dJSEiwtX377bfG29vbPProo2b8+PFm0qRJ5r777jOSzJAhQ1Kco3379sbLy8v07dvXvP/++yY8PNx4eXmZX375xa5f165djZ+fn6lTp44pXLiwKVasWKpxbdq0yeTOnduEhoaaUaNGmenTp5vBgwebp5566obX9NdffxmLxWIaN25sxowZY6ZNm2Zat25tJJkuXbqk6D9gwAAjyfTo0cN88MEHpnnz5kaS+fTTT+36DR061Hh7e5vq1aubsmXLmtR+9Z8+fdqULl3a5MyZ0wwaNMhMmjTJNGjQwEgyc+fOvWH8memVV14xAQEBpnv37ub99983Y8aMMaVKlTKenp7mhx9+sOt79uxZU6ZMGRMUFGRGjx5txo0bZ4oUKWIKFy5sYmNj7foWK1bM5M2b1zz00EPGy8sr1X873377rfHw8DA1atQw7733nhkxYoQJDAw0ISEh5vjx4zeMf+LEicbf39906NDBTJo0yYwfP95Uq1bNSDIzZsyw65uUlGTq1KljsmfPbl5//XUzadIkExoaanLmzGn27NljF1N6v9+PP/64KVCggHnhhRfM9OnTzYgRI0xwcLDJnj27+euvv1K8foECBczgwYNveF0AQA7qfOSgt5fbMQc9cuSICQwMNMHBwebNN98048ePN1WqVDFeXl7mp59+uuE1ZUUOevbsWSPJ3HvvveaNN94wH3zwgenWrZvx8PAwjRo1svv3mJG8KavdSTmcI67+LI0xRpJ58MEHU/x3Ydu2bSlev1atWqZz5843vC4gKzAgepfo3bt3qoMc1zp//rwTokm/1JLR2xHJaOY5d+7cLZ+jcuXK5sknn7RrO3DggDl06JBdW3Jysrn//vuNr6+v3ev+8ccfRpIZO3asrS0hIcGUKlXKhIeH253j6NGj5tKlS8YYY5o3b55qMpqUlGTCwsJM7dq1zYULFzJ8TSdOnHCYKHTr1s1IMnv37rW1/fPPP8bb29v07t3b7lrr169vChcubK5cuWJrj46OtsWT1u+KMWPGGElm5cqVdtdUs2ZNU6BAAZOYmJjha7pZ69evN2fPnrVri42NNfnz5zd169a1ax89erSRZNatW2dr27lzp/H09DRRUVF2fQ8dOmRL2rJnz57qv53Q0FBTunRpu2vevHmz8fDwMJGRkTeMf9u2bebEiRN2bRcvXjTly5c3hQsXtmtfsGCBkWQ+//xzW9vx48dNQECA6dChg60tI9/v3377LcXntWfPHuPr62s6deqUIt6IiAhTrFixFAktANwIOWjWIwfNPHdrDtqrVy/j5eVldu3aZWs7f/68KVKkiKlWrdoNrykrctDExETz22+/pTjnsGHDjCS7Ce6M5E1Z7U7L4a7n6s/SGOuA6LXnTMvbb79tsmfPniLvB5yBAdG7hKNktGHDhqZixYpm/fr1pn79+sbf39/06dPHGGPM119/bR5++GFTsGBB4+PjY0qWLGmGDx9uN4hy7Tm2b99uGjVqZPz9/U2hQoXM6NGjU8QwYcIEExoaavz9/U1AQICpXr26XVXZzJkzjSRz8OBBY4w1iZJkt11NTFetWmUkmVWrVtm9xmeffWaqVatm/Pz8TL58+UynTp3MP//8Y9ena9euJnv27Oaff/4xjz76qMmePbsJDAw0r7zySorrcyQ5OdmMGDHChISEGH9/f9OoUSOzbds2h0nfqVOnTJ8+fUzhwoWNj4+PKVWqlBk1alSKqoJPP/3UVKtWzeTIkcPkzJnThIWFmfHjx98wlqSkJDN+/HgTFhZmfH19TWBgoGnatKn5888/bX2uj2vo0KEO/zC5/v03xpg///zTNGnSxOTLl8/4+fmZ4sWLm27duhljjDl48GCKz+f6xHTnzp3m8ccfN3ny5DG+vr6mevXq5ptvvnH4uqtXrzY9e/Y0+fPnNwEBAcYYY86cOWP69OljihUrZnx8fEz+/PlN48aNzYYNG9J8Xw4cOGAkmVmzZt3wPTTG+t2UZLZu3Wpr69evn/H09DSnT5+26/vWW28ZSebIkSMOz5VWMrpkyRIjySxevNgYY01E0/Odu5FFixYZSWbRokW2tsmTJxtJZvv27XZ9582bZySlqDC4Kq0/XFu0aGHy58+fon3s2LFGklm+fPktXEXmeOyxx0zevHnt2mrWrGlq1qyZom+TJk1MqVKlUj1XagOicXFxRpLp169fin0VK1Y0hQoVynjg/xcZGWkkmTNnztjannjiCRMcHJzi98azzz5rsmXLZi5evJjmOR19v1NTrVo1h38YffPNN0aS2bhxYzqvBACsyEH/Qw5KDno9Z+WglSpVcpgLXf33eW21YkZkZg561datW40kM2HChBu+vqO8yVVu1xwuvZz5WV4dEL1w4YJdJbUjW7ZsMZLMl19+mcErAm4da4je5eLi4tSsWTNVrVpV48eP13333SfJurh8jhw5FBkZqffee0/Vq1fXkCFDNGDAgBTnOHXqlB566CFVqVJF77zzjsqXL6/+/ftryZIltj7Tp0/Xiy++qNDQUI0fP17Dhg1T1apV9ccff6Qa2/jx41W4cGGVL19ec+bM0Zw5c9JcZ3HWrFlq27atPD09NXLkSPXo0UNffvml6tWrp/j4eLu+SUlJatq0qfLly6e
"text/plain": [
"<Figure size 1400x600 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig, axes = plt.subplots(1, 2, figsize=(14,6), sharey=True)\n",
"\n",
"# --- Heatmap 1 : 2016 → 2019 ---\n",
"sns.heatmap(\n",
" transition_pct1,\n",
" annot=True,\n",
" fmt=\".2f\",\n",
" cmap=\"Blues\",\n",
" ax=axes[0]\n",
")\n",
"\n",
"axes[0].set_xlabel(\"Cluster 2019-2022\")\n",
"axes[0].set_ylabel(\"Cluster 2016-2019\")\n",
"axes[0].set_title(\"Transition des clusters (2016-2019 → 2019-2022)\")\n",
"\n",
"# --- Heatmap 2 : 2019 → 2022 ---\n",
"sns.heatmap(\n",
" transition_pct2,\n",
" annot=True,\n",
" fmt=\".2f\",\n",
" cmap=\"Blues\",\n",
" ax=axes[1]\n",
")\n",
"\n",
"axes[1].set_xlabel(\"Cluster 2022-2025\")\n",
"axes[1].set_ylabel(\"Cluster 2019-2022\")\n",
"axes[1].set_title(\"Transition des clusters (2019-2022 → 2022-2025)\")\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
2026-04-07 12:31:16 +02:00
"execution_count": 228,
2026-04-03 10:55:04 +02:00
"id": "d2701546-abaf-42e2-9070-7ef43824096c",
"metadata": {},
2026-04-07 12:31:16 +02:00
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAHHCAYAAABZbpmkAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAcaJJREFUeJzt3XlYVOXbB/DvzADDPsg6gIK4i6KipuKeorhrkqWZa2opWuaSWT/FJXPJyhZT6y2XzCVNLS019xU3cAPUUBFUNgHZZZs57x/E5AjIDM4wMHw/1zVXnXOec879cMS5fc6ziARBEEBERERkpMSGDoCIiIhIn5jsEBERkVFjskNERERGjckOERERGTUmO0RERGTUmOwQERGRUWOyQ0REREaNyQ4REREZNSY7REREZNSY7BAZiEgkwoIFC1TbCxYsgEgkQnJysuGCqkGysrIwYcIEyOVyiEQiTJ8+3dAhEZGeMNkh0qENGzZAJBKV+Tl37pyhQ6R/ffrpp9iwYQMmT56Mn3/+GaNGjdLr/erWrYsBAwaU2P/zzz9DIpGgT58+yM3N1WsMRDWViaEDIDJGixYtgpeXV4n9DRo0MEA0VJqjR4+iQ4cOCA4ONlgMv/zyC8aOHQt/f3/s2bMH5ubmBouFyJgx2SHSg759+6Jt27aGDqNayc7OhpWVVaXdLykpCd7e3jq7XmFhIZRKJczMzDQqv23bNowZMwY9evTA77//zkSHSI/4GouoiklOTsZrr70GW1tbODg44L333ivxeqOwsBCLFy9G/fr1IZVKUbduXXz00UfIy8tTlZkxYwYcHBwgCIJq37Rp0yASifD111+r9iUmJkIkEmHNmjXlxrZ582a0a9cOlpaWqFWrFrp27Yq///5bdfzZfkjF6tati7Fjx6q2i1/3nThxAlOmTIGzszNq166NnTt3qvY/a926dRCJRAgPD1ftu3nzJl599VXY29vD3Nwcbdu2xR9//PHcOhw/fhwikQjR0dH4888/Va8Y7927B6AoCXrrrbfg4uICc3NztGzZEhs3blS7xr179yASibBy5UqsWrVK9RwiIyPL/RkCwK+//oo333wT3bt3xx9//FFuoiMSiTB16lTs2LED3t7esLCwgJ+fH65fv6762TRo0ADm5ubo3r27qi5PO3/+PPr06QOZTAZLS0t069YNZ86cUSsTExODKVOmoHHjxrCwsICDgwOGDRtW4nrFz+/MmTOYMWMGnJycYGVlhVdeeQWPHj1SK3vp0iUEBATA0dERFhYW8PLywvjx4zX6ORHpClt2iPQgPT29REdjkUgEBweHcs997bXXULduXSxduhTnzp3D119/jcePH2PTpk2qMhMmTMDGjRvx6quvYubMmTh//jyWLl2KGzduYPfu3QCALl264Msvv0RERASaN28OADh16hTEYjFOnTqFd999V7UPALp27frcuBYuXIgFCxagY8eOWLRoEczMzHD+/HkcPXoUvXv31vyH85QpU6bAyckJ8+fPR3Z2Nvr37w9ra2v8+uuv6Natm1rZ7du3o1mzZqq6REREoFOnTnB3d8eHH34IKysr/PrrrxgyZAh+++03vPLKK6Xes2nTpvj555/x/vvvo3bt2pg5cyYAwMnJCU+ePEH37t1x+/ZtTJ06FV5eXtixYwfGjh2LtLQ0vPfee2rXWr9+PXJzczFp0iRIpVLY29uXW+fffvsNI0eORNeuXbF3715YWFho9LM6deoU/vjjDwQFBQEAli5digEDBuCDDz7Ad999hylTpuDx48dYsWIFxo8fj6NHj6rOPXr0KPr27Ys2bdogODgYYrEY69evR48ePXDq1Cm0a9cOAHDx4kWcPXsWw4cPR+3atXHv3j2sWbMG3bt3R2RkJCwtLdVimjZtGmrVqoXg4GDcu3cPq1atwtSpU7F9+3YARYlj79694eTkhA8//BB2dna4d+8edu3apVGdiXRGICKdWb9+vQCg1I9UKlUrC0AIDg5WbQcHBwsAhEGDBqmVmzJligBAuHr1qiAIgnDlyhUBgDBhwgS1crNmzRIACEePHhUEQRCSkpIEAMJ3330nCIIgpKWlCWKxWBg2bJjg4uKiOu/dd98V7O3tBaVSWWa9oqKiBLFYLLzyyiuCQqFQO/b0ec/WqZinp6cwZsyYEj+nzp07C4WFhWplR4wYITg7O6vtj4+PF8RisbBo0SLVvp49ewo+Pj5Cbm6uWiwdO3YUGjZsWGZdno6pf//+avtWrVolABA2b96s2pefny/4+fkJ1tbWQkZGhiAIghAdHS0AEGxtbYWkpKRy71V8Pzc3N8HExETo3r27kJ2drdF5giCo/vxER0er9q1bt04AIMjlclVcgiAIc+fOFQCoyiqVSqFhw4ZCQECA2rPKyckRvLy8hF69eqnte1ZISIgAQNi0aZNqX/Hz8/f3V7vm+++/L0gkEiEtLU0QBEHYvXu3AEC4ePGixnUl0ge+xiLSg9WrV+PQoUNqn/3792t0bvG/3ItNmzYNAPDXX3+p/XfGjBlq5YpbKP78808ARS0VTZo0wcmTJwEAZ86cgUQiwezZs5GYmIioqCgARS0GnTt3hkgkKjOmPXv2QKlUYv78+RCL1f/aeN555Zk4cSIkEonavtdffx1JSUk4fvy4at/OnTuhVCrx+uuvAwBSU1Nx9OhRvPbaa8jMzERycjKSk5ORkpKCgIAAREVF4eHDh1rH89dff0Eul2PEiBGqfaampnj33XeRlZVV4vVaYGAgnJycNL5+amoqCgsLUbt2bY1bdIr17NkTdevWVW23b99eFYONjU2J/Xfv3gUAXLlyBVFRUXjjjTeQkpKi+lllZ2ejZ8+eOHnyJJRKJQCoxVRQUICUlBQ0aNAAdnZ2CAsLKxHTpEmT1J5/ly5doFAoEBMTAwCws7MDAOzbtw8FBQVa1ZdIl/gai0gP2rVrV+EOyg0bNlTbrl+/PsRisarfRExMDMRicYmRXXK5HHZ2dqovGqDoy6c4OTp16hTatm2Ltm3bwt7eHqdOnYKLiwuuXr2KN95447kx3blzB2KxWKcdegGUOmKtuF/J9u3b0bNnTwBFr7BatWqFRo0aAQBu374NQRAwb948zJs3r9RrJyUlwd3dXat4YmJi0LBhwxIJXdOmTVXHy4v/eXr27AkPDw+sWbMG9vb2+Oqrr1TH0tPT8eTJE9W2mZmZ2msxDw8PtWvJZDIAQJ06dUrd//jxYwBQJbVjxowpM6709HTUqlULT548wdKlS7F+/Xo8fPhQrb9Xenp6ifOejalWrVpq9+7WrRsCAwOxcOFCfPnll+jevTuGDBmCN954A1KptMx4iHSNyQ5RFVdWy4kmLSqdO3fGDz/8gLt37+LUqVPo0qULRCIROnfujFOnTsHNzQ1KpRJdunTRddhqFApFqftLa92QSqUYMmQIdu/eje+++w6JiYk4c+YMPv30U1WZ4paIWbNmISAgoNRrV8Ywf21bZwDg22+/xePHj/H111+jVq1aqg7d7733nlpH6G7duqm1bj3bAlbe/uJEpfhn9dlnn6FVq1allrW2tgZQ1Iq4fv16TJ8+HX5+fpDJZBCJRBg+fLjqOtrcWyQSYefOnTh37hz27t2LgwcPYvz48fj8889x7tw51X2J9I3JDlEVExUVpdZicPv2bSiVStUrDE9PTyiVSkRFRalaHICiUVVpaWnw9PRU7StOYg4dOoSLFy/iww8/BFDUGXnNmjVwc3ODlZUV2rRp89yY6tevD6VSicjIyDK/MIGif9mnpaWp7cvPz0d8fLwmVVd5/fXXsXHjRhw5cgQ3btyAIAiqV1gAUK9ePQBFr5j8/f21uvbzeHp64tq1a1AqlWqtOzdv3lQdf1FisRibNm1Ceno6Fi5cCHt7e7z77rv44IMP8Oabb6rKFbeSvKj69esDAGxtbcv9We3cuRNjxozB559/rtqXm5tb4plqq0OHDujQoQOWLFmCLVu2YOTIkdi2bRsmTJjwQtcl0hT77BBVMatXr1bb/uabbwAUzd0DAP369QMArFq1Sq3cF198AQDo37+/ap+Xlxfc3d3x5ZdfoqCgAJ0
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkAAAAHHCAYAAABXx+fLAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAhV9JREFUeJzt3XlYlOX6B/DvzMDMsA4iOyIgLogbKopaKiqGZm7V0cxyKyvTyoMtejpqnhbUOh2rn2mbZdpilppaYYpLaSgm7gsqoqjssoNsM+/vj2FGRxYZnI2Z7+e6uHTeeZfnZYC553nu57lFgiAIICIiIrIhYnM3gIiIiMjUGAARERGRzWEARERERDaHARARERHZHAZAREREZHMYABEREZHNYQBERERENocBEBEREdkcBkBERERkcxgAERlIUFAQpk2bpn28d+9eiEQi7N27V7stKioKXbt2NX3jyGTeffddtGvXDhKJBOHh4eZuDhE1gAEQ0V2cPHkSjz76KAIDAyGXy+Hv74/hw4fjo48+MnfTjOLMmTN44403cPny5TrPffzxx/jqq69M3qaW4vfff8err76K++67D19++SXeeecdo15v2rRpcHZ2rrP9xIkT8PDwQFBQUL2vIxEBduZuAJEl++uvvzBkyBC0bdsWM2fOhI+PD65evYqDBw/igw8+wAsvvKDdNyUlBWJxy/9McebMGSxZsgRRUVEICgrSee7jjz+Gh4eHTk8X3bJ7926IxWJ88cUXkEqlZmnDqVOnMGzYMDg5OWHPnj11XkMiUmMARNSIt99+GwqFAocPH4abm5vOczk5OTqPZTKZCVtGhiQIAioqKuDg4HBP58nJyYGDg4PBgh9923X69GkMHToUDg4O2LNnD4KDgw3SDiJr1PI/rhIZUWpqKrp06VIn+AEALy8vncd35gA15syZMxgyZAgcHR3h7++P5cuX19knJycHTz31FLy9vSGXy9GjRw+sXbtWZ5/68owA4PLlyxCJRHWGq86dO4dHH30U7u7ukMvliIiIwNatW7XPf/XVV/jHP/4BABgyZAhEIpH2/EFBQTh9+jT27dun3R4VFaU9trCwEHPnzkVAQABkMhnat2+PZcuWQaVS3fX78ffffyMmJgYeHh5wcHBAcHAwZsyYobOPSqXCBx98gG7dukEul8PT0xMjRozA33//rd2npqYGb775JkJCQiCTyRAUFIR//etfqKys1DlXUFAQHnroIezYsQMRERFwcHDAJ598ck/3IRKJ8OWXX6KsrEz7/dF8/w3Rrrs5e/Yshg0bBplMhj179qBdu3aN7q/JRztx4gQGDx4MR0dHtG/fHj/++CMAYN++fYiMjISDgwM6deqEXbt21TnH9evXMWPGDHh7e0Mmk6FLly5Ys2aNzj5VVVVYtGgRevfuDYVCAScnJwwcOBB79uzR2U/zM/vee+/h008/1X6v+vTpg8OHD+vsm5WVhenTp6NNmzaQyWTw9fXF2LFjOdxHemEPEFEjAgMDkZiYiFOnThksebmgoAAjRozAww8/jAkTJuDHH3/Ea6+9hm7dumHkyJEAgJs3byIqKgoXL17EnDlzEBwcjI0bN2LatGkoLCzESy+9pPd1T58+jfvuuw/+/v6YP38+nJyc8MMPP2DcuHH46aefMH78eAwaNAgvvvgiPvzwQ/zrX/9C586dAQCdO3fGihUr8MILL8DZ2Rmvv/46AMDb2xsAUF5ejsGDB+P69et49tln0bZtW/z1119YsGABMjMzsWLFigbblZOTgwceeACenp6YP38+3NzccPnyZWzatElnv6eeegpfffUVRo4ciaeffho1NTX4888/cfDgQURERAAAnn76aaxduxaPPvoo5s2bh0OHDiEuLg5nz57F5s2bdc6XkpKCSZMm4dlnn8XMmTPRqVOne7qPdevW4dNPP0VSUhI+//xzAMCAAQMM0q67SUlJwdChQ2FnZ4c9e/YgJCTkrscA6p/Fhx56CI899hj+8Y9/YNWqVXjsscfwzTffYO7cuXjuuefw+OOP491338Wjjz6Kq1evwsXFBQCQnZ2Nfv36QSQSYc6cOfD09MRvv/2Gp556CsXFxZg7dy4AoLi4GJ9//jkmTZqEmTNnoqSkBF988QViYmKQlJRUJ1H822+/RUlJCZ599lmIRCIsX74cDz/8MC5dugR7e3sAwCOPPILTp0/jhRdeQFBQEHJycrBz506kp6dzyI+aTiCiBv3++++CRCIRJBKJ0L9/f+HVV18VduzYIVRVVdXZNzAwUJg6dar28Z49ewQAwp49e7TbBg8eLAAQvv76a+22yspKwcfHR3jkkUe021asWCEAENavX6/dVlVVJfTv319wdnYWiouLG7yGIAhCWlqaAED48ssvtduGDRsmdOvWTaioqNBuU6lUwoABA4QOHTpot23cuLHecwqCIHTp0kUYPHhwne1vvvmm4OTkJJw/f15n+/z58wWJRCKkp6fXOUZj8+bNAgDh8OHDDe6ze/duAYDw4osv1nlOpVIJgiAIx44dEwAITz/9tM7zL7/8sgBA2L17t3ZbYGCgAECIj4832H0IgiBMnTpVcHJy0tlmiHY1dj17e3vB19dX8PPzq9Puxmh+Fr/99lvttnPnzgkABLFYLBw8eFC7fceOHXV+np566inB19dXyMvL0znvY489JigUCqG8vFwQBEGoqakRKisrdfYpKCgQvL29hRkzZmi3aX5mW7duLeTn52u3//zzzwIAYdu2bdpjAQjvvvtuk++VqD4cAiNqxPDhw5GYmIgxY8bg+PHjWL58OWJiYuDv768zdKQPZ2dnPPHEE9rHUqkUffv2xaVLl7Tbfv31V/j4+GDSpEnabfb29njxxRdRWlqKffv26XXN/Px87N69GxMmTEBJSQny8vKQl5eHGzduICYmBhcuXMD169ebdT8AsHHjRgwcOBCtWrXSnjsvLw/R0dFQKpX4448/GjxWM7y4fft2VFdX17vPTz/9BJFIhMWLF9d5TiQSAVB/zwAgNjZW5/l58+YBAH755Red7cHBwYiJiTHYfTTEEO1qjFKpRF5eHtzd3eHh4aFX25ydnfHYY49pH3fq1Alubm7o3LkzIiMjtds1/9f8jAqCgJ9++gmjR4+GIAg636uYmBgUFRUhOTkZACCRSLQ5USqVCvn5+aipqUFERIR2n9tNnDgRrVq10j4eOHCgzrU1OVZ79+5FQUGBXvdLdDsOgRHdRZ8+fbBp0yZUVVXh+PHj2Lx5M/73v//h0UcfxbFjxxAWFqbX+dq0aaN909Zo1aoVTpw4oX185coVdOjQoc6sMs2Q1JUrV/S65sWLFyEIAhYuXIiFCxfWu09OTg78/f31Oq/GhQsXcOLECXh6ejZ47oYMHjwYjzzyCJYsWYL//e9/iIqKwrhx4/D4449rE8tTU1Ph5+cHd3f3Bs9z5coViMVitG/fXme7j48P3Nzc6nzP6ksQvpf7MGa7GuPg4IDPP/8ckydPxqhRo7Bz5044OTkBUA+lFhUV1bmuRn0/iwqFAgEBAXW2AdAGHLm5uSgsLMSnn36KTz/9tN523f69Wrt2Lf773//i3LlzOkFufffatm1bnceaYEhzbZlMhmXLlmHevHnw9vZGv3798NBDD2HKlCk690Z0NwyAiJpIKpWiT58+6NOnDzp27Ijp06dj48aN9fZKNEYikdS7XRAEvdt055uXhlKp1HmsSeB9+eWXG+xduPMNWh8qlQrDhw/Hq6++Wu/zHTt2bPBYkUiEH3/8EQcPHsS2bduwY8cOzJgxA//9739x8ODBete5aUxD35M71Tez6l7uw5jtupvHHnsMBQUFeP755/Hwww9j27ZtkEql2LBhA6ZPn66z7+0/Zw39LN7tZ1Tz8/TEE09g6tSp9e7bvXt3AMD69esxbdo0jBs3Dq+88gq8vLwgkUgQFxeH1NRUva8NAHPnzsXo0aOxZcsW7NixAwsXLkRcXBx2796Nnj171ns80Z0YABE1gybpNjMz0yjnDwwMxIkTJ6BSqXR6gc6dO6d9Hrj16biwsFDn+Dt7FTQzguzt7REdHd3otRt7o27
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best K by silhouette: 3\n"
]
},
{
"data": {
"text/plain": [
"' Ce que c’ est :\\nInertia = somme des distances intra-cluster (SSE).\\nPlus elle baisse, plus les clusters sont “serrés”.\\n\\nComment lire :\\nQuand K augmente, inertia baisse toujours (normal).\\nOn cherche un “coude” : à partir d’ un certain K, ajouter des clusters apporte peu\\n'"
]
},
"execution_count": 228,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Evolution des clusters\n",
"\n",
"k_range = range(2, 21)\n",
"inertias = []\n",
"silhouettes = []\n",
"\n",
"for k in k_range:\n",
" km = KMeans(n_clusters=k, n_init=30, random_state=42)\n",
" labels = km.fit_predict(X_2022_scaled2)\n",
" inertias.append(km.inertia_)\n",
" silhouettes.append(silhouette_score(X_2022_scaled2, labels))\n",
"\n",
"# Elbow plot\n",
"plt.figure()\n",
"plt.plot(list(k_range), inertias, marker=\"o\")\n",
"plt.xlabel(\"Number of clusters K\")\n",
"plt.ylabel(\"Inertia (within-cluster SSE)\")\n",
"plt.title(\"Elbow curve for K-means\")\n",
"plt.show()\n",
"\n",
"# Silhouette plot\n",
"plt.figure()\n",
"plt.plot(list(k_range), silhouettes, marker=\"o\")\n",
"plt.xlabel(\"Number of clusters K\")\n",
"plt.ylabel(\"Silhouette score\")\n",
"plt.title(\"Silhouette score for K-means\")\n",
"plt.show()\n",
"\n",
"best_k = list(k_range)[int(np.argmax(silhouettes))]\n",
"print(\"Best K by silhouette:\", best_k)\n",
"\n",
"\n",
"''' Ce que c’ est :\n",
"Inertia = somme des distances intra-cluster (SSE).\n",
"Plus elle baisse, plus les clusters sont “serrés”.\n",
"\n",
"Comment lire :\n",
"Quand K augmente, inertia baisse toujours (normal).\n",
"On cherche un “coude” : à partir d’ un certain K, ajouter des clusters apporte peu\n",
"'''"
]
},
{
"cell_type": "code",
"execution_count": 236,
"id": "aa314cec-891d-4f97-af49-75c8869df105",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>aum_qty_med</th>\n",
" <th>freq_med</th>\n",
" <th>rel_int_med</th>\n",
" <th>gross_flow_med</th>\n",
" <th>n_tx_med</th>\n",
" <th>vol_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster2_kmeans</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3.0</th>\n",
" <td>133</td>\n",
" <td>112893.522820</td>\n",
" <td>1.000000</td>\n",
" <td>5.640443</td>\n",
" <td>7428.045000</td>\n",
" <td>2433.0</td>\n",
" <td>8849.651088</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.0</th>\n",
" <td>130</td>\n",
" <td>22171.725988</td>\n",
" <td>0.984615</td>\n",
" <td>5.229762</td>\n",
" <td>1395.668979</td>\n",
" <td>1043.5</td>\n",
" <td>1577.856556</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2.0</th>\n",
" <td>52</td>\n",
" <td>816081.008680</td>\n",
" <td>1.000000</td>\n",
" <td>4.501734</td>\n",
" <td>51749.188856</td>\n",
" <td>11492.5</td>\n",
" <td>49834.562115</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1.0</th>\n",
" <td>31</td>\n",
" <td>65939.622642</td>\n",
" <td>0.200000</td>\n",
" <td>3.489589</td>\n",
" <td>2369.314778</td>\n",
" <td>22.0</td>\n",
" <td>9971.798124</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4.0</th>\n",
" <td>9</td>\n",
" <td>27289.956085</td>\n",
" <td>0.028169</td>\n",
" <td>0.030004</td>\n",
" <td>6.280154</td>\n",
" <td>2.0</td>\n",
" <td>28.914546</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients aum_qty_med freq_med rel_int_med \\\n",
"cluster2_kmeans \n",
"3.0 133 112893.522820 1.000000 5.640443 \n",
"0.0 130 22171.725988 0.984615 5.229762 \n",
"2.0 52 816081.008680 1.000000 4.501734 \n",
"1.0 31 65939.622642 0.200000 3.489589 \n",
"4.0 9 27289.956085 0.028169 0.030004 \n",
"\n",
" gross_flow_med n_tx_med vol_med \n",
"cluster2_kmeans \n",
"3.0 7428.045000 2433.0 8849.651088 \n",
"0.0 1395.668979 1043.5 1577.856556 \n",
"2.0 51749.188856 11492.5 49834.562115 \n",
"1.0 2369.314778 22.0 9971.798124 \n",
"4.0 6.280154 2.0 28.914546 "
]
},
"execution_count": 236,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"km = KMeans(n_clusters=5, n_init=50, random_state=42)\n",
"labels_km = km.fit_predict(X_2022_scaled2)\n",
"\n",
"dfc.loc[X_2022_2.index, \"cluster2_kmeans\"] = labels_km\n",
"\n",
"# Profiling table (medians = robust to outliers)\n",
"k_profile = (\n",
" dfc.loc[X_2022_2.index]\n",
" .groupby(\"cluster2_kmeans\")\n",
" .agg(\n",
" n_clients=(ID_COL, \"count\"),\n",
" aum_qty_med=(\"aum_qty_mean\", \"median\"),\n",
" freq_med=(\"frequency\", \"median\"),\n",
" rel_int_med=(\"rel_intensity_total\", \"median\"),\n",
" gross_flow_med=(\"gross_flow_qty_mean\", \"median\"),\n",
" n_tx_med=(\"n_tx_total\", \"median\"),\n",
" vol_med=(\"net_flow_qty_vol\", \"median\"),\n",
" )\n",
" .sort_values(\"n_clients\", ascending=False)\n",
")\n",
"\n",
"k_profile\n"
]
},
{
"cell_type": "code",
"execution_count": 238,
"id": "fd2eb9a4-9289-437f-b360-87d2accd9737",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAosxJREFUeJzs3XdYU9cbB/BvEghDZe8N7oGgotaJA6W4tVW01o21Fid2aG2dtVq3VdSqVVurFfWn1roVV111Ae6BgrhAkaVMSc7vD0okZJAbsoD38zw+kpOTe99cQvLmTB5jjIEQQgghpArj6zsAQgghhBB9o4SIEEIIIVUeJUSEEEIIqfIoISKEEEJIlUcJESGEEEKqPEqICCGEEFLlUUJECCGEkCqPEiJCCCGEVHmUEBFCCCGkyqOEyMB06NABjRo10ugxeTwexo0bp9FjqmLz5s3g8XhITEzU+bkBYPjw4fDy8tLLuYm0xMRE8Hg8bN68Wd+haIy2npOXlxeGDx+u0WMa8nm1oUOHDujQoYO+w9CILVu2oF69ejA2NoaVlZW+w6nUKCFS4vLlyxg3bhwaNmyIatWqwcPDAwMGDMD9+/dl6nbo0AE8Hg88Hg98Ph8WFhaoW7cuhgwZgmPHjukhekK4uX37NmbNmlWuBHbbtm1Yvny5xmKqrM6fP49Zs2YhIyND36FoxOrVqw062X3+/DlmzZqF2NhYfYfCyd27dzF8+HDUrFkT69evx7p16/QdUqVmpO8ADNlPP/2Ec+fOoX///mjcuDGSk5OxatUqNG3aFBcvXpRpyXFzc8P8+fMBANnZ2YiPj8fu3bvxxx9/YMCAAfjjjz9gbGysj6eiF0OGDMHAgQNhYmKi71CICm7fvo3Zs2ejQ4cOaresbdu2DTdv3sSkSZOkyj09PZGbm1ulXv/KnD9/HrNnz8bw4cNlvvXfu3cPfL7uv6uW57yrV6+GnZ2dwbQwHT16VOr28+fPMXv2bHh5ecHf318/Qanh1KlTEIvFWLFiBWrVqqXvcCo9SoiUiIiIwLZt2yAUCiVloaGh8PX1xYIFC/DHH39I1be0tMSnn34qVbZgwQJMmDABq1evhpeXF3766SedxG4IBAIBBAKBxo6Xk5MDc3NzjR2P6A6Px4Opqam+w1BKLBajoKBA73Hq6wtEZfriUvI9uyJ7+fIlAJTZVcYYQ15eHszMzHQQVSXGCGdNmzZlTZs2lSoLDAxkDRs2lFu/sLCQNWjQgJmbm7OMjAylxy4+zpUrV1irVq2Yqakp8/LyYmvWrJGpm5eXx2bMmMFq1qzJhEIhc3NzY1999RXLy8uTqgeAhYeHsz179rCGDRsyoVDIGjRowA4dOiRVLzExkY0dO5bVqVOHmZqaMhsbG/bxxx+zhIQESZ3Lly8zAGzz5s0y8Rw+fJgBYH///TdjjLFNmzYxAFKPZ4yxyMhI1qBBAyYUCpmzszP74osvWHp6usLr0K5dO2ZmZsYmTpzIGGNs7969rFu3bszZ2ZkJhULm4+PD5syZwwoLC6WOMWzYMObp6ankar9/Tl27dmW2traS6z1ixAipOiKRiC1btow1aNCAmZiYMAcHB/bZZ5+xtLQ0mXozZ85kzs7OzMzMjHXo0IHdunWLeXp6smHDhknqFV+bf/75h40fP57Z2dkxS0tL9tlnn7H8/HyWnp7OhgwZwqysrJiVlRX76quvmFgsVismT09P1r17d/bPP/+w5s2bMxMTE+bt7c1+++03mXhK/zt58qTK1zwwMFDm8cXXPyEhgQFgmzZtkootOjqatW3blpmbmzNLS0vWq1cvdvv2bak6M2fOZADYgwcP2LBhw5ilpSWzsLBgw4cPZ9nZ2VJ1X716xe7cuSNTLk/x38Uff/zBGjRowIyMjNiePXsYY4w9ffqUjRgxgjk4OEj+Xn799Vepx8t7TnFxcWzYsGHM29ubmZiYMEdHRzZixAiWmpoq83xK/yv+Oyn5WuHy96Zq3Iooeo2ePXuWTZ48mdnZ2TFzc3PWp08f9vLlS6nHlX4ugYGBkvvT09PZxIkTmZubGxMKhaxmzZpswYIFTCQSyVzLRYsWsV9++YX5+PgwoVDIAgIC2KVLl6TifPHiBRs+fDhzdXVlQqGQOTk5sV69ekm9zwQGBkpiOHnypNzrvWnTJjZjxgxmZGQk9XyKjR49mllaWrLc3Fy512vRokUMAEtMTJS5b+rUqczY2Fjyt3j//n3Wr18/5ujoyExMTJirqysLDQ1V+nkg77rOnDlTcl/37t3Z4cOHWbNmzZiJiQlbtmyZyte7uN6wYcOYhYUFs7S0ZEOHDmUxMTEyr+mS17Ikee+vmnxPKhnnpEmTmKenJxMKhczV1ZUNGTKEvXr1ir1584aZm5uzCRMmyDzuyZMnjM/nsx9//FHhNS6NEiKOxGIxc3V1ZV27dpUqV5YQMcbY3LlzGQC2f/9+pccPDAxkLi4uzMHBgY0bN479/PPPrG3btgyA1BubSCRiXbt2Zebm5mzSpEnsl19+YePGjWNGRkasd+/eUscEwPz8/JizszObO3cuW758OfPx8WHm5uZSb9Q7d+5kfn5+bMaMGWzdunXs22+/ZdbW1szT01PqA8bHx4d169ZNJvYRI0Ywa2trVlBQwBiTnxAVfxgEBQWxlStXsnHjxjGBQMCaN28ueVzxdXBycmL29vZs/Pjx7JdffmF79+5ljDHWp08fNmDAALZo0SK2Zs0a1r9/fwaAffnll1LxqJIQpaSkMGtra1anTh22aNEitn79ejZ9+nRWv359qXphYWHMyMiIjR49mq1du5Z98803rFq1ajJxf/311wwA69mzJ1u1ahUbPXo0c3NzY3Z2dnI/bPz9/dmHH37IIiMj2ZAhQxgA9vXXX7O2bduyTz75hK1evZr16NGDAZB5s1A1Jk9PT1a3bl3m6OjIvv32W7Zq1SrWtGlTxuPx2M2bNxljjD18+JBNmDCBAWDffvst27JlC9uyZQtLTk5W+ZofPXqU+fv7Mzs7O8njixMMecnDsWPHmJGREatTpw5buHAhmz17NrOzs2PW1tZyXzNNmjRh/fr1Y6tXr2ZhYWGSa1VScd3iRE4ZAKx+/frM3t6ezZ49m0VGRrKYmBiWnJzM3NzcmLu7O5szZw5bs2YN69WrFwMg+dBR9JwWL17M2rVrx+bMmcPWrVvHJk6cyMzMzFiLFi0kCW1cXBwbNGiQ5HjF1+rt27eS31fJ14qqf2+qxq2IooSoSZMmrFOnTmzlypVsypQpTCAQsAEDBkjq7dmzh7m5ubF69epJnsvRo0cZY4xlZ2ezxo0bM1tbW/btt9+ytWvXsqFDhzIejyf5glPyWjZp0oTVqlWL/fTTT2zhwoXMzs6Oubm5Sb2eW7duzSwtLdl3333HNmzYwH788UfWsWNHdvr0aUmdkh/iycnJbM6cOQwA++yzzyQxPnz4kD148IABYCtXrpS6Fvn5+cza2pqNHDlS4fV6/Pgx4/F4bOHChTL3+fj4sO7du0uO5e3tzVxcXNgPP/zANmzYwGbPns2aN28uN5kqeV379u3LALA1a9awLVu2sLi4OMnvqlatWsza2ppNnTqVrV27lp08eVLl6y0Wi1n79u0Zn89nX3zxBVu5ciXr1KkTa9y4cbkSIk2+JzHG2Js3b1ijRo2YQCBgo0ePZmvWrGFz585lzZs3ZzExMYwxxgYPHswcHR1lvhAvXLiQ8Xg89vjxY4XXuDRKiDjasmWLTHLCWNkJ0Z49exgAtmLFCqXHL/6WvWTJEklZfn4+8/f3Zw4ODpIX1ZYtWxifz2f//POP1OPXrl3LALBz585JygAwoVDI4uPjJWVxcXEybwQ5OTky8Vy4cIEBYL///rukbNq0aVLffopjtLKyknoDKZ0QvXz5kgmFQta1a1epbyurVq1iANjGjRtlrsPatWtlYpIX55gxY5i5ublU65gqCVHx7+Xy5csK6/zzzz8MANu6datUefE39OLy5ORkZmRkxPr06SNVb9asWQyA3A+b4OBgqZafVq1aMR6Pxz7//HNJWWFhIXNzc5N6U1I1Jsbef9M8c+aMpOzly5f
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure()\n",
"for name, g in dfc[~dfc['cluster2_kmeans'].isin([4.0])].groupby(\"cluster2_kmeans\"):\n",
" plt.scatter(g[\"frequency\"], g[\"rel_intensity_total\"], s=10, label=name)\n",
"\n",
"plt.yscale(\"log\")\n",
"plt.axvline(thr_freq, linestyle=\"--\")\n",
"plt.axhline(thr_int, linestyle=\"--\")\n",
"plt.xlabel(\"Activity frequency (share of active months)\")\n",
"plt.ylabel(\"Gross flow / mean AUM (quantity) [log scale]\")\n",
"plt.title(\"2D behavioral segmentation: relative intensity vs frequency\")\n",
"plt.legend(markerscale=2)\n",
"plt.ylim(0.1,100)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cda42f1d-d725-412b-8897-3728a4a5fc4a",
"metadata": {},
2026-04-03 10:55:04 +02:00
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}