Project_Carmignac/clustering_avril.ipynb

7446 lines
1.7 MiB
Plaintext
Raw Normal View History

2026-04-07 12:31:16 +02:00
{
"cells": [
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 322,
2026-04-07 12:31:16 +02:00
"id": "2fee3a54-847b-432f-bda5-3d6a9aa9020c",
"metadata": {},
"outputs": [],
"source": [
"import warnings\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"from sklearn.preprocessing import StandardScaler, RobustScaler\n",
"from sklearn.cluster import KMeans\n",
"from sklearn.mixture import GaussianMixture\n",
"from sklearn.metrics import silhouette_score, davies_bouldin_score, pairwise_distances\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.neighbors import kneighbors_graph\n",
"from sklearn.manifold import MDS\n",
"\n",
"sns.set_style(\"whitegrid\")\n",
"pd.set_option(\"display.max_columns\", 200)\n",
"pd.set_option(\"display.max_rows\", 200)\n",
"\n",
"EPS = 1e-9\n",
"RANDOM_STATE = 42"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 323,
2026-04-07 12:31:16 +02:00
"id": "1f95b6b6-03b8-4f23-b236-5c71beedea04",
"metadata": {},
"outputs": [],
"source": [
"PATH_aum = \"s3://projet-bdc-carmignac-g3/paco/AUM_repaired.csv\"\n",
"df_aum_repaired = pd.read_csv(PATH_aum, sep=\",\")"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 324,
2026-04-07 12:31:16 +02:00
"id": "cab4432f-d7e5-4c18-ab86-19fe6759eed6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fichiers Flows : ['projet-bdc-data/carmignac/Flows ENSAE V1 -20251027.csv', 'projet-bdc-data/carmignac/Flows ENSAE V2 -20251105.csv']\n",
"Fichiers AUM : ['projet-bdc-data/carmignac/AUM ENSAE V1 -20251027.csv', 'projet-bdc-data/carmignac/AUM ENSAE V2 -20251105.csv']\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Agreement - Code</th>\n",
" <th>Company - Id</th>\n",
" <th>Company - Ultimate Parent Id</th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Registrar Account - Region</th>\n",
" <th>RegistrarAccount - Country</th>\n",
" <th>Product - Asset Type</th>\n",
" <th>Product - Strategy</th>\n",
" <th>Product - Legal Status</th>\n",
" <th>Product - Is Dedie ?</th>\n",
" <th>Product - Fund</th>\n",
" <th>Product - Shareclass Type</th>\n",
" <th>Product - Shareclass Currency</th>\n",
" <th>Product - Isin</th>\n",
" <th>Centralisation Date</th>\n",
" <th>Quantity - AUM</th>\n",
" <th>Value - AUM CCY</th>\n",
" <th>Value - AUM €</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-03-31</td>\n",
" <td>35.368</td>\n",
" <td>24648.6666</td>\n",
" <td>24648.6666</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-11-30</td>\n",
" <td>35.368</td>\n",
" <td>22413.0553</td>\n",
" <td>22413.0553</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-12-31</td>\n",
" <td>35.368</td>\n",
" <td>22051.2406</td>\n",
" <td>22051.2406</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2016-03-31</td>\n",
" <td>35.368</td>\n",
" <td>21626.1173</td>\n",
" <td>21626.1173</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2016-11-30</td>\n",
" <td>35.368</td>\n",
" <td>22489.4502</td>\n",
" <td>22489.4502</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
"0 003 166 166 \n",
"1 003 166 166 \n",
"2 003 166 166 \n",
"3 003 166 166 \n",
"4 003 166 166 \n",
"\n",
" Registrar Account - ID Registrar Account - Region \\\n",
"0 200000647 France \n",
"1 200000647 France \n",
"2 200000647 France \n",
"3 200000647 France \n",
"4 200000647 France \n",
"\n",
" RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
"0 France Diversified Patrimoine \n",
"1 France Diversified Patrimoine \n",
"2 France Diversified Patrimoine \n",
"3 France Diversified Patrimoine \n",
"4 France Diversified Patrimoine \n",
"\n",
" Product - Legal Status Product - Is Dedie ? Product - Fund \\\n",
"0 FCP NO Carmignac Patrimoine \n",
"1 FCP NO Carmignac Patrimoine \n",
"2 FCP NO Carmignac Patrimoine \n",
"3 FCP NO Carmignac Patrimoine \n",
"4 FCP NO Carmignac Patrimoine \n",
"\n",
" Product - Shareclass Type Product - Shareclass Currency Product - Isin \\\n",
"0 A EUR FR0010135103 \n",
"1 A EUR FR0010135103 \n",
"2 A EUR FR0010135103 \n",
"3 A EUR FR0010135103 \n",
"4 A EUR FR0010135103 \n",
"\n",
" Centralisation Date Quantity - AUM Value - AUM CCY Value - AUM € \n",
"0 2015-03-31 35.368 24648.6666 24648.6666 \n",
"1 2015-11-30 35.368 22413.0553 22413.0553 \n",
"2 2015-12-31 35.368 22051.2406 22051.2406 \n",
"3 2016-03-31 35.368 21626.1173 21626.1173 \n",
"4 2016-11-30 35.368 22489.4502 22489.4502 "
]
},
2026-04-09 16:46:41 +02:00
"execution_count": 324,
2026-04-07 12:31:16 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Import des données\n",
"\n",
"import os\n",
"import s3fs\n",
"import pandas as pd\n",
"\n",
"s3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': s3_ENDPOINT_URL})\n",
"\n",
"BUCKET = \"projet-bdc-data\"\n",
"carmignac_path = \"projet-bdc-data/carmignac\"\n",
"\n",
"# Liste des fichiers FLOWS\n",
"all_files = fs.ls(carmignac_path)\n",
"flows_files = [f for f in all_files if \"Flows\" in f and f.endswith(\".csv\")]\n",
"print(\"Fichiers Flows :\", flows_files)\n",
"\n",
"# Lire tous les fichiers dans un dictionnaire\n",
"flows_data = {}\n",
"for file_path in flows_files:\n",
" with fs.open(file_path, 'r') as f:\n",
" df = pd.read_csv(f, sep=';',low_memory=False)\n",
" flows_data[os.path.basename(file_path)] = df\n",
"\n",
"\n",
"# Liste des fichiers AUM\n",
"all_files = fs.ls(carmignac_path)\n",
"aum_files = [f for f in all_files if \"AUM\" in f and f.endswith(\".csv\")]\n",
"print(\"Fichiers AUM :\", aum_files)\n",
"\n",
"# Lire tous les fichiers dans un dictionnaire\n",
"aum_data = {}\n",
"for file_path in aum_files:\n",
" with fs.open(file_path, 'r') as f:\n",
" df = pd.read_csv(f, sep=';',low_memory=False)\n",
" aum_data[os.path.basename(file_path)] = df\n",
"\n",
"df = aum_data['AUM ENSAE V2 -20251105.csv']\n",
"dg = flows_data['Flows ENSAE V2 -20251105.csv']\n",
"\n",
"df.head()"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 325,
2026-04-07 12:31:16 +02:00
"id": "232e399b-64dc-4943-9c15-793a268ee896",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Agreement - Code</th>\n",
" <th>Company - Id</th>\n",
" <th>Company - Ultimate Parent Id</th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Registrar Account - Region</th>\n",
" <th>RegistrarAccount - Country</th>\n",
" <th>Product - Asset Type</th>\n",
" <th>Product - Strategy</th>\n",
" <th>Product - Legal Status</th>\n",
" <th>Product - Is Dedie ?</th>\n",
" <th>Product - Fund</th>\n",
" <th>Product - Shareclass Type</th>\n",
" <th>Product - Shareclass Currency</th>\n",
" <th>Product - Isin</th>\n",
" <th>Centralisation Date</th>\n",
" <th>Quantity - Subscription</th>\n",
" <th>Quantity - Redemption</th>\n",
" <th>Quantity - NetFlows</th>\n",
" <th>Value Ccy - Subscription</th>\n",
" <th>Value Ccy - Redemption</th>\n",
" <th>Value Ccy - NetFlows</th>\n",
" <th>Value € - Subscription</th>\n",
" <th>Value € - Redemption</th>\n",
" <th>Value € - NetFlows</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200127202</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Portfolio Investissement</td>\n",
" <td>F</td>\n",
" <td>EUR</td>\n",
" <td>LU0992625839</td>\n",
" <td>2020-11-05</td>\n",
" <td>1636.00</td>\n",
" <td>0.000</td>\n",
" <td>1636.000</td>\n",
" <td>280983.00</td>\n",
" <td>0.00</td>\n",
" <td>280983.00</td>\n",
" <td>280983.00</td>\n",
" <td>0.00</td>\n",
" <td>280983.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-03-09</td>\n",
" <td>144.69</td>\n",
" <td>0.000</td>\n",
" <td>144.690</td>\n",
" <td>99985.13</td>\n",
" <td>0.00</td>\n",
" <td>99985.13</td>\n",
" <td>99985.13</td>\n",
" <td>0.00</td>\n",
" <td>99985.13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Investissement</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010148981</td>\n",
" <td>2016-10-26</td>\n",
" <td>0.00</td>\n",
" <td>-8.321</td>\n",
" <td>-8.321</td>\n",
" <td>0.00</td>\n",
" <td>-9384.76</td>\n",
" <td>-9384.76</td>\n",
" <td>0.00</td>\n",
" <td>-9384.76</td>\n",
" <td>-9384.76</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Investissement</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010148981</td>\n",
" <td>2018-10-18</td>\n",
" <td>0.00</td>\n",
" <td>-22.083</td>\n",
" <td>-22.083</td>\n",
" <td>0.00</td>\n",
" <td>-25227.40</td>\n",
" <td>-25227.40</td>\n",
" <td>0.00</td>\n",
" <td>-25227.40</td>\n",
" <td>-25227.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Investissement</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010148981</td>\n",
" <td>2019-04-08</td>\n",
" <td>0.00</td>\n",
" <td>-465.992</td>\n",
" <td>-465.992</td>\n",
" <td>0.00</td>\n",
" <td>-563775.76</td>\n",
" <td>-563775.76</td>\n",
" <td>0.00</td>\n",
" <td>-563775.76</td>\n",
" <td>-563775.76</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
"0 003 166 166 \n",
"1 003 166 166 \n",
"2 003 166 166 \n",
"3 003 166 166 \n",
"4 003 166 166 \n",
"\n",
" Registrar Account - ID Registrar Account - Region \\\n",
"0 200127202 France \n",
"1 406533 France \n",
"2 406533 France \n",
"3 406533 France \n",
"4 406533 France \n",
"\n",
" RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
"0 France Equity Investissement \n",
"1 France Diversified Patrimoine \n",
"2 France Equity Investissement \n",
"3 France Equity Investissement \n",
"4 France Equity Investissement \n",
"\n",
" Product - Legal Status Product - Is Dedie ? \\\n",
"0 SICAV NO \n",
"1 FCP NO \n",
"2 FCP NO \n",
"3 FCP NO \n",
"4 FCP NO \n",
"\n",
" Product - Fund Product - Shareclass Type \\\n",
"0 Carmignac Portfolio Investissement F \n",
"1 Carmignac Patrimoine A \n",
"2 Carmignac Investissement A \n",
"3 Carmignac Investissement A \n",
"4 Carmignac Investissement A \n",
"\n",
" Product - Shareclass Currency Product - Isin Centralisation Date \\\n",
"0 EUR LU0992625839 2020-11-05 \n",
"1 EUR FR0010135103 2015-03-09 \n",
"2 EUR FR0010148981 2016-10-26 \n",
"3 EUR FR0010148981 2018-10-18 \n",
"4 EUR FR0010148981 2019-04-08 \n",
"\n",
" Quantity - Subscription Quantity - Redemption Quantity - NetFlows \\\n",
"0 1636.00 0.000 1636.000 \n",
"1 144.69 0.000 144.690 \n",
"2 0.00 -8.321 -8.321 \n",
"3 0.00 -22.083 -22.083 \n",
"4 0.00 -465.992 -465.992 \n",
"\n",
" Value Ccy - Subscription Value Ccy - Redemption Value Ccy - NetFlows \\\n",
"0 280983.00 0.00 280983.00 \n",
"1 99985.13 0.00 99985.13 \n",
"2 0.00 -9384.76 -9384.76 \n",
"3 0.00 -25227.40 -25227.40 \n",
"4 0.00 -563775.76 -563775.76 \n",
"\n",
" Value € - Subscription Value € - Redemption Value € - NetFlows \n",
"0 280983.00 0.00 280983.00 \n",
"1 99985.13 0.00 99985.13 \n",
"2 0.00 -9384.76 -9384.76 \n",
"3 0.00 -25227.40 -25227.40 \n",
"4 0.00 -563775.76 -563775.76 "
]
},
2026-04-09 16:46:41 +02:00
"execution_count": 325,
2026-04-07 12:31:16 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dg.head()"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 326,
2026-04-07 12:31:16 +02:00
"id": "e19e970c-d1dc-4608-9f6f-73dd3e282ba6",
"metadata": {},
"outputs": [],
"source": [
"# Merge\n",
"\n",
"# 1Identifier les IDs présents dans df_aum_repaired\n",
"ids_repaired = df_aum_repaired[\"Registrar Account - ID\"].unique()\n",
"\n",
"# 2Sélectionner dans df uniquement les IDs qui ne sont pas dans df_aum_repaired\n",
"df_only = df[~df[\"Registrar Account - ID\"].isin(ids_repaired)]\n",
"\n",
"# 3Concaténer les deux DataFrames\n",
"df_merged = pd.concat([df_aum_repaired, df_only], ignore_index=True)"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 327,
2026-04-07 12:31:16 +02:00
"id": "79c732d4-8d4d-4f7d-9a46-2e89cf2b213d",
"metadata": {},
"outputs": [],
"source": [
"# Filtrer les comptes techniques\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"df['Centralisation Date'] = pd.to_datetime(df['Centralisation Date'])\n",
"df_aum_repaired['Centralisation Date'] = pd.to_datetime(df_aum_repaired['Centralisation Date'])\n",
"dg['Centralisation Date'] = pd.to_datetime(dg['Centralisation Date'])\n",
"df = df[~df['Registrar Account - ID'].isin(['Off Distribution','Private Clients', 'Private Client'])]\n",
"dg = dg[~dg['Registrar Account - ID'].isin(['Off Distribution','Private Clients','Private Client'])]"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 328,
2026-04-07 12:31:16 +02:00
"id": "f7f7242c-051e-4d7d-9a76-b46523089e49",
"metadata": {},
"outputs": [],
"source": [
"# Date de référence et sélection des 400+ principaux codes\n",
"\n",
"ref_date = pd.Timestamp('2025-10-31')\n",
"\n",
"df_ref = df[df['Centralisation Date'] == ref_date]\n",
"\n",
"aum_account = (\n",
" df_ref\n",
" .groupby('Registrar Account - ID')['Value - AUM €']\n",
" .sum()\n",
" .reset_index()\n",
" .sort_values(by='Value - AUM €', ascending=False)\n",
")\n",
"aum_account = aum_account[aum_account['Value - AUM €'] > 5_000_000]\n",
"selected_accounts = aum_account['Registrar Account - ID']\n",
"\n",
"df_aum = df_merged[df_merged['Registrar Account - ID'].isin(selected_accounts)].copy()\n",
"df_flows = dg[dg['Registrar Account - ID'].isin(selected_accounts)].copy()"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 329,
2026-04-07 12:31:16 +02:00
"id": "91ea0342-607a-420e-af0d-178d063da761",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(31709, 6)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>month</th>\n",
" <th>aum_qty</th>\n",
" <th>net_flow_qty</th>\n",
" <th>gross_flow_qty</th>\n",
" <th>n_tx</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>2015-01-31</td>\n",
" <td>11819.680</td>\n",
" <td>-1524.010</td>\n",
" <td>15230.010</td>\n",
" <td>32</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>18872</td>\n",
" <td>2015-02-28</td>\n",
" <td>5705.000</td>\n",
" <td>7247.100</td>\n",
" <td>18571.880</td>\n",
" <td>38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>18872</td>\n",
" <td>2015-03-31</td>\n",
" <td>70038.905</td>\n",
" <td>3655.380</td>\n",
" <td>9754.040</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>18872</td>\n",
" <td>2015-04-30</td>\n",
" <td>70324.489</td>\n",
" <td>-218.394</td>\n",
" <td>12840.950</td>\n",
" <td>39</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>18872</td>\n",
" <td>2015-05-31</td>\n",
" <td>75567.276</td>\n",
" <td>-4782.849</td>\n",
" <td>6332.849</td>\n",
" <td>24</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID month aum_qty net_flow_qty gross_flow_qty \\\n",
"0 18872 2015-01-31 11819.680 -1524.010 15230.010 \n",
"1 18872 2015-02-28 5705.000 7247.100 18571.880 \n",
"2 18872 2015-03-31 70038.905 3655.380 9754.040 \n",
"3 18872 2015-04-30 70324.489 -218.394 12840.950 \n",
"4 18872 2015-05-31 75567.276 -4782.849 6332.849 \n",
"\n",
" n_tx \n",
"0 32 \n",
"1 38 \n",
"2 47 \n",
"3 39 \n",
"4 24 "
]
},
2026-04-09 16:46:41 +02:00
"execution_count": 329,
2026-04-07 12:31:16 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Variables par mois\n",
"\n",
"# Parse dates\n",
"df_flows[\"Centralisation Date\"] = pd.to_datetime(df_flows[\"Centralisation Date\"], errors=\"coerce\")\n",
"df_aum[\"Centralisation Date\"] = pd.to_datetime(df_aum[\"Centralisation Date\"], errors=\"coerce\")\n",
"\n",
"ID_COL = \"Registrar Account - ID\"\n",
"FLOW_COL = \"Quantity - NetFlows\"\n",
"AUM_COL = \"Quantity - AUM\"\n",
"\n",
"# Month key\n",
"df_flows[\"month\"] = df_flows[\"Centralisation Date\"].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
"df_aum[\"month\"] = df_aum[\"Centralisation Date\"].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
"# Flows sont journaliers, AUM est mensuel → il faut une granularité commune.\n",
"\n",
"# 1) Monthly aggregation for FLOWS : je fais mon etude mensuel parce que aum valeur mensuel \n",
"\n",
"ID_COL = \"Registrar Account - ID\"\n",
"FLOW_COL = \"Quantity - NetFlows\"\n",
"AUM_COL = \"Quantity - AUM\"\n",
"\n",
"df_flows_m = (\n",
" df_flows\n",
" .dropna(subset=[ID_COL, \"month\", FLOW_COL])\n",
" .assign(gross_flow_qty=lambda x: x[FLOW_COL].abs()) # absolute quantity moved\n",
" .groupby([ID_COL, \"month\"], as_index=False)\n",
" .agg(\n",
" net_flow_qty=(FLOW_COL, \"sum\"), # net quantity change over the month\n",
" gross_flow_qty=(\"gross_flow_qty\", \"sum\"), # total traded quantity (activity intensity)\n",
" n_tx=(FLOW_COL, \"size\"), # number of transactions\n",
" )\n",
")\n",
"\n",
"# 2) Monthly aggregation for AUM (client-month holdings) ---\n",
"df_aum_m = (\n",
" df_aum\n",
" .dropna(subset=[ID_COL, \"month\", AUM_COL])\n",
" .groupby([ID_COL, \"month\"], as_index=False)\n",
" .agg(aum_qty=(AUM_COL, \"sum\")) # total held quantity across ISINs\n",
")\n",
"\n",
"df_month0 = df_aum_m.merge(df_flows_m, on=[ID_COL, \"month\"], how=\"left\")\n",
"\n",
"# 4) Months without transactions => flows are 0 ---\n",
"df_month0[\"net_flow_qty\"] = df_month0[\"net_flow_qty\"].fillna(0.0)\n",
"df_month0[\"gross_flow_qty\"] = df_month0[\"gross_flow_qty\"].fillna(0.0)\n",
"df_month0[\"n_tx\"] = df_month0[\"n_tx\"].fillna(0).astype(int)\n",
"\n",
"print(df_month0.shape)\n",
"df_month0.head()"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 330,
2026-04-07 12:31:16 +02:00
"id": "8caa4710-c7d5-4397-9d90-82f756499016",
"metadata": {},
"outputs": [],
"source": [
"# Ajout de variables\n",
"\n",
"#external data projet-bdc-data /carmignac /Data Modélisation /Nav\n",
"PATH_NAV = \"s3://projet-bdc-data/carmignac/Data Modélisation/Nav/NAV_Bench_data.csv\" #Cest la table de valorisation / performance du produit.\n",
"PATH_RATES = \"s3://projet-bdc-data/carmignac/Data Modélisation/market data/esterRates.csv\"\n",
"\n",
"# optional competitors\n",
"PATH_COMP_FLOWS = \"s3://projet-bdc-data/carmignac/Data Modélisation/competitors/daily_estimated_flows.csv\"\n",
"PATH_COMP_PERF = \"s3://projet-bdc-data/carmignac/Data Modélisation/competitors/weekly_perf_full.csv\"\n",
"PATH_PEERS = \"s3://projet-bdc-carmignac-g3/peers/CAD_peers.csv\"\n",
"\n",
"df_nav = pd.read_csv(PATH_NAV, sep=\";\") \n",
"# Une base de suivi de performance de fonds dans le temps, \n",
"# Price (TF PartPrice) : prix de la part du fond\n",
"# AUM Eur (Assets Under Management) : Taille du fonds en euros\n",
"\n",
"df_rates = pd.read_csv(PATH_RATES,sep=\";\")\n",
"# df_rates : évolution dans le temps dun taux de rendement obligataire (YTM)\n",
"\n",
"df_comp_flows = pd.read_csv(PATH_COMP_FLOWS,sep=\";\")\n",
"# Estimated Fund-level Net Flow (Daily) : Flux nets estimés du fonds\n",
"\n",
"df_comp_perf = pd.read_csv(PATH_COMP_PERF,sep=\";\")\n",
"# perfPeriod : Horizon de performance\n",
"# return : Performance du fonds sur la période donnée\n",
"# percentile : Position du fonds par rapport à ses pairs\n",
"# 0 → top performer\n",
"# 100 → mauvais performer\n",
"\n",
"df_peers = pd.read_csv(PATH_PEERS,sep=\"|\")\n",
"# Global Broad Category Group : grande classe dactifs\n",
"# Global Category : catégorie plus précise et Morningstar Category\n",
"# Index Fund : fonds indiciel (passif)\n",
"# Enhanced Index → quasi-passif (légère surperformance recherchée)\n",
"# Inception Date → date de création de la part\n",
"# Inception Date of Fund's Oldest Share Class → âge réel du fonds\n",
"# Domicile : pays de domiciliation du fonds"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 331,
2026-04-07 12:31:16 +02:00
"id": "fe081e43-092b-4429-813a-67417e39fd07",
"metadata": {},
"outputs": [],
"source": [
"ID_COL = \"Registrar Account - ID\"\n",
"ISIN_COL = \"Product - Isin\"\n",
"\n",
"FLOW_DATE_COL = \"Centralisation Date\"\n",
"AUM_DATE_COL = \"Centralisation Date\"\n",
"\n",
"FLOW_QTY_COL = \"Quantity - NetFlows\"\n",
"FLOW_SUB_COL = \"Quantity - Subscription\"\n",
"FLOW_RED_COL = \"Quantity - Redemption\"\n",
"\n",
"AUM_QTY_COL = \"Quantity - AUM\"\n",
"AUM_VAL_COL = \"Value - AUM €\"\n",
"\n",
"REGION_COL = \"Registrar Account - Region\"\n",
"COUNTRY_COL = \"RegistrarAccount - Country\"\n",
"\n",
"NAV_DATE_COL = \"Dat\"\n",
"NAV_ISIN_COL = \"Isin\"\n",
"NAV_PRICE_COL = \"Price (TF PartPrice)\"\n",
"NAV_BENCH_COL = \"PriceBench\"\n",
"\n",
"RATE_DATE_COL = \"Date\"\n",
"RATE_VAL_COL = \"Yld to Maturity\""
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 332,
2026-04-07 12:31:16 +02:00
"id": "b2a1cdce-1b1c-45d9-9c74-93f826bd65fd",
"metadata": {},
"outputs": [],
"source": [
"for df, date_col in [\n",
" (df_flows, FLOW_DATE_COL),\n",
" (df_aum, AUM_DATE_COL),\n",
" (df_nav, NAV_DATE_COL),\n",
" (df_rates, RATE_DATE_COL),\n",
"]:\n",
" df[date_col] = pd.to_datetime(df[date_col], errors=\"coerce\")\n",
"\n",
"df_flows[\"month\"] = df_flows[FLOW_DATE_COL].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
"df_aum[\"month\"] = df_aum[AUM_DATE_COL].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
"df_nav[\"month\"] = df_nav[NAV_DATE_COL].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
"df_rates[\"month\"] = df_rates[RATE_DATE_COL].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
"\n",
"for col in [FLOW_QTY_COL, FLOW_SUB_COL, FLOW_RED_COL]:\n",
" df_flows[col] = pd.to_numeric(df_flows[col], errors=\"coerce\")\n",
"\n",
"for col in [AUM_QTY_COL, AUM_VAL_COL]:\n",
" df_aum[col] = pd.to_numeric(df_aum[col], errors=\"coerce\")\n",
"\n",
"for col in [NAV_PRICE_COL, NAV_BENCH_COL]:\n",
" df_nav[col] = pd.to_numeric(df_nav[col], errors=\"coerce\")\n",
"\n",
"df_rates[RATE_VAL_COL] = pd.to_numeric(df_rates[RATE_VAL_COL], errors=\"coerce\")\n",
"\n",
"for df, col in [(df_flows, ISIN_COL), (df_aum, ISIN_COL)]:\n",
" df[col] = df[col].astype(str).str.strip()\n",
"\n",
"df_nav[NAV_ISIN_COL] = df_nav[NAV_ISIN_COL].astype(str).str.strip()"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 333,
2026-04-07 12:31:16 +02:00
"id": "e10eb2ef-04cd-4186-b188-72d760b4d778",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(492920, 18)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Product - Isin</th>\n",
" <th>month</th>\n",
" <th>aum_qty</th>\n",
" <th>aum_val</th>\n",
" <th>region</th>\n",
" <th>country</th>\n",
" <th>net_flow_qty</th>\n",
" <th>gross_flow_qty</th>\n",
" <th>sub_qty</th>\n",
" <th>red_qty</th>\n",
" <th>n_tx</th>\n",
" <th>region_flow</th>\n",
" <th>country_flow</th>\n",
" <th>active_rel_month</th>\n",
" <th>holding_rel_month</th>\n",
" <th>flow_to_aum_rel</th>\n",
" <th>turnover_rel</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-01-31</td>\n",
" <td>0.000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>673.990</td>\n",
" <td>956.01</td>\n",
" <td>859.990</td>\n",
" <td>-186.000</td>\n",
" <td>9.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>6.739900e+11</td>\n",
" <td>9.560100e+11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-02-28</td>\n",
" <td>0.000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>988.000</td>\n",
" <td>1712.00</td>\n",
" <td>1350.000</td>\n",
" <td>-362.000</td>\n",
" <td>12.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>9.880000e+11</td>\n",
" <td>1.712000e+12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-03-31</td>\n",
" <td>0.000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>9.710</td>\n",
" <td>1447.71</td>\n",
" <td>785.710</td>\n",
" <td>-776.000</td>\n",
" <td>12.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>9.710000e+09</td>\n",
" <td>1.447710e+12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-04-30</td>\n",
" <td>50219.393</td>\n",
" <td>3.452433e+07</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>-123.234</td>\n",
" <td>1708.19</td>\n",
" <td>853.478</td>\n",
" <td>-976.712</td>\n",
" <td>11.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>-2.453913e-03</td>\n",
" <td>3.401455e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-05-31</td>\n",
" <td>53685.393</td>\n",
" <td>3.699729e+07</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>121.000</td>\n",
" <td>529.00</td>\n",
" <td>325.000</td>\n",
" <td>-204.000</td>\n",
" <td>6.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2.253872e-03</td>\n",
" <td>9.853705e-03</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID Product - Isin month aum_qty aum_val \\\n",
"0 18872 FR0010135103 2015-01-31 0.000 0.000000e+00 \n",
"1 18872 FR0010135103 2015-02-28 0.000 0.000000e+00 \n",
"2 18872 FR0010135103 2015-03-31 0.000 0.000000e+00 \n",
"3 18872 FR0010135103 2015-04-30 50219.393 3.452433e+07 \n",
"4 18872 FR0010135103 2015-05-31 53685.393 3.699729e+07 \n",
"\n",
" region country net_flow_qty gross_flow_qty sub_qty red_qty \\\n",
"0 Switzerland Switzerland 673.990 956.01 859.990 -186.000 \n",
"1 Switzerland Switzerland 988.000 1712.00 1350.000 -362.000 \n",
"2 Switzerland Switzerland 9.710 1447.71 785.710 -776.000 \n",
"3 Switzerland Switzerland -123.234 1708.19 853.478 -976.712 \n",
"4 Switzerland Switzerland 121.000 529.00 325.000 -204.000 \n",
"\n",
" n_tx region_flow country_flow active_rel_month holding_rel_month \\\n",
"0 9.0 Switzerland Switzerland 1 0 \n",
"1 12.0 Switzerland Switzerland 1 0 \n",
"2 12.0 Switzerland Switzerland 1 0 \n",
"3 11.0 Switzerland Switzerland 1 1 \n",
"4 6.0 Switzerland Switzerland 1 1 \n",
"\n",
" flow_to_aum_rel turnover_rel \n",
"0 6.739900e+11 9.560100e+11 \n",
"1 9.880000e+11 1.712000e+12 \n",
"2 9.710000e+09 1.447710e+12 \n",
"3 -2.453913e-03 3.401455e-02 \n",
"4 2.253872e-03 9.853705e-03 "
]
},
2026-04-09 16:46:41 +02:00
"execution_count": 333,
2026-04-07 12:31:16 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_flows_rel_m = (\n",
" df_flows\n",
" .dropna(subset=[ID_COL, ISIN_COL, \"month\"])\n",
" .assign(\n",
" gross_flow_qty=lambda x: x[FLOW_QTY_COL].abs(),\n",
" sub_qty=lambda x: x[FLOW_SUB_COL].fillna(0),\n",
" red_qty=lambda x: x[FLOW_RED_COL].fillna(0)\n",
" )\n",
" .groupby([ID_COL, ISIN_COL, \"month\"], as_index=False)\n",
" .agg(\n",
" net_flow_qty=(FLOW_QTY_COL, \"sum\"),\n",
" gross_flow_qty=(\"gross_flow_qty\", \"sum\"),\n",
" sub_qty=(\"sub_qty\", \"sum\"),\n",
" red_qty=(\"red_qty\", \"sum\"),\n",
" n_tx=(FLOW_QTY_COL, \"size\"),\n",
" region=(REGION_COL, \"last\"),\n",
" country=(COUNTRY_COL, \"last\")\n",
" )\n",
")\n",
"\n",
"df_aum_rel_m = (\n",
" df_aum\n",
" .dropna(subset=[ID_COL, ISIN_COL, \"month\"])\n",
" .groupby([ID_COL, ISIN_COL, \"month\"], as_index=False)\n",
" .agg(\n",
" aum_qty=(AUM_QTY_COL, \"sum\"),\n",
" aum_val=(AUM_VAL_COL, \"sum\"),\n",
" region=(REGION_COL, \"last\"),\n",
" country=(COUNTRY_COL, \"last\")\n",
" )\n",
")\n",
"\n",
"keys = pd.concat([\n",
" df_flows_rel_m[[ID_COL, ISIN_COL, \"month\"]],\n",
" df_aum_rel_m[[ID_COL, ISIN_COL, \"month\"]]\n",
"]).drop_duplicates()\n",
"\n",
"df_rel_m = (\n",
" keys\n",
" .merge(df_aum_rel_m, on=[ID_COL, ISIN_COL, \"month\"], how=\"left\", suffixes=(\"\", \"_aum\"))\n",
" .merge(df_flows_rel_m, on=[ID_COL, ISIN_COL, \"month\"], how=\"left\", suffixes=(\"\", \"_flow\"))\n",
")\n",
"\n",
"for c in [\"aum_qty\", \"aum_val\", \"net_flow_qty\", \"gross_flow_qty\", \"sub_qty\", \"red_qty\", \"n_tx\"]:\n",
" df_rel_m[c] = df_rel_m[c].fillna(0)\n",
"\n",
"df_rel_m[\"region\"] = df_rel_m[\"region\"].fillna(df_rel_m.get(\"region_flow\"))\n",
"df_rel_m[\"country\"] = df_rel_m[\"country\"].fillna(df_rel_m.get(\"country_flow\"))\n",
"\n",
"df_rel_m[\"active_rel_month\"] = (df_rel_m[\"gross_flow_qty\"] > 0).astype(int)\n",
"df_rel_m[\"holding_rel_month\"] = (df_rel_m[\"aum_qty\"] > 0).astype(int)\n",
"df_rel_m[\"flow_to_aum_rel\"] = df_rel_m[\"net_flow_qty\"] / (df_rel_m[\"aum_qty\"].abs() + EPS)\n",
"df_rel_m[\"turnover_rel\"] = df_rel_m[\"gross_flow_qty\"] / (df_rel_m[\"aum_qty\"].abs() + EPS)\n",
"\n",
"print(df_rel_m.shape)\n",
"df_rel_m.head()"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 334,
2026-04-07 12:31:16 +02:00
"id": "321b09ab-90f0-4add-a670-0d8c74046e03",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Product - Isin</th>\n",
" <th>month</th>\n",
" <th>aum_qty</th>\n",
" <th>aum_val</th>\n",
" <th>region</th>\n",
" <th>country</th>\n",
" <th>net_flow_qty</th>\n",
" <th>gross_flow_qty</th>\n",
" <th>sub_qty</th>\n",
" <th>red_qty</th>\n",
" <th>n_tx</th>\n",
" <th>region_flow</th>\n",
" <th>country_flow</th>\n",
" <th>active_rel_month</th>\n",
" <th>holding_rel_month</th>\n",
" <th>flow_to_aum_rel</th>\n",
" <th>turnover_rel</th>\n",
" <th>ret_fund_m</th>\n",
" <th>ret_bench_m</th>\n",
" <th>active_return_m</th>\n",
" <th>delta_rate_m</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-01-31</td>\n",
" <td>0.000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>673.990</td>\n",
" <td>956.01</td>\n",
" <td>859.990</td>\n",
" <td>-186.000</td>\n",
" <td>9.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>6.739900e+11</td>\n",
" <td>9.560100e+11</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.058</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-02-28</td>\n",
" <td>0.000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>988.000</td>\n",
" <td>1712.00</td>\n",
" <td>1350.000</td>\n",
" <td>-362.000</td>\n",
" <td>12.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>9.880000e+11</td>\n",
" <td>1.712000e+12</td>\n",
" <td>0.121368</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.022</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-03-31</td>\n",
" <td>0.000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>9.710</td>\n",
" <td>1447.71</td>\n",
" <td>785.710</td>\n",
" <td>-776.000</td>\n",
" <td>12.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>9.710000e+09</td>\n",
" <td>1.447710e+12</td>\n",
" <td>0.068598</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.014</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-04-30</td>\n",
" <td>50219.393</td>\n",
" <td>3.452433e+07</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>-123.234</td>\n",
" <td>1708.19</td>\n",
" <td>853.478</td>\n",
" <td>-976.712</td>\n",
" <td>11.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>-2.453913e-03</td>\n",
" <td>3.401455e-02</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.077</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-05-31</td>\n",
" <td>53685.393</td>\n",
" <td>3.699729e+07</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>121.000</td>\n",
" <td>529.00</td>\n",
" <td>325.000</td>\n",
" <td>-204.000</td>\n",
" <td>6.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2.253872e-03</td>\n",
" <td>9.853705e-03</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.053</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID Product - Isin month aum_qty aum_val \\\n",
"0 18872 FR0010135103 2015-01-31 0.000 0.000000e+00 \n",
"1 18872 FR0010135103 2015-02-28 0.000 0.000000e+00 \n",
"2 18872 FR0010135103 2015-03-31 0.000 0.000000e+00 \n",
"3 18872 FR0010135103 2015-04-30 50219.393 3.452433e+07 \n",
"4 18872 FR0010135103 2015-05-31 53685.393 3.699729e+07 \n",
"\n",
" region country net_flow_qty gross_flow_qty sub_qty red_qty \\\n",
"0 Switzerland Switzerland 673.990 956.01 859.990 -186.000 \n",
"1 Switzerland Switzerland 988.000 1712.00 1350.000 -362.000 \n",
"2 Switzerland Switzerland 9.710 1447.71 785.710 -776.000 \n",
"3 Switzerland Switzerland -123.234 1708.19 853.478 -976.712 \n",
"4 Switzerland Switzerland 121.000 529.00 325.000 -204.000 \n",
"\n",
" n_tx region_flow country_flow active_rel_month holding_rel_month \\\n",
"0 9.0 Switzerland Switzerland 1 0 \n",
"1 12.0 Switzerland Switzerland 1 0 \n",
"2 12.0 Switzerland Switzerland 1 0 \n",
"3 11.0 Switzerland Switzerland 1 1 \n",
"4 6.0 Switzerland Switzerland 1 1 \n",
"\n",
" flow_to_aum_rel turnover_rel ret_fund_m ret_bench_m active_return_m \\\n",
"0 6.739900e+11 9.560100e+11 0.000000 0.0 0.0 \n",
"1 9.880000e+11 1.712000e+12 0.121368 0.0 0.0 \n",
"2 9.710000e+09 1.447710e+12 0.068598 0.0 0.0 \n",
"3 -2.453913e-03 3.401455e-02 0.000000 0.0 0.0 \n",
"4 2.253872e-03 9.853705e-03 0.000000 0.0 0.0 \n",
"\n",
" delta_rate_m \n",
"0 -0.058 \n",
"1 -0.022 \n",
"2 -0.014 \n",
"3 -0.077 \n",
"4 -0.053 "
]
},
2026-04-09 16:46:41 +02:00
"execution_count": 334,
2026-04-07 12:31:16 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Ajout\n",
"\n",
"df_nav_m = (\n",
" df_nav\n",
" .dropna(subset=[NAV_ISIN_COL, \"month\", NAV_PRICE_COL])\n",
" .sort_values([NAV_ISIN_COL, \"month\"])\n",
" .groupby([NAV_ISIN_COL, \"month\"], as_index=False)\n",
" .tail(1)\n",
" .copy()\n",
")\n",
"\n",
"df_nav_m[\"ret_fund_m\"] = df_nav_m.groupby(NAV_ISIN_COL)[NAV_PRICE_COL].pct_change()\n",
"df_nav_m[\"ret_bench_m\"] = df_nav_m.groupby(NAV_ISIN_COL)[NAV_BENCH_COL].pct_change()\n",
"df_nav_m[\"active_return_m\"] = df_nav_m[\"ret_fund_m\"] - df_nav_m[\"ret_bench_m\"]\n",
"\n",
"df_nav_m = df_nav_m.rename(columns={NAV_ISIN_COL: ISIN_COL})\n",
"df_nav_m = df_nav_m[[ISIN_COL, \"month\", \"ret_fund_m\", \"ret_bench_m\", \"active_return_m\"]]\n",
"\n",
"df_rates_m = (\n",
" df_rates\n",
" .dropna(subset=[\"month\", RATE_VAL_COL])\n",
" .sort_values(RATE_DATE_COL)\n",
" .groupby(\"month\", as_index=False)\n",
" .tail(1)\n",
" .copy()\n",
")\n",
"\n",
"df_rates_m[\"delta_rate_m\"] = df_rates_m[RATE_VAL_COL].diff()\n",
"df_rates_m = df_rates_m[[\"month\", RATE_VAL_COL, \"delta_rate_m\"]]\n",
"\n",
"\n",
" \n",
"df_rel_m = df_rel_m.merge(\n",
" df_nav_m,\n",
" on=[ISIN_COL, \"month\"],\n",
" how=\"left\"\n",
")\n",
"\n",
"df_rel_m = df_rel_m.merge(\n",
" df_rates_m[[\"month\", \"delta_rate_m\"]],\n",
" on=\"month\",\n",
" how=\"left\"\n",
")\n",
"\n",
"for c in [\"ret_fund_m\", \"ret_bench_m\", \"active_return_m\", \"delta_rate_m\"]:\n",
" df_rel_m[c] = df_rel_m[c].fillna(0)\n",
"\n",
"df_rel_m.head()"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 335,
2026-04-07 12:31:16 +02:00
"id": "614bf72b-7afa-4633-ba09-22540a441459",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(31709, 23)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>month</th>\n",
" <th>aum_qty</th>\n",
" <th>aum_val</th>\n",
" <th>net_flow_qty</th>\n",
" <th>gross_flow_qty</th>\n",
" <th>sub_qty</th>\n",
" <th>red_qty</th>\n",
" <th>n_tx</th>\n",
" <th>n_isin_held</th>\n",
" <th>n_isin_active</th>\n",
" <th>delta_rate_m</th>\n",
" <th>region</th>\n",
" <th>country</th>\n",
" <th>ret_fund_m</th>\n",
" <th>ret_bench_m</th>\n",
" <th>active_month</th>\n",
" <th>flow_to_aum_m</th>\n",
" <th>turnover_m</th>\n",
" <th>sub_share_m</th>\n",
" <th>red_share_m</th>\n",
" <th>aum_peak_to_date</th>\n",
" <th>aum_drawdown</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>2015-01-31</td>\n",
" <td>11819.680</td>\n",
" <td>1.694553e+06</td>\n",
" <td>-1524.010</td>\n",
" <td>15230.010</td>\n",
" <td>6897.990</td>\n",
" <td>-8422.000</td>\n",
" <td>32.0</td>\n",
" <td>4</td>\n",
" <td>13</td>\n",
" <td>-0.058</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>0.013100</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>-0.128938</td>\n",
" <td>1.288530</td>\n",
" <td>0.452921</td>\n",
" <td>-0.552987</td>\n",
" <td>11819.680</td>\n",
" <td>8.459899e-14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>18872</td>\n",
" <td>2015-02-28</td>\n",
" <td>5705.000</td>\n",
" <td>7.008600e+05</td>\n",
" <td>7247.100</td>\n",
" <td>18571.880</td>\n",
" <td>13219.490</td>\n",
" <td>-5972.390</td>\n",
" <td>38.0</td>\n",
" <td>3</td>\n",
" <td>13</td>\n",
" <td>-0.022</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>0.079848</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>1.270307</td>\n",
" <td>3.255369</td>\n",
" <td>0.711801</td>\n",
" <td>-0.321582</td>\n",
" <td>11819.680</td>\n",
" <td>5.173304e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>18872</td>\n",
" <td>2015-03-31</td>\n",
" <td>70038.905</td>\n",
" <td>1.503549e+07</td>\n",
" <td>3655.380</td>\n",
" <td>9754.040</td>\n",
" <td>6767.710</td>\n",
" <td>-3112.330</td>\n",
" <td>47.0</td>\n",
" <td>4</td>\n",
" <td>14</td>\n",
" <td>-0.014</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>0.005051</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>0.052191</td>\n",
" <td>0.139266</td>\n",
" <td>0.693837</td>\n",
" <td>-0.319081</td>\n",
" <td>70038.905</td>\n",
" <td>1.432188e-14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>18872</td>\n",
" <td>2015-04-30</td>\n",
" <td>70324.489</td>\n",
" <td>3.928292e+07</td>\n",
" <td>-218.394</td>\n",
" <td>12840.950</td>\n",
" <td>6384.278</td>\n",
" <td>-6602.672</td>\n",
" <td>39.0</td>\n",
" <td>4</td>\n",
" <td>13</td>\n",
" <td>-0.077</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>-0.003106</td>\n",
" <td>0.182596</td>\n",
" <td>0.497181</td>\n",
" <td>-0.514189</td>\n",
" <td>70324.489</td>\n",
" <td>1.432188e-14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>18872</td>\n",
" <td>2015-05-31</td>\n",
" <td>75567.276</td>\n",
" <td>3.987712e+07</td>\n",
" <td>-4782.849</td>\n",
" <td>6332.849</td>\n",
" <td>775.000</td>\n",
" <td>-5557.849</td>\n",
" <td>24.0</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" <td>-0.053</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>-0.063293</td>\n",
" <td>0.083804</td>\n",
" <td>0.122378</td>\n",
" <td>-0.877622</td>\n",
" <td>75567.276</td>\n",
" <td>1.332268e-14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID month aum_qty aum_val net_flow_qty \\\n",
"0 18872 2015-01-31 11819.680 1.694553e+06 -1524.010 \n",
"1 18872 2015-02-28 5705.000 7.008600e+05 7247.100 \n",
"2 18872 2015-03-31 70038.905 1.503549e+07 3655.380 \n",
"3 18872 2015-04-30 70324.489 3.928292e+07 -218.394 \n",
"4 18872 2015-05-31 75567.276 3.987712e+07 -4782.849 \n",
"\n",
" gross_flow_qty sub_qty red_qty n_tx n_isin_held n_isin_active \\\n",
"0 15230.010 6897.990 -8422.000 32.0 4 13 \n",
"1 18571.880 13219.490 -5972.390 38.0 3 13 \n",
"2 9754.040 6767.710 -3112.330 47.0 4 14 \n",
"3 12840.950 6384.278 -6602.672 39.0 4 13 \n",
"4 6332.849 775.000 -5557.849 24.0 7 9 \n",
"\n",
" delta_rate_m region country ret_fund_m ret_bench_m \\\n",
"0 -0.058 Switzerland Switzerland 0.013100 0.0 \n",
"1 -0.022 Switzerland Switzerland 0.079848 0.0 \n",
"2 -0.014 Switzerland Switzerland 0.005051 0.0 \n",
"3 -0.077 Switzerland Switzerland 0.000000 0.0 \n",
"4 -0.053 Switzerland Switzerland 0.000000 0.0 \n",
"\n",
" active_month flow_to_aum_m turnover_m sub_share_m red_share_m \\\n",
"0 1 -0.128938 1.288530 0.452921 -0.552987 \n",
"1 1 1.270307 3.255369 0.711801 -0.321582 \n",
"2 1 0.052191 0.139266 0.693837 -0.319081 \n",
"3 1 -0.003106 0.182596 0.497181 -0.514189 \n",
"4 1 -0.063293 0.083804 0.122378 -0.877622 \n",
"\n",
" aum_peak_to_date aum_drawdown \n",
"0 11819.680 8.459899e-14 \n",
"1 11819.680 5.173304e-01 \n",
"2 70038.905 1.432188e-14 \n",
"3 70324.489 1.432188e-14 \n",
"4 75567.276 1.332268e-14 "
]
},
2026-04-09 16:46:41 +02:00
"execution_count": 335,
2026-04-07 12:31:16 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Données agrégées sur les ISIN\n",
"\n",
"# =========================\n",
"# ULTRA LIGHT VERSION\n",
"# =========================\n",
"\n",
"tmp = df_rel_m.copy()\n",
"tmp[\"isin_held_flag\"] = (tmp[\"aum_qty\"] > 0).astype(int)\n",
"tmp[\"isin_active_flag\"] = (tmp[\"gross_flow_qty\"] > 0).astype(int)\n",
"\n",
"tmp[\"aum_total\"] = tmp.groupby([ID_COL, \"month\"])[\"aum_qty\"].transform(\"sum\")\n",
"tmp[\"w\"] = tmp[\"aum_qty\"] / (tmp[\"aum_total\"] + 1e-12)\n",
"tmp[\"ret_fund_w\"] = tmp[\"w\"] * tmp[\"ret_fund_m\"]\n",
"tmp[\"ret_bench_w\"] = tmp[\"w\"] * tmp[\"ret_bench_m\"]\n",
"\n",
"df_month = (\n",
" tmp.groupby([ID_COL, \"month\"], as_index=False)\n",
" .agg(\n",
" aum_qty=(\"aum_qty\", \"sum\"),\n",
" aum_val=(\"aum_val\", \"sum\"),\n",
" net_flow_qty=(\"net_flow_qty\", \"sum\"),\n",
" gross_flow_qty=(\"gross_flow_qty\", \"sum\"),\n",
" sub_qty=(\"sub_qty\", \"sum\"),\n",
" red_qty=(\"red_qty\", \"sum\"),\n",
" n_tx=(\"n_tx\", \"sum\"),\n",
" n_isin_held=(\"isin_held_flag\", \"sum\"),\n",
" n_isin_active=(\"isin_active_flag\", \"sum\"),\n",
" delta_rate_m=(\"delta_rate_m\", \"first\"),\n",
" region=(\"region\", \"first\"),\n",
" country=(\"country\", \"first\"),\n",
" ret_fund_m=(\"ret_fund_w\", \"sum\"),\n",
" ret_bench_m=(\"ret_bench_w\", \"sum\")\n",
" )\n",
" .sort_values([ID_COL, \"month\"])\n",
" .reset_index(drop=True)\n",
")\n",
"\n",
"\n",
"df_month[\"active_month\"] = (df_month[\"gross_flow_qty\"] > 0).astype(int)\n",
"df_month[\"flow_to_aum_m\"] = df_month[\"net_flow_qty\"] / (df_month[\"aum_qty\"].abs() + EPS)\n",
"df_month[\"turnover_m\"] = df_month[\"gross_flow_qty\"] / (df_month[\"aum_qty\"].abs() + EPS)\n",
"df_month[\"sub_share_m\"] = df_month[\"sub_qty\"] / (df_month[\"gross_flow_qty\"] + EPS)\n",
"df_month[\"red_share_m\"] = df_month[\"red_qty\"] / (df_month[\"gross_flow_qty\"] + EPS)\n",
"\n",
"df_month[\"aum_peak_to_date\"] = df_month.groupby(ID_COL)[\"aum_qty\"].cummax()\n",
"df_month[\"aum_drawdown\"] = 1 - (df_month[\"aum_qty\"] / (df_month[\"aum_peak_to_date\"] + EPS))\n",
"df_month = df_month[df_month[\"month\"] <= '2025-10-31']\n",
"\n",
"key_cols = [\"Registrar Account - ID\", \"month\"]\n",
"\n",
"df_month = df_month.merge(\n",
" df_month0[key_cols].drop_duplicates(),\n",
" on=key_cols,\n",
" how=\"inner\"\n",
")\n",
"print(df_month.shape)\n",
"df_month.head()"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 336,
2026-04-07 12:31:16 +02:00
"id": "2e01fa4f-ba89-4c8a-8cbb-528d89bc811c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Product - Isin</th>\n",
" <th>rel_n_months</th>\n",
" <th>rel_active_months</th>\n",
" <th>rel_holding_months</th>\n",
" <th>rel_aum_mean</th>\n",
" <th>rel_turnover_mean</th>\n",
" <th>rel_turnover_vol</th>\n",
" <th>rel_flow_to_aum_vol</th>\n",
" <th>rel_n_tx</th>\n",
" <th>rel_full_exit_count</th>\n",
" <th>rel_entry_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>98</td>\n",
" <td>91</td>\n",
" <td>26</td>\n",
" <td>2519.829520</td>\n",
" <td>5.898325e+11</td>\n",
" <td>9.652436e+11</td>\n",
" <td>9.242856e+11</td>\n",
" <td>382.0</td>\n",
" <td>12</td>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>18872</td>\n",
" <td>FR0010147603</td>\n",
" <td>17</td>\n",
" <td>8</td>\n",
" <td>10</td>\n",
" <td>695.058824</td>\n",
" <td>1.685294e+11</td>\n",
" <td>3.805578e+11</td>\n",
" <td>3.805578e+11</td>\n",
" <td>9.0</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>18872</td>\n",
" <td>FR0010148981</td>\n",
" <td>81</td>\n",
" <td>66</td>\n",
" <td>28</td>\n",
" <td>831.906963</td>\n",
" <td>6.628200e+10</td>\n",
" <td>1.140022e+11</td>\n",
" <td>1.212644e+11</td>\n",
" <td>149.0</td>\n",
" <td>16</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>18872</td>\n",
" <td>FR0010149112</td>\n",
" <td>19</td>\n",
" <td>12</td>\n",
" <td>5</td>\n",
" <td>885.208737</td>\n",
" <td>5.886253e+11</td>\n",
" <td>1.251992e+12</td>\n",
" <td>1.273644e+12</td>\n",
" <td>13.0</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>18872</td>\n",
" <td>FR0010149120</td>\n",
" <td>99</td>\n",
" <td>79</td>\n",
" <td>36</td>\n",
" <td>425.655010</td>\n",
" <td>1.673836e+11</td>\n",
" <td>6.287132e+11</td>\n",
" <td>6.316077e+11</td>\n",
" <td>152.0</td>\n",
" <td>14</td>\n",
" <td>15</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID Product - Isin rel_n_months rel_active_months \\\n",
"0 18872 FR0010135103 98 91 \n",
"1 18872 FR0010147603 17 8 \n",
"2 18872 FR0010148981 81 66 \n",
"3 18872 FR0010149112 19 12 \n",
"4 18872 FR0010149120 99 79 \n",
"\n",
" rel_holding_months rel_aum_mean rel_turnover_mean rel_turnover_vol \\\n",
"0 26 2519.829520 5.898325e+11 9.652436e+11 \n",
"1 10 695.058824 1.685294e+11 3.805578e+11 \n",
"2 28 831.906963 6.628200e+10 1.140022e+11 \n",
"3 5 885.208737 5.886253e+11 1.251992e+12 \n",
"4 36 425.655010 1.673836e+11 6.287132e+11 \n",
"\n",
" rel_flow_to_aum_vol rel_n_tx rel_full_exit_count rel_entry_count \n",
"0 9.242856e+11 382.0 12 13 \n",
"1 3.805578e+11 9.0 4 4 \n",
"2 1.212644e+11 149.0 16 17 \n",
"3 1.273644e+12 13.0 3 3 \n",
"4 6.316077e+11 152.0 14 15 "
]
},
2026-04-09 16:46:41 +02:00
"execution_count": 336,
2026-04-07 12:31:16 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Données agrégées sur les mois\n",
"tmp = df_rel_m.sort_values([ID_COL, ISIN_COL, \"month\"]).copy()\n",
"tmp[\"prev_aum\"] = tmp.groupby([ID_COL, ISIN_COL])[\"aum_qty\"].shift(1)\n",
"tmp[\"full_exit_event\"] = ((tmp[\"prev_aum\"] > 0) & (tmp[\"aum_qty\"] <= 0)).astype(int)\n",
"tmp[\"entry_event\"] = ((tmp[\"prev_aum\"].fillna(0) <= 0) & (tmp[\"aum_qty\"] > 0)).astype(int)\n",
"\n",
"df_rel_feat = (\n",
" tmp.groupby([ID_COL, ISIN_COL], as_index=False)\n",
" .agg(\n",
" rel_n_months=(\"month\", \"nunique\"),\n",
" rel_active_months=(\"active_rel_month\", \"sum\"),\n",
" rel_holding_months=(\"holding_rel_month\", \"sum\"),\n",
" rel_aum_mean=(\"aum_qty\", \"mean\"),\n",
" rel_turnover_mean=(\"turnover_rel\", \"mean\"),\n",
" rel_turnover_vol=(\"turnover_rel\", \"std\"),\n",
" rel_flow_to_aum_vol=(\"flow_to_aum_rel\", \"std\"),\n",
" rel_n_tx=(\"n_tx\", \"sum\"),\n",
" rel_full_exit_count=(\"full_exit_event\", \"sum\"),\n",
" rel_entry_count=(\"entry_event\", \"sum\")\n",
" )\n",
")\n",
"\n",
"df_rel_feat.head()"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 337,
2026-04-07 12:31:16 +02:00
"id": "2d81b4fd-f82d-42f1-ba03-8460706fea0d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(431, 40)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>n_months</th>\n",
" <th>n_active_months</th>\n",
" <th>flow_freq</th>\n",
" <th>aum_qty_mean</th>\n",
" <th>aum_qty_median</th>\n",
" <th>aum_qty_max</th>\n",
" <th>aum_qty_last</th>\n",
" <th>net_flow_qty_sum</th>\n",
" <th>gross_flow_qty_sum</th>\n",
" <th>gross_flow_qty_mean</th>\n",
" <th>n_tx_total</th>\n",
" <th>net_flow_vol</th>\n",
" <th>turnover_mean</th>\n",
" <th>turnover_vol</th>\n",
" <th>flow_to_aum_mean</th>\n",
" <th>flow_to_aum_vol</th>\n",
" <th>avg_n_isin_held</th>\n",
" <th>max_n_isin_held</th>\n",
" <th>sub_share_mean</th>\n",
" <th>red_share_mean</th>\n",
" <th>delta_rate_mean</th>\n",
" <th>aum_drawdown_last</th>\n",
" <th>aum_drawdown_max</th>\n",
" <th>region</th>\n",
" <th>country</th>\n",
" <th>n_isin_total</th>\n",
" <th>rel_turnover_mean_avg</th>\n",
" <th>rel_turnover_vol_avg</th>\n",
" <th>rel_flow_to_aum_vol_avg</th>\n",
" <th>full_exit_count</th>\n",
" <th>entry_count</th>\n",
" <th>avg_holding_months_per_isin</th>\n",
" <th>max_holding_months_per_isin</th>\n",
" <th>corr_flow_fund_lag3</th>\n",
" <th>corr_flow_fund_lag6</th>\n",
" <th>corr_flow_bench_lag3</th>\n",
" <th>corr_flow_bench_lag6</th>\n",
" <th>corr_flow_rate_lag3</th>\n",
" <th>corr_flow_rate_lag6</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>130</td>\n",
" <td>130</td>\n",
" <td>1.000000</td>\n",
" <td>23477.224308</td>\n",
" <td>14880.4715</td>\n",
" <td>88818.372</td>\n",
" <td>67570.855</td>\n",
" <td>-45677.1480</td>\n",
" <td>1.244126e+06</td>\n",
" <td>9570.200015</td>\n",
" <td>1926.0</td>\n",
" <td>9832.357264</td>\n",
" <td>6.382330e+10</td>\n",
" <td>5.151309e+11</td>\n",
" <td>-2.560792e+10</td>\n",
" <td>2.841988e+11</td>\n",
" <td>7.507692</td>\n",
" <td>26</td>\n",
" <td>0.429844</td>\n",
" <td>-0.576520</td>\n",
" <td>0.013723</td>\n",
" <td>2.392243e-01</td>\n",
" <td>1.000000</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>107</td>\n",
" <td>4.063407e+11</td>\n",
" <td>8.956214e+11</td>\n",
" <td>8.915940e+11</td>\n",
" <td>310</td>\n",
" <td>344</td>\n",
" <td>9.121495</td>\n",
" <td>36</td>\n",
" <td>0.007825</td>\n",
" <td>0.008326</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.007546</td>\n",
" <td>0.014510</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>200000076</td>\n",
" <td>130</td>\n",
" <td>119</td>\n",
" <td>0.915385</td>\n",
" <td>15840.000331</td>\n",
" <td>9272.4710</td>\n",
" <td>50732.461</td>\n",
" <td>44837.203</td>\n",
" <td>54791.9840</td>\n",
" <td>2.314415e+05</td>\n",
" <td>1780.319492</td>\n",
" <td>518.0</td>\n",
" <td>2838.000232</td>\n",
" <td>1.457820e-01</td>\n",
" <td>2.457632e-01</td>\n",
" <td>-1.707090e-02</td>\n",
" <td>2.717209e-01</td>\n",
" <td>4.700000</td>\n",
" <td>9</td>\n",
" <td>0.508681</td>\n",
" <td>-0.415876</td>\n",
" <td>0.013723</td>\n",
" <td>1.162029e-01</td>\n",
" <td>0.949206</td>\n",
" <td>Spain</td>\n",
" <td>Spain</td>\n",
" <td>22</td>\n",
" <td>6.276897e+10</td>\n",
" <td>2.469731e+11</td>\n",
" <td>2.481822e+11</td>\n",
" <td>71</td>\n",
" <td>81</td>\n",
" <td>27.772727</td>\n",
" <td>85</td>\n",
" <td>0.015278</td>\n",
" <td>0.096449</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.025181</td>\n",
" <td>0.012844</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>200000082</td>\n",
" <td>71</td>\n",
" <td>71</td>\n",
" <td>1.000000</td>\n",
" <td>85194.200239</td>\n",
" <td>25820.0550</td>\n",
" <td>316149.358</td>\n",
" <td>131158.471</td>\n",
" <td>14575.5560</td>\n",
" <td>1.229616e+06</td>\n",
" <td>17318.539183</td>\n",
" <td>4807.0</td>\n",
" <td>13472.042652</td>\n",
" <td>4.056892e+11</td>\n",
" <td>2.421685e+12</td>\n",
" <td>-9.687862e+10</td>\n",
" <td>8.402113e+11</td>\n",
" <td>1.760563</td>\n",
" <td>4</td>\n",
" <td>0.438873</td>\n",
" <td>-0.588724</td>\n",
" <td>0.034282</td>\n",
" <td>5.851376e-01</td>\n",
" <td>1.000000</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>18</td>\n",
" <td>1.147803e+12</td>\n",
" <td>1.251086e+12</td>\n",
" <td>1.333111e+12</td>\n",
" <td>100</td>\n",
" <td>101</td>\n",
" <td>6.944444</td>\n",
" <td>19</td>\n",
" <td>-0.019860</td>\n",
" <td>-0.020797</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.022861</td>\n",
" <td>-0.135696</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>200000146</td>\n",
" <td>130</td>\n",
" <td>130</td>\n",
" <td>1.000000</td>\n",
" <td>71298.603700</td>\n",
" <td>15953.6355</td>\n",
" <td>519508.539</td>\n",
" <td>519508.539</td>\n",
" <td>457533.3310</td>\n",
" <td>1.150546e+06</td>\n",
" <td>8850.350438</td>\n",
" <td>4774.0</td>\n",
" <td>10074.748210</td>\n",
" <td>4.770901e+00</td>\n",
" <td>2.930221e+01</td>\n",
" <td>3.780801e+00</td>\n",
" <td>2.870987e+01</td>\n",
" <td>6.684615</td>\n",
" <td>14</td>\n",
" <td>0.517815</td>\n",
" <td>-0.556667</td>\n",
" <td>0.013723</td>\n",
" <td>1.887379e-15</td>\n",
" <td>0.999302</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>33</td>\n",
" <td>2.123548e+11</td>\n",
" <td>3.670050e+11</td>\n",
" <td>3.882699e+11</td>\n",
" <td>237</td>\n",
" <td>256</td>\n",
" <td>26.333333</td>\n",
" <td>54</td>\n",
" <td>0.281071</td>\n",
" <td>-0.020188</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.018482</td>\n",
" <td>-0.018833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>200000147</td>\n",
" <td>129</td>\n",
" <td>129</td>\n",
" <td>1.000000</td>\n",
" <td>35957.851907</td>\n",
" <td>18047.3390</td>\n",
" <td>174703.188</td>\n",
" <td>8478.402</td>\n",
" <td>677424.2191</td>\n",
" <td>1.210845e+06</td>\n",
" <td>9386.398474</td>\n",
" <td>7523.0</td>\n",
" <td>13914.783110</td>\n",
" <td>1.775257e+00</td>\n",
" <td>8.769726e+00</td>\n",
" <td>1.150007e+00</td>\n",
" <td>7.862819e+00</td>\n",
" <td>13.162791</td>\n",
" <td>27</td>\n",
" <td>0.599433</td>\n",
" <td>-0.448172</td>\n",
" <td>0.013837</td>\n",
" <td>9.514697e-01</td>\n",
" <td>0.996847</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>78</td>\n",
" <td>5.279255e+11</td>\n",
" <td>6.892142e+11</td>\n",
" <td>6.858178e+11</td>\n",
" <td>596</td>\n",
" <td>619</td>\n",
" <td>21.769231</td>\n",
" <td>49</td>\n",
" <td>-0.026933</td>\n",
" <td>-0.010493</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.204637</td>\n",
" <td>-0.109646</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID n_months n_active_months flow_freq aum_qty_mean \\\n",
"0 18872 130 130 1.000000 23477.224308 \n",
"1 200000076 130 119 0.915385 15840.000331 \n",
"2 200000082 71 71 1.000000 85194.200239 \n",
"3 200000146 130 130 1.000000 71298.603700 \n",
"4 200000147 129 129 1.000000 35957.851907 \n",
"\n",
" aum_qty_median aum_qty_max aum_qty_last net_flow_qty_sum \\\n",
"0 14880.4715 88818.372 67570.855 -45677.1480 \n",
"1 9272.4710 50732.461 44837.203 54791.9840 \n",
"2 25820.0550 316149.358 131158.471 14575.5560 \n",
"3 15953.6355 519508.539 519508.539 457533.3310 \n",
"4 18047.3390 174703.188 8478.402 677424.2191 \n",
"\n",
" gross_flow_qty_sum gross_flow_qty_mean n_tx_total net_flow_vol \\\n",
"0 1.244126e+06 9570.200015 1926.0 9832.357264 \n",
"1 2.314415e+05 1780.319492 518.0 2838.000232 \n",
"2 1.229616e+06 17318.539183 4807.0 13472.042652 \n",
"3 1.150546e+06 8850.350438 4774.0 10074.748210 \n",
"4 1.210845e+06 9386.398474 7523.0 13914.783110 \n",
"\n",
" turnover_mean turnover_vol flow_to_aum_mean flow_to_aum_vol \\\n",
"0 6.382330e+10 5.151309e+11 -2.560792e+10 2.841988e+11 \n",
"1 1.457820e-01 2.457632e-01 -1.707090e-02 2.717209e-01 \n",
"2 4.056892e+11 2.421685e+12 -9.687862e+10 8.402113e+11 \n",
"3 4.770901e+00 2.930221e+01 3.780801e+00 2.870987e+01 \n",
"4 1.775257e+00 8.769726e+00 1.150007e+00 7.862819e+00 \n",
"\n",
" avg_n_isin_held max_n_isin_held sub_share_mean red_share_mean \\\n",
"0 7.507692 26 0.429844 -0.576520 \n",
"1 4.700000 9 0.508681 -0.415876 \n",
"2 1.760563 4 0.438873 -0.588724 \n",
"3 6.684615 14 0.517815 -0.556667 \n",
"4 13.162791 27 0.599433 -0.448172 \n",
"\n",
" delta_rate_mean aum_drawdown_last aum_drawdown_max region \\\n",
"0 0.013723 2.392243e-01 1.000000 Switzerland \n",
"1 0.013723 1.162029e-01 0.949206 Spain \n",
"2 0.034282 5.851376e-01 1.000000 Italy \n",
"3 0.013723 1.887379e-15 0.999302 Italy \n",
"4 0.013837 9.514697e-01 0.996847 Italy \n",
"\n",
" country n_isin_total rel_turnover_mean_avg rel_turnover_vol_avg \\\n",
"0 Switzerland 107 4.063407e+11 8.956214e+11 \n",
"1 Spain 22 6.276897e+10 2.469731e+11 \n",
"2 Italy 18 1.147803e+12 1.251086e+12 \n",
"3 Italy 33 2.123548e+11 3.670050e+11 \n",
"4 Italy 78 5.279255e+11 6.892142e+11 \n",
"\n",
" rel_flow_to_aum_vol_avg full_exit_count entry_count \\\n",
"0 8.915940e+11 310 344 \n",
"1 2.481822e+11 71 81 \n",
"2 1.333111e+12 100 101 \n",
"3 3.882699e+11 237 256 \n",
"4 6.858178e+11 596 619 \n",
"\n",
" avg_holding_months_per_isin max_holding_months_per_isin \\\n",
"0 9.121495 36 \n",
"1 27.772727 85 \n",
"2 6.944444 19 \n",
"3 26.333333 54 \n",
"4 21.769231 49 \n",
"\n",
" corr_flow_fund_lag3 corr_flow_fund_lag6 corr_flow_bench_lag3 \\\n",
"0 0.007825 0.008326 NaN \n",
"1 0.015278 0.096449 NaN \n",
"2 -0.019860 -0.020797 NaN \n",
"3 0.281071 -0.020188 NaN \n",
"4 -0.026933 -0.010493 NaN \n",
"\n",
" corr_flow_bench_lag6 corr_flow_rate_lag3 corr_flow_rate_lag6 \n",
"0 NaN 0.007546 0.014510 \n",
"1 NaN -0.025181 0.012844 \n",
"2 NaN 0.022861 -0.135696 \n",
"3 NaN -0.018482 -0.018833 \n",
"4 NaN -0.204637 -0.109646 "
]
},
2026-04-09 16:46:41 +02:00
"execution_count": 337,
2026-04-07 12:31:16 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Données agrégées sur les ISIN et sur les mois\n",
"df_rel_client = (\n",
" df_rel_feat\n",
" .groupby(ID_COL, as_index=False)\n",
" .agg(\n",
" n_isin_total=(ISIN_COL, \"nunique\"),\n",
" rel_turnover_mean_avg=(\"rel_turnover_mean\", \"mean\"),\n",
" rel_turnover_vol_avg=(\"rel_turnover_vol\", \"mean\"),\n",
" rel_flow_to_aum_vol_avg=(\"rel_flow_to_aum_vol\", \"mean\"),\n",
" full_exit_count=(\"rel_full_exit_count\", \"sum\"),\n",
" entry_count=(\"rel_entry_count\", \"sum\"),\n",
" avg_holding_months_per_isin=(\"rel_holding_months\", \"mean\"),\n",
" max_holding_months_per_isin=(\"rel_holding_months\", \"max\")\n",
" )\n",
")\n",
"\n",
"df_client = (\n",
" df_month\n",
" .groupby(ID_COL, as_index=False)\n",
" .agg(\n",
" n_months=(\"month\", \"nunique\"),\n",
" n_active_months=(\"active_month\", \"sum\"),\n",
" flow_freq=(\"active_month\", \"mean\"),\n",
"\n",
" aum_qty_mean=(\"aum_qty\", \"mean\"),\n",
" aum_qty_median=(\"aum_qty\", \"median\"),\n",
" aum_qty_max=(\"aum_qty\", \"max\"),\n",
" aum_qty_last=(\"aum_qty\", \"last\"),\n",
"\n",
" net_flow_qty_sum=(\"net_flow_qty\", \"sum\"),\n",
" gross_flow_qty_sum=(\"gross_flow_qty\", \"sum\"),\n",
" gross_flow_qty_mean=(\"gross_flow_qty\", \"mean\"),\n",
" n_tx_total=(\"n_tx\", \"sum\"),\n",
"\n",
" net_flow_vol=(\"net_flow_qty\", \"std\"),\n",
" turnover_mean=(\"turnover_m\", \"mean\"),\n",
" turnover_vol=(\"turnover_m\", \"std\"),\n",
" flow_to_aum_mean=(\"flow_to_aum_m\", \"mean\"),\n",
" flow_to_aum_vol=(\"flow_to_aum_m\", \"std\"),\n",
"\n",
" avg_n_isin_held=(\"n_isin_held\", \"mean\"),\n",
" max_n_isin_held=(\"n_isin_held\", \"max\"),\n",
"\n",
" sub_share_mean=(\"sub_share_m\", \"mean\"),\n",
" red_share_mean=(\"red_share_m\", \"mean\"),\n",
"\n",
" delta_rate_mean=(\"delta_rate_m\", \"mean\"),\n",
" aum_drawdown_last=(\"aum_drawdown\", \"last\"),\n",
" aum_drawdown_max=(\"aum_drawdown\", \"max\"),\n",
"\n",
" region=(\"region\", \"last\"),\n",
" country=(\"country\", \"last\")\n",
" )\n",
")\n",
"\n",
"df_client = df_client.merge(df_rel_client, on=ID_COL, how=\"left\")\n",
"\n",
"#Variables de corrélations entre performance et flux\n",
"def corr_lag(x, y, lag):\n",
" x = np.asarray(x, dtype=float)\n",
" y = np.asarray(y, dtype=float)\n",
" \n",
" mask = np.isfinite(x) & np.isfinite(y)\n",
" x, y = x[mask], y[mask]\n",
" \n",
" if len(x) <= lag + 3:\n",
" return np.nan\n",
" \n",
" return pd.Series(x[lag:]).corr(pd.Series(y[:-lag]))\n",
"\n",
"rows = []\n",
"\n",
"for acc, g in df_month.groupby(ID_COL):\n",
" g = g.sort_values(\"month\")\n",
" \n",
" flow = g[\"flow_to_aum_m\"].values\n",
" ret_fund = g[\"ret_fund_m\"].values\n",
" ret_bench = g[\"ret_bench_m\"].values\n",
" rate = g[\"delta_rate_m\"].values\n",
" \n",
" rows.append({\n",
" ID_COL: acc,\n",
" \n",
" # 👇 Corrélations perf vs flux\n",
" \"corr_flow_fund_lag3\": corr_lag(flow, ret_fund, 3),\n",
" \"corr_flow_fund_lag6\": corr_lag(flow, ret_fund, 6),\n",
" \n",
" \"corr_flow_bench_lag3\": corr_lag(flow, ret_bench, 3),\n",
" \"corr_flow_bench_lag6\": corr_lag(flow, ret_bench, 6),\n",
" \n",
" # 👇 Corrélation taux vs flux\n",
" \"corr_flow_rate_lag3\": corr_lag(flow, rate, 3),\n",
" \"corr_flow_rate_lag6\": corr_lag(flow, rate, 6),\n",
" })\n",
"\n",
"df_corr = pd.DataFrame(rows)\n",
"\n",
"df_client = df_client.merge(df_corr, on=ID_COL, how=\"left\")\n",
"\n",
"print(df_client.shape)\n",
"df_client.head()"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 338,
2026-04-07 12:31:16 +02:00
"id": "8c1a0491-a0bb-4165-b073-41f81637466b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(431, 44)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>n_months</th>\n",
" <th>n_active_months</th>\n",
" <th>flow_freq</th>\n",
" <th>aum_qty_mean</th>\n",
" <th>aum_qty_median</th>\n",
" <th>aum_qty_max</th>\n",
" <th>aum_qty_last</th>\n",
" <th>net_flow_qty_sum</th>\n",
" <th>gross_flow_qty_sum</th>\n",
" <th>gross_flow_qty_mean</th>\n",
" <th>n_tx_total</th>\n",
" <th>net_flow_vol</th>\n",
" <th>turnover_mean</th>\n",
" <th>turnover_vol</th>\n",
" <th>flow_to_aum_mean</th>\n",
" <th>flow_to_aum_vol</th>\n",
" <th>avg_n_isin_held</th>\n",
" <th>max_n_isin_held</th>\n",
" <th>sub_share_mean</th>\n",
" <th>red_share_mean</th>\n",
" <th>delta_rate_mean</th>\n",
" <th>aum_drawdown_last</th>\n",
" <th>aum_drawdown_max</th>\n",
" <th>region</th>\n",
" <th>country</th>\n",
" <th>n_isin_total</th>\n",
" <th>rel_turnover_mean_avg</th>\n",
" <th>rel_turnover_vol_avg</th>\n",
" <th>rel_flow_to_aum_vol_avg</th>\n",
" <th>full_exit_count</th>\n",
" <th>entry_count</th>\n",
" <th>avg_holding_months_per_isin</th>\n",
" <th>max_holding_months_per_isin</th>\n",
" <th>corr_flow_fund_lag3</th>\n",
" <th>corr_flow_fund_lag6</th>\n",
" <th>corr_flow_bench_lag3</th>\n",
" <th>corr_flow_bench_lag6</th>\n",
" <th>corr_flow_rate_lag3</th>\n",
" <th>corr_flow_rate_lag6</th>\n",
" <th>flow_trend_12m</th>\n",
" <th>aum_trend_12m</th>\n",
" <th>drawdown_trend_12m</th>\n",
" <th>beta_rate</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>130</td>\n",
" <td>130</td>\n",
" <td>1.000000</td>\n",
" <td>23477.224308</td>\n",
" <td>14880.4715</td>\n",
" <td>88818.372</td>\n",
" <td>67570.855</td>\n",
" <td>-45677.1480</td>\n",
" <td>1.244126e+06</td>\n",
" <td>9570.200015</td>\n",
" <td>1926.0</td>\n",
" <td>9832.357264</td>\n",
" <td>6.382330e+10</td>\n",
" <td>5.151309e+11</td>\n",
" <td>-2.560792e+10</td>\n",
" <td>2.841988e+11</td>\n",
" <td>7.507692</td>\n",
" <td>26</td>\n",
" <td>0.429844</td>\n",
" <td>-0.576520</td>\n",
" <td>0.013723</td>\n",
" <td>2.392243e-01</td>\n",
" <td>1.000000</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>107</td>\n",
" <td>4.063407e+11</td>\n",
" <td>8.956214e+11</td>\n",
" <td>8.915940e+11</td>\n",
" <td>310</td>\n",
" <td>344</td>\n",
" <td>9.121495</td>\n",
" <td>36</td>\n",
" <td>0.007825</td>\n",
" <td>0.008326</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.007546</td>\n",
" <td>0.014510</td>\n",
" <td>-1.886348e-02</td>\n",
" <td>2920.070661</td>\n",
" <td>-0.024467</td>\n",
" <td>1.405196e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>200000076</td>\n",
" <td>130</td>\n",
" <td>119</td>\n",
" <td>0.915385</td>\n",
" <td>15840.000331</td>\n",
" <td>9272.4710</td>\n",
" <td>50732.461</td>\n",
" <td>44837.203</td>\n",
" <td>54791.9840</td>\n",
" <td>2.314415e+05</td>\n",
" <td>1780.319492</td>\n",
" <td>518.0</td>\n",
" <td>2838.000232</td>\n",
" <td>1.457820e-01</td>\n",
" <td>2.457632e-01</td>\n",
" <td>-1.707090e-02</td>\n",
" <td>2.717209e-01</td>\n",
" <td>4.700000</td>\n",
" <td>9</td>\n",
" <td>0.508681</td>\n",
" <td>-0.415876</td>\n",
" <td>0.013723</td>\n",
" <td>1.162029e-01</td>\n",
" <td>0.949206</td>\n",
" <td>Spain</td>\n",
" <td>Spain</td>\n",
" <td>22</td>\n",
" <td>6.276897e+10</td>\n",
" <td>2.469731e+11</td>\n",
" <td>2.481822e+11</td>\n",
" <td>71</td>\n",
" <td>81</td>\n",
" <td>27.772727</td>\n",
" <td>85</td>\n",
" <td>0.015278</td>\n",
" <td>0.096449</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.025181</td>\n",
" <td>0.012844</td>\n",
" <td>1.789020e-03</td>\n",
" <td>548.538087</td>\n",
" <td>-0.003843</td>\n",
" <td>-1.283031e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>200000082</td>\n",
" <td>71</td>\n",
" <td>71</td>\n",
" <td>1.000000</td>\n",
" <td>85194.200239</td>\n",
" <td>25820.0550</td>\n",
" <td>316149.358</td>\n",
" <td>131158.471</td>\n",
" <td>14575.5560</td>\n",
" <td>1.229616e+06</td>\n",
" <td>17318.539183</td>\n",
" <td>4807.0</td>\n",
" <td>13472.042652</td>\n",
" <td>4.056892e+11</td>\n",
" <td>2.421685e+12</td>\n",
" <td>-9.687862e+10</td>\n",
" <td>8.402113e+11</td>\n",
" <td>1.760563</td>\n",
" <td>4</td>\n",
" <td>0.438873</td>\n",
" <td>-0.588724</td>\n",
" <td>0.034282</td>\n",
" <td>5.851376e-01</td>\n",
" <td>1.000000</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>18</td>\n",
" <td>1.147803e+12</td>\n",
" <td>1.251086e+12</td>\n",
" <td>1.333111e+12</td>\n",
" <td>100</td>\n",
" <td>101</td>\n",
" <td>6.944444</td>\n",
" <td>19</td>\n",
" <td>-0.019860</td>\n",
" <td>-0.020797</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.022861</td>\n",
" <td>-0.135696</td>\n",
" <td>4.793703e+09</td>\n",
" <td>-10443.281371</td>\n",
" <td>0.033033</td>\n",
" <td>7.995257e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>200000146</td>\n",
" <td>130</td>\n",
" <td>130</td>\n",
" <td>1.000000</td>\n",
" <td>71298.603700</td>\n",
" <td>15953.6355</td>\n",
" <td>519508.539</td>\n",
" <td>519508.539</td>\n",
" <td>457533.3310</td>\n",
" <td>1.150546e+06</td>\n",
" <td>8850.350438</td>\n",
" <td>4774.0</td>\n",
" <td>10074.748210</td>\n",
" <td>4.770901e+00</td>\n",
" <td>2.930221e+01</td>\n",
" <td>3.780801e+00</td>\n",
" <td>2.870987e+01</td>\n",
" <td>6.684615</td>\n",
" <td>14</td>\n",
" <td>0.517815</td>\n",
" <td>-0.556667</td>\n",
" <td>0.013723</td>\n",
" <td>1.887379e-15</td>\n",
" <td>0.999302</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>33</td>\n",
" <td>2.123548e+11</td>\n",
" <td>3.670050e+11</td>\n",
" <td>3.882699e+11</td>\n",
" <td>237</td>\n",
" <td>256</td>\n",
" <td>26.333333</td>\n",
" <td>54</td>\n",
" <td>0.281071</td>\n",
" <td>-0.020188</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.018482</td>\n",
" <td>-0.018833</td>\n",
" <td>-9.860558e-02</td>\n",
" <td>24136.047846</td>\n",
" <td>-0.049820</td>\n",
" <td>-4.842472e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>200000147</td>\n",
" <td>129</td>\n",
" <td>129</td>\n",
" <td>1.000000</td>\n",
" <td>35957.851907</td>\n",
" <td>18047.3390</td>\n",
" <td>174703.188</td>\n",
" <td>8478.402</td>\n",
" <td>677424.2191</td>\n",
" <td>1.210845e+06</td>\n",
" <td>9386.398474</td>\n",
" <td>7523.0</td>\n",
" <td>13914.783110</td>\n",
" <td>1.775257e+00</td>\n",
" <td>8.769726e+00</td>\n",
" <td>1.150007e+00</td>\n",
" <td>7.862819e+00</td>\n",
" <td>13.162791</td>\n",
" <td>27</td>\n",
" <td>0.599433</td>\n",
" <td>-0.448172</td>\n",
" <td>0.013837</td>\n",
" <td>9.514697e-01</td>\n",
" <td>0.996847</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>78</td>\n",
" <td>5.279255e+11</td>\n",
" <td>6.892142e+11</td>\n",
" <td>6.858178e+11</td>\n",
" <td>596</td>\n",
" <td>619</td>\n",
" <td>21.769231</td>\n",
" <td>49</td>\n",
" <td>-0.026933</td>\n",
" <td>-0.010493</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.204637</td>\n",
" <td>-0.109646</td>\n",
" <td>1.129487e+00</td>\n",
" <td>2098.385472</td>\n",
" <td>-0.012011</td>\n",
" <td>-2.472128e+00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID n_months n_active_months flow_freq aum_qty_mean \\\n",
"0 18872 130 130 1.000000 23477.224308 \n",
"1 200000076 130 119 0.915385 15840.000331 \n",
"2 200000082 71 71 1.000000 85194.200239 \n",
"3 200000146 130 130 1.000000 71298.603700 \n",
"4 200000147 129 129 1.000000 35957.851907 \n",
"\n",
" aum_qty_median aum_qty_max aum_qty_last net_flow_qty_sum \\\n",
"0 14880.4715 88818.372 67570.855 -45677.1480 \n",
"1 9272.4710 50732.461 44837.203 54791.9840 \n",
"2 25820.0550 316149.358 131158.471 14575.5560 \n",
"3 15953.6355 519508.539 519508.539 457533.3310 \n",
"4 18047.3390 174703.188 8478.402 677424.2191 \n",
"\n",
" gross_flow_qty_sum gross_flow_qty_mean n_tx_total net_flow_vol \\\n",
"0 1.244126e+06 9570.200015 1926.0 9832.357264 \n",
"1 2.314415e+05 1780.319492 518.0 2838.000232 \n",
"2 1.229616e+06 17318.539183 4807.0 13472.042652 \n",
"3 1.150546e+06 8850.350438 4774.0 10074.748210 \n",
"4 1.210845e+06 9386.398474 7523.0 13914.783110 \n",
"\n",
" turnover_mean turnover_vol flow_to_aum_mean flow_to_aum_vol \\\n",
"0 6.382330e+10 5.151309e+11 -2.560792e+10 2.841988e+11 \n",
"1 1.457820e-01 2.457632e-01 -1.707090e-02 2.717209e-01 \n",
"2 4.056892e+11 2.421685e+12 -9.687862e+10 8.402113e+11 \n",
"3 4.770901e+00 2.930221e+01 3.780801e+00 2.870987e+01 \n",
"4 1.775257e+00 8.769726e+00 1.150007e+00 7.862819e+00 \n",
"\n",
" avg_n_isin_held max_n_isin_held sub_share_mean red_share_mean \\\n",
"0 7.507692 26 0.429844 -0.576520 \n",
"1 4.700000 9 0.508681 -0.415876 \n",
"2 1.760563 4 0.438873 -0.588724 \n",
"3 6.684615 14 0.517815 -0.556667 \n",
"4 13.162791 27 0.599433 -0.448172 \n",
"\n",
" delta_rate_mean aum_drawdown_last aum_drawdown_max region \\\n",
"0 0.013723 2.392243e-01 1.000000 Switzerland \n",
"1 0.013723 1.162029e-01 0.949206 Spain \n",
"2 0.034282 5.851376e-01 1.000000 Italy \n",
"3 0.013723 1.887379e-15 0.999302 Italy \n",
"4 0.013837 9.514697e-01 0.996847 Italy \n",
"\n",
" country n_isin_total rel_turnover_mean_avg rel_turnover_vol_avg \\\n",
"0 Switzerland 107 4.063407e+11 8.956214e+11 \n",
"1 Spain 22 6.276897e+10 2.469731e+11 \n",
"2 Italy 18 1.147803e+12 1.251086e+12 \n",
"3 Italy 33 2.123548e+11 3.670050e+11 \n",
"4 Italy 78 5.279255e+11 6.892142e+11 \n",
"\n",
" rel_flow_to_aum_vol_avg full_exit_count entry_count \\\n",
"0 8.915940e+11 310 344 \n",
"1 2.481822e+11 71 81 \n",
"2 1.333111e+12 100 101 \n",
"3 3.882699e+11 237 256 \n",
"4 6.858178e+11 596 619 \n",
"\n",
" avg_holding_months_per_isin max_holding_months_per_isin \\\n",
"0 9.121495 36 \n",
"1 27.772727 85 \n",
"2 6.944444 19 \n",
"3 26.333333 54 \n",
"4 21.769231 49 \n",
"\n",
" corr_flow_fund_lag3 corr_flow_fund_lag6 corr_flow_bench_lag3 \\\n",
"0 0.007825 0.008326 NaN \n",
"1 0.015278 0.096449 NaN \n",
"2 -0.019860 -0.020797 NaN \n",
"3 0.281071 -0.020188 NaN \n",
"4 -0.026933 -0.010493 NaN \n",
"\n",
" corr_flow_bench_lag6 corr_flow_rate_lag3 corr_flow_rate_lag6 \\\n",
"0 NaN 0.007546 0.014510 \n",
"1 NaN -0.025181 0.012844 \n",
"2 NaN 0.022861 -0.135696 \n",
"3 NaN -0.018482 -0.018833 \n",
"4 NaN -0.204637 -0.109646 \n",
"\n",
" flow_trend_12m aum_trend_12m drawdown_trend_12m beta_rate \n",
"0 -1.886348e-02 2920.070661 -0.024467 1.405196e+10 \n",
"1 1.789020e-03 548.538087 -0.003843 -1.283031e-01 \n",
"2 4.793703e+09 -10443.281371 0.033033 7.995257e+10 \n",
"3 -9.860558e-02 24136.047846 -0.049820 -4.842472e+00 \n",
"4 1.129487e+00 2098.385472 -0.012011 -2.472128e+00 "
]
},
2026-04-09 16:46:41 +02:00
"execution_count": 338,
2026-04-07 12:31:16 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def compute_trend(y):\n",
" y = np.asarray(y, dtype=float)\n",
" if len(y) < 4:\n",
" return np.nan\n",
" x = np.arange(len(y)).reshape(-1, 1)\n",
" mask = np.isfinite(y)\n",
" if mask.sum() < 4:\n",
" return np.nan\n",
" reg = LinearRegression().fit(x[mask], y[mask])\n",
" return reg.coef_[0]\n",
"\n",
"def compute_beta(y, x):\n",
" y = np.asarray(y, dtype=float)\n",
" x = np.asarray(x, dtype=float)\n",
" mask = np.isfinite(y) & np.isfinite(x)\n",
" if mask.sum() < 6:\n",
" return np.nan\n",
" reg = LinearRegression().fit(x[mask].reshape(-1, 1), y[mask])\n",
" return reg.coef_[0]\n",
"\n",
"rows = []\n",
"\n",
"for acc, g in df_month.groupby(ID_COL):\n",
" g = g.sort_values(\"month\")\n",
"\n",
" flow = g[\"flow_to_aum_m\"].values\n",
" aum = g[\"aum_qty\"].values\n",
" delta_rate = g[\"delta_rate_m\"].values\n",
" drawdown = g[\"aum_drawdown\"].values\n",
"\n",
" rows.append({\n",
" ID_COL: acc,\n",
" \"flow_trend_12m\": compute_trend(flow[-12:]),\n",
" \"aum_trend_12m\": compute_trend(aum[-12:]),\n",
" \"drawdown_trend_12m\": compute_trend(drawdown[-12:]),\n",
" \"beta_rate\": compute_beta(flow, delta_rate)\n",
" })\n",
"\n",
"df_beta = pd.DataFrame(rows)\n",
"\n",
"df_client = df_client.merge(df_beta, on=ID_COL, how=\"left\")\n",
"\n",
"print(df_client.shape)\n",
"df_client.head()"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 339,
2026-04-07 12:31:16 +02:00
"id": "4e4ea46f-5c3d-4a4a-b79c-ff5ae8973bad",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"seg_2D\n",
"Highly active (high int, high freq) 137\n",
"Dormant (low int, low freq) 134\n",
"Small rebalancers (low int, high freq) 80\n",
"Occasional large movers (high int, low freq) 80\n",
"Name: count, dtype: int64\n",
"thr_int: 5.739688017572092 thr_freq: 0.8\n"
]
}
],
"source": [
"df_client[\"rel_intensity_total\"] = df_client[\"gross_flow_qty_sum\"]/df_client[\"aum_qty_mean\"] # turnover proxy\n",
"df_client[\"frequency\"] = df_client[\"flow_freq\"] # share of active months\n",
"\n",
"# Thresholds: medians (simple + explainable)\n",
"thr_int = df_client[\"rel_intensity_total\"].median()\n",
"thr_freq = df_client[\"frequency\"].median()\n",
"\n",
"def quadrant(row):\n",
" low_int = row[\"rel_intensity_total\"] < thr_int\n",
" low_frq = row[\"frequency\"] < thr_freq\n",
"\n",
" if low_int and low_frq:\n",
" return \"Dormant (low int, low freq)\"\n",
" if low_int and (not low_frq):\n",
" return \"Small rebalancers (low int, high freq)\"\n",
" if (not low_int) and low_frq:\n",
" return \"Occasional large movers (high int, low freq)\"\n",
" return \"Highly active (high int, high freq)\"\n",
"\n",
"df_client[\"seg_2D\"] = df_client.apply(quadrant, axis=1)\n",
"\n",
"print(df_client[\"seg_2D\"].value_counts())\n",
"print(\"thr_int:\", thr_int, \"thr_freq:\", thr_freq)\n"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 340,
2026-04-07 12:31:16 +02:00
"id": "09943df7-8c78-4c51-b387-866c5cddd392",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAApgAAAHHCAYAAAAbASh2AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAA5ZhJREFUeJzs3XdYU9cbB/Bvwt6yEVkCLhQBBf1pqziLe89axd1W3NqqtXXWuuqqojjqqLUVt9bWSd3a1oG49wAVcSDgQEZyfn/QpITchKybAe/neXg0Jyf3nNwkN2/OFDDGGAghhBBCCNERoaErQAghhBBCyhYKMAkhhBBCiE5RgEkIIYQQQnSKAkxCCCGEEKJTFGASQgghhBCdogCTEEIIIYToFAWYhBBCCCFEpyjAJIQQQgghOkUBJiGEEEII0SmjDzCbNGmCWrVq6fSYAoEAw4cP1+kxVbF+/XoIBAI8ePBA72UDQP/+/REQEGCQsomsBw8eQCAQYP369Yauis7w9ZwCAgLQv39/nR7TmMvlQ5MmTdCkSRNDV0MnNm7ciOrVq8PCwgIVKlQwdHXKpLS0NFhbW+PUqVOGrgoxsP3798Pe3h7Pnz9X+7FqBZhnz57F8OHDUbNmTdjZ2cHPzw89evTArVu35PI2adIEAoEAAoEAQqEQjo6OqFatGvr27YtDhw6pXVFC9O3atWuYNm2aVj8IfvnlFyxevFhndSqrTp8+jWnTpiErK8vQVdGJ5cuXG/WPhydPnmDatGm4ePGioauilhs3bqB///4ICgrC6tWrsWrVKkNXqUyaMWMG6tevjw8++EBhnpYtWyptrPnxxx9Ro0YNWFtbo0qVKli6dClnvsePH6NHjx6oUKECHB0d0bFjR9y7d08nz6M0WVlZsLa2hkAgwPXr1/VSpiH88ccfmDZtmkaPbdWqFYKDgzF79mz1H8zU0LVrV+bl5cVGjBjBVq9ezWbOnMk8PT2ZnZ0du3z5skze6Oho5uPjwzZu3Mg2btzIEhIS2Pjx41lgYCADwHr06MHy8/NLLTM6OprVrFlTnWqWCgCLi4vT6TFVUVhYyHJzc5lYLNZ72YwxFhsby/z9/Q1StinaunUrA8COHDmi8THatm3Lec7FYjHLzc1lhYWFmlfQyNy/f58BYOvWrVP7sfPnz2cA2P379+Xue//+vUrXCl3TptyaNWuy6Oho3VZIC3l5eSwvL096++zZsxq/Voa0YsUKBoDdvn3b0FUps549e8YsLCzYL7/8ojDP9u3bmZ2dncLv0oSEBAaAde3ala1atYr17duXAWBz5syRyff69WtWpUoV5uHhwebOncsWLlzIfH19mY+PD3vx4oXOn1tJq1atYtbW1szLy4tNnjyZ9/IMJS4ujqkZ7slYvnw5s7W1ZTk5OWo9Tq0WzLFjx+Lhw4f44YcfMHjwYHz99dc4ceIECgsLMWfOHLn8Tk5O+OSTT/DJJ5/g008/xfz583Hr1i0MGzYMW7Zswddff61+RGzCzMzMpL+WdOHdu3c6OQ7RP4FAAGtra5iZmRm6KgqJxWK8f//e0NWAlZUVLCwsyk25fLC0tISlpaWhq6G1Z8+eAUCpXeOMMeTm5uqhRmXPzz//DHNzc7Rv357z/vfv32PcuHGYMGEC5/25ubmYPHky2rZti23btmHIkCH46aef0KdPH8ycOROvXr2S5l2+fDlu376NvXv34ssvv8SYMWNw8OBBpKenY8GCBUrrOW3aNK2HfP38889o06YNevfujV9++UWrY5VlXbt2RV5eHrZu3areAzUOaYupU6cOq1OnjkyaspbHwsJCFhISwmxtbVlWVpbSY0uOc+7cOdagQQNmbW3NAgIC2IoVK+Tyvn//nk2ZMoUFBQUxS0tL5uPjw7744gv2/v17mXz491fXzp07Wc2aNZmlpSULCQlh+/btk8n34MED9vnnn7OqVasya2tr5uLiwrp16ybTyiJpCVi/fr1cffbv388AsN9++40xxti6des4W2ni4+NZSEgIs7S0ZBUrVmTDhg1jr169UngeGjVqxGxsbNioUaMYY4zt2rWLtWnThlWsWJFZWlqywMBANmPGDLnWMVVbMM+ePcs++ugj5urqKj3fAwYMkMkjEonYokWLWEhICLOysmIeHh5s6NChLDMzUy7f1KlTWcWKFZmNjQ1r0qQJu3r1KvP392exsbHSfJJzc+LECTZixAjm5ubGnJyc2NChQ1leXh579eoV69u3L6tQoQKrUKEC++KLL+RaglWtk7+/P2vbti07ceIEi4qKYlZWVqxy5cpsw4YNcvUp+SdpzVTlnEdHR8s9XnL+FbX2JSUlsQ8//JDZ2toyJycn1qFDB3bt2jWZPFOnTpW24sTGxjInJyfm6OjI+vfvz96+fSuT9/nz5+z69ety6Vwkn4uff/6ZhYSEMHNzc7Zz507GGGOPHj1iAwYMYB4eHtLPy48//ijzeK7nlJKSwmJjY1nlypWZlZUV8/T0ZAMGDJBpnZA8n5J/ks9J8feKOp83VeutiKL36MmTJ9mYMWOYm5sbs7W1ZZ06dWLPnj2TeVzJ51K8NfPVq1ds1KhRzMfHh1laWrKgoCA2Z84cJhKJ5M7l/Pnz2cqVK1lgYCCztLRkkZGR7J9//pGpZ3p6Ouvfvz+rVKkSs7S0ZF5eXqxDhw4y15no6GhpHY4cOcJ5vtetW8emTJnCzM3NZZ6PxJAhQ5iTkxPLzc3lPF+SVugHDx7I3Tdx4kRmYWEh/SzeunWLdenShXl6ejIrKytWqVIl1rNnT6XfB1znderUqdL72rZty/bv38/q1q3LrKys2KJFi1Q+35J8sbGxzNHRkTk5ObF+/fqx5ORkufd08XNZHNf1VZfXpOL1HD16NPP392eWlpasUqVKrG/fvuz58+fs9evXzNbWlo0cOVLucWlpaUwoFLLvvvtO4TlmjLHGjRuzJk2aKLx/+vTpzM/Pj717946zBfP3339nANjvv/8uk3769GkGgG3cuFGaFhUVxaKiouTK+Oijj1hQUJDSek6dOlWrHrmHDx8ygUDAtmzZwv7++28GgJ06dYoz78aNG1lUVBSzsbFhFSpUYI0aNWIHDhyQyfPHH3+wxo0bM3t7e+bg4MAiIyPZpk2bZPJs2bKF1alTh1lbWzNXV1fWp08f9ujRI5k8qr6/VL1GxMbGcn7eJX799VdWp04dab1r1arFFi9eLFd+REQE69Chg8LzyUXrAFMsFrNKlSqxjz76SCa9tK7tmTNnMgBs7969So8fHR3NvL29mYeHBxs+fDj74Ycf2IcffsgAyHxRiEQi9tFHHzFbW1s2evRotnLlSjZ8+HBmbm7OOnbsKHNMACwsLIxVrFiRzZw5ky1evJgFBgYyW1tbmS++rVu3srCwMDZlyhS2atUq9tVXXzFnZ2fm7+8v84UdGBjI2rRpI1f3AQMGMGdnZ2k3G1eAKflybdGiBVu6dCkbPnw4MzMzY1FRUTLdc9HR0czLy4u5u7uzESNGsJUrV7Jdu3Yxxhjr1KkT69GjB5s/fz5bsWIF6969OwPAxo8fL1MfVQLMjIwM5uzszKpWrcrmz5/PVq9ezSZPnsxq1Kghk2/w4MHM3NycDRkyhCUkJLAJEyYwOzs7uXp/+eWXDABr3749W7ZsGRsyZAjz8fFhbm5unF/e4eHhrFWrViw+Pl7arfLll1+yDz/8kH388cds+fLlrF27dgyA3MVX1Tr5+/uzatWqMU9PT/bVV1+xZcuWsTp16jCBQMCuXLnCGGPs7t27bOTIkQwA++qrr6RDPZ4+faryOT948CALDw9nbm5u0sdLAjauYOzQoUPM3NycVa1alc2bN49Nnz6dubm5MWdnZ873TEREBOvSpQtbvnw5Gzx4sPRcFSfJq0o3PwBWo0YN5u7uzqZPn87i4+NZcnIye/r0KfPx8WG+vr5sxowZbMWKFaxDhw4MgPRLXNFz+v7771mjRo3YjBkz2KpVq9ioUaOYjY0Nq1evnvQHQkpKCuvdu7f0eJJz9ebNG+nrVfy9ournTdV6K6IowIyIiGDNmjVjS5cuZePGjWNmZmasR48e0nw
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"plt.style.use('default')\n",
"plt.figure()\n",
"for name, g in df_client.groupby(\"seg_2D\"):\n",
" plt.scatter(g[\"frequency\"], g[\"rel_intensity_total\"], s=10, label=name)\n",
"\n",
"plt.yscale(\"log\")\n",
"plt.axvline(thr_freq, linestyle=\"--\")\n",
"plt.axhline(thr_int, linestyle=\"--\")\n",
"plt.xlabel(\"Activity frequency (share of active months)\")\n",
"plt.ylabel(\"Gross flow / mean AUM (quantity) [log scale]\")\n",
"plt.title(\"2D behavioral segmentation: relative intensity vs frequency (400+ Accounts)\")\n",
"plt.legend(markerscale=2)\n",
"plt.show()"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 366,
2026-04-07 12:31:16 +02:00
"id": "9eb5fbb8-1a7b-434c-ba36-3c2a560b4cb1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nb clients = 404\n",
"Nb features = 35\n",
"['log_aum_qty_mean', 'flow_freq', 'gross_flow_to_aum', 'flow_to_aum_vol', 'activity_intensity', 'n_tx_total', 'avg_n_isin_held', 'n_isin_total', 'avg_holding_months_per_isin', 'exit_rate_per_isin', 'flow_direction_balance', 'aum_drawdown_last', 'corr_flow_fund_lag3', 'corr_flow_fund_lag6', 'corr_flow_rate_lag3', 'country_grp_France', 'country_grp_Germany', 'country_grp_Italy', 'country_grp_Luxembourg', 'country_grp_Monaco', 'country_grp_Other', 'country_grp_Spain', 'country_grp_Sweden', 'country_grp_Switzerland', 'country_grp_United Kingdom', 'region_grp_France', 'region_grp_Germany', 'region_grp_International', 'region_grp_Italy', 'region_grp_Luxembourg', 'region_grp_Nordics', 'region_grp_Other', 'region_grp_Spain', 'region_grp_Switzerland', 'region_grp_United Kingdom']\n"
]
}
],
"source": [
"dfc = df_client.copy()\n",
"\n",
"dfc[\"gross_flow_to_aum\"] = dfc[\"gross_flow_qty_sum\"] / (dfc[\"aum_qty_mean\"].abs() + EPS)\n",
"dfc[\"avg_ticket\"] = dfc[\"gross_flow_qty_sum\"] / (dfc[\"n_tx_total\"] + EPS)\n",
"dfc[\"flow_direction_balance\"] = dfc[\"net_flow_qty_sum\"] / (dfc[\"gross_flow_qty_sum\"] + EPS)\n",
"dfc[\"redemption_bias\"] = dfc[\"red_share_mean\"] - dfc[\"sub_share_mean\"]\n",
"dfc[\"activity_intensity\"] = dfc[\"n_tx_total\"] / (dfc[\"n_months\"] + EPS)\n",
"dfc[\"exit_rate_per_isin\"] = dfc[\"full_exit_count\"] / (dfc[\"n_isin_total\"] + EPS)\n",
"dfc[\"entry_rate_per_isin\"] = dfc[\"entry_count\"] / (dfc[\"n_isin_total\"] + EPS)\n",
"dfc[\"aum_final_to_peak\"] = dfc[\"aum_qty_last\"] / (dfc[\"aum_qty_max\"] + EPS)\n",
"\n",
"for col in [\"aum_qty_mean\", \"gross_flow_qty_sum\", \"n_tx_total\", \"avg_ticket\", \"gross_flow_qty_mean\"]:\n",
" dfc[f\"log_{col}\"] = np.log1p(dfc[col].clip(lower=0))\n",
"\n",
"dfc = dfc[(dfc[\"n_months\"] >= 6) & (dfc[\"aum_qty_mean\"] > 0)].copy()\n",
"\n",
"top_countries = dfc[\"country\"].fillna(\"Unknown\").value_counts().head(10).index\n",
"top_regions = dfc[\"region\"].fillna(\"Unknown\").value_counts().head(10).index\n",
"\n",
"dfc[\"country_grp\"] = np.where(dfc[\"country\"].isin(top_countries), dfc[\"country\"], \"Other\")\n",
"dfc[\"region_grp\"] = np.where(dfc[\"region\"].isin(top_regions), dfc[\"region\"], \"Other\")\n",
"\n",
"base_features = [\n",
" \"log_aum_qty_mean\",\n",
" \"flow_freq\",\n",
" \"gross_flow_to_aum\",\n",
" #\"turnover_vol\",\n",
" \"flow_to_aum_vol\",\n",
" \"activity_intensity\",\n",
" \"n_tx_total\",\n",
" \"avg_n_isin_held\",\n",
" \"n_isin_total\",\n",
" \"avg_holding_months_per_isin\",\n",
" \"exit_rate_per_isin\",\n",
" \"flow_direction_balance\",\n",
" #\"redemption_bias\",\n",
" \"aum_drawdown_last\",\n",
" \"corr_flow_fund_lag3\",\n",
" \"corr_flow_fund_lag6\",\n",
" \"corr_flow_rate_lag3\",\n",
" #\"corr_flow_rate_lag6\",\n",
" #\"corr_flow_bench_lag3\",\n",
" #\"corr_flow_bench_lag6\"\n",
" \n",
"]\n",
"\n",
"base_features2 = [\n",
" \"log_aum_qty_mean\",\n",
" \"log_gross_flow_qty_mean\",\n",
" \"n_tx_total\",\n",
" \"flow_freq\",\n",
" \"gross_flow_to_aum\",\n",
" \"net_flow_vol\",\n",
" #\"avg_n_isin_held\",\n",
" #\"flow_direction_balance\",\n",
"]\n",
"\n",
"base_features = [c for c in base_features if c in dfc.columns]\n",
"\n",
"X_num = dfc[base_features].replace([np.inf, -np.inf], np.nan).fillna(dfc[base_features].median())\n",
"X_cat = pd.get_dummies(dfc[[\"country_grp\", \"region_grp\"]].fillna(\"Unknown\"), drop_first=True)\n",
"\n",
"X = pd.concat([X_num.reset_index(drop=True), X_cat.reset_index(drop=True)], axis=1)\n",
"\n",
"scaler = StandardScaler()\n",
"scaler2 = RobustScaler()\n",
"\n",
"X_scaled = scaler.fit_transform(X_num)\n",
"X_scaled2 = scaler2.fit_transform(X_num)\n",
"\n",
"print(\"Nb clients =\", X.shape[0])\n",
"print(\"Nb features =\", X.shape[1])\n",
"print(X.columns.tolist())"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 364,
2026-04-07 12:31:16 +02:00
"id": "5f006fc0-d0e7-47b2-94f0-7e3bbdf91097",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>k</th>\n",
" <th>inertia</th>\n",
" <th>silhouette</th>\n",
" <th>davies_bouldin</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>5178.843770</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3</td>\n",
" <td>4741.629415</td>\n",
" <td>0.132598</td>\n",
" <td>2.166657</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4</td>\n",
" <td>4394.702026</td>\n",
" <td>0.124785</td>\n",
" <td>2.073192</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5</td>\n",
" <td>4115.441587</td>\n",
" <td>0.133249</td>\n",
" <td>1.787169</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>6</td>\n",
" <td>3865.546167</td>\n",
" <td>0.127431</td>\n",
" <td>1.759628</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>7</td>\n",
" <td>3679.273300</td>\n",
" <td>0.135589</td>\n",
" <td>1.702516</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>8</td>\n",
" <td>3448.452307</td>\n",
" <td>0.139533</td>\n",
" <td>1.634761</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>9</td>\n",
" <td>3321.805201</td>\n",
" <td>0.121927</td>\n",
" <td>1.709083</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>10</td>\n",
" <td>3167.889248</td>\n",
" <td>0.128239</td>\n",
" <td>1.605403</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>11</td>\n",
" <td>3048.339811</td>\n",
" <td>0.134592</td>\n",
" <td>1.620711</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>12</td>\n",
" <td>2931.256053</td>\n",
" <td>0.139258</td>\n",
" <td>1.550705</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>13</td>\n",
" <td>2847.001592</td>\n",
" <td>0.144681</td>\n",
" <td>1.537896</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>14</td>\n",
" <td>2742.565943</td>\n",
" <td>0.152159</td>\n",
" <td>1.455955</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>15</td>\n",
" <td>2647.758120</td>\n",
" <td>0.148589</td>\n",
" <td>1.469929</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>16</td>\n",
" <td>2576.736131</td>\n",
" <td>0.132343</td>\n",
" <td>1.498820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>17</td>\n",
" <td>2520.993893</td>\n",
" <td>0.137837</td>\n",
" <td>1.491089</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>18</td>\n",
" <td>2443.613239</td>\n",
" <td>0.135204</td>\n",
" <td>1.450617</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>19</td>\n",
" <td>2368.363384</td>\n",
" <td>0.151232</td>\n",
" <td>1.428163</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>20</td>\n",
" <td>2348.022385</td>\n",
" <td>0.134130</td>\n",
" <td>1.432542</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" k inertia silhouette davies_bouldin\n",
"0 2 5178.843770 NaN NaN\n",
"1 3 4741.629415 0.132598 2.166657\n",
"2 4 4394.702026 0.124785 2.073192\n",
"3 5 4115.441587 0.133249 1.787169\n",
"4 6 3865.546167 0.127431 1.759628\n",
"5 7 3679.273300 0.135589 1.702516\n",
"6 8 3448.452307 0.139533 1.634761\n",
"7 9 3321.805201 0.121927 1.709083\n",
"8 10 3167.889248 0.128239 1.605403\n",
"9 11 3048.339811 0.134592 1.620711\n",
"10 12 2931.256053 0.139258 1.550705\n",
"11 13 2847.001592 0.144681 1.537896\n",
"12 14 2742.565943 0.152159 1.455955\n",
"13 15 2647.758120 0.148589 1.469929\n",
"14 16 2576.736131 0.132343 1.498820\n",
"15 17 2520.993893 0.137837 1.491089\n",
"16 18 2443.613239 0.135204 1.450617\n",
"17 19 2368.363384 0.151232 1.428163\n",
"18 20 2348.022385 0.134130 1.432542"
]
},
2026-04-09 16:46:41 +02:00
"execution_count": 364,
2026-04-07 12:31:16 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rows = []\n",
"\n",
"for k in range(2, 21):\n",
" km = KMeans(n_clusters=k, n_init=30, random_state=42)\n",
" labels = km.fit_predict(X_scaled)\n",
"\n",
" row = {\n",
" \"k\": k,\n",
" \"inertia\": km.inertia_\n",
" }\n",
"\n",
" if k >= 3:\n",
" row[\"silhouette\"] = silhouette_score(X_scaled, labels)\n",
" row[\"davies_bouldin\"] = davies_bouldin_score(X_scaled, labels)\n",
" else:\n",
" row[\"silhouette\"] = np.nan\n",
" row[\"davies_bouldin\"] = np.nan\n",
"\n",
" rows.append(row)\n",
"\n",
"df_kdiag = pd.DataFrame(rows)\n",
"df_kdiag"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 343,
2026-04-07 12:31:16 +02:00
"id": "0198c399-f532-44c5-91a7-d4e0a27887ec",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjUAAAGGCAYAAAAzegNcAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAA88RJREFUeJzs3XlcVHX3B/DPzAAzrCM7iAgIuCCK4EouuePappWluTylj6aW2WI8v9LMyjItW0hLK32k0uyxxTLN3FrEUBQVcUFkcWFH2WFg5v7+GGZ0ZIfZgM/79ZpXzZ3v3DmDyty5555zRIIgCCAiIiIiIiIiIiIiIjJzYlMHQERERERERERERERE1BhMahARERERERERERERUavApAYREREREREREREREbUKTGoQEREREREREREREVGrwKQGERERERERERERERG1CkxqEBERERERERERERFRq8CkBhERERERERERERERtQpMahARERERERERERERUavApAYREREREREREREREbUKTGoQEREREVGT+fr6Yvbs2dr7hw8fhkgkwuHDh7Xbhg8fjuDgYOMHR0RERK3Ga6+9BpFIZOowjGr27Nnw9fVtcF1qaipEIhG2bNmi3dYef15Ed2NSg6gVE4lEeO2117T3NR9subm5pguqHbj7505ERNTWnD17FlOnToWPjw9kMhm8vLwwZswYfPTRR6YOTe8SExPx2muvITU1tcZjn3zyic5JBCIiorZoy5YtEIlE2ptMJkPHjh0RERGBDz/8EEVFRaYOUe9mz56t854tLCzg7e2NadOmITEx0dThEVEDmNQgMjN3H0zcfTt27JipQ2yxjz76CHK5HJWVlXWuEYlEWLRokRGj0rVnzx4mLoiIqF06evQo+vXrh9OnT2Pu3Ln4+OOP8dRTT0EsFuODDz7Qrrt48SI2bdpkwkj1IzExEStXrmRSg4iI2r3XX38d27Ztw4YNG7B48WIAwJIlS9CrVy+cOXPGYK/7yiuvoKyszGD7r4tUKsW2bduwbds2bN68GbNnz8aBAwdwzz334MaNG0aPp7FM9fMiMicWpg6AiGr3+uuvw8/Pr8b2gIAAE0SjX7/88gvGjh0LS0tLU4dSpz179iAqKqrWxEZZWRksLPjrk4iI2qY333wTcrkcx48fR4cOHXQey87O1v6/VCo1cmRERERkSOPHj0e/fv209yMjI3Hw4EFMmjQJ9913H86fPw9ra2u9v66FhYVJvmNbWFhgxowZOtsGDRqESZMm4ZdffsHcuXONHlNjmOrnRWROWKlBZKbGjx+PGTNm1Li5uLiYOrQWKS0txZEjRzBx4kRTh1KrkpKSBtfIZDIeQBARUZuVnJyMnj171khoAICbm5v2/++eqVGfxMREjBgxAjY2NvDy8sKaNWtqrMnOzsaTTz4Jd3d3yGQyhISEYOvWrTprapvbAdTebxoALly4gKlTp8LJyQkymQz9+vXDTz/9pH18y5YtePjhhwEAI0aM0FbGHj58GL6+vjh37hyOHDmi3T58+HDtc2/duoUlS5bA29sbUqkUAQEBeOedd6BSqRr1MyEiImoNRo4ciVdffRVpaWmIjo4GAJw5cwazZ89Gly5dIJPJ4OHhgX/961/Iy8vTPu+7776DSCTCkSNHauzz008/hUgkQkJCAoC6Z0RER0ejb9++sLa2hpOTE6ZNm4arV6/qrElKSsKUKVPg4eEBmUyGTp06Ydq0aSgoKGjW+/Xw8ACAGt/5r1y5gocffhhOTk6wsbHBoEGD8Msvv+is0XTeuLv6s67jl7vdunULs2fPhlwuR4cOHTBr1izcunWrxrrafl6abhc//PADgoODIZVK0bNnT+zdu7dxb5yolWFSg6gNys3NxSOPPAIHBwc4Ozvj2WefRXl5uc6aqqoqrFq1Cv7+/pBKpfD19cV//vMfVFRUaNcsXboUzs7OEARBu23x4sUQiUT48MMPtduysrIgEomwYcOGBmM7cOAAKioqMH78+Ca9J81BwLfffos333wTnTp1gkwmw6hRo3D58uUa6//55x+MGzcOcrkcNjY2uPfee/H333/rrNEcCCQmJuLxxx+Ho6MjhgwZgtmzZyMqKgoAdFp/adw9UyMtLQ1PP/00unXrBmtrazg7O+Phhx+utY0FERGRufPx8UFcXJz2RENL3bx5E+PGjUNISAjWrVuH7t27Y9myZfj111+1a8rKyjB8+HBs27YN06dPx7vvvgu5XI7Zs2frtLxqinPnzmHQoEE4f/48Xn75Zaxbtw62trZ44IEH8P333wMAhg0bhmeeeQYA8J///EfbgqJHjx5Yv349OnXqhO7du2u3/9///R8A9UUa9957L6KjozFz5kx8+OGHGDx4MCIjI7F06dIW/sSIiIjMyxNPPAEA+O233wAA+/fvx5UrVzBnzhx89NFHmDZtGrZv344JEyZozx9MnDgRdnZ2+Pbbb2vsb8eOHejZsyeCg4PrfM0333wTM2fORGBgIN577z0sWbIEBw4cwLBhw7Qn+hUKBSIiInDs2DEsXrwYUVFRmDdvHq5cuVJrMqA2ubm5yM3NRVZWFmJiYvDcc8/B2dkZkyZN0q7JysrCPffcg3379uHpp5/Gm2++ifLyctx3333aY4qWEgQB999/P7Zt24YZM2bgjTfewLVr1zBr1qxG7+Ovv/7C008/jWnTpmHNmjUoLy/HlClTdJJNRG2GQERm5csvvxQACL///ruQk5Ojc8vNzdVZC0BYsWKF9v6KFSsEAEKvXr2EyZMnCx9//LEwY8YMAYDwxBNP6Dx31qxZAgBh6tSpQlRUlDBz5kwBgPDAAw9o1+zatUsAIJw9e1a7LSQkRBCLxcLUqVO123bu3CkAEBISEhp8f/Pnzxf69evX4DoAwsKFC7X3Dx06JAAQQkNDhb59+wrvv/++8Nprrwk2NjbCgAEDdJ574MABwcrKSggPDxfWrVsnvP/++0Lv3r0FKysr4Z9//qnx8woKChLuv/9+4ZNPPhGioqKEo0ePCmPGjBEACNu2bdPe7oztzp/7zp07hZCQEGH58uXCZ599JvznP/8RHB0dBR8fH6GkpKTB90pERGROfvvtN0EikQgSiUQIDw8XXnrpJWHfvn2CQqHQWefj4yPMmjVLe1/zWX3o0CHttnvvvVcAIPz3v//VbquoqBA8PDyEKVOmaLetX79eACBER0drtykUCiE8PFyws7MTCgsL63wNQRCElJQUAYDw5ZdfareNGjVK6NWrl1BeXq7dplKphHvuuUcIDAzUbtMcx9y9T0EQhJ49ewr33ntvje2rVq0SbG1thUuXLulsf/nllwWJRCKkp6fXeA4REZG50pyHOH78eJ1r5HK5EBoaKgiCIJSWltZ4/JtvvhEACH/88Yd222OPPSa4ubkJVVVV2m0ZGRmCWCwWXn/9de02zXdzjdTUVEEikQhvvvmmzmucPXtWsLCw0G4/deqUAEDYuXNnE9/x7XMid9+8vLyEuLg4nbVLliwRAAh//vmndltRUZHg5+cn+Pr6CkqlUhCE2z/HlJQUnefXdvwya9YswcfHR3v/hx9+EAAIa9as0W6rqqoShg4dWuMY5+6flyCoz1NYWVkJly9f1m47ffq0AED46KOPmvrjITJ7rNQgMlOjR4+Gq6urzs3Ly6tRz/Xz88NPP/2EhQsXYtu2bXj66aexbds27WCv06dPY+vWrXjqqaewc+dOPP3009i6dSteeOEF/PDDDzh06BAAYMiQIQCAP//8EwBQUFCAs2fPYsqUKdptmsednJwQFBTUYGx79uxpUeup8vJyHD16FEuWLMGKFSuwevVqxMbGaq8mFQQB8+fPx4gRI/D3339j6dKlWLJkCY4dOwYvLy+88sorNfYZEhKCH374AQsWLMDTTz+N8PBwdO3aFQB0Wn/VZeLEiYiPj8fKlSsxd+5cvPnmm9izZw/S0tLwv//9r9nvlYiIyBTGjBmDmJgY3HfffTh9+jTWrFmDiIgIeHl56bRuaiw7Ozudz1ErKysMGDAAV65c0W7bs2cPPDw88Nhjj2m3WVpa4plnnkFxcXGtrSvqk5+
"text/plain": [
"<Figure size 1600x400 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig, axes = plt.subplots(1, 3, figsize=(16, 4))\n",
"\n",
"axes[0].plot(df_kdiag[\"k\"], df_kdiag[\"inertia\"], marker=\"o\")\n",
"axes[0].set_title(\"Elbow / Inertia\")\n",
"axes[0].set_xlabel(\"K\")\n",
"\n",
"axes[1].plot(df_kdiag[\"k\"], df_kdiag[\"silhouette\"], marker=\"o\")\n",
"axes[1].set_title(\"Silhouette\")\n",
"axes[1].set_xlabel(\"K\")\n",
"\n",
"axes[2].plot(df_kdiag[\"k\"], df_kdiag[\"davies_bouldin\"], marker=\"o\")\n",
"axes[2].set_title(\"Davies-Bouldin\")\n",
"axes[2].set_xlabel(\"K\")\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 344,
2026-04-07 12:31:16 +02:00
"id": "5ba1f3bf-7fd7-49aa-8b28-0ca0f2658bf0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"K=2 | silhouette=0.2357 | davies_bouldin=1.8611\n",
"K=4 | silhouette=0.1262 | davies_bouldin=2.0735\n",
"K=5 | silhouette=0.1332 | davies_bouldin=1.7872\n"
]
}
],
"source": [
"RESULTS = {}\n",
"\n",
"for k in [2, 4, 5]:\n",
" km = KMeans(n_clusters=k, n_init=50, random_state=42)\n",
" labels = km.fit_predict(X_scaled)\n",
" dfc[f\"cluster_k{k}\"] = labels\n",
"\n",
" RESULTS[k] = {\n",
" \"model\": km,\n",
" \"labels\": labels,\n",
" \"silhouette\": silhouette_score(X_scaled, labels),\n",
" \"davies_bouldin\": davies_bouldin_score(X_scaled, labels)\n",
" }\n",
"\n",
"for k in [2, 4, 5]:\n",
" print(f\"K={k} | silhouette={RESULTS[k]['silhouette']:.4f} | davies_bouldin={RESULTS[k]['davies_bouldin']:.4f}\")"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 345,
2026-04-07 12:31:16 +02:00
"id": "0052976f-e30f-4f84-b720-6fa4a9078aba",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== K=2 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>aum_qty_mean_med</th>\n",
" <th>gross_flow_to_aum_med</th>\n",
" <th>flow_freq_med</th>\n",
" <th>n_tx_total_med</th>\n",
" <th>avg_n_isin_held_med</th>\n",
" <th>n_isin_total_med</th>\n",
" <th>avg_holding_months_per_isin_med</th>\n",
" <th>exit_rate_per_isin_med</th>\n",
" <th>flow_direction_balance_med</th>\n",
" <th>redemption_bias_med</th>\n",
" <th>aum_drawdown_last_med</th>\n",
" <th>aum_final_to_peak_med</th>\n",
" <th>corr_flow_fund_lag3_med</th>\n",
" <th>corr_flow_fund_lag6_med</th>\n",
" <th>corr_flow_rate_lag3_med</th>\n",
" <th>corr_flow_rate_lag6_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k2</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>327</td>\n",
" <td>21039.415608</td>\n",
" <td>5.516534</td>\n",
" <td>0.730769</td>\n",
" <td>469.0</td>\n",
" <td>2.347826</td>\n",
" <td>20.0</td>\n",
" <td>10.000000</td>\n",
" <td>1.416667</td>\n",
" <td>0.042861</td>\n",
" <td>-0.762943</td>\n",
" <td>0.818922</td>\n",
" <td>0.181078</td>\n",
" <td>0.002365</td>\n",
" <td>0.004122</td>\n",
" <td>0.000339</td>\n",
" <td>0.000122</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>77</td>\n",
" <td>133315.879515</td>\n",
" <td>10.375358</td>\n",
" <td>1.000000</td>\n",
" <td>8861.0</td>\n",
" <td>14.769231</td>\n",
" <td>56.0</td>\n",
" <td>34.765306</td>\n",
" <td>2.515152</td>\n",
" <td>0.026428</td>\n",
" <td>-1.057873</td>\n",
" <td>0.505165</td>\n",
" <td>0.494835</td>\n",
" <td>0.041515</td>\n",
" <td>0.023970</td>\n",
" <td>-0.045190</td>\n",
" <td>-0.046754</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients aum_qty_mean_med gross_flow_to_aum_med flow_freq_med \\\n",
"cluster_k2 \n",
"0 327 21039.415608 5.516534 0.730769 \n",
"1 77 133315.879515 10.375358 1.000000 \n",
"\n",
" n_tx_total_med avg_n_isin_held_med n_isin_total_med \\\n",
"cluster_k2 \n",
"0 469.0 2.347826 20.0 \n",
"1 8861.0 14.769231 56.0 \n",
"\n",
" avg_holding_months_per_isin_med exit_rate_per_isin_med \\\n",
"cluster_k2 \n",
"0 10.000000 1.416667 \n",
"1 34.765306 2.515152 \n",
"\n",
" flow_direction_balance_med redemption_bias_med \\\n",
"cluster_k2 \n",
"0 0.042861 -0.762943 \n",
"1 0.026428 -1.057873 \n",
"\n",
" aum_drawdown_last_med aum_final_to_peak_med \\\n",
"cluster_k2 \n",
"0 0.818922 0.181078 \n",
"1 0.505165 0.494835 \n",
"\n",
" corr_flow_fund_lag3_med corr_flow_fund_lag6_med \\\n",
"cluster_k2 \n",
"0 0.002365 0.004122 \n",
"1 0.041515 0.023970 \n",
"\n",
" corr_flow_rate_lag3_med corr_flow_rate_lag6_med \n",
"cluster_k2 \n",
"0 0.000339 0.000122 \n",
"1 -0.045190 -0.046754 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== K=4 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>aum_qty_mean_med</th>\n",
" <th>gross_flow_to_aum_med</th>\n",
" <th>flow_freq_med</th>\n",
" <th>n_tx_total_med</th>\n",
" <th>avg_n_isin_held_med</th>\n",
" <th>n_isin_total_med</th>\n",
" <th>avg_holding_months_per_isin_med</th>\n",
" <th>exit_rate_per_isin_med</th>\n",
" <th>flow_direction_balance_med</th>\n",
" <th>redemption_bias_med</th>\n",
" <th>aum_drawdown_last_med</th>\n",
" <th>aum_final_to_peak_med</th>\n",
" <th>corr_flow_fund_lag3_med</th>\n",
" <th>corr_flow_fund_lag6_med</th>\n",
" <th>corr_flow_rate_lag3_med</th>\n",
" <th>corr_flow_rate_lag6_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k4</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>165</td>\n",
" <td>14436.315829</td>\n",
" <td>17.218849</td>\n",
" <td>0.949153</td>\n",
" <td>1252.0</td>\n",
" <td>2.527559</td>\n",
" <td>29.0</td>\n",
" <td>7.756757</td>\n",
" <td>3.093750</td>\n",
" <td>0.018779</td>\n",
" <td>-0.996486</td>\n",
" <td>0.916665</td>\n",
" <td>0.083335</td>\n",
" <td>0.000208</td>\n",
" <td>0.001381</td>\n",
" <td>-0.002783</td>\n",
" <td>-0.003603</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>131</td>\n",
" <td>28566.131306</td>\n",
" <td>0.894162</td>\n",
" <td>0.123077</td>\n",
" <td>14.0</td>\n",
" <td>1.650000</td>\n",
" <td>15.0</td>\n",
" <td>8.500000</td>\n",
" <td>0.619718</td>\n",
" <td>0.000000</td>\n",
" <td>-0.127257</td>\n",
" <td>0.792318</td>\n",
" <td>0.207682</td>\n",
" <td>-0.000988</td>\n",
" <td>0.000061</td>\n",
" <td>0.014765</td>\n",
" <td>0.015976</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>86</td>\n",
" <td>76209.164582</td>\n",
" <td>4.871121</td>\n",
" <td>1.000000</td>\n",
" <td>2339.0</td>\n",
" <td>11.116667</td>\n",
" <td>25.5</td>\n",
" <td>40.643704</td>\n",
" <td>0.750000</td>\n",
" <td>0.154712</td>\n",
" <td>-1.021555</td>\n",
" <td>0.216071</td>\n",
" <td>0.783929</td>\n",
" <td>0.030799</td>\n",
" <td>0.022152</td>\n",
" <td>-0.036992</td>\n",
" <td>-0.046754</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>22</td>\n",
" <td>335180.430008</td>\n",
" <td>15.652972</td>\n",
" <td>1.000000</td>\n",
" <td>20193.0</td>\n",
" <td>13.237500</td>\n",
" <td>81.0</td>\n",
" <td>19.072084</td>\n",
" <td>5.158364</td>\n",
" <td>0.028313</td>\n",
" <td>-1.100355</td>\n",
" <td>0.651444</td>\n",
" <td>0.348556</td>\n",
" <td>0.096447</td>\n",
" <td>0.077212</td>\n",
" <td>-0.029813</td>\n",
" <td>-0.031076</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients aum_qty_mean_med gross_flow_to_aum_med flow_freq_med \\\n",
"cluster_k4 \n",
"3 165 14436.315829 17.218849 0.949153 \n",
"0 131 28566.131306 0.894162 0.123077 \n",
"2 86 76209.164582 4.871121 1.000000 \n",
"1 22 335180.430008 15.652972 1.000000 \n",
"\n",
" n_tx_total_med avg_n_isin_held_med n_isin_total_med \\\n",
"cluster_k4 \n",
"3 1252.0 2.527559 29.0 \n",
"0 14.0 1.650000 15.0 \n",
"2 2339.0 11.116667 25.5 \n",
"1 20193.0 13.237500 81.0 \n",
"\n",
" avg_holding_months_per_isin_med exit_rate_per_isin_med \\\n",
"cluster_k4 \n",
"3 7.756757 3.093750 \n",
"0 8.500000 0.619718 \n",
"2 40.643704 0.750000 \n",
"1 19.072084 5.158364 \n",
"\n",
" flow_direction_balance_med redemption_bias_med \\\n",
"cluster_k4 \n",
"3 0.018779 -0.996486 \n",
"0 0.000000 -0.127257 \n",
"2 0.154712 -1.021555 \n",
"1 0.028313 -1.100355 \n",
"\n",
" aum_drawdown_last_med aum_final_to_peak_med \\\n",
"cluster_k4 \n",
"3 0.916665 0.083335 \n",
"0 0.792318 0.207682 \n",
"2 0.216071 0.783929 \n",
"1 0.651444 0.348556 \n",
"\n",
" corr_flow_fund_lag3_med corr_flow_fund_lag6_med \\\n",
"cluster_k4 \n",
"3 0.000208 0.001381 \n",
"0 -0.000988 0.000061 \n",
"2 0.030799 0.022152 \n",
"1 0.096447 0.077212 \n",
"\n",
" corr_flow_rate_lag3_med corr_flow_rate_lag6_med \n",
"cluster_k4 \n",
"3 -0.002783 -0.003603 \n",
"0 0.014765 0.015976 \n",
"2 -0.036992 -0.046754 \n",
"1 -0.029813 -0.031076 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== K=5 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>aum_qty_mean_med</th>\n",
" <th>gross_flow_to_aum_med</th>\n",
" <th>flow_freq_med</th>\n",
" <th>n_tx_total_med</th>\n",
" <th>avg_n_isin_held_med</th>\n",
" <th>n_isin_total_med</th>\n",
" <th>avg_holding_months_per_isin_med</th>\n",
" <th>exit_rate_per_isin_med</th>\n",
" <th>flow_direction_balance_med</th>\n",
" <th>redemption_bias_med</th>\n",
" <th>aum_drawdown_last_med</th>\n",
" <th>aum_final_to_peak_med</th>\n",
" <th>corr_flow_fund_lag3_med</th>\n",
" <th>corr_flow_fund_lag6_med</th>\n",
" <th>corr_flow_rate_lag3_med</th>\n",
" <th>corr_flow_rate_lag6_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k5</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>168</td>\n",
" <td>12566.405685</td>\n",
" <td>16.253847</td>\n",
" <td>0.911577</td>\n",
" <td>1094.5</td>\n",
" <td>2.524180</td>\n",
" <td>28.5</td>\n",
" <td>8.166667</td>\n",
" <td>2.955196</td>\n",
" <td>0.036286</td>\n",
" <td>-0.970398</td>\n",
" <td>0.914496</td>\n",
" <td>0.085504</td>\n",
" <td>0.001715</td>\n",
" <td>0.003794</td>\n",
" <td>-0.000844</td>\n",
" <td>0.001543</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>111</td>\n",
" <td>37555.632000</td>\n",
" <td>0.661205</td>\n",
" <td>0.076923</td>\n",
" <td>6.0</td>\n",
" <td>1.511111</td>\n",
" <td>12.0</td>\n",
" <td>8.290323</td>\n",
" <td>0.586207</td>\n",
" <td>0.000000</td>\n",
" <td>-0.076923</td>\n",
" <td>0.818922</td>\n",
" <td>0.181078</td>\n",
" <td>-0.002355</td>\n",
" <td>-0.000290</td>\n",
" <td>0.011330</td>\n",
" <td>0.021365</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>90</td>\n",
" <td>59767.645164</td>\n",
" <td>4.216773</td>\n",
" <td>0.995868</td>\n",
" <td>1725.5</td>\n",
" <td>10.811538</td>\n",
" <td>24.0</td>\n",
" <td>39.349432</td>\n",
" <td>0.708536</td>\n",
" <td>0.177072</td>\n",
" <td>-1.017685</td>\n",
" <td>0.181774</td>\n",
" <td>0.818226</td>\n",
" <td>0.029066</td>\n",
" <td>0.024737</td>\n",
" <td>-0.025887</td>\n",
" <td>-0.038057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>30</td>\n",
" <td>212211.195227</td>\n",
" <td>15.786289</td>\n",
" <td>1.000000</td>\n",
" <td>17459.5</td>\n",
" <td>9.768395</td>\n",
" <td>77.5</td>\n",
" <td>17.601779</td>\n",
" <td>5.770464</td>\n",
" <td>0.028313</td>\n",
" <td>-1.083467</td>\n",
" <td>0.691744</td>\n",
" <td>0.308256</td>\n",
" <td>0.062011</td>\n",
" <td>0.052249</td>\n",
" <td>-0.031718</td>\n",
" <td>-0.037537</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>5</td>\n",
" <td>114674.703704</td>\n",
" <td>7.607274</td>\n",
" <td>0.944444</td>\n",
" <td>1221.0</td>\n",
" <td>0.861111</td>\n",
" <td>12.0</td>\n",
" <td>3.800000</td>\n",
" <td>2.600000</td>\n",
" <td>-0.049426</td>\n",
" <td>-1.237358</td>\n",
" <td>0.999914</td>\n",
" <td>0.000086</td>\n",
" <td>-0.035506</td>\n",
" <td>-0.027489</td>\n",
" <td>-0.000900</td>\n",
" <td>-0.036517</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients aum_qty_mean_med gross_flow_to_aum_med flow_freq_med \\\n",
"cluster_k5 \n",
"1 168 12566.405685 16.253847 0.911577 \n",
"3 111 37555.632000 0.661205 0.076923 \n",
"4 90 59767.645164 4.216773 0.995868 \n",
"0 30 212211.195227 15.786289 1.000000 \n",
"2 5 114674.703704 7.607274 0.944444 \n",
"\n",
" n_tx_total_med avg_n_isin_held_med n_isin_total_med \\\n",
"cluster_k5 \n",
"1 1094.5 2.524180 28.5 \n",
"3 6.0 1.511111 12.0 \n",
"4 1725.5 10.811538 24.0 \n",
"0 17459.5 9.768395 77.5 \n",
"2 1221.0 0.861111 12.0 \n",
"\n",
" avg_holding_months_per_isin_med exit_rate_per_isin_med \\\n",
"cluster_k5 \n",
"1 8.166667 2.955196 \n",
"3 8.290323 0.586207 \n",
"4 39.349432 0.708536 \n",
"0 17.601779 5.770464 \n",
"2 3.800000 2.600000 \n",
"\n",
" flow_direction_balance_med redemption_bias_med \\\n",
"cluster_k5 \n",
"1 0.036286 -0.970398 \n",
"3 0.000000 -0.076923 \n",
"4 0.177072 -1.017685 \n",
"0 0.028313 -1.083467 \n",
"2 -0.049426 -1.237358 \n",
"\n",
" aum_drawdown_last_med aum_final_to_peak_med \\\n",
"cluster_k5 \n",
"1 0.914496 0.085504 \n",
"3 0.818922 0.181078 \n",
"4 0.181774 0.818226 \n",
"0 0.691744 0.308256 \n",
"2 0.999914 0.000086 \n",
"\n",
" corr_flow_fund_lag3_med corr_flow_fund_lag6_med \\\n",
"cluster_k5 \n",
"1 0.001715 0.003794 \n",
"3 -0.002355 -0.000290 \n",
"4 0.029066 0.024737 \n",
"0 0.062011 0.052249 \n",
"2 -0.035506 -0.027489 \n",
"\n",
" corr_flow_rate_lag3_med corr_flow_rate_lag6_med \n",
"cluster_k5 \n",
"1 -0.000844 0.001543 \n",
"3 0.011330 0.021365 \n",
"4 -0.025887 -0.038057 \n",
"0 -0.031718 -0.037537 \n",
"2 -0.000900 -0.036517 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"profile_vars = [\n",
" \"aum_qty_mean\",\n",
" \"gross_flow_to_aum\",\n",
" \"flow_freq\",\n",
" \"n_tx_total\",\n",
" \"avg_n_isin_held\",\n",
" \"n_isin_total\",\n",
" \"avg_holding_months_per_isin\",\n",
" \"exit_rate_per_isin\",\n",
" \"flow_direction_balance\",\n",
" \"redemption_bias\",\n",
" \"aum_drawdown_last\",\n",
" \"aum_final_to_peak\",\n",
" \"corr_flow_fund_lag3\",\n",
" \"corr_flow_fund_lag6\",\n",
" \"corr_flow_rate_lag3\",\n",
" \"corr_flow_rate_lag6\",\n",
" #\"corr_flow_bench_lag3\",\n",
" #\"corr_flow_bench_lag6\"\n",
"]\n",
"\n",
"profile_vars2 = [\n",
" \"aum_qty_mean\",\n",
" \"gross_flow_to_aum\",\n",
" \"flow_freq\",\n",
" \"n_tx_total\",\n",
" \"log_gross_flow_qty_mean\",\n",
" \"net_flow_vol\",\n",
"]\n",
"\n",
"profile_vars = [c for c in profile_vars if c in dfc.columns]\n",
"\n",
"for k in [2, 4, 5]:\n",
" print(f\"\\n===== K={k} =====\")\n",
" prof = (\n",
" dfc.groupby(f\"cluster_k{k}\")\n",
" .agg(\n",
" n_clients=(ID_COL, \"count\"),\n",
" **{f\"{c}_med\": (c, \"median\") for c in profile_vars}\n",
" )\n",
" .sort_values(\"n_clients\", ascending=False)\n",
" )\n",
" display(prof)"
]
},
{
"cell_type": "code",
"execution_count": 237,
"id": "ff8bdf91-859c-419e-a2ea-eb4a5f44f0df",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAkyVJREFUeJzs3XdYk9fbB/BvCKAgiiwVERdKQIaA4ECEihvqwlUt8MOtVat2KVoHaou2jrpaceGs1SpuwL2LuFAc4FYEFwKKCgok5/2DN4+EBEwgISTcn+vykpycPLlzeJLcPGfxGGMMhBBCCCFVmI66AyCEEEIIUTdKiAghhBBS5VFCRAghhJAqjxIiQgghhFR5lBARQgghpMqjhIgQQgghVR4lRIQQQgip8ighIoQQQkiVRwkRIYQQQqo8SogqmaCgIHz55ZdKPaZAIMCcOXOUekx5REVFQSAQIDU1tcKfGwCmTp0KX19ftTw3kZSamgqBQICoqCh1h6I0qnpNvr6+mDp1qlKPWZmfVxWCgoIQFBSk7jCUYs+ePejevTscHBzg7u6u7nC0mq66A6jMEhMTsWfPHsTHxyMtLQ21a9dGy5YtMWnSJDRp0kSiblBQEC5cuAAA4PF4MDQ0hIWFBZydndGnTx+0b99eHS+BELndu3cPMTEx6Nu3Lxo0aFCmY+zfvx8ZGRkICQlRbnBa5sqVKzh37hz+97//oVatWuoOp9y2bt0KAwMDBAQEqDsUmV68eIEdO3agc+fOsLe3V3c4crt//z5CQ0PRoUMHjBo1CtWrV1d3SFqNEqJSrF27FleuXEH37t0hEAiQnp6OrVu3IiAgANu3b4etra1E/Xr16uG7774DAOTm5uLx48c4cuQI9u3bhx49euD333+Hnp6eOl6KWvTu3Rv+/v7Q19dXdyhEDvfu3cOKFSvQunXrMidEBw4cwN27d6USIisrKyQmJkJXlz5yACAhIQErVqxA3759pRKi2NhY8Hi8Co+pPM+7bds2mJiYVJqEaN26dRK3X758iRUrVsDKykqjEqILFy5AJBJh+vTpaNSokbrD0Xr06VSKkJAQLFy4UOIL3c/PDz179sTq1auxcOFCifo1a9ZE7969Jcp++OEHzJs3D3///TesrKzw448/VkjslQGfzwefz1fa8XJzc2FgYKC045GKw+PxUK1aNXWHUSqRSIT8/Hy1x6muPyC06Q8XbXktGRkZAAq/W0rDGMPHjx/pClI50RiiUri5uUm9sRo3bozmzZvjwYMHch2Dz+fj559/RrNmzbB161a8fftWrsfduHEDX331FZydneHr64tt27ZJ1cnLy8OyZcvQpUsXODo6wsfHB7/99hvy8vJkHvPo0aP48ssv4ejoCH9/f5w+fVri/rS0NMyePRvdunWDs7Mz2rRpg2+//VZiDND169chEAiwe/duqeOfOXMGAoEAJ06cAFDyGKKtW7fC398fjo6O8PLyQlhYGLKzsyXqiMdS3bhxA19//TVatmyJxYsXc69j1KhR8PLygqOjIzp37oyVK1dCKBTK0bLSrl+/juHDh6NNmzZce4eGhkrUEYlE2LBhA/z9/eHk5ARPT0/MnDkTb968kaq3fPlyeHl5oWXLlggKCsK9e/ekxmeI2+bSpUuYN28e2rZtC3d3d8ycORN5eXnIzs7GTz/9BA8PD3h4eOC3334DY6xMMfn6+mL06NG4dOkS+vfvDycnJ3Tq1Al79uyRiGfixIkAgODgYAgEAggEAsTHx8vd5kFBQTh58iTS0tK4x4vHcJU03iYuLg5DhgyBi4sL3N3dMXbsWNy/f1+izvLlyyEQCPD48WNMnToV7u7uaNWqFUJDQ5GbmytRNzMzE/fv35cql0U8tm7fvn1cG545cwZAYRdLaGgoPD09uffLzp07P3vM5ORkTJ06FZ06dYKTkxPat2+P0NBQZGVlSbye3377DQDQqVMnrq3E75Oi54oi77fyxF38eYFP5+jly5cRHh6Otm3bwsXFBePGjUNmZqbE4+7evYsLFy5wr6Xo+J3s7Gz88ssv8PHxgaOjI7p06YLVq1dDJBJxdcTnx7p167B9+3Z07twZjo6O6NevHxITEyXiTE9PR2hoKLy9vbnPkLFjx0p8zhQdQxQfH4/+/fsDAEJDQ7kYo6KisGzZMjg4OEi8HrEZM2bA3d0dHz9+lNle69atg0AgQFpamtR9ixYtgqOjI/defPToESZMmID27dvDyckJ3t7emDx5cqnfB76+vli+fDkAoF27dhAIBNxt8Xv6zJkzCAgIgLOzM/755x+521tcb+rUqWjVqhXc3d0xZcoUJCUlSb1PSxqPJWuMpjI/k4rG+euvv8LX1xeOjo7w9vbGTz/9hMzMTLx//x4uLi6YN2+e1OOeP38Oe3t7RERElNjGxdEVIgUxxvDq1Ss0b95c7sfw+Xz4+/tj6dKluHz5Mr744otS67958wajRo1Cjx494O/vj5iYGMyePRt6enrcG1skEmHs2LG4fPkyBg4cCBsbG9y5cwcbN27Eo0eP8Oeff0oc8/Llyzh8+DCGDBmCGjVqYPPmzfj2229x4sQJmJiYACj88E1ISIC/vz/q1auHtLQ0bNu2DcHBwTh48CAMDAzg5OQEa2trbqxJUdHR0TA2NoaXl1eJr2358uVYsWIFPD09MXjwYDx8+BDbtm3D9evXsW3bNokuxdevX2PkyJHw9/dHr169YGZmBgDYvXs3DA0NMXToUBgaGuL8+fNYtmwZ3r17hylTpsj9ewEK/wIbPnw4TExMMGrUKNSqVQupqak4cuSIRL2ZM2di9+7dCAgIQFBQEFJTU7F161bcunVLIu5FixZh7dq16NixIzp06IDk5GQMHz68xA/VefPmwdzcHBMmTMC1a9ewfft21KxZEwkJCbC0tMTkyZNx+vRprFu3Dra2tujTp4/CMQHA48ePMXHiRPTv3x99+/bFrl27MHXqVDg4OKB58+bw8PBAUFAQNm/ejDFjxqBp06YAABsbG7nbfMyYMXj79i2eP3/OJZQ1atQose3/++8/jBw5Eg0aNMD48ePx4cMHbNmyBYMHD0ZUVJRUt92kSZPQoEEDfPfdd7h16xb+/fdfmJqaSlx13bp1K1asWIFNmzahTZs2pf7uAeD8+fOIiYnB119/DRMTE1hZWeHVq1cYOHAgeDwevv76a5iamuL06dOYPn063r17V+r4qP/++w9PnjxBQEAALCwscPfuXezYsQP37t3Djh07wOPx0KVLFzx69AgHDhxAaGgo9/4zNTWVOp4i77fyxF2aefPmoVatWhg/fjzS0tKwceNGzJkzB3/88QcAYNq0aZg7dy4MDQ0xZswYAIC5uTmAwqu6gYGBePHiBb766itYWloiISEBixcvRnp6OqZPny7xXAcOHMD79+8xaNAg8Hg8rF27FhMmTMDRo0e583nChAm4d+8eAgMDYWVlhczMTJw7dw7Pnj2T2dVrY2ODb7/9FsuWLcOgQYPQqlUrAIV/8LZq1QorV65EdHQ0AgMDucfk5eXh0KFD6Nq1a4lXDMXDIGJiYjBixAiJ+2JiYtC+fXsYGxsjLy8Pw4cPR15eHgIDA2Fubo4XL17g5MmTyM7OLvHqz7Rp07Bnzx4cOXIEs2fPhqGhIQQCAXf/w4cP8f3332PQoEEYOHAgmjRpInd7M8bwzTff4PLly/jqq69gY2ODI0eOKPz5WZwyP5MA4P379/j6669x//599OvXDy1atEBWVhaOHz+OFy9ewN7eHp07d0ZMTAxCQ0MleiQOHDgAxhh69uwp/wtgRCF79uxhtra27N9//5UoDwwMZP7+/iU+7siRI8zW1pZt3Lix1OMHBgYyW1tbtn79eq7s48ePrHfv3qxdu3YsLy+Pi8POzo5dvHhR4vHbtm1jtra27PLly1yZra0tc3BwYI8fP+bKkpKSmK2tLdu8eTNXlpubKxVPQkICs7W1Zbt37+bKFi1axBwcHNjr168lYnR3d2ehoaFc2a5du5itrS178uQJY4yxjIwM5uDgwIYNG8aEQiFXb8uWLczW1pbt3LlTqh22bdsmFZOsOGfMmMF
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Analyse graphique des clusters : features2\n",
"sns.set_style(\"whitegrid\")\n",
"thr_int = dfc[\"gross_flow_to_aum\"].median()\n",
"thr_freq = dfc[\"flow_freq\"].median()\n",
"\n",
"plt.figure()\n",
"for name, g in dfc[~dfc['cluster_k5'].isin([2, 4, 6.0])].groupby(\"cluster_k5\"):\n",
" plt.scatter(g[\"flow_freq\"], g[\"gross_flow_to_aum\"], s=10, label=f\"Cluster {int(name)}\")\n",
"\n",
"plt.yscale(\"log\")\n",
"plt.axvline(thr_freq, linestyle=\"--\")\n",
"plt.axhline(thr_int, linestyle=\"--\")\n",
"plt.xlabel(\"Activity frequency (share of active months)\")\n",
"plt.ylabel(\"Gross flow / mean AUM (quantity) [log scale]\")\n",
"plt.title(\"2D behavioral segmentation: relative intensity vs frequency\")\n",
"plt.legend(markerscale=2)\n",
"plt.ylim(0.1,1000)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 238,
"id": "0bb325e2-a490-465c-9c8f-2121694f9b92",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAl4VJREFUeJzs3XdcU9f7B/BPCCDgYKOo4GbIEBAHiKC4cQKOaoW6R9VWO5y1raOF9ldttdo6655VcBW07omIihtwoDJUVEAcoEByf3/wTUpIArnhZpHn/Xr5kpzc3Pvk3tybJ+ecew6PYRgGhBBCCCF6zEDTARBCCCGEaBolRIQQQgjRe5QQEUIIIUTvUUJECCGEEL1HCREhhBBC9B4lRIQQQgjRe5QQEUIIIUTvUUJECCGEEL1HCREhhBBC9B4lRFomIiIC/fr143Sdzs7OWLhwIafrVERMTAycnZ2RlZWl9m0DwOzZsxEcHKyRbRNJWVlZcHZ2RkxMjKZD4Yyq3lNwcDBmz57N6Tq1ebuqEBERgYiICE2HwYl9+/ahd+/ecHNzg6+vr6bDqdEMNR2ANrtx4wb27duHxMREZGdnw8LCAm3atMH06dPRrFkziWUjIiJw6dIlAACPx4OZmRlsbW3h6emJQYMGoVOnTpp4C4Qo7P79+4iPj0doaCgaN26s1DoOHjyI3NxcjBo1itvgapirV6/i/Pnz+OSTT1CvXj1Nh1Nt27Ztg6mpKcLCwjQdikw5OTnYvXs3unfvDldXV02Ho7AHDx5gzpw56Ny5MyZMmAATExNNh1SjUUJUiXXr1uHq1avo3bs3nJ2d8eLFC2zbtg1hYWHYtWsXnJycJJZv0KABvvjiCwBAUVERHj9+jKNHj+LAgQPo06cP/u///g9GRkaaeCsaMXDgQPTt2xfGxsaaDoUo4P79+1ixYgXat2+vdEJ06NAh3Lt3TyohatSoEW7cuAFDQ7rkAEBycjJWrFiB0NBQqYTo8OHD4PF4ao+pOtvdsWMHLC0ttSYhWr9+vcTj58+fY8WKFWjUqJFOJUSXLl2CUCjEvHnz0KRJE02HU+PR1akSo0aNwi+//CLxhR4SEoL+/ftjzZo1+OWXXySWr1u3LgYOHChR9tVXX2Hx4sXYvn07GjVqhK+//lotsWsDPp8PPp/P2fqKiopgamrK2fqI+vB4PNSqVUvTYVRKKBSipKRE43Fq6gdETfrhUlPeS25uLoCy75bKMAyDDx8+UA1SNVEfokr4+PhInVhNmzZFq1atkJ6ertA6+Hw+vvnmG7Rs2RLbtm3DmzdvFHrdrVu38NFHH8HT0xPBwcHYsWOH1DLFxcVYvnw5evToAXd3dwQFBeHnn39GcXGxzHUeO3YM/fr1g7u7O/r27YszZ85IPJ+dnY3vv/8evXr1gqenJzp06IDPPvtMog/QzZs34ezsjNjYWKn1nz17Fs7Ozjh58iQA+X2Itm3bhr59+8Ld3R0BAQFYsGABXr9+LbGMqC/VrVu38PHHH6NNmzZYunSp+H1MmDABAQEBcHd3R/fu3bFy5UoIBAIF9qy0mzdvYuzYsejQoYN4f8+ZM0diGaFQiI0bN6Jv377w8PCAv78/vv32WxQUFEgt9/vvvyMgIABt2rRBREQE7t+/L9U/Q7RvLl++jMWLF6Njx47w9fXFt99+i+LiYrx+/RozZ85Eu3bt0K5dO/z8889gGEapmIKDgzFx4kRcvnwZgwcPhoeHB7p164Z9+/ZJxPP5558DACIjI+Hs7AxnZ2ckJiYqvM8jIiJw6tQpZGdni18v6sMlr79NQkICRowYAS8vL/j6+mLy5Ml48OCBxDK///47nJ2d8fjxY8yePRu+vr5o27Yt5syZg6KiIoll8/Ly8ODBA6lyWUR96w4cOCDeh2fPngVQ1sQyZ84c+Pv7i8+XPXv2VLnO1NRUzJ49G926dYOHhwc6deqEOXPmID8/X+L9/PzzzwCAbt26ifeV6Dwp/1lhc75VJ+6K2wX++4xeuXIFUVFR6NixI7y8vDBlyhTk5eVJvO7evXu4dOmS+L2U77/z+vVr/PDDDwgKCoK7uzt69OiBNWvWQCgUipcRfT7Wr1+PXbt2oXv37nB3d0d4eDhu3LghEeeLFy8wZ84cBAYGiq8hkydPlrjOlO9DlJiYiMGDBwMA5syZI44xJiYGy5cvh5ubm8T7EZk/fz58fX3x4cMHmftr/fr1cHZ2RnZ2ttRzS5Ysgbu7u/hcfPToEaZNm4ZOnTrBw8MDgYGBmDFjRqXfB8HBwfj9998BAH5+fnB2dhY/Fp3TZ8+eRVhYGDw9PbFz506F97doudmzZ6Nt27bw9fXFrFmzkJKSInWeyuuPJauPJpfXpPJx/vjjjwgODoa7uzsCAwMxc+ZM5OXl4d27d/Dy8sLixYulXvfs2TO4urpi9erVcvdxRVRDxBLDMHj58iVatWql8Gv4fD769u2LZcuW4cqVK+jSpUulyxcUFGDChAno06cP+vbti/j4eHz//fcwMjISn9hCoRCTJ0/GlStXMHToULRo0QJ3797Fpk2b8OjRI/zxxx8S67xy5Qr+/fdfjBgxArVr18aWLVvw2Wef4eTJk7C0tARQdvFNTk5G37590aBBA2RnZ2PHjh2IjIzEP//8A1NTU3h4eMDBwUHc16S8uLg4mJubIyAgQO57+/3337FixQr4+/tj+PDhePjwIXbs2IGbN29ix44dEk2Kr169wvjx49G3b18MGDAA1tbWAIDY2FiYmZlh9OjRMDMzw8WLF7F8+XK8ffsWs2bNUvi4AGW/wMaOHQtLS0tMmDAB9erVQ1ZWFo4ePSqx3LfffovY2FiEhYUhIiICWVlZ2LZtG+7cuSMR95IlS7Bu3Tp07doVnTt3RmpqKsaOHSv3orp48WLY2Nhg2rRpuH79Onbt2oW6desiOTkZ9vb2mDFjBs6cOYP169fDyckJgwYNYh0TADx+/Biff/45Bg8ejNDQUOzduxezZ8+Gm5sbWrVqhXbt2iEiIgJbtmzBpEmT0Lx5cwBAixYtFN7nkyZNwps3b/Ds2TNxQlm7dm25+/7ChQsYP348GjdujKlTp+L9+/fYunUrhg8fjpiYGKlmu+nTp6Nx48b44osvcOfOHfz999+wsrKSqHXdtm0bVqxYgc2bN6NDhw6VHnsAuHjxIuLj4/Hxxx/D0tISjRo1wsuXLzF06FDweDx8/PHHsLKywpkzZzBv3jy8ffu20v5RFy5cQGZmJsLCwmBra4t79+5h9+7duH//Pnbv3g0ej4cePXrg0aNHOHToEObMmSM+/6ysrKTWx+Z8q07clVm8eDHq1auHqVOnIjs7G5s2bcLChQvx22+/AQDmzp2LRYsWwczMDJMmTQIA2NjYACir1R05ciRycnLw0Ucfwd7eHsnJyVi6dClevHiBefPmSWzr0KFDePfuHYYNGwYej4d169Zh2rRpOHbsmPjzPG3aNNy/fx8jR45Eo0aNkJeXh/Pnz+Pp06cym3pbtGiBzz77DMuXL8ewYcPQtm1bAGU/eNu2bYuVK1ciLi4OI0eOFL+muLgYR44cQc+ePeXWGIq6QcTHx2PcuHESz8XHx6NTp04wNzdHcXExxo4di+LiYowcORI2NjbIycnBqVOn8Pr1a7m1P3PnzsW+fftw9OhRfP/99zAzM4Ozs7P4+YcPH+LLL7/EsGHDMHToUDRr1kzh/c0wDD799FNcuXIFH330EVq0aIGjR4+yvn5WxOU1CQDevXuHjz/+GA8ePEB4eDhat26N/Px8nDhxAjk5OXB1dUX37t0RHx+POXPmSLRIHDp0CAzDoH///oq/AYawsm/fPsbJyYn5+++/JcpHjhzJ9O3bV+7rjh49yjg5OTGbNm2qdP0jR45knJycmL/++ktc9uHDB2bgwIGMn58fU1xcLI7DxcWFSUpKknj9jh07GCcnJ+bKlSviMicnJ8bNzY15/PixuCwlJYVxcnJitmzZIi4rKiqSiic5OZlxcnJiYmNjxWVLlixh3NzcmFevXknE6Ovry8yZM0dctnfvXsbJyYnJzMxkGIZhcnNzGTc3N2bMmDGMQCAQL7d
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Analyse graphique des clusters : features2\n",
"\n",
"thr_int = dfc[\"gross_flow_to_aum\"].median()\n",
"thr_freq = dfc[\"flow_freq\"].median()\n",
"\n",
"plt.figure()\n",
"for name, g in dfc[~dfc['cluster_k5'].isin([1,3,0, 4, 6.0])].groupby(\"cluster_k5\"):\n",
" plt.scatter(g[\"flow_freq\"], g[\"gross_flow_to_aum\"], s=10, label=f\"Cluster {int(name)}\")\n",
"\n",
"plt.yscale(\"log\")\n",
"plt.axvline(thr_freq, linestyle=\"--\")\n",
"plt.axhline(thr_int, linestyle=\"--\")\n",
"plt.xlabel(\"Activity frequency (share of active months)\")\n",
"plt.ylabel(\"Gross flow / mean AUM (quantity) [log scale]\")\n",
"plt.title(\"2D behavioral segmentation: relative intensity vs frequency\")\n",
"plt.legend(markerscale=2)\n",
"plt.ylim(0.1,1000)\n",
"plt.show()"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 346,
2026-04-07 12:31:16 +02:00
"id": "4d04e670-51ae-482d-a5c5-93fe8263cfeb",
"metadata": {},
"outputs": [],
"source": [
"# Analyse : features\n",
"\n",
"labels_map = {\n",
" 0: \"Cluster 0 (30): Large and highly active movers\",\n",
" 1: \"Cluster 1 (168): Occasional large movers\",\n",
" 3: \"Cluster 3 (111): Dormant profiles\",\n",
" 4: \"Cluster 4 (90): Loyal clients\"\n",
"}"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 347,
2026-04-07 12:31:16 +02:00
"id": "6ebe0025-0532-4e51-acb2-81aa786a164b",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABKUAAAHqCAYAAADVi/1VAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3XdYU9cbB/BvWGEPBQeCogxFVFBEVLRoHbhw1lV/DFet4l6oVQFHFbdVnG1ddddZrQNxL5w4UXHgxK0ooIzk/P5IcyVkkEBCAryf5+GB3Jx777knN5ebN+e8h8cYYyCEEEIIIYQQQgghpAjpabsChBBCCCGEEEIIIaT0oaAUIYQQQgghhBBCCClyFJQihBBCCCGEEEIIIUWOglKEEEIIIYQQQgghpMhRUIoQQgghhBBCCCGEFDkKShFCCCGEEEIIIYSQIkdBKUIIIYQQQgghhBBS5CgoRQghhBBCCCGEEEKKHAWlCCGEEEIIIYQQQkiRo6BUMdWsWTPUqlVLrdvk8XgYOnSoWrepjLVr14LH4yE5ObnI9w0AoaGhcHJy0sq+iaTk5GTweDysXbtW21VRG00dk5OTE0JDQ9W6TV3eryY0a9YMzZo103Y11GLDhg2oUaMGDA0NYW1tre3qEFKq0D2Z+tA9me4oifdkuioyMhI8Hq9A66rzXsbJyQkdOnRQy7aKu8K8JupWku5X5aGglBpcvHgRQ4cOhYeHB8zMzFC5cmX06NED9+7dkyrbrFkz8Hg88Hg86OnpwdLSEtWrV0dQUBBiY2O1UHtCVHP79m1ERkYW6oZ106ZNWLRokdrqVFKdPXsWkZGR+Pjxo7arohbLli3T6ZvbFy9eIDIyEgkJCdquikru3LmD0NBQODs7Y/Xq1Vi1apW2q0SI1tA9GSlN6J6scNTRfsrKyMhAZGQkjh8/rvF9kZKtuN6vKmKg7QqUBNHR0Thz5gy6d++OOnXq4OXLl1i6dCnq1auH8+fPS3175uDggFmzZgEA0tPTcf/+fezcuRN//fUXevTogb/++guGhobaOBStCAoKQq9evcDn87VdFaKE27dvIyoqCs2aNSvwt5mbNm3CzZs3MXLkSInlVapUwZcvX0rV+a/I2bNnERUVhdDQUKneL3fv3oWeXtF/r1CY/S5btgy2trY609Pq8OHDEo9fvHiBqKgoODk5wcvLSzuVKoDjx49DKBRi8eLFcHFx0XZ1CNEquicrHLonK17onqxw1NF+ysrIyEBUVBQASPV6mTx5MiZMmKDR/ZOSo7jerypCQSk1GD16NDZt2gQjIyNuWc+ePVG7dm3Mnj0bf/31l0R5Kysr/O9//5NYNnv2bAwfPhzLli2Dk5MToqOji6TuukBfXx/6+vpq215GRgZMTU3Vtj1SdHg8HoyNjbVdDYWEQiGysrK0Xk9tfWAoSR9Ucl+zi7PXr18DQL7D9hhj+Pr1K0xMTIqgVoRoB92TFQ7dkxGx4nBPVlIYGBjAwKB0fCynexEiCw3fU4PGjRtLfbhxdXWFh4cHEhMTldqGvr4+fvvtN9SsWRNLly5FamqqUutdvnwZjRs3homJCapWrYoVK1ZIlcnMzERERARcXFzA5/Ph6OiI8ePHIzMzU+Y2d+/ejVq1aoHP58PDwwMHDx6UeP7x48cYMmQIqlevDhMTE5QtWxbdu3eX6Pp66dIl8Hg8rFu3Tmr7hw4dAo/Hw759+wDIz1+wbNkyeHh4gM/nw97eHmFhYVLDmMR5HC5fvozvvvsOpqammDRpEgBgz549aN++Pezt7cHn8+Hs7Izp06dDIBDk16wyXbp0CQEBAbC1teXau1+/fhJlhEIhFi1aBA8PDxgbG6N8+fIYNGgQPnz4IFUuMjIS9vb2MDU1RfPmzXH79m2pfD3itjl9+jSGDx8OOzs7WFtbY9CgQcjKysLHjx8RHBwMGxsb2NjYYPz48WCMFahO4nHkp0+fRoMGDWBsbIxq1aph/fr1EvXp3r07AKB58+bcsAdxV2Rl2rxZs2bYv38/Hj9+zK0v/nZKXv6Co0ePomnTpjAzM4O1tTU6deok9d4Sj/2+f/8+17PIysoKffv2RUZGhkTZt2/f4s6dO1LLZRHn9di4cSN3PorfE8+fP0e/fv1Qvnx57v3y559/5rvN69evIzQ0FNWqVYOxsTEqVKiAfv364d27dxLHM27cOABA1apVubYSv09ynyuqvN8KU++8+wW+naNnzpzB6NGjYWdnBzMzM3Tp0gVv3ryRWO/WrVs4ceIEdyy5vyn8+PEjRo4cCUdHR/D5fLi4uCA6OhpCoZArIz4/5s2bh1WrVsHZ2Rl8Ph8+Pj64ePGiRD1fvnyJvn37wsHBAXw+HxUrVkSnTp0krjO5x+gfP34cPj4+AIC+fftydVy7di0iIiJgaGgocTxiP/30E6ytrfH161eZ7TVv3jzweDw8fvxY6rmJEyfCyMiIey8mJSWhW7duqFChAoyNjeHg4IBevXop/H/g5OSEiIgIAICdnR14PB4iIyO55zp06IBDhw6hfv36MDExwcqVK5Vub3G50NBQWFlZwdraGiEhIUhISJB6n8rLdyArP4w6r0m56zlq1Cg4OTmBz+fDwcEBwcHBePv2LdLS0mBmZoYRI0ZIrffs2TPo6+tzPWVI8Uf3ZHRPJkb3ZCX7nqx69eowNjaGt7c3Tp48KVX26tWraNu2LSwtLWFubo4WLVrg/PnzSrcfABw4cIA7VgsLC7Rv3x63bt2S2E9oaCjMzc3x/PlzdO7cGebm5rCzs8PYsWO5tk5OToadnR0AICoqituX+P+1rPxFa9aswffff49y5cqBz+ejZs2aWL58eb5tpMhff/2FBg0awNTUFDY2Nvjuu++keo0DyPf/rrx8S7KuH/LuRY4fPw4ej4dt27Zh5syZcHBwgLGxMVq0aIH79+8rPI6///4bPB4PJ06ckHpu5cqV4PF4uHnzJgDl7geVlZOTg+nTp3P3n05OTpg0aZLE9TskJAS2trbIzs6WWr9169aoXr0697ggr7Gm7le1jhGNEAqFrFKlSqx169YSy/39/ZmHh4fc9aZPn84AsH379incvr+/P7O3t2flypVjQ4cOZb/99htr0qQJA8D++OMPrpxAIGCtW7dmpqambOTIkWzlypVs6NChzMDAgHXq1ElimwCYp6cnq1ixIps+fTpbtGgRq1atGjM1NWVv377lym3fvp15enqyqVOnslWrVrFJkyYxGxsbVqVKFZaens6Vq1atGmvXrp1U3fv27ctsbGxYVlYWY4yxNWvWMADs0aNHXJmIiAgGgLVs2ZItWbKEDR06lOnr6zMfHx9uPXE7VKhQgdnZ2bFhw4axlStXst27dzPGGOvcuTPr0aMHmzt3Llu+fDnr3r07A8DGjh0rUZ+QkBBWpUoVhe396tUrZmNjw9zc3NjcuXPZ6tWr2S+//MLc3d0lyg0YMIAZGBiwgQMHshUrVrDw8HBmZmYmVe/x48czACwwMJAtXbqUDRw4kDk4ODBbW1sWEhLClRO3jZeXF2vTpg2LiYlhQUFBDAAbP348a9KkCfvxxx/ZsmXLWIcOHRgAtm7dugLVqUqVKqx69eqsfPnybNKkSWzp0qWsXr16jMfjsZs3bzLGGHvw4AEbPnw4A8AmTZrENmzYwDZs2MBevnypdJsfPnyYeXl5MVtbW279Xbt2McYYe/ToEQPA1qxZw5WPjY1lBgYGzM3Njc2ZM4dFRUUxW1tbZmNjI/OcqVu3LuvatStbtmwZGzBgANdWuYnLHjt2TOHrzpjofeHu7s7s7OxYVFQUi4mJYVevXmUvX75kDg4OzNHRkU2bNo0tX76cdezYkQFgCxcu5NaXdUzz5s1jTZs2ZdOmTWOrVq1iI0aMYCYmJqxBgwZMKBQyxhi7du0a6927N7c9cVulpaVxr1fuc0XZ95uy9ZYn737F52jdunXZ999/z5YsWcLGjBnD9PX1WY8ePbhyu3btYg4ODqxGjRrcsRw+fJgxxlh6ejqrU6cOK1u2LJs0aRJbsWIFCw4OZjwej40YMUKqLevWrctcXFxYdHQ0mzNnDrO1tWUODg4
"text/plain": [
"<Figure size 1200x500 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig, axes = plt.subplots(1, 2, figsize=(12,5), sharey=False)\n",
"\n",
"thr_log_tx = dfc[\"log_n_tx_total\"].median()\n",
"# --- Graphique 1 ---\n",
"for name, g in dfc[~dfc['cluster_k5'].isin([2,4])].groupby(\"cluster_k5\"):\n",
" axes[0].scatter(g[\"log_n_tx_total\"], g[\"gross_flow_to_aum\"], s=10, label=labels_map.get(int(name), \"Cluster {}\".format(int(name))))\n",
"\n",
"axes[0].set_yscale(\"log\")\n",
"axes[0].axvline(thr_log_tx, linestyle=\"--\")\n",
"axes[0].axhline(thr_int, linestyle=\"--\")\n",
"axes[0].set_xlabel(\"Activity frequency (log_n_tx_total)\")\n",
"axes[0].set_ylabel(\"Gross flow / mean AUM\")\n",
"axes[0].set_title(\"2D behavioral segmentation: relative intensity vs frequency\")\n",
"axes[0].set_ylim(0.1,1000)\n",
"axes[0].legend(markerscale=2)\n",
"\n",
"# --- Graphique 2 ---\n",
"thr_churn = dfc[\"aum_drawdown_last\"].median()\n",
"thr_hold = dfc[\"avg_holding_months_per_isin\"].median()\n",
"\n",
"color_map = {\n",
" 1: \"#ff7f0e\",\n",
" 4: \"red\"\n",
"}\n",
"\n",
"for name, g in dfc[~dfc['cluster_k5'].isin([0,2,3])].groupby(\"cluster_k5\"):\n",
" axes[1].scatter(\n",
" g[\"avg_holding_months_per_isin\"], g[\"aum_drawdown_last\"],\n",
" s=10,\n",
" color= color_map.get(int(name), \"gray\"),\n",
" label=labels_map.get(int(name), \"Cluster {}\".format(int(name)))\n",
" )\n",
"\n",
"axes[1].set_yscale(\"log\")\n",
"axes[1].axvline(thr_hold, linestyle=\"--\")\n",
"axes[1].axhline(thr_churn, linestyle=\"--\")\n",
"axes[1].set_xlabel(\"avg_holding_months_per_isin\")\n",
"axes[1].set_ylabel(\"aum_drawdown_last\")\n",
"axes[1].set_title(\"2D behavioral segmentation: potential churn vs loyalty\")\n",
"axes[1].legend(markerscale=2)\n",
"axes[1].set_ylim(0.001,1.3)\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 348,
2026-04-07 12:31:16 +02:00
"id": "5b3c5228-c176-4f1c-8edb-5b5d093df8a9",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAjtFJREFUeJzt3XdYU2cbB+BfAoQlSxAQ2bjAAShqceFAKVWs2s9VqzjbKm5r1Q5nrataq+Ksq9aBWrV2OHErrahAXeBCcaGiDAVlJO/3B80pIQnkQEIS8tzX5SU5OTnnOS8h58k7BYwxBkIIIYQQAybUdgCEEEIIIdpGCREhhBBCDB4lRIQQQggxeJQQEUIIIcTgUUJECCGEEINHCREhhBBCDB4lRIQQQggxeJQQEUIIIcTgUUJECCGEEINHCZEO6tChAxo3bqzWYwoEAowZM0atx1TF5s2bIRAIcO/evSo/NwAMGTIEnp6eWjk3kXXv3j0IBAJs3rxZ26GojaauydPTE0OGDFHrMXX5vJrQoUMHdOjQQdthqMXWrVvRsGFDmJiYwNbWVtvhVFuUEJUjPj4eY8aMQaNGjWBpaQl3d3f07dsXN2/elNu3Q4cOEAgEEAgEEAqFsLa2RoMGDTBo0CAcPXpUC9ETws/169cxa9asSiWw27dvx7Jly9QWU3V1/vx5zJo1C1lZWdoORS1WrVql08nu48ePMWvWLCQmJmo7FF6Sk5MxZMgQ+Pj4YP369Vi3bp22Q6q2jLUdgK5buHAhzp07hz59+qBp06ZIT0/HypUr0axZM/z1119yNTmurq6YP38+ACA3Nxe3b9/G3r178fPPP6Nv3774+eefYWJioo1L0YpBgwahf//+MDU11XYoRAXXr1/H7Nmz0aFDhwrXrG3fvh1Xr17FhAkTZLZ7eHjgzZs3BvX+L8v58+cxe/ZsDBkyRO5bf0pKCoTCqv++Wpnzrlq1Cg4ODjpTw3TkyBGZx48fP8bs2bPh6emJgIAA7QRVASdPnoREIsEPP/yAunXrajucao0SonJMmjQJ27dvh0gk4rb169cPTZo0wYIFC/Dzzz/L7G9jY4OPPvpIZtuCBQswbtw4rFq1Cp6enli4cGGVxK4LjIyMYGRkpLbj5eXlwcLCQm3HI1VHIBDAzMxM22GUSSKRoKCgQOtxausLRHX64lLyM1ufPXv2DADKbSpjjOHt27cwNzevgqiqKUYqpFmzZqxZs2Yy20JCQlijRo0U7l9UVMT8/PyYhYUFy8rKKvPY0uNcvHiRBQcHMzMzM+bp6clWr14tt+/bt2/ZjBkzmI+PDxOJRMzV1ZVNmTKFvX37VmY/ACwqKort27ePNWrUiIlEIubn58cOHjwos9+9e/fYqFGjWP369ZmZmRmrWbMm+9///sdSU1O5feLj4xkAtnnzZrl4Dh06xACw3377jTHG2KZNmxgAmdczxlh0dDTz8/NjIpGI1a5dm40ePZplZmYqLYd27doxc3NzNn78eMYYY/v372fvvfceq127NhOJRMzb25vNmTOHFRUVyRwjMjKSeXh4lFHa/11T165dmb29PVfeQ4cOldlHLBaz77//nvn5+TFTU1Pm6OjIPv74Y/by5Uu5/WbOnMlq167NzM3NWYcOHdi1a9eYh4cHi4yM5PaTls2ZM2fY2LFjmYODA7OxsWEff/wxy8/PZ5mZmWzQoEHM1taW2drasilTpjCJRFKhmDw8PFi3bt3YmTNnWIsWLZipqSnz8vJiW7ZskYun9L8TJ06oXOYhISFyr5eWf2pqKgPANm3aJBNbbGwsa9u2LbOwsGA2NjasR48e7Pr16zL7zJw5kwFgt27dYpGRkczGxoZZW1uzIUOGsNzcXJl9nz9/zm7cuCG3XRHp38XPP//M/Pz8mLGxMdu3bx9jjLGHDx+yoUOHMkdHR+7vZcOGDTKvV3RNSUlJLDIyknl5eTFTU1Pm5OTEhg4dyjIyMuSup/Q/6d9JyfcKn783VeNWRtl79OzZs2zixInMwcGBWVhYsJ49e7Jnz57JvK70tYSEhHDPZ2ZmsvHjxzNXV1cmEomYj48PW7BgAROLxXJluXjxYrZ27Vrm7e3NRCIRCwoKYhcuXJCJ88mTJ2zIkCGsTp06TCQSMWdnZ9ajRw+Zz5mQkBAuhhMnTigs702bNrEZM2YwY2NjmeuRGjlyJLOxsWFv3rxRWF6LFy9mANi9e/fknps2bRozMTHh/hZv3rzJevfuzZycnJipqSmrU6cO69evX5n3A0XlOnPmTO65bt26sUOHDrHmzZszU1NT9v3336tc3tL9IiMjmbW1NbOxsWGDBw9mCQkJcu/pkmVZkqLPV3V+JpWMc8KECczDw4OJRCJWp04dNmjQIPb8+XP26tUrZmFhwcaNGyf3ugcPHjChUMi+/fZbpWVcEiVEFSCRSFidOnVY165dZbaXlRAxxtjcuXMZAPb777+XefyQkBDm4uLCHB0d2ZgxY9jy5ctZ27ZtGQCZDzaxWMy6du3KLCws2IQJE9jatWvZmDFjmLGxMXv//fdljgmA+fv7s9q1a7O5c+eyZcuWMW9vb2ZhYSHzQb17927m7+/PZsyYwdatW8e++OILZmdnxzw8PGRuMN7e3uy9996Ti33o0KHMzs6OFRQUMMYUJ0TSm0FoaChbsWIFGzNmDDMyMmItWrTgXictB2dnZ1arVi02duxYtnbtWrZ//37GGGM9e/Zkffv2ZYsXL2arV69mffr0YQDYZ599JhOPKgnR06dPmZ2dHatfvz5bvHgxW79+Pfvyyy+Zr6+vzH4jRoxgxsbGbOTIkWzNmjVs6tSpzNLSUi7uzz//nAFgERERbOXKlWzkyJHM1dWVOTg4KLzZBAQEsHfffZdFR0ezQYMGMQDs888/Z23btmUffvghW7VqFevevTsDIPdhoWpMHh4erEGDBszJyYl98cUXbOXKlaxZs2ZMIBCwq1evMsYYu3PnDhs3bhwDwL744gu2detWtnXrVpaenq5ymR85coQFBAQwBwcH7vXSBENR8nD06FFmbGzM6tevzxYtWsRmz57NHBwcmJ2dncL3TGBgIOvduzdbtWoVGzFiBFdWJUn3lSZyZQHAfH19Wa1atdjs2bNZdHQ0S0hIYOnp6czV1ZW5ubmxOXPmsNWrV7MePXowANxNR9k1fffdd6xdu3Zszpw5bN26dWz8+PHM3NyctWzZkktok5KS2IABA7jjScvq9evX3O+r5HtF1b83VeNWRllCFBgYyDp16sRWrFjBJk+ezIyMjFjfvn25/fbt28dcXV1Zw4YNuWs5cuQIY4yx3Nxc1rRpU2Zvb8+++OILtmbNGjZ48GAmEAi4LzglyzIwMJDVrVuXLVy4kC1atIg5ODgwV1dXmfdz69atmY2NDfvqq6/Yjz/+yL799lvWsWNHdurUKW6fkjfx9PR0NmfOHAaAffzxx1yMd+7cYbdu3WIA2IoVK2TKIj8/n9nZ2bFhw4YpLa/79+8zgUDAFi1aJPect7c369atG3csLy8v5uLiwr755hv2448/stmzZ7MWLVooTKZKlmuvXr0YALZ69Wq2detWlpSUxP2u6taty+zs7Ni0adPYmjVr2IkTJ1Qub4lEwtq3b8+EQiEbPXo0W7FiBevUqRNr2rRppRIidX4mMcbYq1evWOPGjZmRkREbOXIkW716NZs7dy5r0aIFS0hIYIwxNnDgQObk5CT3hXjRokVMIBCw+/fvKy3jkighqoCtW7fKJSeMlZ8Q7du3jwFgP/zwQ5nHl37LXrJkCbctPz+fBQQEMEdHR+5NtXXrViYUCtmZM2dkXr9mzRoGgJ07d47bBoCJRCJ2+/ZtbltSUpLcB0FeXp5cPHFxcQwA++mnn7ht06dPl/n2I43R1tZW5gOkdEL07NkzJhKJWNeuXWW+raxcuZIBYBs3bpQrhzVr1sjFpCjOTz75hFlYWMjUjqmSEEl/L/Hx8Ur3OXPmDAPAtm3bJrNd+g1duj09PZ0ZGxuznj17yuw3a9YsBkDhzSYsLEym5ic4OJgJBAL26aefctuKioqYq6urzIeSqjEx9t83zdOnT3Pbnj1
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Profil fidele (avg_holding_months_per_isin_med) vs churn (aum_drawdown_last_med)\n",
"\n",
"thr_churn = dfc[\"aum_drawdown_last\"].median()\n",
"thr_hold = dfc[\"avg_holding_months_per_isin\"].median()\n",
"\n",
"plt.figure()\n",
"for name, g in dfc[~dfc['cluster_k5'].isin([0,2,3])].groupby(\"cluster_k5\"):\n",
" plt.scatter(g[\"avg_holding_months_per_isin\"], g[\"aum_drawdown_last\"], s=10, label=name)\n",
"\n",
"plt.yscale(\"log\")\n",
"plt.axvline(thr_hold, linestyle=\"--\")\n",
"plt.axhline(thr_churn, linestyle=\"--\")\n",
"plt.xlabel(\"Activity frequency (share of active months)\")\n",
"plt.ylabel(\"Gross flow / mean AUM (quantity) [log scale]\")\n",
"plt.title(\"2D behavioral segmentation: relative intensity vs frequency\")\n",
"plt.legend(markerscale=2)\n",
"plt.ylim(0.001,1.1)\n",
"plt.show()"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 349,
2026-04-07 12:31:16 +02:00
"id": "5071c36c-0176-460c-aeb7-ed7c4fb35ce5",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABMoAAAGGCAYAAACKUW2JAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAA4BBJREFUeJzs3Xd4TOnbB/D7aAkiiShBiN6DIGqQEJIgrGCVLFZdvQuil7V2dVbvffXeW3S7elm9rtWDFIIg833/yDvnN5MidjfJZE6+n+uai5xzZuY+z5x6n6coACBEREREREREREQpXCpTB0BERERERERERJQcMFFGREREREREREQkTJQRERERERERERGJCBNlREREREREREREIsJEGRERERERERERkYgwUUZERERERERERCQiTJQRERERERERERGJCBNlREREREREREREIsJEGRERERERERERkYgwUUZERGRW8uXLJ23btjV1GElq6dKloiiKPHjwwNShEBEREZHGMVFGRESUDNy9e1c6d+4sBQoUEEtLS7G2thZXV1eZPn26vH//PkliePfunYwaNUoOHz6cJN9nrlavXi3Tpk0zdRjJjqIo0qNHjxjTf/rpJ1EURdq3by86ne5ff/6DBw9EUZRYX2vWrPkvoRMRERGp0pg6ACIiopRu586d8u2334qFhYW0adNGnJyc5OPHj3L8+HHx9/eXq1evyvz58xM9jnfv3sno0aNFRMTd3T3Rv+9rtW7dWlq0aCEWFhamDkVEohJlf/75p/Tp08fUoSR7P//8swwdOlS+//57WbhwoaRK9d+f0bZs2VLq1atnNK1KlSr/+XOJiIiIRJgoIyIiMqn79+9LixYtJG/evHLo0CHJmTOnOq979+5y584d2blzpwkj/O/Cw8MlY8aM//r9qVOnltSpUydgRMnTu3fvJEOGDKYOI8FMnDhRAgICpE2bNrJ48eIESZKJiJQrV05atWqVIJ9FREREFB2bXhIREZnQhAkT5O3bt7Jo0SKjJJleoUKFpHfv3nG+f9SoUaIoSozpsfXrdfbsWfHy8pKsWbNK+vTpJX/+/NK+fXsRiWrWli1bNhERGT16tNqkbdSoUer7b9y4IU2bNhU7OzuxtLQUFxcX2bZtW6zfe+TIEenWrZtkz55dcufO/cUy+PXXX6VkyZKSIUMGyZw5s7i4uMjq1au/uC46nU5GjRoluXLlkgwZMkjNmjXl2rVrMfpw07/3xIkT0q9fP8mWLZtkzJhRfH19JSgoyCiOrVu3Sv369SVXrlxiYWEhBQsWlLFjx0pkZKS6jLu7u+zcuVP++usvtYzy5csXZ5wiIocPHxZFUYyatLq7u4uTk5OcO3dOatSoIRkyZJAhQ4aIiEhERISMHDlSChUqJBYWFpInTx4ZOHCgREREGH3u/v37pVq1amJraytWVlZStGhR9TNMbcqUKTJw4EBp1aqVLFmyJMGSZHrh4eHy8ePHBP1MIiIiIhHWKCMiIjKp7du3S4ECBaRq1aqJ+j0vXrwQT09PyZYtmwwePFhsbW3lwYMHsmnTJhERyZYtm8yZM0e6du0qvr6+0rhxYxERKV26tIiIXL16VVxdXcXBwUEGDx4sGTNmlHXr1kmjRo1k48aN4uvra/R93bp1k2zZssmIESMkPDw8zrgWLFggvXr1kqZNm0rv3r3lw4cPcvnyZfnjjz/Ez88vzvcFBATIhAkTpEGDBuLl5SWXLl0SLy8v+fDhQ6zL9+zZUzJnziwjR46UBw8eyLRp06RHjx6ydu1adZmlS5eKlZWV9OvXT6ysrOTQoUMyYsQICQsLk4kTJ4qIyNChQyU0NFQePXokU6dOFRERKyur+Io/Vq9evZK6detKixYtpFWrVmJvby86nU4aNmwox48flx9++EGKFy8uV65ckalTp8qtW7dky5YtIhL1e/j4+Ejp0qVlzJgxYmFhIXfu3JETJ078q1gS0vTp06V///7i5+cnS5cujTVJ9vLly6/6rEyZMsVocjt69Gjx9/cXRVGkfPnyMm7cOPH09EyQ2ImIiIiYKCMiIjKRsLAwefz4sXzzzTeJ/l0nT56U4OBg2bdvn7i4uKjTf/zxRxERyZgxozRt2lS6du0qpUuXjtG0rXfv3uLo6ChnzpxRExfdunWTatWqyaBBg2Ikyuzs7OTgwYPxNpncuXOnlCxZUtavX//V6/L8+XOZMmWKNGrUSDZv3qxOHz16tFENOENZsmSRffv2qbXvdDqdzJgxQ0JDQ8XGxkZEovoeS58+vfqeLl26SJcuXWT27Nny448/ioWFhdSpU0ccHBwkODj4Pzf/e/bsmcydO1c6d+6sTlu5cqUcOHBAjhw5ItWqVVOnOzk5SZcuXeTkyZNStWpV2b9/v3z8+FF2794tWbNm/U9xJKQdO3bIX3/9JS1btpTly5fH+fvray/GZ8mSJWoNwVSpUomnp6f4+vqKg4OD3Lt3T6ZMmSJ169aVbdu2Sf369RNqNYiIiCgFY6KMiIjIRMLCwkQkqtZMYrO1tRWRqERGmTJlJG3atF/93tevX8uhQ4dkzJgx8ubNG3nz5o06z8vLS0aOHCmPHz8WBwcHdXqnTp2+ql8xW1tbefTokZw5c0YqVKjwVfEcPHhQPn/+LN26dTOa3rNnzzgTZT/88INRE9Xq1avL1KlT5a+//lJrzRkmyd68eSMRERFSvXp1mTdvnty4cUPKlCnzVfF9LQsLC2nXrp3RtPXr10vx4sWlWLFiRrWuatWqJSIigYGBUrVqVfX33Lp1q7Rr1y7Bmzb+W8+fPxcRkfz583/x99+/f/9XfV7JkiXV/zs6OsrevXuN5rdu3VpKlCgh/fv3Z6KMiIiIEgQTZURERCZibW0tImKUeEosbm5u0qRJExk9erRMnTpV3N3dpVGjRuLn5xfvaJJ37twRADJ8+HAZPnx4rMu8ePHCKFGWP3/+r4pr0KBBcuDAAalYsaIUKlRIPD09xc/PT1xdXeN8z19//SUiUf23GbKzs5PMmTPH+h5HR0ejv/XLBQcHq9OuXr0qw4YNk0OHDqlJTL3Q0NCvWp9/wsHBQdKlS2c07fbt23L9+vU4a1y9ePFCRESaN28uCxculI4dO8rgwYPFw8NDGjduLE2bNv1i0uz169f/um8vOzu7GPFG9/3338uTJ0/kp59+kqxZs0rfvn1jXa527dr/KobYYmrXrp38/PPP8ujRo3j7wyMiIiKKDxNlREREJmJtbS25cuWSP//8819/Rmwd+YuIUQf0+uU2bNggv//+u2zfvl327t0r7du3l8mTJ8vvv//+xX62dDqdiIgMGDBAvLy8Yl0metLKsHbWlxQvXlxu3rwpO3bskD179sjGjRtl9uzZMmLECBk9evRXfcbXiKt2EwAREQkJCRE3NzextraWMWPGSMGCBcXS0lLOnz8vgwYNUsvgS772t9CLrYx0Op2UKlVKpkyZEut78uTJo7736NGjEhgYKDt37pQ9e/bI2rVrpVatWrJv374417dx48Zy5MiReNclNoGBgeLu7v7FZdKkSSPr1q0Tb29v6d+/v9ja2saoNScS1ez0a9jY2MS7LenL5PXr10yUERER0X/GRBkREZEJ+fj4yPz58+XUqVNSpUqVf/x+fc2okJAQtTmeyP9qXUVXuXJlqVy5sowbN05Wr14t3333naxZs0Y6duwYZ6KnQIECIiKSNm3aBKsJZChjxozSvHlzad68uXz8+FEaN24s48aNk4CAALG0tIyxfN68eUUkqqabYc21V69eGdUQ+ycOHz4sr169kk2bNkmNGjXU6ffv34+xbFzlZPhbGIrrt4hNwYIF5dKlS+Lh4RHn9+ilSpVKPDw8xMPDQ6ZMmSI//fSTDB06VAIDA+P8nSZPnvyvy+hrm55aWlrKtm3bpGbNmtKpUyextbWN0YddbCO8xsawj7K43Lt3T0S+vt8zIiIioi9hooyIiMiEBg4cKKtWrZKOHTvKoUOHxN7e3mj+3bt3ZceOHdK7d+9Y31+wYEERETl69Kg0bNhQRETCw8Nl2bJlRssFBweLra2tUfLF2dlZREQiIiJERCR
"text/plain": [
"<Figure size 1400x400 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#heatmap\n",
"def robust_zscore_col(s):\n",
" med = np.nanmedian(s)\n",
" mad = np.nanmedian(np.abs(s - med))\n",
" if mad == 0 or np.isnan(mad):\n",
" return np.zeros(len(s))\n",
" return (s - med) / (1.4826 * mad)\n",
"\n",
"for k in [5]:\n",
" prof = dfc.groupby(f\"cluster_k{k}\")[profile_vars].median()\n",
" prof_z = prof.copy()\n",
"\n",
" for c in prof.columns:\n",
" # prof_z[c] = robust_zscore_col(prof[c].values)\n",
" prof_z[c] = (prof[c] - prof[c].mean()) / (prof[c].std() + 1e-12)\n",
" prof_z[c] = prof_z[c].fillna(0)\n",
"\n",
" plt.figure(figsize=(14, 4))\n",
" sns.heatmap(prof_z, cmap=\"RdBu_r\", center=0)\n",
" plt.xticks(rotation=45, ha='right') # incline les noms à 45°, alignés à droite\n",
" plt.title(f\"Cluster signatures — K={k}\")\n",
" plt.ylabel(\"Clusters\")\n",
" plt.tight_layout()\n",
" plt.show()"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 350,
2026-04-07 12:31:16 +02:00
"id": "72393182-7c5b-4484-b0e0-770bff771d4c",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAuEAAAKyCAYAAAB7WgDLAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs/Xm4bVV1Joy/c87V7X6f/txzey7tpReQiIhih9hENKkYq2ywfSqlyWOMiVCJIpqIVqIVk7K0Kn6RfPrlF5OoFSomqEGQJIgNSt/f/t7Tn7PP7lc75++POdac+wgqGhSI632eC/vsvZrZr7nGeMc7mFJKoUCBAgUKFChQoECBAj8z8Ce6AAUKFChQoECBAgUK/Lyh2IQXKFCgQIECBQoUKPAzRrEJL1CgQIECBQoUKFDgZ4xiE16gQIECBQoUKFCgwM8YxSa8QIECBQoUKFCgQIGfMYpNeIECBQoUKFCgQIECP2MUm/ACBQoUKFCgQIECBX7GKDbhBQoUKFCgQIECBQr8jFFswgsUKFCgQIECBQoU+Bmj2IQXKPBzjPe9731gjD3RxSjwQ3D55Zdj165dT3QxChQoUKDA44xiE16gwL8TXHvttWCMmX9BEGBubg6XXHIJ/uRP/gTdbvdxuc/8/Dze97734fbbb39crvfzgsFggPe973246aabnuii/FTAGMPb3/72R3z/wQ9+EIwxvPGNb4SU8ie+/sGDBzeN79F/f/VXf/VvKXqBAgUKPCFwnugCFChQ4PHF+9//fuzevRtJkmBxcRE33XQT3vGOd+CjH/0orrvuOpxxxhnm2N/7vd/DFVdc8WNdf35+HldffTV27dqFs84663Eu/b9fDAYDXH311QCA5zznOY/5vD/7sz/7N21en0h86EMfwu/+7u/i9a9/PT71qU+B83+73efVr341XvziF2/67hnPeMa/+boFChQo8LNGsQkvUODfGS699FKce+655u8rr7wSX/va1/DSl74Uv/iLv4j77rsPpVIJAOA4DhynWAaejOj3+6hUKnBd94kuyk+EP/zDP8SVV16J173udfjzP//zx2UDDgBPe9rT8JrXvOZxuVaBAgUKPJEo6CgFCvwc4LnPfS7e85734NChQ/jsZz9rvn80TvhXv/pVXHjhhWg2m6hWqzjppJPwX//rfwUA3HTTTTjvvPMAAG94wxsMHeDaa68FAPzzP/8z/sN/+A/YsWMHfN/H9u3b8Zu/+ZsYDoeb7nH55ZejWq3i2LFjuOyyy1CtVjE1NYV3vetdyLJs07FSSnzsYx/D6aefjiAIMDU1hRe96EX4zne+s+m4z372szjnnHNQKpUwPj6OX/3VX8WRI0d+ZNvkbfDggw/iNa95DRqNBqampvCe97wHSikcOXIEL3/5y1Gv1zE7O4uPfOQjm86P4xjvfe97cc4556DRaKBSqeBZz3oWbrzxRnPMwYMHMTU1BQC4+uqrTbu9733v29Qe+/btw4tf/GLUajX8p//0n8xvo5zwq666Cpxz3HDDDZvK8da3vhWe5+GOO+74kXX+aeOjH/0ofud3fgevec1r8OlPf/px24Dn6Pf7iOP4cb1mgQIFCvysUWzCCxT4OcFrX/taAMBXvvKVH3jMPffcg5e+9KWIogjvf//78ZGPfAS/+Iu/iH/9138FAJxyyil4//vfD0Bv+j7zmc/gM5/5DC666CIAwN/8zd9gMBjg137t1/Cnf/qnuOSSS/Cnf/qneN3rXveIe2VZhksuuQQTExP4oz/6Izz72c/GRz7yEfzv//2/Nx33pje9Ce94xzuwfft2fPjDH8YVV1yBIAhw6623mmP+4A/+AK973etwwgkn4KMf/Sje8Y534IYbbsBFF12EjY2Nx9Q+r3rVqyClxIc+9CGcf/75+P3f/3388R//MV7wghdg69at+PCHP4zjjz8e73rXu3DzzTeb8zqdDj71qU/hOc95Dj784Q/jfe97H1ZWVnDJJZcY3vzU1BQ+8YlPAABe8YpXmHZ75Stfaa6TpikuueQSTE9P44/+6I/wS7/0S49azt/7vd/DWWedhTe96U2G5//lL38Zf/Znf4b3vve9OPPMMx9TfX9a+NjHPobf+q3fwn/8j/8R11577aNuwFdXVx/TvyiKHnHu1VdfjWq1iiAIcN555/3Q8VygQIECT2qoAgUK/LvApz/9aQVAffvb3/6BxzQaDXX22Webv6+66io1ugz89//+3xUAtbKy8gOv8e1vf1sBUJ/+9Kcf8dtgMHjEd9dcc41ijKlDhw6Z717/+tcrAOr973//pmPPPvtsdc4555i/v/a1rykA6jd+4zcecV0ppVJKqYMHDyohhPqDP/iDTb/fddddynGcR3z//cjb4K1vfav5Lk1TtW3bNsUYUx/60IfM961WS5VKJfX6179+07FRFG26ZqvVUjMzM+qNb3yj+W5lZUUBUFddddUjypC3xxVXXPGov+3cufMRdfM8T735zW9WrVZLbd26VZ177rkqSZIfWtefJgConTt3KgDq1a9+tUrT9Ice+1j+jY6xQ4cOqRe+8IXqE5/4hLruuuvUH//xH6sdO3Yozrn6+7//+59BDQsUKFDg8UVBBi1Q4OcI1Wr1h6qkNJtNAMDf/d3f4Q1veMOPTSPIueaApgwMh0NccMEFUErhe9/7Hnbs2LHp+P/8n//zpr+f9axn4TOf+Yz5+/Of/zwYY7jqqqseca+cRvOFL3wBUkr8yq/8ClZXV83vs7OzOOGEE3DjjTcaOs0Pw5vf/GbzWQiBc889F0ePHsWb3vQm832z2cRJJ52E/fv3bzpWCAFAU2c2NjYgpcS5556L7373uz/yvqP4tV/7tcd03GmnnYarr74aV155Je68806srq7iK1/5yhPO719aWgIA7N6927TJo+GrX/3qY7reqaeeaj7v2LEDX/7ylzf9/trXvhZ79+7Fb/3Wb+ElL3nJT1DiAgUKFHjiUGzCCxT4OUKv18P09PQP/P1Vr3oVPvWpT+HNb34zrrjiCjzvec/DK1/5SvzyL//yY9qQHz58GO9973tx3XXXodVqbfqt3W5v+jvnd49ibGxs03n79u3D3NwcxsfHf+A9H3roISilcMIJJzzq7481sPH7XxAajQaCIMDk5OQjvl9bW9v03V/8xV/gIx/5CO6//34kSWK+371792O6N6CDZLdt2/aYj//t3/5t/NVf/RW+9a1v4YMf/CD27t37I89ZX1//ibnU4+Pj8Dzvhx7z+te/HvPz8/jgBz+IyclJ/OZv/uajHvf85z//JyrDo5XpDW94Az70oQ/h6NGjP1b7FShQoMATjWITXqDAzwmOHj2KdruN448//gceUyqVcPPNN+PGG2/El770JVx//fX43Oc+h+c+97n4yle+8kOtm1mW4QUveAHW19fx7ne/GyeffDIqlQqOHTuGyy+//BEyez/sWj8OpJRgjOEf//EfH/Wa1Wr1MV3n0c79QWVUSpnPn/3sZ3H55Zfjsssuw2//9m9jenoaQghcc8012Ldv32OsBeD7/o/ledi/fz8eeughAMBdd931mM555Stfia9//euP+R6juPHGG3+ktKLjOPjrv/5rvOhFL8Jv/dZvodls4g1veMMjjltcXHxM92w0Gpu8K4+G7du3A9AvGMUmvECBAk8lFJvwAgV+TpDTPC655JIfehznHM973vPwvOc9Dx/96EfxwQ9+EL/7u7+LG2+8Ec9//vN/YIbNu+66Cw8++CD+4i/+YlMg5mOlHjwa9uzZgy9/+ctYX1//gdbwPXv2QCmF3bt348QTT/yJ7/WT4m//9m9x3HHH4Qtf+MKmtvl+Cs3jmZlUSonLL78c9Xod73jHO/DBD34Qv/zLv7wp0PPR8JGPfOQRHorHisca8BkEAa677jpcfPHFeMtb3oJms4lXvOIVm47ZsmXLY7rWpz/9aVx++eU/9JicGvT9XpUCBQoUeLKj2IQXKPBzgK997Wv4wAc+gN27dxvpu0fDo21284Q8uVJFpVIBgEeojuRW41ErsVIKH/vYx37icv/SL/0SPv7xj+Pqq69+xHWUUmCM4ZWvfCWuvPJ
"text/plain": [
"<Figure size 800x700 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from sklearn.metrics import pairwise_distances\n",
"\n",
"def plot_distance_matrix_sorted(X_scaled, labels, max_points=400, title=\"Distance matrix\"):\n",
" \"\"\"\n",
" Trace la matrice de distance triée par cluster avec des lignes séparatrices.\n",
" \n",
" Parameters\n",
" ----------\n",
" X_scaled : np.array ou pd.DataFrame\n",
" Les données numériques standardisées (n_samples x n_features)\n",
" labels : array-like\n",
" Les labels de cluster pour chaque point\n",
" max_points : int, optional\n",
" Nombre maximum de points à afficher pour éviter des matrices trop grandes\n",
" title : str, optional\n",
" Titre de la figure\n",
" \"\"\"\n",
" n = X_scaled.shape[0]\n",
" idx = np.arange(n)\n",
"\n",
" # Sous-échantillonnage si nécessaire\n",
" if n > max_points:\n",
" rng = np.random.default_rng(42)\n",
" idx = rng.choice(idx, size=max_points, replace=False)\n",
"\n",
" X_sub = X_scaled[idx]\n",
" labels_sub = np.asarray(labels)[idx]\n",
"\n",
" # Tri par cluster\n",
" order = np.lexsort((np.arange(len(labels_sub)), labels_sub))\n",
" X_sub = X_sub[order]\n",
" labels_sub = labels_sub[order]\n",
"\n",
" # Matrice de distances\n",
" D = pairwise_distances(X_sub)\n",
"\n",
" # Figure\n",
" plt.figure(figsize=(8, 7))\n",
" sns.heatmap(D, cmap=\"viridis\")\n",
" \n",
" # Lignes séparatrices entre clusters\n",
" unique_labels, counts = np.unique(labels_sub, return_counts=True)\n",
" boundaries = np.cumsum(counts)\n",
" for b in boundaries[:-1]: # on ignore la dernière limite\n",
" plt.axhline(b, color='red', linewidth=2)\n",
" plt.axvline(b, color='red', linewidth=2)\n",
"\n",
" plt.title(title)\n",
" plt.tight_layout()\n",
" plt.show()\n",
"\n",
"for k in [5]:\n",
" plot_distance_matrix_sorted(\n",
" X_scaled,\n",
" dfc[f\"cluster_k{k}\"].values,\n",
" title=f\"Distance matrix — K={k}\"\n",
" )\n",
"\n",
"# Cluster 0 très distant des autres"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 351,
2026-04-07 12:31:16 +02:00
"id": "a5f006c5-55a8-475f-b58d-fc26886c0aba",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"churn_hard 0.361386\n",
"churn_soft 0.603960\n",
"churn_warning 0.344059\n",
"dtype: float64\n",
"\n",
"===== CHURN PAR CLUSTER K=2 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>churn_hard_rate</th>\n",
" <th>churn_soft_rate</th>\n",
" <th>churn_warning_rate</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k2</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>327</td>\n",
" <td>0.409786</td>\n",
" <td>0.642202</td>\n",
" <td>0.336391</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>77</td>\n",
" <td>0.155844</td>\n",
" <td>0.441558</td>\n",
" <td>0.376623</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients churn_hard_rate churn_soft_rate churn_warning_rate\n",
"cluster_k2 \n",
"0 327 0.409786 0.642202 0.336391\n",
"1 77 0.155844 0.441558 0.376623"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== CHURN PAR CLUSTER K=5 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>churn_hard_rate</th>\n",
" <th>churn_soft_rate</th>\n",
" <th>churn_warning_rate</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k5</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>168</td>\n",
" <td>0.541667</td>\n",
" <td>0.797619</td>\n",
" <td>0.416667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>111</td>\n",
" <td>0.396396</td>\n",
" <td>0.648649</td>\n",
" <td>0.306306</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>90</td>\n",
" <td>0.000000</td>\n",
" <td>0.166667</td>\n",
" <td>0.211111</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>30</td>\n",
" <td>0.233333</td>\n",
" <td>0.600000</td>\n",
" <td>0.433333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>5</td>\n",
" <td>0.800000</td>\n",
" <td>1.000000</td>\n",
" <td>0.600000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients churn_hard_rate churn_soft_rate churn_warning_rate\n",
"cluster_k5 \n",
"1 168 0.541667 0.797619 0.416667\n",
"3 111 0.396396 0.648649 0.306306\n",
"4 90 0.000000 0.166667 0.211111\n",
"0 30 0.233333 0.600000 0.433333\n",
"2 5 0.800000 1.000000 0.600000"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Analyse churn\n",
"\n",
"dfc[\"churn_hard\"] = (dfc[\"aum_final_to_peak\"] < 0.10).astype(int)\n",
"\n",
"dfc[\"churn_soft\"] = (\n",
" (dfc[\"aum_final_to_peak\"] < 0.40) &\n",
" (dfc[\"aum_drawdown_last\"] > 0.40)\n",
").astype(int)\n",
"\n",
"dfc[\"churn_warning\"] = (\n",
" (dfc[\"flow_direction_balance\"] < 0) &\n",
" (dfc[\"aum_drawdown_last\"] > 0.20)\n",
").astype(int)\n",
"\n",
"print(dfc[[\"churn_hard\", \"churn_soft\", \"churn_warning\"]].mean())\n",
"\n",
"for k in [2, 5]:\n",
" out = (\n",
" dfc.groupby(f\"cluster_k{k}\")\n",
" .agg(\n",
" n_clients=(ID_COL, \"count\"),\n",
" churn_hard_rate=(\"churn_hard\", \"mean\"),\n",
" churn_soft_rate=(\"churn_soft\", \"mean\"),\n",
" churn_warning_rate=(\"churn_warning\", \"mean\")\n",
" )\n",
" .sort_values(\"n_clients\", ascending=False)\n",
" )\n",
" print(f\"\\n===== CHURN PAR CLUSTER K={k} =====\")\n",
" display(out)"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 352,
2026-04-07 12:31:16 +02:00
"id": "b8b4940e-4ab5-4123-a59a-e99d5f1fc5b6",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAGGCAYAAADmRxfNAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAQNBJREFUeJzt3Xt8zvX/x/HntdmuHdgQNjTN2TAji5xCrVS+Skn0DSOUWGGEJccwKSKnJUTFVyfp4GxMXyGnlJzKYeZbbSZf5rjN9vn94ef6drWD8dm1a7PH/Xa7brdd78/n8/68PpdP3+/1vD7v9+djMQzDEAAAAACY4OLsAgAAAAAUfQQLAAAAAKYRLAAAAACYRrAAAAAAYBrBAgAAAIBpBAsAAAAAphEsAAAAAJhGsAAAAABgGsECAAAAgGkECwC4SRaLRREREc4uI0969uypkiVLOrsMSVKbNm3Upk0bZ5cBAHAQggUA/L+jR4/qhRdeULVq1eTh4SEfHx+1aNFCM2bM0OXLl51dHm5g0qRJWrFihbPLMC0+Pl4Wi0VvvfWWXbthGHrhhRdksVg0duxYU/uIi4uTxWLJ9rV9+3ZTfQMovko4uwAAKAxWrlypzp07y2q1qkePHqpfv77S0tK0ZcsWvfLKK9q/f7/mzZvn7DKRi0mTJumpp55Sx44dnV1KvjMMQ/3799e8efM0atQo08Hiupdffln33HOPXVuNGjXypW8AxQ/BAkCxd/z4cXXt2lV33XWXNm7cqIoVK9qWDRgwQEeOHNHKlSsLtKbMzEylpaXJw8OjQPcLe1euXJG7u7tcXJx7gf+ll15STEyMRo4cqfHjx+dbv61atdJTTz2Vb/0BKN4YCgWg2JsyZYouXLigBQsW2IWK62rUqKGBAwdmaV+xYoXq168vq9WqevXqac2aNXbLe/bsqcDAwCzbjR07VhaLxa7t+ryNJUuWqF69erJarVqzZo0WLVoki8Wi7777TpGRkSpfvry8vb31xBNPKDk5Oc/HeOzYMbVr107e3t6qVKmSxo8fL8MwJF37NTwwMFCPP/54lu2uXLkiX19fvfDCCzfcx0cffaQmTZrIy8tLZcqU0X333ad169bluP71Y4uPj7drvz5MJy4uztb266+/qlOnTvL395eHh4fuvPNOde3aVefOnZN07fO7ePGiFi9ebBvS07NnT9v2v/32m5577jn5+fnZ/r0WLlyY7X6XLVum1157TZUrV5aXl5dSUlJueOyONHDgQM2ePVtRUVGaMGFCvvd//vx5Xb16Nd/7BVD8cMUCQLH39ddfq1q1amrevHmet9myZYuWL1+u/v37q1SpUnrnnXfUqVMnJSQk6I477rilOjZu3KhPPvlEERERKleunAIDA7V3715J136xLlOmjMaMGaP4+HhNnz5dERER+vjjj2/Yb0ZGhh5++GHde++9mjJlitasWaMxY8bo6tWrGj9+vCwWi7p166YpU6bozJkzKlu2rG3br7/+WikpKerWrVuu+xg3bpzGjh2r5s2ba/z48XJ3d9f333+vjRs36qGHHrqlz+O6tLQ0tWvXTqmpqXrppZfk7++v3377Td98843Onj0rX19fffjhh+rTp4+aNGmi559/XpJUvXp1SVJSUpLuvfdeW3grX768Vq9erd69eyslJUWDBg2y29/rr78ud3d3DR06VKmpqXJ3dzdVvxmDBw/WO++8o+HDh2vSpElZlmdmZurMmTN56svX11dubm52bb169dKFCxfk6uqqVq1a6c0331RoaGi+1A6gGDIAoBg7d+6cIcl4/PHH87yNJMPd3d04cuSIre3HH380JBkzZ860tYWHhxt33XVXlu3HjBlj/P1/fiUZLi4uxv79++3a33//fUOSERYWZmRmZtraBw8ebLi6uhpnz57Ntdbw8HBDkvHSSy/Z2jIzM4327dsb7u7uRnJysmEYhnH48GFDkjF37ly77R977DEjMDDQbt9/9+uvvxouLi7GE088YWRkZNgt++t2rVu3Nlq3bp3l2I4fP263zaZNmwxJxqZNmwzDMIwffvjBkGR8+umnuR6rt7e3ER4enqW9d+/eRsWKFY3Tp0/btXft2tXw9fU1Ll26ZLffatWq2dqc4fjx44Yk46677jIkGa+88soN183L6/rnaRiG8d133xmdOnUyFixYYHz55ZdGdHS0cccddxgeHh7Gnj17CuAoAdyOuGIBoFi7PsylVKlSN7VdWFiY7RdxSWrQoIF8fHx07NixW66ldevWqlu3brbLnn/+ebvhU61atdLbb7+tEydOqEGDBjfs+6+3x73+y/3KlSu1YcMGde3aVbVq1VLTpk21ZMkS9evXT5J05swZrV69WsOGDcsydOuvVqxYoczMTI0ePTrLXITctssrX19fSdLatWv16KOPysvLK8/bGoahzz//XE8//bQMw9Dp06dty9q1a6dly5Zpz549atGiha09PDxcnp6epus2KykpSZJUq1atHNfx9/fX+vXr89RfSEiI7e/mzZvbXaF77LHH9NRTT6lBgwaKiorKMqwPAPKCYAGgWPPx8ZF0bZz5zahSpUqWtjJlyui///3vLddStWrVPO+vTJkykpSn/bm4uKhatWp2bde/rP51fkOPHj0UERGhEydO6K677tKnn36q9PR0de/ePdf+jx49KhcXlxxDkVlVq1ZVZGSkpk2bpiVLlqhVq1Z67LHH1K1bN1voyElycrLOnj2refPm5XhXr1OnTmXZX14kJycrIyMjbwfxN+XLl5erq2uu6wwfPlyrVq3SCy+8oNKlS2c7ydrDw0NhYWG3VMPf1ahRQ48//riWL1+ujIyMG9YHAH9HsABQrPn4+KhSpUr6+eefb2q7nL50Gf8/IVrK+df6nL6M5vYreV72Z1bXrl01ePBgLVmyRK+++qo++ugjhYaGqnbt2vm2j7+6mc9n6tSp6tmzp7788kutW7dOL7/8sqKjo7V9+3bdeeedOe4jMzNTktStWzeFh4dnu87fr/jk9WrFPffcoxMnTuRp3b87fvx4thP7/6pkyZJavXq17rvvPj377LPy8fHJMl8lIyMjz5P4y5Yte8P5IgEBAUpLS9PFixdtoRsA8opgAaDY+8c//qF58+Zp27ZtatasWb71W6ZMGZ09ezZL+61+Gb1VmZmZOnbsmN2Qml9++UWS7L7cli1bVu3bt9eSJUv07LPP6rvvvtP06dNv2H/16tWVmZmpAwcOqGHDhnmu6/pVl79/Rjl9PsHBwQoODtZrr72mrVu3qkWLFoqJibHdKSm7oFK+fHmVKlVKGRkZ+fbL/nVLliy55Qcn+vv752m9O+64Q+vWrVOLFi305JNPav369Xbn6MmTJ/N8hWXTpk03fPL5sWPH5OHhUWie1g6gaCFYACj2hg0bpiVLlqhPnz7auHGj/Pz87JYfPXpU33zzTba3nM1N9erVde7cOf3000+2X8X/+OMPffHFF/lWe17NmjVL77zzjqRrVzlmzZolNzc3PfDAA3brde/eXU8++aReeeUVubq6qmvXrjfsu2PHjho+fLjGjx+vzz77zG6ehWEYOV6ZuD5H5dtvv7UFkoyMjCxDllJSUuTl5aUSJf73f1nBwcFycXFRamqqrc3b2ztLSHF1dVWnTp20dOlS/fzzz6pfv77d8uTkZJUvX/6Gx5idv87LcKTKlStr/fr1atmypdq3b6/NmzcrODhY0q3PscjuuH/88Ud99dVXeuSRR5z+3A4ARRPBAkCxV716dS1dulRdunRRUFCQ3ZO3t27dqk8//dTumQh51bVrVw0fPlxPPPGEXn75ZV26dElz585VrVq1tGfPnvw/kBx4eHhozZo1Cg8PV9OmTbV69WqtXLlSr776apYvl+3bt9cdd9yhTz/9VI888ogqVKhww/5r1KihkSNH6vXXX1erVq305JNPymq1aufOnapUqZKio6Oz3a5evXq69957FRUVZbvN7bJly7I8U2Hjxo2KiIhQ586dVatWLV29elUffvihLTRc17hxY23YsEHTpk1TpUqVVLVqVTVt2lSTJ0/Wpk2b1LRpU/Xt21d169bVmTN
"text/plain": [
"<Figure size 800x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2026-04-09 16:46:41 +02:00
"for k in [5]:\n",
2026-04-07 12:31:16 +02:00
" tmp = (\n",
" dfc.groupby(f\"cluster_k{k}\")\n",
" .agg(\n",
" churn_hard=(\"churn_hard\", \"mean\"),\n",
" churn_soft=(\"churn_soft\", \"mean\"),\n",
" churn_warning=(\"churn_warning\", \"mean\")\n",
" )\n",
" )\n",
"\n",
" tmp.plot(kind=\"bar\", figsize=(8, 4))\n",
" plt.title(f\"Churn by cluster — K={k}\")\n",
" plt.ylabel(\"Rate\")\n",
" plt.xlabel(\"Clusters\")\n",
" plt.tight_layout()\n",
" plt.show()"
]
},
{
"cell_type": "code",
2026-04-09 16:46:41 +02:00
"execution_count": 356,
2026-04-07 12:31:16 +02:00
"id": "a0370454-561e-48c5-ad3b-28a356a2abac",
"metadata": {},
2026-04-09 16:46:41 +02:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Filtered clients: (246, 55)\n",
"Filtered clients: (317, 55)\n",
"Filtered clients: (317, 55)\n",
"Filtered clients: (327, 55)\n"
]
}
],
"source": [
"# Analyse temporelle\n",
"\n",
"def corr_lag(x, y, lag):\n",
" x = np.asarray(x, dtype=float)\n",
" y = np.asarray(y, dtype=float)\n",
" \n",
" mask = np.isfinite(x) & np.isfinite(y)\n",
" x, y = x[mask], y[mask]\n",
" \n",
" if len(x) <= lag + 3:\n",
" return np.nan\n",
" \n",
" return pd.Series(x[lag:]).corr(pd.Series(y[:-lag]))\n",
"\n",
"# 1. Définir les fenêtres temporelles (ex: 2 ans glissants)\n",
"windows = [\n",
" (\"2017-10-31\", \"2019-10-31\"),\n",
" (\"2019-10-31\", \"2021-10-31\"),\n",
" (\"2021-10-31\", \"2023-10-31\"),\n",
" (\"2023-10-31\", \"2025-10-31\")\n",
"]\n",
"\n",
"for start, end in windows:\n",
" # FILTRAGE : On recalcule les variables sur la période (simulation)\n",
" df_month_copy = df_month[(df_month['month'] > start) & (df_month['month'] <= end)].copy()\n",
" df_rel_m_copy = df_rel_m[(df_rel_m['month'] > start) & (df_rel_m['month'] <= end)].copy()\n",
" \n",
" tmp_copy = df_rel_m_copy.sort_values([ID_COL, ISIN_COL, \"month\"]).copy()\n",
" tmp_copy[\"prev_aum\"] = tmp_copy.groupby([ID_COL, ISIN_COL])[\"aum_qty\"].shift(1)\n",
" tmp_copy[\"full_exit_event\"] = ((tmp_copy[\"prev_aum\"] > 0) & (tmp_copy[\"aum_qty\"] <= 0)).astype(int)\n",
" tmp_copy[\"entry_event\"] = ((tmp_copy[\"prev_aum\"].fillna(0) <= 0) & (tmp_copy[\"aum_qty\"] > 0)).astype(int)\n",
"\n",
" df_rel_feat_copy = (\n",
" tmp_copy.groupby([ID_COL, ISIN_COL], as_index=False)\n",
" .agg(\n",
" rel_n_months=(\"month\", \"nunique\"),\n",
" rel_active_months=(\"active_rel_month\", \"sum\"),\n",
" rel_holding_months=(\"holding_rel_month\", \"sum\"),\n",
" rel_aum_mean=(\"aum_qty\", \"mean\"),\n",
" rel_turnover_mean=(\"turnover_rel\", \"mean\"),\n",
" rel_turnover_vol=(\"turnover_rel\", \"std\"),\n",
" rel_flow_to_aum_vol=(\"flow_to_aum_rel\", \"std\"),\n",
" rel_n_tx=(\"n_tx\", \"sum\"),\n",
" rel_full_exit_count=(\"full_exit_event\", \"sum\"),\n",
" rel_entry_count=(\"entry_event\", \"sum\")\n",
" )\n",
" )\n",
"\n",
" df_rel_client_copy = (\n",
" df_rel_feat_copy\n",
" .groupby(ID_COL, as_index=False)\n",
" .agg(\n",
" n_isin_total=(ISIN_COL, \"nunique\"),\n",
" rel_turnover_mean_avg=(\"rel_turnover_mean\", \"mean\"),\n",
" rel_turnover_vol_avg=(\"rel_turnover_vol\", \"mean\"),\n",
" rel_flow_to_aum_vol_avg=(\"rel_flow_to_aum_vol\", \"mean\"),\n",
" full_exit_count=(\"rel_full_exit_count\", \"sum\"),\n",
" entry_count=(\"rel_entry_count\", \"sum\"),\n",
" avg_holding_months_per_isin=(\"rel_holding_months\", \"mean\"),\n",
" max_holding_months_per_isin=(\"rel_holding_months\", \"max\")\n",
" )\n",
" )\n",
"\n",
" df_client_copy = (\n",
" df_month_copy\n",
" .groupby(ID_COL, as_index=False)\n",
" .agg(\n",
" n_months=(\"month\", \"nunique\"),\n",
" n_active_months=(\"active_month\", \"sum\"),\n",
" flow_freq=(\"active_month\", \"mean\"),\n",
"\n",
" aum_qty_mean=(\"aum_qty\", \"mean\"),\n",
" aum_qty_median=(\"aum_qty\", \"median\"),\n",
" aum_qty_max=(\"aum_qty\", \"max\"),\n",
" aum_qty_last=(\"aum_qty\", \"last\"),\n",
"\n",
" net_flow_qty_sum=(\"net_flow_qty\", \"sum\"),\n",
" gross_flow_qty_sum=(\"gross_flow_qty\", \"sum\"),\n",
" gross_flow_qty_mean=(\"gross_flow_qty\", \"mean\"),\n",
" n_tx_total=(\"n_tx\", \"sum\"),\n",
"\n",
" net_flow_vol=(\"net_flow_qty\", \"std\"),\n",
" turnover_mean=(\"turnover_m\", \"mean\"),\n",
" turnover_vol=(\"turnover_m\", \"std\"),\n",
" flow_to_aum_mean=(\"flow_to_aum_m\", \"mean\"),\n",
" flow_to_aum_vol=(\"flow_to_aum_m\", \"std\"),\n",
"\n",
" avg_n_isin_held=(\"n_isin_held\", \"mean\"),\n",
" max_n_isin_held=(\"n_isin_held\", \"max\"),\n",
"\n",
" sub_share_mean=(\"sub_share_m\", \"mean\"),\n",
" red_share_mean=(\"red_share_m\", \"mean\"),\n",
"\n",
" delta_rate_mean=(\"delta_rate_m\", \"mean\"),\n",
" aum_drawdown_last=(\"aum_drawdown\", \"last\"),\n",
" aum_drawdown_max=(\"aum_drawdown\", \"max\"),\n",
"\n",
" region=(\"region\", \"last\"),\n",
" country=(\"country\", \"last\")\n",
" )\n",
" )\n",
"\n",
" df_client_copy = df_client_copy.merge(df_rel_client_copy, on=ID_COL, how=\"left\")\n",
"\n",
" #Variables de corrélations entre performance et flux\n",
"\n",
" rows = []\n",
"\n",
" for acc, g in df_month_copy.groupby(ID_COL):\n",
" g = g.sort_values(\"month\")\n",
" \n",
" flow = g[\"flow_to_aum_m\"].values\n",
" ret_fund = g[\"ret_fund_m\"].values\n",
" ret_bench = g[\"ret_bench_m\"].values\n",
" rate = g[\"delta_rate_m\"].values\n",
" \n",
" rows.append({\n",
" ID_COL: acc,\n",
" \n",
" # 👇 Corrélations perf vs flux\n",
" \"corr_flow_fund_lag3\": corr_lag(flow, ret_fund, 3),\n",
" \"corr_flow_fund_lag6\": corr_lag(flow, ret_fund, 6),\n",
" \n",
" \"corr_flow_bench_lag3\": corr_lag(flow, ret_bench, 3),\n",
" \"corr_flow_bench_lag6\": corr_lag(flow, ret_bench, 6),\n",
" \n",
" # 👇 Corrélation taux vs flux\n",
" \"corr_flow_rate_lag3\": corr_lag(flow, rate, 3),\n",
" \"corr_flow_rate_lag6\": corr_lag(flow, rate, 6),\n",
" })\n",
"\n",
" df_corr_copy = pd.DataFrame(rows)\n",
"\n",
" df_client_copy = df_client_copy.merge(df_corr_copy, on=ID_COL, how=\"left\")\n",
"\n",
" dfc_copy = df_client_copy.copy()\n",
"\n",
" dfc_copy[\"gross_flow_to_aum\"] = dfc_copy[\"gross_flow_qty_sum\"] / (dfc_copy[\"aum_qty_mean\"].abs() + EPS)\n",
" dfc_copy[\"avg_ticket\"] = dfc_copy[\"gross_flow_qty_sum\"] / (dfc_copy[\"n_tx_total\"] + EPS)\n",
" dfc_copy[\"flow_direction_balance\"] = dfc_copy[\"net_flow_qty_sum\"] / (dfc_copy[\"gross_flow_qty_sum\"] + EPS)\n",
" dfc_copy[\"redemption_bias\"] = dfc_copy[\"red_share_mean\"] - dfc_copy[\"sub_share_mean\"]\n",
" dfc_copy[\"activity_intensity\"] = dfc_copy[\"n_tx_total\"] / (dfc_copy[\"n_months\"] + EPS)\n",
" dfc_copy[\"exit_rate_per_isin\"] = dfc_copy[\"full_exit_count\"] / (dfc_copy[\"n_isin_total\"] + EPS)\n",
" dfc_copy[\"entry_rate_per_isin\"] = dfc_copy[\"entry_count\"] / (dfc_copy[\"n_isin_total\"] + EPS)\n",
" dfc_copy[\"aum_final_to_peak\"] = dfc_copy[\"aum_qty_last\"] / (dfc_copy[\"aum_qty_max\"] + EPS)\n",
"\n",
" for col in [\"aum_qty_mean\", \"gross_flow_qty_sum\", \"n_tx_total\", \"avg_ticket\", \"gross_flow_qty_mean\"]:\n",
" dfc_copy[f\"log_{col}\"] = np.log1p(dfc_copy[col].clip(lower=0))\n",
"\n",
" dfc_copy = dfc_copy[(dfc_copy[\"n_months\"] >= 6) & (dfc_copy[\"aum_qty_mean\"] > 0)].copy()\n",
"\n",
" top_countries = dfc_copy[\"country\"].fillna(\"Unknown\").value_counts().head(10).index\n",
" top_regions = dfc_copy[\"region\"].fillna(\"Unknown\").value_counts().head(10).index\n",
"\n",
" dfc_copy[\"country_grp\"] = np.where(dfc_copy[\"country\"].isin(top_countries), dfc_copy[\"country\"], \"Other\")\n",
" dfc_copy[\"region_grp\"] = np.where(dfc_copy[\"region\"].isin(top_regions), dfc_copy[\"region\"], \"Other\")\n",
"\n",
" \n",
" if start == \"2017-10-31\":\n",
" df_2017 = dfc_copy.copy()\n",
" if start == \"2019-10-31\":\n",
" df_2019 = dfc_copy.copy()\n",
" if start == \"2021-10-31\":\n",
" df_2021 = dfc_copy.copy()\n",
" if start == \"2023-10-31\":\n",
" df_2023 = dfc_copy.copy()\n",
" print(\"Filtered clients:\", dfc_copy.shape)\n",
"\n",
"# Moins de codes à mesure que l'on remonte dans le temps"
]
},
{
"cell_type": "code",
"execution_count": 367,
"id": "d4a05629-4dac-48a2-bd7f-b7a885d7e47a",
"metadata": {},
"outputs": [],
"source": [
"base_features = [\n",
" \"log_aum_qty_mean\",\n",
" \"flow_freq\",\n",
" \"gross_flow_to_aum\",\n",
" #\"turnover_vol\",\n",
" \"flow_to_aum_vol\",\n",
" \"activity_intensity\",\n",
" \"n_tx_total\",\n",
" \"avg_n_isin_held\",\n",
" \"n_isin_total\",\n",
" \"avg_holding_months_per_isin\",\n",
" \"exit_rate_per_isin\",\n",
" \"flow_direction_balance\",\n",
" #\"redemption_bias\",\n",
" \"aum_drawdown_last\",\n",
" \"corr_flow_fund_lag3\",\n",
" \"corr_flow_fund_lag6\",\n",
" \"corr_flow_rate_lag3\",\n",
" #\"corr_flow_rate_lag6\",\n",
" #\"corr_flow_bench_lag3\",\n",
" #\"corr_flow_bench_lag6\"\n",
" \n",
"]\n",
"\n",
"base_features2 = [\n",
" \"log_aum_qty_mean\",\n",
" \"log_gross_flow_qty_mean\",\n",
" \"n_tx_total\",\n",
" \"flow_freq\",\n",
" \"gross_flow_to_aum\",\n",
" \"net_flow_vol\",\n",
" #\"avg_n_isin_held\",\n",
" #\"flow_direction_balance\",\n",
"]\n",
"\n",
"windows = [\n",
" (\"2017-10-31\", \"2019-10-31\"),\n",
" (\"2019-10-31\", \"2021-10-31\"),\n",
" (\"2021-10-31\", \"2023-10-31\"),\n",
" (\"2023-10-31\", \"2025-10-31\")\n",
"]\n",
"\n",
"stability_results = []\n",
"\n",
"X_temp_2017 = df_2017[base_features].replace([np.inf, -np.inf], np.nan).fillna(df_2017[base_features].median())\n",
"X_temp_2019 = df_2019[base_features].replace([np.inf, -np.inf], np.nan).fillna(df_2019[base_features].median())\n",
"X_temp_2021 = df_2021[base_features].replace([np.inf, -np.inf], np.nan).fillna(df_2021[base_features].median())\n",
"X_temp_2023 = df_2023[base_features].replace([np.inf, -np.inf], np.nan).fillna(df_2023[base_features].median())\n",
"\n",
"X_temp_2017_scaled = scaler.transform(X_temp_2017)\n",
"X_temp_2019_scaled = scaler.transform(X_temp_2019)\n",
"X_temp_2021_scaled = scaler.transform(X_temp_2021)\n",
"X_temp_2023_scaled = scaler.transform(X_temp_2023)\n",
"\n",
"km = RESULTS[5][\"model\"]\n",
"\n",
"labels_2017 = km.predict(X_temp_2017_scaled)\n",
"labels_2019 = km.predict(X_temp_2019_scaled)\n",
"labels_2021 = km.predict(X_temp_2021_scaled)\n",
"labels_2023 = km.predict(X_temp_2023_scaled)\n",
"\n",
"df_2017[\"cluster_kmeans\"] = labels_2017\n",
"df_2019[\"cluster_kmeans\"] = labels_2019\n",
"df_2021[\"cluster_kmeans\"] = labels_2021\n",
"df_2023[\"cluster_kmeans\"] = labels_2023\n"
]
},
{
"cell_type": "code",
"execution_count": 368,
"id": "79428e73-9401-4d39-9fad-20d06a4d1f2b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>n_months</th>\n",
" <th>n_active_months</th>\n",
" <th>flow_freq</th>\n",
" <th>aum_qty_mean</th>\n",
" <th>aum_qty_median</th>\n",
" <th>aum_qty_max</th>\n",
" <th>aum_qty_last</th>\n",
" <th>net_flow_qty_sum</th>\n",
" <th>gross_flow_qty_sum</th>\n",
" <th>gross_flow_qty_mean</th>\n",
" <th>n_tx_total</th>\n",
" <th>net_flow_vol</th>\n",
" <th>turnover_mean</th>\n",
" <th>turnover_vol</th>\n",
" <th>flow_to_aum_mean</th>\n",
" <th>flow_to_aum_vol</th>\n",
" <th>avg_n_isin_held</th>\n",
" <th>max_n_isin_held</th>\n",
" <th>sub_share_mean</th>\n",
" <th>red_share_mean</th>\n",
" <th>delta_rate_mean</th>\n",
" <th>aum_drawdown_last</th>\n",
" <th>aum_drawdown_max</th>\n",
" <th>region</th>\n",
" <th>country</th>\n",
" <th>n_isin_total</th>\n",
" <th>rel_turnover_mean_avg</th>\n",
" <th>rel_turnover_vol_avg</th>\n",
" <th>rel_flow_to_aum_vol_avg</th>\n",
" <th>full_exit_count</th>\n",
" <th>entry_count</th>\n",
" <th>avg_holding_months_per_isin</th>\n",
" <th>max_holding_months_per_isin</th>\n",
" <th>corr_flow_fund_lag3</th>\n",
" <th>corr_flow_fund_lag6</th>\n",
" <th>corr_flow_bench_lag3</th>\n",
" <th>corr_flow_bench_lag6</th>\n",
" <th>corr_flow_rate_lag3</th>\n",
" <th>corr_flow_rate_lag6</th>\n",
" <th>gross_flow_to_aum</th>\n",
" <th>avg_ticket</th>\n",
" <th>flow_direction_balance</th>\n",
" <th>redemption_bias</th>\n",
" <th>activity_intensity</th>\n",
" <th>exit_rate_per_isin</th>\n",
" <th>entry_rate_per_isin</th>\n",
" <th>aum_final_to_peak</th>\n",
" <th>log_aum_qty_mean</th>\n",
" <th>country_grp</th>\n",
" <th>region_grp</th>\n",
" <th>log_gross_flow_qty_sum</th>\n",
" <th>log_n_tx_total</th>\n",
" <th>log_avg_ticket</th>\n",
" <th>log_gross_flow_qty_mean</th>\n",
" <th>cluster_kmeans</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>24</td>\n",
" <td>24</td>\n",
" <td>1.000000</td>\n",
" <td>13099.683542</td>\n",
" <td>12313.2330</td>\n",
" <td>39201.929</td>\n",
" <td>15362.431</td>\n",
" <td>-86610.542</td>\n",
" <td>145870.662</td>\n",
" <td>6077.944250</td>\n",
" <td>347.0</td>\n",
" <td>4405.421469</td>\n",
" <td>6.866620e-01</td>\n",
" <td>6.476563e-01</td>\n",
" <td>-4.346513e-01</td>\n",
" <td>4.760756e-01</td>\n",
" <td>5.666667</td>\n",
" <td>9</td>\n",
" <td>0.182835</td>\n",
" <td>-0.820120</td>\n",
" <td>-0.004667</td>\n",
" <td>0.796705</td>\n",
" <td>0.965250</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>70</td>\n",
" <td>2.868232e+11</td>\n",
" <td>4.008490e+11</td>\n",
" <td>4.992860e+11</td>\n",
" <td>50</td>\n",
" <td>71</td>\n",
" <td>1.942857</td>\n",
" <td>7</td>\n",
" <td>-0.098882</td>\n",
" <td>0.103718</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.621462</td>\n",
" <td>0.312713</td>\n",
" <td>11.135434</td>\n",
" <td>420.376548</td>\n",
" <td>-0.593749</td>\n",
" <td>-1.002955</td>\n",
" <td>14.458333</td>\n",
" <td>0.714286</td>\n",
" <td>1.014286</td>\n",
" <td>0.391879</td>\n",
" <td>9.480420</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>11.890482</td>\n",
" <td>5.852202</td>\n",
" <td>6.043527</td>\n",
" <td>8.712586</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>200000076</td>\n",
" <td>24</td>\n",
" <td>23</td>\n",
" <td>0.958333</td>\n",
" <td>19721.920083</td>\n",
" <td>9272.4710</td>\n",
" <td>46591.535</td>\n",
" <td>5705.213</td>\n",
" <td>-958.588</td>\n",
" <td>79325.796</td>\n",
" <td>3305.241500</td>\n",
" <td>131.0</td>\n",
" <td>4205.535841</td>\n",
" <td>2.448698e-01</td>\n",
" <td>4.034074e-01</td>\n",
" <td>-1.119577e-01</td>\n",
" <td>4.487544e-01</td>\n",
" <td>3.750000</td>\n",
" <td>7</td>\n",
" <td>0.408252</td>\n",
" <td>-0.556621</td>\n",
" <td>-0.004667</td>\n",
" <td>0.877548</td>\n",
" <td>0.911153</td>\n",
" <td>Spain</td>\n",
" <td>Spain</td>\n",
" <td>10</td>\n",
" <td>7.402653e+10</td>\n",
" <td>1.892627e+11</td>\n",
" <td>1.897906e+11</td>\n",
" <td>11</td>\n",
" <td>16</td>\n",
" <td>9.000000</td>\n",
" <td>16</td>\n",
" <td>0.079104</td>\n",
" <td>-0.154260</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.154608</td>\n",
" <td>-0.043408</td>\n",
" <td>4.022215</td>\n",
" <td>605.540427</td>\n",
" <td>-0.012084</td>\n",
" <td>-0.964873</td>\n",
" <td>5.458333</td>\n",
" <td>1.100000</td>\n",
" <td>1.600000</td>\n",
" <td>0.122452</td>\n",
" <td>9.889537</td>\n",
" <td>Spain</td>\n",
" <td>Spain</td>\n",
" <td>11.281331</td>\n",
" <td>4.882802</td>\n",
" <td>6.407771</td>\n",
" <td>8.103567</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>200000082</td>\n",
" <td>9</td>\n",
" <td>9</td>\n",
" <td>1.000000</td>\n",
" <td>94110.153222</td>\n",
" <td>9912.7690</td>\n",
" <td>316149.358</td>\n",
" <td>168684.609</td>\n",
" <td>-54092.803</td>\n",
" <td>170248.173</td>\n",
" <td>18916.463667</td>\n",
" <td>334.0</td>\n",
" <td>15896.897315</td>\n",
" <td>1.676878e+01</td>\n",
" <td>4.067557e+01</td>\n",
" <td>-1.331691e+01</td>\n",
" <td>3.939035e+01</td>\n",
" <td>1.333333</td>\n",
" <td>2</td>\n",
" <td>0.322361</td>\n",
" <td>-0.688804</td>\n",
" <td>-0.021889</td>\n",
" <td>0.466440</td>\n",
" <td>0.999064</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>5</td>\n",
" <td>3.729108e+12</td>\n",
" <td>2.856665e+12</td>\n",
" <td>3.471936e+12</td>\n",
" <td>11</td>\n",
" <td>11</td>\n",
" <td>2.400000</td>\n",
" <td>4</td>\n",
" <td>-0.202170</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.207860</td>\n",
" <td>NaN</td>\n",
" <td>1.809031</td>\n",
" <td>509.725069</td>\n",
" <td>-0.317729</td>\n",
" <td>-1.011165</td>\n",
" <td>37.111111</td>\n",
" <td>2.200000</td>\n",
" <td>2.200000</td>\n",
" <td>0.533560</td>\n",
" <td>11.452232</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>12.045018</td>\n",
" <td>5.814131</td>\n",
" <td>6.235831</td>\n",
" <td>9.847841</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>200000146</td>\n",
" <td>24</td>\n",
" <td>24</td>\n",
" <td>1.000000</td>\n",
" <td>74707.740583</td>\n",
" <td>106267.2235</td>\n",
" <td>196139.292</td>\n",
" <td>196139.292</td>\n",
" <td>-59221.624</td>\n",
" <td>211436.064</td>\n",
" <td>8809.836000</td>\n",
" <td>1132.0</td>\n",
" <td>5513.516693</td>\n",
" <td>1.256966e+00</td>\n",
" <td>2.328463e+00</td>\n",
" <td>-1.348461e-01</td>\n",
" <td>4.600613e-01</td>\n",
" <td>5.458333</td>\n",
" <td>9</td>\n",
" <td>0.382985</td>\n",
" <td>-0.717218</td>\n",
" <td>-0.004667</td>\n",
" <td>0.074580</td>\n",
" <td>0.996116</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>20</td>\n",
" <td>3.031166e+11</td>\n",
" <td>2.822949e+11</td>\n",
" <td>2.429317e+11</td>\n",
" <td>42</td>\n",
" <td>53</td>\n",
" <td>6.550000</td>\n",
" <td>11</td>\n",
" <td>0.279563</td>\n",
" <td>0.344789</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.141305</td>\n",
" <td>0.256302</td>\n",
" <td>2.830176</td>\n",
" <td>186.780975</td>\n",
" <td>-0.280092</td>\n",
" <td>-1.100203</td>\n",
" <td>47.166667</td>\n",
" <td>2.100000</td>\n",
" <td>2.650000</td>\n",
" <td>1.000000</td>\n",
" <td>11.221352</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>12.261683</td>\n",
" <td>7.032624</td>\n",
" <td>5.235276</td>\n",
" <td>9.083738</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>200000147</td>\n",
" <td>24</td>\n",
" <td>24</td>\n",
" <td>1.000000</td>\n",
" <td>26636.948500</td>\n",
" <td>29692.2925</td>\n",
" <td>48287.032</td>\n",
" <td>7512.370</td>\n",
" <td>-7789.010</td>\n",
" <td>45214.622</td>\n",
" <td>1883.942583</td>\n",
" <td>826.0</td>\n",
" <td>1158.765419</td>\n",
" <td>8.937726e-02</td>\n",
" <td>6.399876e-02</td>\n",
" <td>-2.717045e-02</td>\n",
" <td>6.214485e-02</td>\n",
" <td>19.000000</td>\n",
" <td>24</td>\n",
" <td>0.375099</td>\n",
" <td>-0.637097</td>\n",
" <td>-0.004667</td>\n",
" <td>0.956999</td>\n",
" <td>0.956999</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>60</td>\n",
" <td>2.801077e+10</td>\n",
" <td>6.331226e+10</td>\n",
" <td>6.213826e+10</td>\n",
" <td>107</td>\n",
" <td>138</td>\n",
" <td>7.600000</td>\n",
" <td>17</td>\n",
" <td>0.215711</td>\n",
" <td>0.376121</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.130792</td>\n",
" <td>0.108078</td>\n",
" <td>1.697440</td>\n",
" <td>54.739252</td>\n",
" <td>-0.172268</td>\n",
" <td>-1.012196</td>\n",
" <td>34.416667</td>\n",
" <td>1.783333</td>\n",
" <td>2.300000</td>\n",
" <td>0.155577</td>\n",
" <td>10.190092</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>10.719198</td>\n",
" <td>6.717805</td>\n",
" <td>4.020685</td>\n",
" <td>7.541653</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>266</th>\n",
" <td>422444</td>\n",
" <td>24</td>\n",
" <td>24</td>\n",
" <td>1.000000</td>\n",
" <td>12267.997958</td>\n",
" <td>9950.5330</td>\n",
" <td>32977.443</td>\n",
" <td>4877.824</td>\n",
" <td>-15441.419</td>\n",
" <td>32108.803</td>\n",
" <td>1337.866792</td>\n",
" <td>631.0</td>\n",
" <td>1041.528882</td>\n",
" <td>2.150940e-01</td>\n",
" <td>2.769692e-01</td>\n",
" <td>-1.143586e-01</td>\n",
" <td>2.638958e-01</td>\n",
" <td>11.166667</td>\n",
" <td>15</td>\n",
" <td>0.320475</td>\n",
" <td>-0.697638</td>\n",
" <td>-0.004667</td>\n",
" <td>0.856627</td>\n",
" <td>0.947579</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>34</td>\n",
" <td>3.093744e+10</td>\n",
" <td>6.664333e+10</td>\n",
" <td>6.710298e+10</td>\n",
" <td>71</td>\n",
" <td>89</td>\n",
" <td>7.882353</td>\n",
" <td>14</td>\n",
" <td>-0.065606</td>\n",
" <td>-0.151279</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.097411</td>\n",
" <td>-0.114743</td>\n",
" <td>2.617281</td>\n",
" <td>50.885583</td>\n",
" <td>-0.480909</td>\n",
" <td>-1.018113</td>\n",
" <td>26.291667</td>\n",
" <td>2.088235</td>\n",
" <td>2.617647</td>\n",
" <td>0.147914</td>\n",
" <td>9.414831</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>10.376917</td>\n",
" <td>6.448889</td>\n",
" <td>3.949041</td>\n",
" <td>7.199579</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>267</th>\n",
" <td>422445</td>\n",
" <td>24</td>\n",
" <td>24</td>\n",
" <td>1.000000</td>\n",
" <td>69421.793833</td>\n",
" <td>55546.6050</td>\n",
" <td>163966.050</td>\n",
" <td>14277.014</td>\n",
" <td>-251075.901</td>\n",
" <td>347147.887</td>\n",
" <td>14464.495292</td>\n",
" <td>685.0</td>\n",
" <td>17441.410733</td>\n",
" <td>3.217610e-01</td>\n",
" <td>4.302206e-01</td>\n",
" <td>-2.075494e-01</td>\n",
" <td>3.112958e-01</td>\n",
" <td>10.375000</td>\n",
" <td>14</td>\n",
" <td>0.137291</td>\n",
" <td>-0.873971</td>\n",
" <td>-0.004667</td>\n",
" <td>0.938989</td>\n",
" <td>0.948780</td>\n",
" <td>United Kingdom</td>\n",
" <td>United Kingdom</td>\n",
" <td>37</td>\n",
" <td>2.100063e+11</td>\n",
" <td>4.459683e+11</td>\n",
" <td>3.365609e+11</td>\n",
" <td>59</td>\n",
" <td>74</td>\n",
" <td>6.729730</td>\n",
" <td>17</td>\n",
" <td>0.059350</td>\n",
" <td>0.090918</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.165629</td>\n",
" <td>-0.029560</td>\n",
" <td>5.000561</td>\n",
" <td>506.785236</td>\n",
" <td>-0.723253</td>\n",
" <td>-1.011262</td>\n",
" <td>28.541667</td>\n",
" <td>1.594595</td>\n",
" <td>2.000000</td>\n",
" <td>0.087073</td>\n",
" <td>11.147971</td>\n",
" <td>United Kingdom</td>\n",
" <td>United Kingdom</td>\n",
" <td>12.757509</td>\n",
" <td>6.530878</td>\n",
" <td>6.230059</td>\n",
" <td>9.579521</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>268</th>\n",
" <td>422554</td>\n",
" <td>24</td>\n",
" <td>6</td>\n",
" <td>0.250000</td>\n",
" <td>1789.997458</td>\n",
" <td>2021.5270</td>\n",
" <td>2518.899</td>\n",
" <td>554.000</td>\n",
" <td>-2115.826</td>\n",
" <td>2115.826</td>\n",
" <td>88.159417</td>\n",
" <td>9.0</td>\n",
" <td>296.399564</td>\n",
" <td>1.193556e-01</td>\n",
" <td>5.157004e-01</td>\n",
" <td>-1.193556e-01</td>\n",
" <td>5.157004e-01</td>\n",
" <td>5.333333</td>\n",
" <td>6</td>\n",
" <td>0.000000</td>\n",
" <td>-0.250000</td>\n",
" <td>-0.004667</td>\n",
" <td>0.795396</td>\n",
" <td>0.795396</td>\n",
" <td>Spain</td>\n",
" <td>Portugal</td>\n",
" <td>8</td>\n",
" <td>2.166681e+09</td>\n",
" <td>1.061451e+10</td>\n",
" <td>1.061451e+10</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>16.000000</td>\n",
" <td>24</td>\n",
" <td>-0.032409</td>\n",
" <td>0.252735</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.002680</td>\n",
" <td>-0.019555</td>\n",
" <td>1.182027</td>\n",
" <td>235.091778</td>\n",
" <td>-1.000000</td>\n",
" <td>-0.250000</td>\n",
" <td>0.375000</td>\n",
" <td>0.375000</td>\n",
" <td>0.750000</td>\n",
" <td>0.219937</td>\n",
" <td>7.490528</td>\n",
" <td>Other</td>\n",
" <td>Spain</td>\n",
" <td>7.657673</td>\n",
" <td>2.302585</td>\n",
" <td>5.464221</td>\n",
" <td>4.490426</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>269</th>\n",
" <td>422691</td>\n",
" <td>24</td>\n",
" <td>24</td>\n",
" <td>1.000000</td>\n",
" <td>39831.571833</td>\n",
" <td>34980.0330</td>\n",
" <td>84299.823</td>\n",
" <td>36354.458</td>\n",
" <td>-38718.541</td>\n",
" <td>77652.421</td>\n",
" <td>3235.517542</td>\n",
" <td>324.0</td>\n",
" <td>2235.473089</td>\n",
" <td>8.638039e-02</td>\n",
" <td>8.322480e-02</td>\n",
" <td>-3.706567e-02</td>\n",
" <td>6.696282e-02</td>\n",
" <td>14.041667</td>\n",
" <td>18</td>\n",
" <td>0.308259</td>\n",
" <td>-0.719980</td>\n",
" <td>-0.004667</td>\n",
" <td>0.568748</td>\n",
" <td>0.864511</td>\n",
" <td>Spain</td>\n",
" <td>Spain</td>\n",
" <td>42</td>\n",
" <td>4.960864e+10</td>\n",
" <td>1.547882e+11</td>\n",
" <td>1.513444e+11</td>\n",
" <td>40</td>\n",
" <td>57</td>\n",
" <td>8.023810</td>\n",
" <td>19</td>\n",
" <td>0.302523</td>\n",
" <td>-0.131632</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.015668</td>\n",
" <td>-0.430534</td>\n",
" <td>1.949519</td>\n",
" <td>239.667966</td>\n",
" <td>-0.498613</td>\n",
" <td>-1.028239</td>\n",
" <td>13.500000</td>\n",
" <td>0.952381</td>\n",
" <td>1.357143</td>\n",
" <td>0.431252</td>\n",
" <td>10.592440</td>\n",
" <td>Spain</td>\n",
" <td>Spain</td>\n",
" <td>11.260011</td>\n",
" <td>5.783825</td>\n",
" <td>5.483418</td>\n",
" <td>8.082253</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>270</th>\n",
" <td>422874</td>\n",
" <td>24</td>\n",
" <td>24</td>\n",
" <td>1.000000</td>\n",
" <td>2136.818542</td>\n",
" <td>727.0000</td>\n",
" <td>9560.044</td>\n",
" <td>612.299</td>\n",
" <td>-3167.353</td>\n",
" <td>85045.949</td>\n",
" <td>3543.581208</td>\n",
" <td>321.0</td>\n",
" <td>4353.942068</td>\n",
" <td>2.763833e+11</td>\n",
" <td>1.157257e+12</td>\n",
" <td>-8.166042e+09</td>\n",
" <td>3.001015e+10</td>\n",
" <td>1.666667</td>\n",
" <td>4</td>\n",
" <td>0.411331</td>\n",
" <td>-0.611609</td>\n",
" <td>-0.004667</td>\n",
" <td>0.967122</td>\n",
" <td>1.000000</td>\n",
" <td>Spain</td>\n",
" <td>Spain</td>\n",
" <td>26</td>\n",
" <td>3.481410e+11</td>\n",
" <td>5.321386e+11</td>\n",
" <td>5.878530e+11</td>\n",
" <td>14</td>\n",
" <td>23</td>\n",
" <td>1.538462</td>\n",
" <td>5</td>\n",
" <td>0.021567</td>\n",
" <td>-0.332525</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.046606</td>\n",
" <td>0.015242</td>\n",
" <td>39.800267</td>\n",
" <td>264.940651</td>\n",
" <td>-0.037243</td>\n",
" <td>-1.022940</td>\n",
" <td>13.375000</td>\n",
" <td>0.538462</td>\n",
" <td>0.884615</td>\n",
" <td>0.064048</td>\n",
" <td>7.667541</td>\n",
" <td>Spain</td>\n",
" <td>Spain</td>\n",
" <td>11.350959</td>\n",
" <td>5.774552</td>\n",
" <td>5.583273</td>\n",
" <td>8.173175</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>246 rows × 56 columns</p>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID n_months n_active_months flow_freq \\\n",
"0 18872 24 24 1.000000 \n",
"1 200000076 24 23 0.958333 \n",
"2 200000082 9 9 1.000000 \n",
"3 200000146 24 24 1.000000 \n",
"4 200000147 24 24 1.000000 \n",
".. ... ... ... ... \n",
"266 422444 24 24 1.000000 \n",
"267 422445 24 24 1.000000 \n",
"268 422554 24 6 0.250000 \n",
"269 422691 24 24 1.000000 \n",
"270 422874 24 24 1.000000 \n",
"\n",
" aum_qty_mean aum_qty_median aum_qty_max aum_qty_last \\\n",
"0 13099.683542 12313.2330 39201.929 15362.431 \n",
"1 19721.920083 9272.4710 46591.535 5705.213 \n",
"2 94110.153222 9912.7690 316149.358 168684.609 \n",
"3 74707.740583 106267.2235 196139.292 196139.292 \n",
"4 26636.948500 29692.2925 48287.032 7512.370 \n",
".. ... ... ... ... \n",
"266 12267.997958 9950.5330 32977.443 4877.824 \n",
"267 69421.793833 55546.6050 163966.050 14277.014 \n",
"268 1789.997458 2021.5270 2518.899 554.000 \n",
"269 39831.571833 34980.0330 84299.823 36354.458 \n",
"270 2136.818542 727.0000 9560.044 612.299 \n",
"\n",
" net_flow_qty_sum gross_flow_qty_sum gross_flow_qty_mean n_tx_total \\\n",
"0 -86610.542 145870.662 6077.944250 347.0 \n",
"1 -958.588 79325.796 3305.241500 131.0 \n",
"2 -54092.803 170248.173 18916.463667 334.0 \n",
"3 -59221.624 211436.064 8809.836000 1132.0 \n",
"4 -7789.010 45214.622 1883.942583 826.0 \n",
".. ... ... ... ... \n",
"266 -15441.419 32108.803 1337.866792 631.0 \n",
"267 -251075.901 347147.887 14464.495292 685.0 \n",
"268 -2115.826 2115.826 88.159417 9.0 \n",
"269 -38718.541 77652.421 3235.517542 324.0 \n",
"270 -3167.353 85045.949 3543.581208 321.0 \n",
"\n",
" net_flow_vol turnover_mean turnover_vol flow_to_aum_mean \\\n",
"0 4405.421469 6.866620e-01 6.476563e-01 -4.346513e-01 \n",
"1 4205.535841 2.448698e-01 4.034074e-01 -1.119577e-01 \n",
"2 15896.897315 1.676878e+01 4.067557e+01 -1.331691e+01 \n",
"3 5513.516693 1.256966e+00 2.328463e+00 -1.348461e-01 \n",
"4 1158.765419 8.937726e-02 6.399876e-02 -2.717045e-02 \n",
".. ... ... ... ... \n",
"266 1041.528882 2.150940e-01 2.769692e-01 -1.143586e-01 \n",
"267 17441.410733 3.217610e-01 4.302206e-01 -2.075494e-01 \n",
"268 296.399564 1.193556e-01 5.157004e-01 -1.193556e-01 \n",
"269 2235.473089 8.638039e-02 8.322480e-02 -3.706567e-02 \n",
"270 4353.942068 2.763833e+11 1.157257e+12 -8.166042e+09 \n",
"\n",
" flow_to_aum_vol avg_n_isin_held max_n_isin_held sub_share_mean \\\n",
"0 4.760756e-01 5.666667 9 0.182835 \n",
"1 4.487544e-01 3.750000 7 0.408252 \n",
"2 3.939035e+01 1.333333 2 0.322361 \n",
"3 4.600613e-01 5.458333 9 0.382985 \n",
"4 6.214485e-02 19.000000 24 0.375099 \n",
".. ... ... ... ... \n",
"266 2.638958e-01 11.166667 15 0.320475 \n",
"267 3.112958e-01 10.375000 14 0.137291 \n",
"268 5.157004e-01 5.333333 6 0.000000 \n",
"269 6.696282e-02 14.041667 18 0.308259 \n",
"270 3.001015e+10 1.666667 4 0.411331 \n",
"\n",
" red_share_mean delta_rate_mean aum_drawdown_last aum_drawdown_max \\\n",
"0 -0.820120 -0.004667 0.796705 0.965250 \n",
"1 -0.556621 -0.004667 0.877548 0.911153 \n",
"2 -0.688804 -0.021889 0.466440 0.999064 \n",
"3 -0.717218 -0.004667 0.074580 0.996116 \n",
"4 -0.637097 -0.004667 0.956999 0.956999 \n",
".. ... ... ... ... \n",
"266 -0.697638 -0.004667 0.856627 0.947579 \n",
"267 -0.873971 -0.004667 0.938989 0.948780 \n",
"268 -0.250000 -0.004667 0.795396 0.795396 \n",
"269 -0.719980 -0.004667 0.568748 0.864511 \n",
"270 -0.611609 -0.004667 0.967122 1.000000 \n",
"\n",
" region country n_isin_total rel_turnover_mean_avg \\\n",
"0 Switzerland Switzerland 70 2.868232e+11 \n",
"1 Spain Spain 10 7.402653e+10 \n",
"2 Italy Italy 5 3.729108e+12 \n",
"3 Italy Italy 20 3.031166e+11 \n",
"4 Italy Italy 60 2.801077e+10 \n",
".. ... ... ... ... \n",
"266 Italy Italy 34 3.093744e+10 \n",
"267 United Kingdom United Kingdom 37 2.100063e+11 \n",
"268 Spain Portugal 8 2.166681e+09 \n",
"269 Spain Spain 42 4.960864e+10 \n",
"270 Spain Spain 26 3.481410e+11 \n",
"\n",
" rel_turnover_vol_avg rel_flow_to_aum_vol_avg full_exit_count \\\n",
"0 4.008490e+11 4.992860e+11 50 \n",
"1 1.892627e+11 1.897906e+11 11 \n",
"2 2.856665e+12 3.471936e+12 11 \n",
"3 2.822949e+11 2.429317e+11 42 \n",
"4 6.331226e+10 6.213826e+10 107 \n",
".. ... ... ... \n",
"266 6.664333e+10 6.710298e+10 71 \n",
"267 4.459683e+11 3.365609e+11 59 \n",
"268 1.061451e+10 1.061451e+10 3 \n",
"269 1.547882e+11 1.513444e+11 40 \n",
"270 5.321386e+11 5.878530e+11 14 \n",
"\n",
" entry_count avg_holding_months_per_isin max_holding_months_per_isin \\\n",
"0 71 1.942857 7 \n",
"1 16 9.000000 16 \n",
"2 11 2.400000 4 \n",
"3 53 6.550000 11 \n",
"4 138 7.600000 17 \n",
".. ... ... ... \n",
"266 89 7.882353 14 \n",
"267 74 6.729730 17 \n",
"268 6 16.000000 24 \n",
"269 57 8.023810 19 \n",
"270 23 1.538462 5 \n",
"\n",
" corr_flow_fund_lag3 corr_flow_fund_lag6 corr_flow_bench_lag3 \\\n",
"0 -0.098882 0.103718 NaN \n",
"1 0.079104 -0.154260 NaN \n",
"2 -0.202170 NaN NaN \n",
"3 0.279563 0.344789 NaN \n",
"4 0.215711 0.376121 NaN \n",
".. ... ... ... \n",
"266 -0.065606 -0.151279 NaN \n",
"267 0.059350 0.090918 NaN \n",
"268 -0.032409 0.252735 NaN \n",
"269 0.302523 -0.131632 NaN \n",
"270 0.021567 -0.332525 NaN \n",
"\n",
" corr_flow_bench_lag6 corr_flow_rate_lag3 corr_flow_rate_lag6 \\\n",
"0 NaN 0.621462 0.312713 \n",
"1 NaN 0.154608 -0.043408 \n",
"2 NaN -0.207860 NaN \n",
"3 NaN -0.141305 0.256302 \n",
"4 NaN -0.130792 0.108078 \n",
".. ... ... ... \n",
"266 NaN -0.097411 -0.114743 \n",
"267 NaN 0.165629 -0.029560 \n",
"268 NaN 0.002680 -0.019555 \n",
"269 NaN 0.015668 -0.430534 \n",
"270 NaN 0.046606 0.015242 \n",
"\n",
" gross_flow_to_aum avg_ticket flow_direction_balance redemption_bias \\\n",
"0 11.135434 420.376548 -0.593749 -1.002955 \n",
"1 4.022215 605.540427 -0.012084 -0.964873 \n",
"2 1.809031 509.725069 -0.317729 -1.011165 \n",
"3 2.830176 186.780975 -0.280092 -1.100203 \n",
"4 1.697440 54.739252 -0.172268 -1.012196 \n",
".. ... ... ... ... \n",
"266 2.617281 50.885583 -0.480909 -1.018113 \n",
"267 5.000561 506.785236 -0.723253 -1.011262 \n",
"268 1.182027 235.091778 -1.000000 -0.250000 \n",
"269 1.949519 239.667966 -0.498613 -1.028239 \n",
"270 39.800267 264.940651 -0.037243 -1.022940 \n",
"\n",
" activity_intensity exit_rate_per_isin entry_rate_per_isin \\\n",
"0 14.458333 0.714286 1.014286 \n",
"1 5.458333 1.100000 1.600000 \n",
"2 37.111111 2.200000 2.200000 \n",
"3 47.166667 2.100000 2.650000 \n",
"4 34.416667 1.783333 2.300000 \n",
".. ... ... ... \n",
"266 26.291667 2.088235 2.617647 \n",
"267 28.541667 1.594595 2.000000 \n",
"268 0.375000 0.375000 0.750000 \n",
"269 13.500000 0.952381 1.357143 \n",
"270 13.375000 0.538462 0.884615 \n",
"\n",
" aum_final_to_peak log_aum_qty_mean country_grp region_grp \\\n",
"0 0.391879 9.480420 Switzerland Switzerland \n",
"1 0.122452 9.889537 Spain Spain \n",
"2 0.533560 11.452232 Italy Italy \n",
"3 1.000000 11.221352 Italy Italy \n",
"4 0.155577 10.190092 Italy Italy \n",
".. ... ... ... ... \n",
"266 0.147914 9.414831 Italy Italy \n",
"267 0.087073 11.147971 United Kingdom United Kingdom \n",
"268 0.219937 7.490528 Other Spain \n",
"269 0.431252 10.592440 Spain Spain \n",
"270 0.064048 7.667541 Spain Spain \n",
"\n",
" log_gross_flow_qty_sum log_n_tx_total log_avg_ticket \\\n",
"0 11.890482 5.852202 6.043527 \n",
"1 11.281331 4.882802 6.407771 \n",
"2 12.045018 5.814131 6.235831 \n",
"3 12.261683 7.032624 5.235276 \n",
"4 10.719198 6.717805 4.020685 \n",
".. ... ... ... \n",
"266 10.376917 6.448889 3.949041 \n",
"267 12.757509 6.530878 6.230059 \n",
"268 7.657673 2.302585 5.464221 \n",
"269 11.260011 5.783825 5.483418 \n",
"270 11.350959 5.774552 5.583273 \n",
"\n",
" log_gross_flow_qty_mean cluster_kmeans \n",
"0 8.712586 1 \n",
"1 8.103567 1 \n",
"2 9.847841 1 \n",
"3 9.083738 4 \n",
"4 7.541653 1 \n",
".. ... ... \n",
"266 7.199579 1 \n",
"267 9.579521 1 \n",
"268 4.490426 3 \n",
"269 8.082253 1 \n",
"270 8.173175 1 \n",
"\n",
"[246 rows x 56 columns]"
]
},
"execution_count": 368,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_2017"
]
},
{
"cell_type": "code",
"execution_count": 379,
"id": "80300015-8c96-4c68-8572-b005fe3008cb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== K=5 (2017) =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>aum_qty_mean_med</th>\n",
" <th>gross_flow_to_aum_med</th>\n",
" <th>flow_freq_med</th>\n",
" <th>n_tx_total_med</th>\n",
" <th>avg_n_isin_held_med</th>\n",
" <th>n_isin_total_med</th>\n",
" <th>avg_holding_months_per_isin_med</th>\n",
" <th>exit_rate_per_isin_med</th>\n",
" <th>flow_direction_balance_med</th>\n",
" <th>redemption_bias_med</th>\n",
" <th>aum_drawdown_last_med</th>\n",
" <th>aum_final_to_peak_med</th>\n",
" <th>corr_flow_fund_lag3_med</th>\n",
" <th>corr_flow_fund_lag6_med</th>\n",
" <th>corr_flow_rate_lag3_med</th>\n",
" <th>corr_flow_rate_lag6_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_kmeans</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>111</td>\n",
" <td>16968.521150</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2.291667</td>\n",
" <td>15.0</td>\n",
" <td>3.800000</td>\n",
" <td>0.083333</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.920330</td>\n",
" <td>0.112701</td>\n",
" <td>0.029610</td>\n",
" <td>-0.028331</td>\n",
" <td>0.026860</td>\n",
" <td>0.009551</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>91</td>\n",
" <td>15224.049625</td>\n",
" <td>4.040687</td>\n",
" <td>1.0</td>\n",
" <td>347.0</td>\n",
" <td>2.772727</td>\n",
" <td>19.0</td>\n",
" <td>4.000000</td>\n",
" <td>1.175000</td>\n",
" <td>-0.326316</td>\n",
" <td>-1.013060</td>\n",
" <td>0.919243</td>\n",
" <td>0.111327</td>\n",
" <td>0.028616</td>\n",
" <td>-0.021581</td>\n",
" <td>-0.000388</td>\n",
" <td>-0.003074</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>27</td>\n",
" <td>88889.763125</td>\n",
" <td>1.307204</td>\n",
" <td>1.0</td>\n",
" <td>1132.0</td>\n",
" <td>14.666667</td>\n",
" <td>32.0</td>\n",
" <td>15.380952</td>\n",
" <td>0.196429</td>\n",
" <td>-0.290942</td>\n",
" <td>-1.051078</td>\n",
" <td>0.322512</td>\n",
" <td>0.677488</td>\n",
" <td>0.186117</td>\n",
" <td>0.079017</td>\n",
" <td>-0.033758</td>\n",
" <td>0.104930</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>12</td>\n",
" <td>536154.854937</td>\n",
" <td>2.704868</td>\n",
" <td>1.0</td>\n",
" <td>4320.0</td>\n",
" <td>22.479167</td>\n",
" <td>54.0</td>\n",
" <td>8.628205</td>\n",
" <td>2.010311</td>\n",
" <td>-0.483334</td>\n",
" <td>-1.148256</td>\n",
" <td>0.655400</td>\n",
" <td>0.411812</td>\n",
" <td>0.150284</td>\n",
" <td>0.115993</td>\n",
" <td>0.067313</td>\n",
" <td>0.094253</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>5</td>\n",
" <td>72055.041304</td>\n",
" <td>12.515495</td>\n",
" <td>1.0</td>\n",
" <td>347.0</td>\n",
" <td>1.000000</td>\n",
" <td>10.0</td>\n",
" <td>1.916667</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-1.251187</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.054633</td>\n",
" <td>-0.100003</td>\n",
" <td>0.002018</td>\n",
" <td>0.026134</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients aum_qty_mean_med gross_flow_to_aum_med \\\n",
"cluster_kmeans \n",
"3 111 16968.521150 0.000000 \n",
"1 91 15224.049625 4.040687 \n",
"4 27 88889.763125 1.307204 \n",
"0 12 536154.854937 2.704868 \n",
"2 5 72055.041304 12.515495 \n",
"\n",
" flow_freq_med n_tx_total_med avg_n_isin_held_med \\\n",
"cluster_kmeans \n",
"3 0.0 0.0 2.291667 \n",
"1 1.0 347.0 2.772727 \n",
"4 1.0 1132.0 14.666667 \n",
"0 1.0 4320.0 22.479167 \n",
"2 1.0 347.0 1.000000 \n",
"\n",
" n_isin_total_med avg_holding_months_per_isin_med \\\n",
"cluster_kmeans \n",
"3 15.0 3.800000 \n",
"1 19.0 4.000000 \n",
"4 32.0 15.380952 \n",
"0 54.0 8.628205 \n",
"2 10.0 1.916667 \n",
"\n",
" exit_rate_per_isin_med flow_direction_balance_med \\\n",
"cluster_kmeans \n",
"3 0.083333 0.000000 \n",
"1 1.175000 -0.326316 \n",
"4 0.196429 -0.290942 \n",
"0 2.010311 -0.483334 \n",
"2 1.000000 0.000000 \n",
"\n",
" redemption_bias_med aum_drawdown_last_med \\\n",
"cluster_kmeans \n",
"3 0.000000 0.920330 \n",
"1 -1.013060 0.919243 \n",
"4 -1.051078 0.322512 \n",
"0 -1.148256 0.655400 \n",
"2 -1.251187 1.000000 \n",
"\n",
" aum_final_to_peak_med corr_flow_fund_lag3_med \\\n",
"cluster_kmeans \n",
"3 0.112701 0.029610 \n",
"1 0.111327 0.028616 \n",
"4 0.677488 0.186117 \n",
"0 0.411812 0.150284 \n",
"2 0.000000 0.054633 \n",
"\n",
" corr_flow_fund_lag6_med corr_flow_rate_lag3_med \\\n",
"cluster_kmeans \n",
"3 -0.028331 0.026860 \n",
"1 -0.021581 -0.000388 \n",
"4 0.079017 -0.033758 \n",
"0 0.115993 0.067313 \n",
"2 -0.100003 0.002018 \n",
"\n",
" corr_flow_rate_lag6_med \n",
"cluster_kmeans \n",
"3 0.009551 \n",
"1 -0.003074 \n",
"4 0.104930 \n",
"0 0.094253 \n",
"2 0.026134 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== K=5 (2019) =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>aum_qty_mean_med</th>\n",
" <th>gross_flow_to_aum_med</th>\n",
" <th>flow_freq_med</th>\n",
" <th>n_tx_total_med</th>\n",
" <th>avg_n_isin_held_med</th>\n",
" <th>n_isin_total_med</th>\n",
" <th>avg_holding_months_per_isin_med</th>\n",
" <th>exit_rate_per_isin_med</th>\n",
" <th>flow_direction_balance_med</th>\n",
" <th>redemption_bias_med</th>\n",
" <th>aum_drawdown_last_med</th>\n",
" <th>aum_final_to_peak_med</th>\n",
" <th>corr_flow_fund_lag3_med</th>\n",
" <th>corr_flow_fund_lag6_med</th>\n",
" <th>corr_flow_rate_lag3_med</th>\n",
" <th>corr_flow_rate_lag6_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_kmeans</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>158</td>\n",
" <td>9633.246229</td>\n",
" <td>4.266741</td>\n",
" <td>1.000000</td>\n",
" <td>329.5</td>\n",
" <td>2.638587</td>\n",
" <td>20.0</td>\n",
" <td>2.923188</td>\n",
" <td>1.066964</td>\n",
" <td>0.034443</td>\n",
" <td>-1.006633</td>\n",
" <td>0.876492</td>\n",
" <td>0.221628</td>\n",
" <td>-0.009886</td>\n",
" <td>-0.002589</td>\n",
" <td>-0.021260</td>\n",
" <td>0.004922</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>74</td>\n",
" <td>17463.171534</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>1.764493</td>\n",
" <td>11.0</td>\n",
" <td>3.765625</td>\n",
" <td>0.119658</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.730164</td>\n",
" <td>0.486056</td>\n",
" <td>-0.015831</td>\n",
" <td>-0.054000</td>\n",
" <td>0.029791</td>\n",
" <td>0.012218</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>68</td>\n",
" <td>61413.995862</td>\n",
" <td>1.242228</td>\n",
" <td>1.000000</td>\n",
" <td>512.5</td>\n",
" <td>10.750000</td>\n",
" <td>19.0</td>\n",
" <td>14.662500</td>\n",
" <td>0.084118</td>\n",
" <td>0.284410</td>\n",
" <td>-1.022922</td>\n",
" <td>0.024953</td>\n",
" <td>1.000000</td>\n",
" <td>0.036290</td>\n",
" <td>0.005350</td>\n",
" <td>-0.063025</td>\n",
" <td>-0.070005</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>15</td>\n",
" <td>315716.091250</td>\n",
" <td>4.836881</td>\n",
" <td>1.000000</td>\n",
" <td>4596.0</td>\n",
" <td>13.250000</td>\n",
" <td>62.0</td>\n",
" <td>5.065217</td>\n",
" <td>1.952381</td>\n",
" <td>0.114974</td>\n",
" <td>-1.104636</td>\n",
" <td>0.767660</td>\n",
" <td>0.442852</td>\n",
" <td>0.024923</td>\n",
" <td>0.080512</td>\n",
" <td>0.001555</td>\n",
" <td>-0.089721</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>77921.342500</td>\n",
" <td>4.689483</td>\n",
" <td>0.666667</td>\n",
" <td>3060.0</td>\n",
" <td>2.583333</td>\n",
" <td>32.5</td>\n",
" <td>1.420635</td>\n",
" <td>0.250000</td>\n",
" <td>0.610875</td>\n",
" <td>-0.710315</td>\n",
" <td>0.000641</td>\n",
" <td>0.999359</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.048991</td>\n",
" <td>0.069475</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients aum_qty_mean_med gross_flow_to_aum_med \\\n",
"cluster_kmeans \n",
"1 158 9633.246229 4.266741 \n",
"3 74 17463.171534 0.000000 \n",
"4 68 61413.995862 1.242228 \n",
"0 15 315716.091250 4.836881 \n",
"2 2 77921.342500 4.689483 \n",
"\n",
" flow_freq_med n_tx_total_med avg_n_isin_held_med \\\n",
"cluster_kmeans \n",
"1 1.000000 329.5 2.638587 \n",
"3 0.000000 0.0 1.764493 \n",
"4 1.000000 512.5 10.750000 \n",
"0 1.000000 4596.0 13.250000 \n",
"2 0.666667 3060.0 2.583333 \n",
"\n",
" n_isin_total_med avg_holding_months_per_isin_med \\\n",
"cluster_kmeans \n",
"1 20.0 2.923188 \n",
"3 11.0 3.765625 \n",
"4 19.0 14.662500 \n",
"0 62.0 5.065217 \n",
"2 32.5 1.420635 \n",
"\n",
" exit_rate_per_isin_med flow_direction_balance_med \\\n",
"cluster_kmeans \n",
"1 1.066964 0.034443 \n",
"3 0.119658 0.000000 \n",
"4 0.084118 0.284410 \n",
"0 1.952381 0.114974 \n",
"2 0.250000 0.610875 \n",
"\n",
" redemption_bias_med aum_drawdown_last_med \\\n",
"cluster_kmeans \n",
"1 -1.006633 0.876492 \n",
"3 0.000000 0.730164 \n",
"4 -1.022922 0.024953 \n",
"0 -1.104636 0.767660 \n",
"2 -0.710315 0.000641 \n",
"\n",
" aum_final_to_peak_med corr_flow_fund_lag3_med \\\n",
"cluster_kmeans \n",
"1 0.221628 -0.009886 \n",
"3 0.486056 -0.015831 \n",
"4 1.000000 0.036290 \n",
"0 0.442852 0.024923 \n",
"2 0.999359 NaN \n",
"\n",
" corr_flow_fund_lag6_med corr_flow_rate_lag3_med \\\n",
"cluster_kmeans \n",
"1 -0.002589 -0.021260 \n",
"3 -0.054000 0.029791 \n",
"4 0.005350 -0.063025 \n",
"0 0.080512 0.001555 \n",
"2 NaN 0.048991 \n",
"\n",
" corr_flow_rate_lag6_med \n",
"cluster_kmeans \n",
"1 0.004922 \n",
"3 0.012218 \n",
"4 -0.070005 \n",
"0 -0.089721 \n",
"2 0.069475 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== K=5 (2021) =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>aum_qty_mean_med</th>\n",
" <th>gross_flow_to_aum_med</th>\n",
" <th>flow_freq_med</th>\n",
" <th>n_tx_total_med</th>\n",
" <th>avg_n_isin_held_med</th>\n",
" <th>n_isin_total_med</th>\n",
" <th>avg_holding_months_per_isin_med</th>\n",
" <th>exit_rate_per_isin_med</th>\n",
" <th>flow_direction_balance_med</th>\n",
" <th>redemption_bias_med</th>\n",
" <th>aum_drawdown_last_med</th>\n",
" <th>aum_final_to_peak_med</th>\n",
" <th>corr_flow_fund_lag3_med</th>\n",
" <th>corr_flow_fund_lag6_med</th>\n",
" <th>corr_flow_rate_lag3_med</th>\n",
" <th>corr_flow_rate_lag6_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_kmeans</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>176</td>\n",
" <td>13730.289442</td>\n",
" <td>3.314366</td>\n",
" <td>1.000000</td>\n",
" <td>303.5</td>\n",
" <td>3.260870</td>\n",
" <td>20.0</td>\n",
" <td>3.661905</td>\n",
" <td>1.242647</td>\n",
" <td>-0.321897</td>\n",
" <td>-1.013108</td>\n",
" <td>0.889758</td>\n",
" <td>0.203476</td>\n",
" <td>-0.055453</td>\n",
" <td>-0.035168</td>\n",
" <td>0.034440</td>\n",
" <td>0.061284</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>65</td>\n",
" <td>32116.345125</td>\n",
" <td>0.349545</td>\n",
" <td>0.125000</td>\n",
" <td>1.0</td>\n",
" <td>1.666667</td>\n",
" <td>7.0</td>\n",
" <td>5.500000</td>\n",
" <td>0.272727</td>\n",
" <td>0.000000</td>\n",
" <td>-0.125000</td>\n",
" <td>0.517941</td>\n",
" <td>0.565745</td>\n",
" <td>-0.023467</td>\n",
" <td>-0.004682</td>\n",
" <td>0.159795</td>\n",
" <td>0.081149</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>61</td>\n",
" <td>113501.940000</td>\n",
" <td>0.950099</td>\n",
" <td>1.000000</td>\n",
" <td>907.0</td>\n",
" <td>14.833333</td>\n",
" <td>20.0</td>\n",
" <td>19.128205</td>\n",
" <td>0.166667</td>\n",
" <td>-0.125148</td>\n",
" <td>-1.024417</td>\n",
" <td>0.214815</td>\n",
" <td>0.805057</td>\n",
" <td>-0.017808</td>\n",
" <td>0.013349</td>\n",
" <td>0.020689</td>\n",
" <td>0.045295</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>14</td>\n",
" <td>432637.663188</td>\n",
" <td>4.529472</td>\n",
" <td>1.000000</td>\n",
" <td>4802.0</td>\n",
" <td>21.062500</td>\n",
" <td>65.0</td>\n",
" <td>7.035629</td>\n",
" <td>1.286394</td>\n",
" <td>-0.114398</td>\n",
" <td>-1.076190</td>\n",
" <td>0.649860</td>\n",
" <td>0.405715</td>\n",
" <td>0.071547</td>\n",
" <td>0.023572</td>\n",
" <td>0.175785</td>\n",
" <td>0.120807</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>502831.492923</td>\n",
" <td>1.175835</td>\n",
" <td>0.923077</td>\n",
" <td>31.0</td>\n",
" <td>0.692308</td>\n",
" <td>4.0</td>\n",
" <td>2.250000</td>\n",
" <td>0.500000</td>\n",
" <td>-0.075935</td>\n",
" <td>-0.923077</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-0.239729</td>\n",
" <td>-0.165593</td>\n",
" <td>-0.603793</td>\n",
" <td>0.022203</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients aum_qty_mean_med gross_flow_to_aum_med \\\n",
"cluster_kmeans \n",
"1 176 13730.289442 3.314366 \n",
"3 65 32116.345125 0.349545 \n",
"4 61 113501.940000 0.950099 \n",
"0 14 432637.663188 4.529472 \n",
"2 1 502831.492923 1.175835 \n",
"\n",
" flow_freq_med n_tx_total_med avg_n_isin_held_med \\\n",
"cluster_kmeans \n",
"1 1.000000 303.5 3.260870 \n",
"3 0.125000 1.0 1.666667 \n",
"4 1.000000 907.0 14.833333 \n",
"0 1.000000 4802.0 21.062500 \n",
"2 0.923077 31.0 0.692308 \n",
"\n",
" n_isin_total_med avg_holding_months_per_isin_med \\\n",
"cluster_kmeans \n",
"1 20.0 3.661905 \n",
"3 7.0 5.500000 \n",
"4 20.0 19.128205 \n",
"0 65.0 7.035629 \n",
"2 4.0 2.250000 \n",
"\n",
" exit_rate_per_isin_med flow_direction_balance_med \\\n",
"cluster_kmeans \n",
"1 1.242647 -0.321897 \n",
"3 0.272727 0.000000 \n",
"4 0.166667 -0.125148 \n",
"0 1.286394 -0.114398 \n",
"2 0.500000 -0.075935 \n",
"\n",
" redemption_bias_med aum_drawdown_last_med \\\n",
"cluster_kmeans \n",
"1 -1.013108 0.889758 \n",
"3 -0.125000 0.517941 \n",
"4 -1.024417 0.214815 \n",
"0 -1.076190 0.649860 \n",
"2 -0.923077 1.000000 \n",
"\n",
" aum_final_to_peak_med corr_flow_fund_lag3_med \\\n",
"cluster_kmeans \n",
"1 0.203476 -0.055453 \n",
"3 0.565745 -0.023467 \n",
"4 0.805057 -0.017808 \n",
"0 0.405715 0.071547 \n",
"2 0.000000 -0.239729 \n",
"\n",
" corr_flow_fund_lag6_med corr_flow_rate_lag3_med \\\n",
"cluster_kmeans \n",
"1 -0.035168 0.034440 \n",
"3 -0.004682 0.159795 \n",
"4 0.013349 0.020689 \n",
"0 0.023572 0.175785 \n",
"2 -0.165593 -0.603793 \n",
"\n",
" corr_flow_rate_lag6_med \n",
"cluster_kmeans \n",
"1 0.061284 \n",
"3 0.081149 \n",
"4 0.045295 \n",
"0 0.120807 \n",
"2 0.022203 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== K=5 (2023) =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>aum_qty_mean_med</th>\n",
" <th>gross_flow_to_aum_med</th>\n",
" <th>flow_freq_med</th>\n",
" <th>n_tx_total_med</th>\n",
" <th>avg_n_isin_held_med</th>\n",
" <th>n_isin_total_med</th>\n",
" <th>avg_holding_months_per_isin_med</th>\n",
" <th>exit_rate_per_isin_med</th>\n",
" <th>flow_direction_balance_med</th>\n",
" <th>redemption_bias_med</th>\n",
" <th>aum_drawdown_last_med</th>\n",
" <th>aum_final_to_peak_med</th>\n",
" <th>corr_flow_fund_lag3_med</th>\n",
" <th>corr_flow_fund_lag6_med</th>\n",
" <th>corr_flow_rate_lag3_med</th>\n",
" <th>corr_flow_rate_lag6_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_kmeans</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>172</td>\n",
" <td>1.357394e+04</td>\n",
" <td>4.150092</td>\n",
" <td>1.000000</td>\n",
" <td>310.0</td>\n",
" <td>2.848485</td>\n",
" <td>20.0</td>\n",
" <td>3.227273</td>\n",
" <td>1.108732</td>\n",
" <td>0.130341</td>\n",
" <td>-1.011453</td>\n",
" <td>0.906305</td>\n",
" <td>0.246636</td>\n",
" <td>-0.021698</td>\n",
" <td>-0.048594</td>\n",
" <td>-0.010974</td>\n",
" <td>-0.042960</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>82</td>\n",
" <td>8.636871e+04</td>\n",
" <td>1.389136</td>\n",
" <td>1.000000</td>\n",
" <td>770.5</td>\n",
" <td>12.895833</td>\n",
" <td>27.0</td>\n",
" <td>14.025264</td>\n",
" <td>0.275518</td>\n",
" <td>0.237140</td>\n",
" <td>-1.024236</td>\n",
" <td>0.125418</td>\n",
" <td>0.953249</td>\n",
" <td>-0.000646</td>\n",
" <td>0.010373</td>\n",
" <td>-0.102000</td>\n",
" <td>0.070727</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>55</td>\n",
" <td>6.469700e+04</td>\n",
" <td>0.486343</td>\n",
" <td>0.125000</td>\n",
" <td>4.0</td>\n",
" <td>1.041667</td>\n",
" <td>3.0</td>\n",
" <td>8.000000</td>\n",
" <td>0.222222</td>\n",
" <td>0.000000</td>\n",
" <td>-0.132403</td>\n",
" <td>0.355167</td>\n",
" <td>0.812196</td>\n",
" <td>0.048014</td>\n",
" <td>-0.013428</td>\n",
" <td>0.146384</td>\n",
" <td>0.189835</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>16</td>\n",
" <td>5.487127e+05</td>\n",
" <td>3.956302</td>\n",
" <td>1.000000</td>\n",
" <td>5915.5</td>\n",
" <td>21.229167</td>\n",
" <td>68.5</td>\n",
" <td>6.473287</td>\n",
" <td>1.152933</td>\n",
" <td>0.407132</td>\n",
" <td>-1.158641</td>\n",
" <td>0.487722</td>\n",
" <td>0.921376</td>\n",
" <td>-0.063857</td>\n",
" <td>0.034445</td>\n",
" <td>-0.126342</td>\n",
" <td>-0.203116</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>1.312765e+06</td>\n",
" <td>6.604496</td>\n",
" <td>0.708333</td>\n",
" <td>314.0</td>\n",
" <td>1.267857</td>\n",
" <td>8.0</td>\n",
" <td>2.375000</td>\n",
" <td>1.541667</td>\n",
" <td>-0.426440</td>\n",
" <td>-0.843028</td>\n",
" <td>0.995909</td>\n",
" <td>0.010642</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.206573</td>\n",
" <td>0.199750</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients aum_qty_mean_med gross_flow_to_aum_med \\\n",
"cluster_kmeans \n",
"1 172 1.357394e+04 4.150092 \n",
"4 82 8.636871e+04 1.389136 \n",
"3 55 6.469700e+04 0.486343 \n",
"0 16 5.487127e+05 3.956302 \n",
"2 2 1.312765e+06 6.604496 \n",
"\n",
" flow_freq_med n_tx_total_med avg_n_isin_held_med \\\n",
"cluster_kmeans \n",
"1 1.000000 310.0 2.848485 \n",
"4 1.000000 770.5 12.895833 \n",
"3 0.125000 4.0 1.041667 \n",
"0 1.000000 5915.5 21.229167 \n",
"2 0.708333 314.0 1.267857 \n",
"\n",
" n_isin_total_med avg_holding_months_per_isin_med \\\n",
"cluster_kmeans \n",
"1 20.0 3.227273 \n",
"4 27.0 14.025264 \n",
"3 3.0 8.000000 \n",
"0 68.5 6.473287 \n",
"2 8.0 2.375000 \n",
"\n",
" exit_rate_per_isin_med flow_direction_balance_med \\\n",
"cluster_kmeans \n",
"1 1.108732 0.130341 \n",
"4 0.275518 0.237140 \n",
"3 0.222222 0.000000 \n",
"0 1.152933 0.407132 \n",
"2 1.541667 -0.426440 \n",
"\n",
" redemption_bias_med aum_drawdown_last_med \\\n",
"cluster_kmeans \n",
"1 -1.011453 0.906305 \n",
"4 -1.024236 0.125418 \n",
"3 -0.132403 0.355167 \n",
"0 -1.158641 0.487722 \n",
"2 -0.843028 0.995909 \n",
"\n",
" aum_final_to_peak_med corr_flow_fund_lag3_med \\\n",
"cluster_kmeans \n",
"1 0.246636 -0.021698 \n",
"4 0.953249 -0.000646 \n",
"3 0.812196 0.048014 \n",
"0 0.921376 -0.063857 \n",
"2 0.010642 NaN \n",
"\n",
" corr_flow_fund_lag6_med corr_flow_rate_lag3_med \\\n",
"cluster_kmeans \n",
"1 -0.048594 -0.010974 \n",
"4 0.010373 -0.102000 \n",
"3 -0.013428 0.146384 \n",
"0 0.034445 -0.126342 \n",
"2 NaN 0.206573 \n",
"\n",
" corr_flow_rate_lag6_med \n",
"cluster_kmeans \n",
"1 -0.042960 \n",
"4 0.070727 \n",
"3 0.189835 \n",
"0 -0.203116 \n",
"2 0.199750 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"print(f\"\\n===== K=5 (2017) =====\")\n",
"prof = (\n",
" df_2017.groupby(f\"cluster_kmeans\")\n",
" .agg(\n",
" n_clients=(ID_COL, \"count\"),\n",
" **{f\"{c}_med\": (c, \"median\") for c in profile_vars}\n",
" )\n",
" .sort_values(\"n_clients\", ascending=False)\n",
" )\n",
"display(prof)\n",
"\n",
"print(f\"\\n===== K=5 (2019) =====\")\n",
"\n",
"prof = (\n",
" df_2019.groupby(f\"cluster_kmeans\")\n",
" .agg(\n",
" n_clients=(ID_COL, \"count\"),\n",
" **{f\"{c}_med\": (c, \"median\") for c in profile_vars}\n",
" )\n",
" .sort_values(\"n_clients\", ascending=False)\n",
" )\n",
"display(prof)\n",
"\n",
"print(f\"\\n===== K=5 (2021) =====\")\n",
"\n",
"prof = (\n",
" df_2021.groupby(f\"cluster_kmeans\")\n",
" .agg(\n",
" n_clients=(ID_COL, \"count\"),\n",
" **{f\"{c}_med\": (c, \"median\") for c in profile_vars}\n",
" )\n",
" .sort_values(\"n_clients\", ascending=False)\n",
" )\n",
"display(prof)\n",
"\n",
"print(f\"\\n===== K=5 (2023) =====\")\n",
"\n",
"prof = (\n",
" df_2023.groupby(f\"cluster_kmeans\")\n",
" .agg(\n",
" n_clients=(ID_COL, \"count\"),\n",
" **{f\"{c}_med\": (c, \"median\") for c in profile_vars}\n",
" )\n",
" .sort_values(\"n_clients\", ascending=False)\n",
" )\n",
"display(prof)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2895cf20-79e0-4b98-b0c2-107425f99b60",
"metadata": {},
"outputs": [],
"source": [
"labels_map = {\n",
" 0: \"Cluster 0 (30): Large and highly active movers\",\n",
" 1: \"Cluster 1 (168): Occasional large movers\",\n",
" 3: \"Cluster 3 (111): Dormant profiles\",\n",
" 4: \"Cluster 4 (90): Loyal clients\"\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 381,
"id": "0040629e-2d5a-4e61-ba50-a1e1cce790e1",
"metadata": {},
"outputs": [],
"source": [
"# Merge\n",
"\n",
"df_evo = (\n",
" df_2017[[ID_COL, \"cluster_kmeans\"]]\n",
" .rename(columns={\"cluster_kmeans\": \"cluster_2017\"})\n",
" .merge(\n",
" df_2019[[ID_COL, \"cluster_kmeans\"]]\n",
" .rename(columns={\"cluster_kmeans\": \"cluster_2019\"}),\n",
" on=ID_COL\n",
" )\n",
" .merge(\n",
" df_2021[[ID_COL, \"cluster_kmeans\"]]\n",
" .rename(columns={\"cluster_kmeans\": \"cluster_2021\"}),\n",
" on=ID_COL\n",
" )\n",
" .merge(\n",
" df_2023[[ID_COL, \"cluster_kmeans\"]]\n",
" .rename(columns={\"cluster_kmeans\": \"cluster_2023\"}),\n",
" on=ID_COL\n",
" )\n",
")\n",
"\n",
"df_evo = df_evo.isin(clusters_keep = [1, 2, 3])\n"
]
},
{
"cell_type": "code",
"execution_count": 382,
"id": "6fb52dd0-e5ee-443f-a155-913caaf4bbe1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>cluster_2019</th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_2017</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>9.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>NaN</td>\n",
" <td>64.0</td>\n",
" <td>3.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2.0</td>\n",
" <td>30.0</td>\n",
" <td>31.0</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.0</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>19.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"cluster_2019 0 1 3 4\n",
"cluster_2017 \n",
"0 9.0 NaN NaN 1.0\n",
"1 NaN 64.0 3.0 8.0\n",
"2 NaN 3.0 1.0 NaN\n",
"3 2.0 30.0 31.0 5.0\n",
"4 1.0 5.0 1.0 19.0"
]
},
"execution_count": 382,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_evo.groupby([\"cluster_2017\", \"cluster_2019\"]).size().unstack()"
]
},
{
"cell_type": "code",
"execution_count": 383,
"id": "7bad62fa-d161-4c6e-ad80-d0c8a75c06c2",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>cluster_2021</th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_2019</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>9.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>96.0</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>1.0</td>\n",
" <td>22.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.0</td>\n",
" <td>9.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>22.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"cluster_2021 0 1 2 3 4\n",
"cluster_2019 \n",
"0 9.0 1.0 NaN NaN 2.0\n",
"1 1.0 96.0 NaN 2.0 3.0\n",
"3 NaN 12.0 1.0 22.0 1.0\n",
"4 1.0 9.0 NaN 1.0 22.0"
]
},
"execution_count": 383,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_evo.groupby([\"cluster_2019\", \"cluster_2021\"]).size().unstack()"
]
},
{
"cell_type": "code",
"execution_count": 384,
"id": "8f135625-24b3-417e-9414-d2cd0dc32cfe",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>cluster_2023</th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_2021</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>11.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>97.0</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>18.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>22.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"cluster_2023 0 1 2 3 4\n",
"cluster_2021 \n",
"0 11.0 NaN NaN NaN NaN\n",
"1 1.0 97.0 NaN 2.0 18.0\n",
"2 NaN NaN 1.0 NaN NaN\n",
"3 NaN 13.0 NaN 12.0 NaN\n",
"4 2.0 4.0 NaN NaN 22.0"
]
},
"execution_count": 384,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_evo.groupby([\"cluster_2021\", \"cluster_2023\"]).size().unstack()"
]
},
{
"cell_type": "code",
"execution_count": 391,
"id": "73db7626-a489-49f1-9c3b-bfde6001e193",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoUAAAIjCAYAAAB1bGEnAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAfGRJREFUeJzt3XdYFFfbBvB7l7L0LqCIoKAoNhQFsWFBscTeorEESxJjDdFEbGBFYwkW7CXWaKzxtWDBFmMHsYtdE5UqghQBd+f7w4+NKwuyyLIg9y/XXFc4c2bm2V2Bh+ecOSMSBEEAEREREZVpYk0HQERERESax6SQiIiIiJgUEhERERGTQiIiIiICk0IiIiIiApNCIiIiIgKTQiIiIiICk0IiIiIiApNCIiIiIgKTQiqgoKAgiEQiTYcBAPjtt98gEonw+PFjTYdS7FT5HEri+yQSiRAUFKTpMHIpqXEVh++//x5t2rTRdBj0CcLCwmBkZIT4+HhNh0KlHJPCEibnF7lIJMKZM2dy7RcEAfb29hCJRPjiiy8KdY3Zs2dj7969nxjp5+PWrVsICgoqUcmTKvh5lmzPnz9HUFAQoqKiNB1KLo8ePcKaNWswceJEeds///yDadOmwcPDA+bm5rCyskKLFi1w7Ngxped49eoVvvnmG5QrVw6GhoZo2bIlIiMjc/Xbvn07+vfvj6pVq0IkEqFFixb5xhYZGYnOnTvDwsICBgYGqFWrFhYvXvxJr1dV4eHhGDx4MKpVqwYDAwNUqVIFQ4cOxYsXL5T2P3v2LJo2bQoDAwPY2tpi9OjRSE1NVeiTmpqKwMBAtGvXDhYWFhCJRPjtt9/yjGHp0qWoUaMGJBIJ7Ozs4O/vj7S0NIU+7dq1g7OzM4KDgz/5NVMZJ1CJsn79egGAoKenJwwfPjzX/hMnTggABIlEInTs2LFQ1zA0NBQGDRqk0jHZ2dlCRkZGoa5X1HLeo0ePHhXJ+Xbs2CEAEE6cOFEk51MnZZ9DXp/n27dvhYyMDEEmkxVTdB8HQAgMDNR0GLmoM65Lly4JAIT169er5fyfYsyYMUK1atUU2pYsWSLo6+sLffv2FZYuXSqEhIQI9evXFwAI69atU+grlUqFxo0bC4aGhkJQUJCwdOlSwdXVVTA2Nhbu3r2r0Nfb21swMjISWrZsKZibmwve3t55xnX48GFBV1dX8PT0FBYuXCisWrVK+Pnnn4Xx48cX2WsvCHd3d6Fy5crCTz/9JKxevVoICAgQjI2NBRsbG+HFixcKfa9cuSLo6ekJ9erVE5YvXy5MmjRJkEgkQrt27RT6PXr0SAAgVKpUSWjRokW+/zZ++uknAYDQs2dPYfny5cKoUaMEbW1toW3btrn6Llu2TDAwMBBSUlKK7PVT2cOksITJSXi6d+8uWFlZCdnZ2Qr7hw0bJri7uwsODg7FkhSmpqYW6hrqpMmkUCaTCenp6UVy3aJSmCRfU5gUFp1P/d7MysoSrKyshMmTJyu037hxQ4iPj1doe/PmjVC9enWhYsWKCu3bt28XAAg7duyQt8XFxQlmZmZC3759Ffo+ffpUkEqlgiAIQs2aNfNMCpOTkwUbGxuhW7du8v6acurUqVwxnDp1SgAgTJo0SaG9ffv2Qvny5YXk5GR52+rVqwUAwuHDh+Vtb968kSeU+f3beP78uaCtrS0MGDBAoX3JkiUCAGHfvn0K7bGxsYKWlpawdu3aQr1WIkEQBA4fl1B9+/ZFYmIijh49Km/LysrCzp070a9fP6XHzJ8/H40bN4alpSX09fXh7u6OnTt3KvQRiURIS0vDhg0b5MPUX3/9NYD/5qvdunUL/fr1g7m5OZo2baqw70ObN2+Gh4cHDAwMYG5ujubNm+PIkSMKfQ4dOoRmzZrB0NAQxsbG6NixI27evFmg9+HmzZto1aoV9PX1UbFiRcycORMymUxp38Jc57fffkOvXr0AAC1btpS/JydPngQAODo64osvvsDhw4fRoEED6OvrY+XKlQCA9evXo1WrVrC2toZEIoGrqyuWL1+e6xo55zhz5gw8PDygp6eHKlWqYOPGjQr9srOzMW3aNFStWhV6enqwtLRE06ZNFf4NfPg55Pd55jWncNmyZahZsyYkEgkqVKiAESNG4NWrVwp9WrRogVq1auHWrVto2bIlDAwMYGdnh19++SXf9zNHZmYmfvjhB5QrVw7Gxsbo3Lkz/v33X6V9nz17hsGDB8PGxgYSiQQ1a9bEunXrcvVbsmQJatasKf+31qBBA2zduvWjsbx58wZBQUGoVq0a9PT0UL58eXTv3h0PHjzI85ivv/4ajo6OudqVfR8cPXoUTZs2hZmZGYyMjODi4iIfjj158iQaNmwIAPDz85N/Ru8PF164cAHt2rWDqakpDAwM4O3tjb///lvpdZV9b8bExMDPzw8VK1aERCJB+fLl0aVLl49Ohzhz5gwSEhLg4+Oj0F6zZk1YWVkptEkkEnTo0AH//vsvXr9+LW/fuXMnbGxs0L17d3lbuXLl0Lt3b/z555/IzMyUt9vb20Ms/vivnK1btyI2NhazZs2CWCxGWlpant/z6ta8efNcMTdv3hwWFha4ffu2vC0lJQVHjx5F//79YWJiIm8fOHAgjIyM8Mcff8jbJBIJbG1tP3rtc+fO4e3bt/jyyy8V2nO+3rZtm0K7tbU16tSpgz///LPgL5DoA0wKSyhHR0d4eXnh999/l7cdOnQIycnJuX5I5Fi0aBHq1auH6dOnY/bs2dDW1kavXr1w4MABeZ9NmzZBIpGgWbNm2LRpEzZt2oRvv/1W4Ty9evVCeno6Zs+ejWHDhuUZ47Rp0zBgwADo6Ohg+vTpmDZtGuzt7XH8+HGF63Xs2BFGRkaYO3cupkyZglu3bqFp06Yf/aUVExODli1bIioqChMmTMDYsWOxceNGLFq0KFffwl6nefPmGD16NABg4sSJ8vekRo0a8j7R0dHo27cv2rRpg0WLFsHNzQ0AsHz5cjg4OGDixIlYsGAB7O3t8f333yM0NDTXde7fv4+ePXuiTZs2WLBgAczNzfH1118rJK1BQUGYNm0aWrZsiaVLl2LSpEmoVKmS0vlZ77/uj32e7wsKCsKIESNQoUIFLFiwAD169MDKlSvRtm1bZGdnK/RNSkpCu3btULduXSxYsADVq1fHzz//jEOHDuV5/hxDhw5FSEgI2rZtizlz5kBHRwcdO3bM1S82NhaNGjXCsWPHMHLkSCxatAjOzs4YMmQIQkJC5P1Wr16N0aNHw9XVFSEhIZg2bRrc3Nxw4cKFfOOQSqX44osvMG3aNLi7u2PBggUYM2YMkpOTcePGjY++jo+5efMmvvjiC2RmZmL69OlYsGABOnfuLE/qatSogenTpwMAvvnmG/ln1Lx5cwDA8ePH0bx5c6SkpCAwMBCzZ8/Gq1ev0KpVK1y8eDHX9ZR9b/bo0QN79uyBn58fli1bhtGjR+P169d4+vRpvrGfPXsWIpEI9erVK9BrjYmJgYGBAQwMDORtV65cQf369XMlTh4eHkhPT8fdu3cLdO73HTt2DCYmJnj27BlcXFxgZGQEExMTDB8+HG/evFH5fO978+YNevbsicuXLxf6HKmpqUhNTVVInK9fv463b9+iQYMGCn11dXXh5uaGK1euqHydnIRaX19foT3n/Y+IiMh1jLu7O86ePavytYjkNF2qJEU5Q6OXLl0Sli5dKhgbG8uHK3v16iW0bNlSEARB6fDxh8OaWVlZQq1atYRWrVoptOc13BgYGCgAyDXs8/6+HPfu3RPEYrHSIZ6cOWyvX78WzMzMhGHDhinsj4mJEUxNTXO1f2js2LECAOHChQvytri4OMHU1FRh+PhTr5Pf8LGDg4MAQAgLC8u1T9kwsq+vr1ClShWl5zh9+rTC65BIJMKPP/4ob6tbt+5HpwR8+DkIQt6f54fD7HFxcYKurq7Qtm1bhc9s6dKlueaLeXt7CwCEjRs3ytsyMzMFW1tboUePHvnGGBUVJQAQvv/+e4X2fv365RqmHTJkiFC+fHkhISFBoe+XX34pmJqayt/jLl26CDVr1sz3usqsW7dOACA
"text/plain": [
"<Figure size 800x600 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"df_filtered = df_evo[\n",
" (df_evo[\"cluster_2017\"] != 2) & \n",
" (df_evo[\"cluster_2019\"] != 2) &\n",
" (df_evo[\"cluster_2021\"] != 2) &\n",
" (df_evo[\"cluster_2023\"] != 2)\n",
"]\n",
"\n",
"# matrice de transition\n",
"transition1 = df_filtered.groupby([\"cluster_2017\", \"cluster_2019\"]).size().unstack(fill_value=0)\n",
"transition2 = df_filtered.groupby([\"cluster_2019\", \"cluster_2021\"]).size().unstack(fill_value=0)\n",
"transition3 = df_filtered.groupby([\"cluster_2021\", \"cluster_2023\"]).size().unstack(fill_value=0)\n",
"\n",
"transition_pct1 = transition1.div(transition1.sum(axis=1), axis=0)\n",
"transition_pct2 = transition2.div(transition2.sum(axis=1), axis=0)\n",
"transition_pct3 = transition3.div(transition3.sum(axis=1), axis=0)\n",
"\n",
"plt.figure(figsize=(8,6))\n",
"#sns.heatmap(transition1, annot=True, fmt=\"d\", cmap=\"Blues\")\n",
"sns.heatmap(transition_pct1, annot=True, fmt=\".2f\", cmap=\"Blues\")\n",
"\n",
"plt.ylabel(\"Cluster 2016-2019\")\n",
"plt.xlabel(\"Cluster 2019-2022\")\n",
"plt.title(\"Matrice de transition des clusters (2016 → 2019)\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 393,
"id": "47edb04f-e57c-48de-8696-af968b5614d7",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABUQAAAJOCAYAAAB/QA2/AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAxRdJREFUeJzs3Xd0VEUbx/HfJqTRSxoECL2EFggQehNBOiiCojQRFQjFSFW6CFJEkC4KIkUQxPIq0sFKkyq9gyIJJPSWhOS+f8SsLNmEBDa7gXw/nnuOmTs7+9zdy+6zc2fmmgzDMAQAAAAAAAAAGYCTowMAAAAAAAAAAHuhQxQAAAAAAABAhkGHKAAAAAAAAIAMgw5RAAAAAAAAABkGHaIAAAAAAAAAMgw6RAEAAAAAAABkGHSIAgAAAAAAAMgw6BAFAAAAAAAAkGHQIQoAAAAAAAAgw6BDFHbz2WefyWQy6fTp0w+su3nzZplMJm3evDnN40qpQoUKqUuXLo4OI5H0Gpc9TJgwQaVKlVJcXJyjQwEeW9WqVdPAgQMdHQYApBly0LSRXuOyB3JQ4NENHjxYwcHBjg4DGRgdoo8pk8mUoi09JXPWzJw5U5999pmjw0ASbt26pZEjR6bL8+jatWsaP368Bg0aJCen+I+yyMhITZw4UXXq1JGXl5dy5sypatWqadmyZVbbiIqK0qBBg5QvXz55eHgoODhY69atS1Rv7dq16tatm8qWLStnZ2cVKlTIansjR45M9t/jb7/9luwx3bp1SzNmzFCjRo2UN29eZcuWTRUrVtSsWbMUGxubqH5cXJwmTJigwoULy93dXeXLl9cXX3yRqN727dvVs2dPBQUFycXFRSaTKckYwsPD1bVrV3l7e8vDw0OVKlXS8uXLk407LRw+fFgDBw5UYGCgsmXLprx586pZs2b6448/rNY/d+6c2rVrp5w5cyp79uxq1aqVTp48majerFmz9Pzzz6tgwYIymUzJ/pBbt26datWqpcyZMytXrlxq27Ztin5MS9Jff/2lUaNGqWrVqsqVK5c8PT1Vr149rV+/3mr9K1eu6LXXXpOXl5eyZMmi+vXra9euXRZ1UnN+37hxQyNGjNAzzzyj3Llzy2QyJflZO2jQIM2YMUNhYWEpOjYAGRs5KOyBHDReSnNQSTp+/Ljatm2rXLlyKXPmzKpVq5Y2bdqUomNKixw0Li5On332mVq2bKkCBQooS5YsKlu2rMaMGaM7d+4kajM1OVpaepxyOGsc/V7evn3bfM7myJFDWbNmVYUKFTR16lTFxMRY1O3Xr5/27t2r7777LsXHB9iUgcfSwoULLbann37akJSoPCwszNGhmt29e9e4ffu2ERcXZy4rU6aMUbdu3UR1Y2Njjdu3bxuxsbF2jDB5/v7+RufOnR0dRiJpGdfFixcNScaIESPSpP1H8eGHHxrZs2c3bt++bS773//+Z7i4uBitWrUypkyZYkyfPt2oX7++IckYPnx4ojZeeOEFI1OmTEb//v2NOXPmGNWrVzcyZcpk/PLLLxb1OnfubLi7uxs1atQw8ufPb/j7+1uNae/evYn+DS5cuNAoUKCAkStXLiMqKirZY/rzzz8Nk8lkNGzY0JgwYYIxe/Zso02bNoYko1OnTonqDx482JBkdO/e3fj444+NZs2aGZKML774wqLeiBEjDBcXFyMoKMgoUaKEkdRH/9WrV41ixYoZ2bJlM4YOHWpMnz7dqFOnjiHJWLx4cbKx29pbb71l5MyZ0+jWrZsxZ84cY8KECUbRokUNZ2dnY926dRZ1r1+/bhQvXtzw9vY2xo8fb0yePNkoUKCAkT9/fiMiIsKirr+/v5E7d27jmWeeMTJlypTkv53//e9/hpOTk1G5cmVj6tSpxrvvvmt4enoafn5+xoULFx4Y/7Rp0wwPDw/jxRdfNKZPn25MmTLFqFSpkiHJmDdvnkXd2NhYo0aNGkaWLFmMkSNHGtOnTzcCAgKMbNmyGUePHrWIKaXn96lTpwxJRsGCBY169eoZkoz58+dbjTU2Ntbw9fU1hg0b9sDjAgByUPsjB01f0mMOevbsWcPT09Pw8fEx3nvvPWPKlClGhQoVjEyZMhk//fTTA48pLXLQ69evG5KMatWqGWPGjDE+/vhjo2vXroaTk5NRr149i3+PhpHyHC2tPU45nDWOfi8jIyON4OBgY8CAAcaMGTOMWbNmGR07djRMJpPx4osvJnr+du3aGbVr107x8QG2RIfoE6JXr15JdnLc6+bNm3aIJuWSSkbTI5JR27lx48Yjt1G+fHnj5Zdftig7efKkcfr0aYuyuLg4o0GDBoabm5vF827bts2QZEycONFcdvv2baNo0aJG9erVLdo4d+6cER0dbRiGYTRr1izJZNSas2fPGiaTyejevfsD6168eNHYv39/ovKuXbsakoxjx46Zy/7++2/DxcXF6NWrl8Wx1q5d28ifP79x9+5dc3lYWJhx69YtwzCS/6yYMGGCIcnYsGGDuSw2NtaoUqWK4evr+8AOXVv6448/jOvXr1uURUREGF5eXkbNmjUtysePH29IMrZv324uO3TokOHs7GwMGTLEou7p06fNSVuWLFmS/LcTEBBgFCtWzOKY9+zZYzg5ORmhoaEPjH///v3GxYsXLcru3LljlCpVysifP79F+bJlywxJxvLly81lFy5cMHLmzGmROKbm/L5z545x/vx5wzAMY8eOHQ9MpkNCQgx/f/9EP04A4EHIQdMeOajtPKk5aM+ePY1MmTIZhw8fNpfdvHnTKFCggFGpUqUHHlNa5KBRUVHGb7/9lqjNUaNGGZISXeBOaY6W1h63HO5+6eG9tCYkJMSQZD62BCtWrDBMJpNx4sSJFB8jYCtMmX+C1atXT2XLltXOnTtVp04dZc6cWW+//bYk6dtvv1WzZs2UL18+ubm5qWjRonr33XcTDaNPaOPgwYOqX7++MmfOLD8/P02YMCHR802bNk1lypQxTy+tXLmylixZYt5///pNhQoV0oEDB/TTTz+Zp1fVq1dPUtLrNy1fvlxBQUHy8PCQp6enXn75ZZ07d86iTpcuXZQ1a1adO3dOrVu3VtasWeXl5aX+/ftbnSZwP8MwNGbMGOXPn1+ZM2dW/fr1deDAAat1r1y5on79+qlAgQJyc3NTsWLFNH78+ETrCS1dulRBQUHKli2bsmfPrnLlymnq1KkPjCUuLk5Tp05VuXLl5O7uLi8vLz3zzDNJThuW/pu2fT9r62f98ccfaty4sTw9PeXh4aHChQvrlVdekSSdPn1aXl5ekqRRo0aZ36ORI0eaH3/48GG1bdtWuXPnlru7uypXrpxoykPC8/7000/q2bOnvL29lT9/fknS9evX1a9fPxUqVEhubm7y9vbW008/nWiayf1OnTqlffv2qWHDhhblhQsXlr+/v0WZyWRS69atFRUVZTGFesWKFXJ2dtZrr71mLnN3d1e3bt20ZcsW/fXXX+byfPnyycXFJdmYkvLFF1/IMAy99NJLD6zr6empMmXKJCpv06aNJOnQoUPmsm+//VYxMTHq2bOnucxkMqlHjx76+++/tWXLFnO5j4+PPDw8Hvj8v/zyi7y8vNSgQQNzmZOTk9q1a6ewsDD99NNPD2zDVoKCgpQ1a1aLsjx58qh27doWr4MU/15WqVJFVapUMZeVKlVKTz31lL788kuLuv7+/skuGSBJly5d0sGDB9WmTRu5urqayytUqKDSpUtr6dKlD4y/TJky8vT0tChzc3NT06ZN9ffff+v69esW8fv4+OjZZ581l3l5ealdu3b69ttvFRUVJSl157ebm5t8fX0fGGeCp59+WmfOnNGePXtS/BgASAo5KDnovchB0z4H/eWXX1SxYkWVLFnSXJY5c2a1bNlSu3bt0rFjx5J9fFrkoK6urqpRo0aK2pRSlqPZw+OWw90vPbyX1iQs93DlyhWL8oR/S99+++0D2wBsjQ7RJ1xkZKSaNGmiwMBATZkyRfXr15cUnyBkzZpVoaGhmjp1qoKCgjR8+HANHjw4URuXL1/WM888owoVKuiDDz5QqVKlNGjQIP3444/mOnP
"text/plain": [
"<Figure size 1400x600 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig, axes = plt.subplots(1, 2, figsize=(14,6), sharey=True)\n",
"\n",
"# --- Heatmap 1 : 2017 → 2019 ---\n",
"sns.heatmap(\n",
" transition_pct1,\n",
" annot=True,\n",
" fmt=\".2f\",\n",
" cmap=\"Blues\",\n",
" ax=axes[0]\n",
")\n",
"\n",
"axes[0].set_xlabel(\"Cluster 2019-2021\")\n",
"axes[0].set_ylabel(\"Cluster 2017-2019\")\n",
"axes[0].set_title(\"Transition des clusters (2017-2019 → 2019-2021)\")\n",
"\n",
"# --- Heatmap 2 : 2019 → 2021 ---\n",
"sns.heatmap(\n",
" transition_pct2,\n",
" annot=True,\n",
" fmt=\".2f\",\n",
" cmap=\"Blues\",\n",
" ax=axes[1]\n",
")\n",
"\n",
"axes[1].set_xlabel(\"Cluster 2021-2023\")\n",
"axes[1].set_ylabel(\"Cluster 2019-2021\")\n",
"axes[1].set_title(\"Transition des clusters (2019-2021 → 2021-2023)\")\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 394,
"id": "24ff33bb-3658-4c36-bcd8-0a0982577314",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABuMAAAJOCAYAAACtJnoWAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAA+ihJREFUeJzs3Xd0FFUbx/HfJqTSEhISOkgnlARCB+lSpSlFQXqTokhEIVJCFRFFUJrSq6CIyitIVWw06dJFEKQkkNBbEpJ5/4gsLNlNQkyyCXw/58w57J27d57ZYXafzJ17x2QYhiEAAAAAAAAAAAAAKc7B3gEAAAAAAAAAAAAATyo64wAAAAAAAAAAAIBUQmccAAAAAAAAAAAAkErojAMAAAAAAAAAAABSCZ1xAAAAAAAAAAAAQCqhMw4AAAAAAAAAAABIJXTGAQAAAAAAAAAAAKmEzjgAAAAAAAAAAAAgldAZBwAAAAAAAAAAAKQSOuOQri1YsEAmk0l///13onW3bNkik8mkLVu2pHpc1sTGxqpMmTIaP368XbYPPAkOHz6sTJky6eDBg/YOBQDSPfIk4OkSHR2t/Pnza8aMGfYOBQAyBHIl4OlCroT0js64p4jJZErSYq/EI6lmzJihBQsW2DuMeD7//HP9888/GjBggLns999/14ABA1S6dGllzpxZBQoUULt27XT8+HGrbRw5ckSNGzdWlixZlCNHDnXq1EmXLl2KV2/8+PFq0aKFfH19ZTKZNGrUKKvtFSpUyOZxLlasWKL7dPToUb399tsKCAhQ1qxZlTt3bjVr1ky7du2yWv/cuXNq166dPDw8lC1bNrVs2VInT56MV2/mzJlq27atChQoIJPJpK5du9qMYePGjapZs6bc3d3l6empNm3aJCmRTmmrVq1S+/btVbhwYbm7u6tEiRJ68803dfXqVav1V69erQoVKsjV1VUFChRQSEiI7t27Z1HnwoULGjp0qOrWrausWbMmeP5FR0dr9OjRKly4sFxcXFS4cGGNGzcuXpu2bN68Wd27d1fx4sXl7u6uwoULq2fPnrpw4YLV+lu3bjV/7rly5dLrr7+umzdvWtR5nP/fO3fuVL9+/RQYGCgnJyeZTCar2/Xz81OzZs00cuTIJO0XAKQU8qTUlR7zJElavny5+fc6Z86c6tGjh8LDw5O0T6mRJ/3zzz8aPXq0KleuLE9PT3l7e6tOnTratGlTvPYeJ49IbRklz7DF3sfy559/VosWLZQ/f365uroqV65caty4sX777TeLek5OTgoKCtL48eN19+7dx9pHAPivyJVSV1rlSo/zm3fs2DENGjRI1atXl6ura5I7Nu97nN9CSbp69ap69+6tnDlzKnPmzKpbt6727NljUSciIkKTJk1SrVq1lDNnTnl4eKhq1apasWJFvPYOHTqktm3bmq/jeHt7q1atWvrf//6X5H1IKfY+ll9//bUaNWqkPHnyyMXFRfny5VObNm2SfCO0vY/l/c5ya8v27dvN9ciVkO4ZeGosXrzYYnnuuecMSfHKQ0ND7R2q2b1794w7d+4YsbGx5rLSpUsbtWvXjlc3JibGuHPnjhETE5OGET7g7+9v9O7d26LsxRdfNHLlymW89tprxuzZs42xY8cavr6+RubMmY0//vjDou4///xjeHt7G0WKFDGmTp1qjB8/3vD09DT8/f2NyMhIi7qSjFy5chmNGjUyJBkhISFWY/r666/jHd9x48YZkox+/foluk9vvvmm4eHhYfTo0cP49NNPjffff98oUqSI4ejoaGzcuNGi7o0bN4xixYoZPj4+xsSJE43Jkycb+fPnN/Lly2eEh4db1C1YsKCRI0cOo3HjxkamTJmMLl26WN3+//73P8PBwcGoWLGiMXXqVGPs2LGGt7e3kTdvXuPixYuJxp+SvLy8jLJlyxojRowwZs+ebbz++uuGs7OzUbJkSeP27dsWddeuXWuYTCajbt26xmeffWa89tprhoODg/Hqq69a1Pvxxx8NSUaxYsWMatWqGZKMH3/80er227VrZ5hMJqNHjx7GzJkzjS5duhiSjF69eiUp/sDAQOOZZ54x3n77bWP27NlGcHCwkTVrVsPX19e4cOGCRd29e/carq6uRvny5Y2ZM2caw4YNM1xcXIzGjRtb1Huc/98hISGGk5OTERgYaBQvXtxI6Odn7dq1hiTjxIkTSdo3AEgJ5EmpKz3mSTNmzDAkGfXr1zemT59uBAcHG+7u7ka5cuWMO3fuJLpPqZEnffLJJ4abm5vx8ssvG9OmTTOmTJliVKhQwZBkzJs3z6LNx8kjUltGyjOssfexnD17ttGyZUtj3Lhxxpw5c4xJkyYZ/v7+hoODg/H9999b1L1y5Yrh7OxszJ0797H2EQD+K3Kl1JVWudLj/ObNnz/fcHBwMMqUKWMEBAQYkoxTp04leZ8e57cwJibGqF69upE5c2Zj1KhRxrRp0ww/Pz8ja9asxvHjx831/ve//xlOTk5Gy5YtjSlTphjTpk0z6tata0gyRo4cadHmmjVrjEaNGhmjRo0yPvvsM2PKlCnGs88+a0gyPv300yTvR0qw97EcPXq00b59e+O9994z5syZY4wbN84oXLiw4ebmZuzbty/R+O19LO/nva+//nq875xLly5Z1CVXQnpGZ9xTrH///kn6Q/XWrVtpEE3S2Uqc7GnPnj2GJGPTpk0W5b/99lu8C0THjx83XFxcjI4dO1qU9+3b13BzczNOnz5tLtu4caPVJOF+8nPp0qUELzJZM3bsWEOS8dtvvyVad9euXcaNGzcsysLDw42cOXMaNWrUsCifOHGiIcnYuXOnuezIkSOGo6OjERwcbFH377//NifDmTNnttkZ5+fnZxQtWtTiM9y3b5/h4OBgBAUFJRp/SrJ2cWvhwoWGJGP27NkW5X5+foa/v78RHR1tLhs2bJhhMpmMI0eOmMuuX79uREREGIZhGF9++aXNi2g7d+40JBkjRoywKH/zzTcNk8lk7N+/P9H4f/rpp3h/VPz000+GJGPYsGEW5U2aNDFy585tXLt2zVw2e/ZsQ5Kxfv16c9nj/P8ODQ01d1om9t0TFRVleHp6xttfAEhL5EkpJz3mSZGRkYaHh4dRq1Ytiwt0//vf/wxJxscff5zofqVGnnTw4MF4FxTu3r1rlCxZ0siXL59FeVLziLSQkfIMa+x9LK25deuW4evrazRq1Cjeuueff9549tlnk7RvAJBayJVSTlrmSo/zmxcREWFcv37dMAzDmDRp0mN3xj3Ob+GKFSsMScaXX35pLrt48aLh4eFhvPzyy+aykydPGn///bfFe2NjY4169eoZLi4uxs2bNxOM6d69e4a/v79RokSJJO9HSrD3sbQmNDTUyJQpk9GnT59E69r7WN7vjHu4zYSQKyG9ojPuKWYtcapdu7ZRunRpY9euXcazzz5ruLm5GQMHDjQMwzC++eYbo2nTpkbu3LkNZ2dno3DhwsaYMWOMe/fuWW3j0KFDRp06dQw3NzcjT548xsSJE+PF8PHHHxt+fn6Gm5ub4eHhYQQGBhpLly41r58/f77Fj33BggUNSRbL/STq/hfzoxchvvjiC6NChQqGq6ur4eXlZXTs2NE4e/asRZ0uXboYmTNnNs6ePWu0bNnSyJw5s+Ht7W28+eab8fbPmpEjRxrOzs5GVFRUonUNwzAqVKhgVKhQwaLMx8fHaNu2bby6xYsXN+rXr2+1neR0xpUqVcp45plnklzfmhdeeMHIkSOHRVmlSpWMSpUqxavbsGFDo0iRIjbbstUZFxERYUgy3nrrrXjrSpcubeTJk+fxA09h169fNyRZdAweOnTIkGRMnz7dou65c+cMScbYsWOttpXQRbQPP/zQkGQcOnTIovz33383JBnvvPNOsvchR44cxgsvvGB+fe3aNSNTpkzxPvfIyEgjS5YsRo8ePRJt09r/74cl5Y+21q1bG+XKlUt0WwCQWsiTHngS86Tdu3db/b02DMPIkiWLUb169STFak1K5kn3BQUFGZLMF8QeZe/OOFvSa56RVPY4lg8rU6aMUaVKlXjlU6dONUwmk7kzFgD
"text/plain": [
"<Figure size 1800x600 with 6 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig, axes = plt.subplots(1, 3, figsize=(18,6), sharey=True)\n",
"\n",
"# --- Heatmap 1 : \n",
"sns.heatmap(\n",
" transition_pct1,\n",
" annot=True,\n",
" fmt=\".2f\",\n",
" cmap=\"Blues\",\n",
" ax=axes[0]\n",
")\n",
"\n",
"axes[0].set_xlabel(\"Cluster 2019-2021\")\n",
"axes[0].set_ylabel(\"Cluster 2017-2019\")\n",
"axes[0].set_title(\"Transition (2017-2019 → 2019-2021)\")\n",
"\n",
"# --- Heatmap 2 : \n",
"sns.heatmap(\n",
" transition_pct2,\n",
" annot=True,\n",
" fmt=\".2f\",\n",
" cmap=\"Blues\",\n",
" ax=axes[1]\n",
")\n",
"\n",
"axes[1].set_xlabel(\"Cluster 2021-2023\")\n",
"axes[1].set_ylabel(\"Cluster 2019-2021\")\n",
"axes[1].set_title(\"Transition (2019-2021 → 2021-2023)\")\n",
"\n",
"# --- Heatmap 3 : \n",
"sns.heatmap(\n",
" transition_pct3,\n",
" annot=True,\n",
" fmt=\".2f\",\n",
" cmap=\"Blues\",\n",
" ax=axes[2]\n",
")\n",
"\n",
"axes[2].set_xlabel(\"Cluster 2023-2025\")\n",
"axes[2].set_ylabel(\"Cluster 2021-2023\")\n",
"axes[2].set_title(\"Transition (2021-2023 → 2023-2025)\")\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 410,
"id": "f50429f5-12ea-4056-8568-d27d005835ca",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA9sAAAJOCAYAAACnVRSYAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAzz9JREFUeJzs3XV4FMcbwPHv3uUuLkACQQIJ7trihWKFYoVCgeIOxb24FCseXIu0hRYtUoq7FSiFUvnh7lAggcjp/v645MjFSCAQoO/nefI0tzs7O7t3ufLuzDujqKqqIoQQQgghhBBCiBSjSe0GCCGEEEIIIYQQ7xoJtoUQQgghhBBCiBQmwbYQQgghhBBCCJHCJNgWQgghhBBCCCFSmATbQgghhBBCCCFECpNgWwghhBBCCCGESGESbAshhBBCCCGEEClMgm0hhBBCCCGEECKFSbAthBBCCCGEEEKkMAm2hRDiP0BRFEaOHJmidS5duhRFUbhy5UqK1pvSJk2aRPbs2dFqtRQtWvSl6ho5ciSKoqRMw0Sqad26NYGBgandjHeW/J0IIYSNBNtCCPGaRAenCf38+uuvqd3EeI0bN47169endjNeyPbt2xkwYADlypVjyZIljBs3LrWblKgVK1YQHByc2s14J9y6dYuRI0dy6tSp1G7KOyk8PJyRI0eyd+/e1G6KEEK8sZxSuwFCCPFf89VXXxEUFBRne86cOVOhNc83btw4GjZsSL169Ry2t2jRgiZNmuDs7Jw6DUuC3bt3o9Fo+Oabb9Dr9andnOdasWIFf/31F7169Urtprz1bt26xahRowgMDIwzomHhwoVYrdbUadg7Ijw8nFGjRgHw4YcfOuwbOnQoAwcOTIVWCSHEm0WCbSGEeM0+/vhj3nvvvdRuxkvTarVotdrUbkai7t27h6ur61sRaL8qVqsVo9GIi4tLitcdFhaGu7t7itf7Msxm83MDaZ1O95pa83aLjIxEr9ej0SRvIKSTkxNOTvJPTCGEkGHkQgjxBjGZTKRNm5Y2bdrE2RcaGoqLiwv9+vWzb7t37x7t2rUjQ4YMuLi4UKRIEZYtW/bc8ySUsxo711JRFMLCwli2bJl9uHvr1q2BhHO258yZQ4ECBXB2diZTpkx07dqVx48fO5T58MMPKViwIP/88w+VKlXCzc2NzJkzM3HixOe2HWwB1ejRo8mRIwfOzs4EBgYyePBgDAaDQ9uXLFlCWFiYve1Lly5NtN6jR49Ss2ZN0qRJg7u7O4ULF2b69OkJlr9y5UqC9cbOk3/y5Am9evUiMDAQZ2dn0qdPT7Vq1fj999/t92Tz5s1cvXrV3t6Y75HBYGDEiBHkzJkTZ2dnAgICGDBggMM1R5+3W7duLF++3P4+bN26NcFrCAwMpHbt2mzfvp2iRYvi4uJC/vz5WbdunUO56Pd73759dOnShfTp05MlSxb7/uS87ydOnKBs2bK4uroSFBTEvHnz4rQrKZ/t6Ps/efJkgoOD7Z+HOXPm8P777wPQpk2bOO9/fJ//sLAw+vbtS0BAAM7OzuTJk4fJkyejqmq893f9+vUULFgQZ2dnChQoEOceP+/9TszJkyf5+OOP8fLywsPDgypVqjikmfz2228oihLv3/q2bdtQFIWff/7Zvu3mzZu0bduWDBky2Nu7ePFih+P27t2Loij8+OOPDB06lMyZM+Pm5kZoaGicc1y5cgU/Pz8ARo0aZb+/0Z/3+HK2o+/b6tWryZ8/P66urpQpU4Y///wTgPnz55MzZ05cXFz48MMP450L4ujRo9SoUQNvb2/c3NyoWLEihw4deu79FEKI1CKPHYUQ4jULCQnhwYMHDtsURSFdunTodDrq16/PunXrmD9/vkOP7Pr16zEYDDRp0gSAiIgIPvzwQy5cuEC3bt0ICgpi9erVtG7dmsePH9OzZ8+Xbut3331H+/btKVmyJB07dgQgR44cCZYfOXIko0aNomrVqnzxxRecPXuWuXPncvz4cQ4dOuTQo/jo0SNq1KjBp59+SqNGjVizZg1ffvklhQoV4uOPP060Xe3bt2fZsmU0bNiQvn37cvToUcaPH8///vc/fvrpJ3vbFyxYwLFjx1i0aBEAZcuWTbDOHTt2ULt2bTJmzEjPnj3x9/fnf//7Hz///HOK3MvOnTuzZs0aunXrRv78+fn33385ePAg//vf/yhevDhDhgwhJCSEGzduMG3aNAA8PDwAW+903bp1OXjwIB07diRfvnz8+eefTJs2jXPnzsXJqd+9ezerVq2iW7du+Pr6PncysPPnz9O4cWM6d+5Mq1atWLJkCZ999hlbt26lWrVqDmW7dOmCn58fw4cPJywsDEj++16zZk0aNWrE559/zqpVq/jiiy/Q6/W0bdsWSP5ne8mSJURGRtKxY0ecnZ2pX78+T548Yfjw4XTs2JEPPvgASPj9V1WVunXrsmfPHtq1a0fRokXZtm0b/fv35+bNm/b3I9rBgwdZt24dXbp0wdPTkxkzZtCgQQOuXbtGunTpkvR+J+Tvv//mgw8+wMvLiwEDBqDT6Zg/fz4ffvgh+/bto1SpUrz33ntkz56dVatW0apVK4fjV65cSZo0aahevToAd+/epXTp0vZg18/Pjy1bttCuXTtCQ0PjpCyMHj0avV5Pv379MBgM8Y4K8fPzY+7cuXzxxRfUr1+fTz/9FIDChQsneF0ABw4cYOPGjXTt2hWA8ePHU7t2bQYMGMCcOXPo0qULjx49YuLEibRt25bdu3fbj929ezcff/wxJUqUYMSIEWg0GpYsWULlypU5cOAAJUuWTPTcQgiRKlQhhBCvxZIlS1Qg3h9nZ2d7uW3btqmAumnTJofja9asqWbPnt3+Ojg4WAXU77//3r7NaDSqZcqUUT08PNTQ0FD7dkAdMWKE/XWrVq3UbNmyxWnjiBEj1Nj/a3B3d1dbtWqV4PVcvnxZVVVVvXfvnqrX69WPPvpItVgs9nKzZs1SAXXx4sX2bRUrVlQB9dtvv7VvMxgMqr+/v9qgQYM454rp1KlTKqC2b9/eYXu/fv1UQN29e7fDdbq7uydan6qqqtlsVoOCgtRs2bKpjx49cthntVrtv8e+P5cvX1YBdcmSJXHqjH3Pvb291a5duybajlq1asX7vnz33XeqRqNRDxw44LB93rx5KqAeOnTI4bwajUb9+++/Ez1XtGzZsqmAunbtWvu2kJAQNWPGjGqxYsXs26Lf7/Lly6tms9m+/UXe9ylTpti3GQwGtWjRomr69OlVo9GoqmrSP9vR99/Ly0u9d++ew3UdP348wfcm9ud//fr1KqCOGTPGoVzDhg1VRVHUCxcu2LcBql6vd9j2xx9/qIA6c+ZM+7akvN/xqVevnqrX69WLFy/at926dUv19PRUK1SoYN82aNAgVafTqQ8fPrRvMxgMqo+Pj9q2bVv7tnbt2qkZM2ZUHzx44HCeJk2aqN7e3mp4eLiqqqq6Z88eFVCzZ89u35aY+/fvx/mMR4vveyT6ey76+0JVVXX+/PkqoPr7+zt8Xw0aNMjhu8Vqtaq5cuVSq1ev7vD3GB4ergYFBanVqlV7bnuFECI1yDByIYR4zWbPns2OHTscfrZs2WLfX7lyZXx9fVm5cqV926NHj9ixYweNGze2b/vll1/w9/fn888/t2/T6XT06NGDp0+fsm/fvtdzQVF27tyJ0WikV69eDjmeHTp0wMvLi82bNzuU9/DwoHnz5vbXer2ekiVLcunSpUTP88svvwDQp08fh+19+/YFiHOepDh58iSXL1+mV69e+Pj4OOxLqSWMfHx8OHr0KLdu3Ur2satXryZfvnzkzZuXBw8e2H8qV64MwJ49exzKV6xYkfz58ye5/kyZMlG/fn37ay8vL1q2bMnJkye5c+eOQ9kOHTo45Oon9313cnKiU6dO9td6vZ5OnTpx7949Tpw4AST/s92gQQP7sOYX8csvv6DVaunRo4fD9r59+6KqqsPfJ0DVqlUdRngULlwYLy8vh8/ui7zfFouF7du3U69ePbJnz27fnjFjRpo2bcrBgwftw7obN26MyWRyGO6/fft2Hj9+bP+eUFWVtWvXUqd
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA9EAAAJOCAYAAACwd4RRAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAoyZJREFUeJzs3XdcleX/x/H3AWXLcICLGCpO3FqKpaaGmZqmaWY5UnPv2bfcGjlz5K60TFNzVeLGlaMyFTUHTqTh3jhA4P794Y+TR0DPMRTM1/PxOI8497juz32fW+J97uu+bpNhGIYAAAAAAMBD2WV0AQAAAAAAPC0I0QAAAAAAWIkQDQAAAACAlQjRAAAAAABYiRANAAAAAICVCNEAAAAAAFiJEA0AAAAAgJUI0QAAAAAAWIkQDQAAAACAlQjRwBPi7++vVq1aZXQZj92mTZtkMpm0adMm87RWrVrJ39//iWz//uM8Z84cmUwm/fbbb09k+9WqVVO1atWeyLbu9aT380kxmUwaMmRIurWXfJyio6PTrU0AAPBsIUQD/9Lx48fVvn17BQYGysnJSe7u7goJCdHEiRN169atJ1LDzZs3NWTIEIvg+rQ7ePCghgwZkinDTmauDQAAAI9XlowuAHiahYeH680335Sjo6NatGihEiVKKD4+Xlu3blXfvn114MABzZw587HXcfPmTQ0dOlSSMuQq6MPMmjVLSUlJNq1z8OBBDR06VNWqVbPpKnZUVJTs7B7v94MPqm3t2rWPddvPmlu3bilLFv5XBQAAMg/+MgEe0cmTJ/XWW2/Jz89PGzZsUJ48eczzOnfurGPHjik8PDwDK/z3bty4IVdX13/dTtasWdOhmrQZhqHbt2/L2dlZjo6Oj3VbD+Pg4JCh2/8vSEpKUnx8vJycnOTk5JTR5QAAAFigOzfwiEaPHq3Y2Fh98cUXFgE6WcGCBdW9e/c01x8yZIhMJlOK6ands/nbb78pNDRUOXPmlLOzswICAvTee+9JkqKjo5UrVy5J0tChQ2UymVLcR3r48GE1btxY2bNnl5OTk8qXL68ffvgh1e1u3rxZnTp1kre3t/Lnz//AY/Dnn3+qQYMGcnV1lbe3t3r27Km4uLgUy6V2T/SCBQtUrlw5ZcuWTe7u7goODtbEiRPNtbz55puSpOrVq5v3Kbm7ur+/v+rWras1a9aofPnycnZ21owZM8zzUrv3/ObNm2rfvr1y5Mghd3d3tWjRQpcvX7ZYJq37b+9t82G1pXZP9Llz59SmTRv5+PjIyclJpUqV0ldffWWxTHR0tEwmk8aOHauZM2eqQIECcnR0VIUKFbRz584UNaXlYfvZsmVL5cyZU3fu3Emx7iuvvKLChQs/sP1q1aqpRIkS2rVrlypXrmw+H6dPn55i2bi4OA0ePFgFCxaUo6OjfH191a9fvxTniMlkUpcuXTRv3jwVL15cjo6OWr16tXne/Z/Jnj179Oqrr8rd3V1ubm6qUaOGfv755xTbP3DggF5++WU5Ozsrf/78GjFihM09IgAAAO7HlWjgEf34448KDAxU5cqVH+t2zp07p1deeUW5cuXSgAED5OnpqejoaC1dulSSlCtXLk2bNk0dO3ZUw4YN9cYbb0iSSpYsKelukAgJCVG+fPk0YMAAubq6atGiRWrQoIGWLFmihg0bWmyvU6dOypUrlwYNGqQbN26kWdetW7dUo0YNxcTEqFu3bsqbN6/mzp2rDRs2PHSf1q1bp2bNmqlGjRoaNWqUJOnQoUPatm2bunfvrpdeekndunXTpEmT9L///U9FixaVJPN/pbvdtps1a6b27durXbt2Dw1/Xbp0kaenp4YMGaKoqChNmzZNp06dMg+EZi1rarvXrVu3VK1aNR07dkxdunRRQECAvvvuO7Vq1UpXrlxJ8UXL/Pnzdf36dbVv314mk0mjR4/WG2+8oRMnTlh1Rf9h+/nuu+/q66+/1po1a1S3bl3zemfOnNGGDRs0ePDgh27j8uXLqlOnjpo0aaJmzZpp0aJF6tixoxwcHMxf7iQlJal+/fraunWr3n//fRUtWlT79+/Xp59+qiNHjmj58uUWbW7YsEGLFi1Sly5dlDNnzjS78B84cEAvvvii3N3d1a9fP2XNmlUzZsxQtWrVtHnzZj3//PPm/alevboSEhLM5/3MmTPl7Oz80P0DAAB4IAOAza5evWpIMl5//XWr1/Hz8zNatmxpfj948GAjtX+Cs2fPNiQZJ0+eNAzDMJYtW2ZIMnbu3Jlm2+fPnzckGYMHD04xr0aNGkZwcLBx+/Zt87SkpCSjcuXKRqFChVJst0qVKkZCQsJD92fChAmGJGPRokXmaTdu3DAKFixoSDI2btxont6yZUvDz8/P/L579+6Gu7v7A7fz3XffpWgnmZ+fnyHJWL16darz7j3OyftVrlw5Iz4+3jx99OjRhiTj+++/N09L6xje3+aDaqtatapRtWpV8/vk4/TNN9+Yp8XHxxuVKlUy3NzcjGvXrhmGYRgnT540JBk5cuQwLl26ZF72+++/NyQZP/74Y4pt3cva/UxMTDTy589vNG3a1GL98ePHGyaTyThx4sQDt1O1alVDkjFu3DjztLi4OKN06dKGt7e3edtz58417OzsjJ9++sli/enTpxuSjG3btpmnSTLs7OyMAwcOpNje/Z9JgwYNDAcHB+P48ePmaX///beRLVs246WXXjJP69GjhyHJ+OWXX8zTzp07Z3h4eFj8+wIAALAV3bmBR3Dt2jVJUrZs2R77tjw9PSVJK1asSLUL7oNcunRJGzZsUJMmTXT9+nVduHBBFy5c0MWLFxUaGqqjR4/qr7/+slinXbt2sre3f2jbK1euVJ48edS4cWPzNBcXF73//vtW7dONGze0bt06m/bnXgEBAQoNDbV6+ffff9/iSm7Hjh2VJUsWrVy58pFrsMbKlSuVO3duNWvWzDwta9as6tatm2JjY7V582aL5Zs2bSovLy/z+xdffFGSdOLECau297D9tLOzU/PmzfXDDz/o+vXr5uXmzZunypUrKyAg4KHbyJIli9q3b29+7+DgoPbt2+vcuXPatWuXJOm7775T0aJFVaRIEfN5d+HCBb388suSpI0bN1q0WbVqVRUrVuyB201MTNTatWvVoEEDBQYGmqfnyZNHb7/9trZu3Wr+t7ly5Uq98MILqlixonm5XLlyqXnz5g/dPwAAgAchRAOPwN3dXZIsQsjjUrVqVTVq1EhDhw5Vzpw59frrr2v27Nmp3nt8v2PHjskwDA0cOFC5cuWyeCV32z137pzFOtaEKEk6deqUChYsmKIr9MO6VUt3u4wHBQXp1VdfVf78+fXee++Z74G1lrV1JitUqJDFezc3N+XJk+exP6bq1KlTKlSoUIoRw5O7f586dcpi+nPPPWfxPjlQ33//dlqs2c8WLVro1q1bWrZsmaS7XeN37dqld99916pt5M2bN8WAc0FBQZJk3s7Ro0d14MCBFOdd8nKPct6dP39eN2/eTPUcK1q0qJKSkvTHH39I+ue438+a8xMAAOBBuCcaeATu7u7Kmzevfv/990duI637cBMTE1Mst3jxYv3888/68ccftWbNGr333nsaN26cfv75Z7m5uaW5jeRBlPr06ZPmVduCBQtavH8S94x6e3srMjJSa9as0apVq7Rq1SrNnj1bLVq0SDHgVlqe5L2t938mj1NavQAMw0i3bRQrVkzlypXTN998oxYtWuibb76Rg4ODmjRpkm7bSEpKUnBwsMaPH5/qfF9fX4v33KsMAACeFoRo4BHVrVtXM2fO1I4dO1SpUiWb10++wnjlyhVzl20p5ZXJZC+88IJeeOEFjRw5UvPnz1fz5s21YMECtW3bNs1AntzlNWvWrKpZs6bNNT6In5+ffv/9dxmGYbH9qKgoq9Z3cHBQvXr1VK9ePSUlJalTp06aMWOGBg4cmOoV7n/r6NGjql69uvl9bGysTp8+rTp16pineXl56cqVKxbrxcfH6/Tp0xbTbKnNz89P+/btU1JSksXV6MOHD5vnpydr9lO6ezW6V69eOn36tObPn6/XXnvNohv5g/z9998
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"\n",
"labels_map_new = {\n",
" 0: \"Cluster 0: Large and highly active movers\",\n",
" 1: \"Cluster 1: Occasional large movers\",\n",
" 2: \"Cluster 2: Extreme\",\n",
" 3: \"Cluster 3: Dormant profiles\",\n",
" 4: \"Cluster 4: Loyal clients\"\n",
"}\n",
"\n",
"colors_map = {\n",
" \"Cluster 0: Large and highly active movers\" : \"#0000FF\", #bleu\n",
" \"Cluster 1: Occasional large movers\": \"#FFA500\", #orange\n",
" \"Cluster 2: Extreme\": \"#800080\", #violet\n",
" \"Cluster 3: Dormant profiles\": \"#008000\", #vert\n",
" \"Cluster 4: Loyal clients\": \"#FF0000\" #rouge\n",
"}\n",
"\n",
"# --- 1. Construction du dataset long ---\n",
"df_long = pd.concat([\n",
" df_2017.assign(period=\"2017-2019\"),\n",
" df_2019.assign(period=\"2019-2021\"),\n",
" df_2021.assign(period=\"2021-2023\"),\n",
" df_2023.assign(period=\"2023-2025\")\n",
"])\n",
"\n",
"# --- 2. Comptage par cluster et période ---\n",
"counts = (\n",
" df_long.groupby([\"period\", \"cluster_kmeans\"])\n",
" .size()\n",
" .unstack(fill_value=0)\n",
")\n",
"\n",
"# --- 3. Passage en proportions ---\n",
"props = counts.div(counts.sum(axis=1), axis=0)\n",
"\n",
"\n",
"props = props.sort_index(axis=1)\n",
"props.columns = [labels_map_new.get(int(c), f\"Cluster {int(c)}\") for c in props.columns]\n",
"\n",
"# =========================\n",
"# 🔹 Graphique 1 : AREA\n",
"# =========================\n",
"plt.figure(figsize=(10,6))\n",
"\n",
"props.plot(\n",
" kind=\"area\",\n",
" stacked=True,\n",
" ax=plt.gca()\n",
")\n",
"\n",
"plt.ylabel(\"Proportion of clients\")\n",
"plt.xlabel(\"Period\")\n",
"plt.title(\"Evolution of cluster proportions over time\")\n",
"plt.ylim(0, 1)\n",
"plt.legend(title=\"Cluster\", loc=\"upper left\")\n",
"\n",
"plt.tight_layout()\n",
"plt.show()\n",
"\n",
"# =========================\n",
"# 🔹 Graphique 2 : BAR\n",
"# =========================\n",
"\n",
"colors_list = [colors_map[c] for c in props.columns]\n",
"\n",
"plt.figure(figsize=(10,6))\n",
"\n",
"props.plot(\n",
" kind=\"bar\",\n",
" stacked=True,\n",
" ax=plt.gca(),\n",
" color=colors_list\n",
")\n",
"\n",
"plt.xticks(rotation=45, ha=\"right\")\n",
"\n",
"plt.ylabel(\"Proportion of clients\")\n",
"plt.xlabel(\"Period\")\n",
"plt.title(\"Cluster distribution by period\")\n",
"plt.ylim(0, 1)\n",
"plt.legend(title=\"Cluster\", bbox_to_anchor=(1.05, 1), loc=\"upper left\")\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2f8e871a-1cb7-499d-b45f-de714b15cd15",
"metadata": {},
2026-04-07 12:31:16 +02:00
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}