Project_Carmignac/.ipynb_checkpoints/temporal clustering-checkpoint.ipynb

4891 lines
1.4 MiB
Plaintext
Raw Permalink Normal View History

2026-04-07 12:31:16 +02:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 290,
"id": "2fee3a54-847b-432f-bda5-3d6a9aa9020c",
"metadata": {},
"outputs": [],
"source": [
"import warnings\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"from sklearn.preprocessing import StandardScaler, RobustScaler\n",
"from sklearn.cluster import KMeans\n",
"from sklearn.mixture import GaussianMixture\n",
"from sklearn.metrics import silhouette_score, davies_bouldin_score, pairwise_distances\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.neighbors import kneighbors_graph\n",
"from sklearn.manifold import MDS\n",
"\n",
"sns.set_style(\"whitegrid\")\n",
"pd.set_option(\"display.max_columns\", 200)\n",
"pd.set_option(\"display.max_rows\", 200)\n",
"\n",
"EPS = 1e-9\n",
"RANDOM_STATE = 42"
]
},
{
"cell_type": "code",
"execution_count": 291,
"id": "1f95b6b6-03b8-4f23-b236-5c71beedea04",
"metadata": {},
"outputs": [],
"source": [
"PATH_aum = \"s3://projet-bdc-carmignac-g3/paco/AUM_repaired.csv\"\n",
"df_aum_repaired = pd.read_csv(PATH_aum, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": 292,
"id": "cab4432f-d7e5-4c18-ab86-19fe6759eed6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fichiers Flows : ['projet-bdc-data/carmignac/Flows ENSAE V1 -20251027.csv', 'projet-bdc-data/carmignac/Flows ENSAE V2 -20251105.csv']\n",
"Fichiers AUM : ['projet-bdc-data/carmignac/AUM ENSAE V1 -20251027.csv', 'projet-bdc-data/carmignac/AUM ENSAE V2 -20251105.csv']\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Agreement - Code</th>\n",
" <th>Company - Id</th>\n",
" <th>Company - Ultimate Parent Id</th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Registrar Account - Region</th>\n",
" <th>RegistrarAccount - Country</th>\n",
" <th>Product - Asset Type</th>\n",
" <th>Product - Strategy</th>\n",
" <th>Product - Legal Status</th>\n",
" <th>Product - Is Dedie ?</th>\n",
" <th>Product - Fund</th>\n",
" <th>Product - Shareclass Type</th>\n",
" <th>Product - Shareclass Currency</th>\n",
" <th>Product - Isin</th>\n",
" <th>Centralisation Date</th>\n",
" <th>Quantity - AUM</th>\n",
" <th>Value - AUM CCY</th>\n",
" <th>Value - AUM €</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-03-31</td>\n",
" <td>35.368</td>\n",
" <td>24648.6666</td>\n",
" <td>24648.6666</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-11-30</td>\n",
" <td>35.368</td>\n",
" <td>22413.0553</td>\n",
" <td>22413.0553</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-12-31</td>\n",
" <td>35.368</td>\n",
" <td>22051.2406</td>\n",
" <td>22051.2406</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2016-03-31</td>\n",
" <td>35.368</td>\n",
" <td>21626.1173</td>\n",
" <td>21626.1173</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2016-11-30</td>\n",
" <td>35.368</td>\n",
" <td>22489.4502</td>\n",
" <td>22489.4502</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
"0 003 166 166 \n",
"1 003 166 166 \n",
"2 003 166 166 \n",
"3 003 166 166 \n",
"4 003 166 166 \n",
"\n",
" Registrar Account - ID Registrar Account - Region \\\n",
"0 200000647 France \n",
"1 200000647 France \n",
"2 200000647 France \n",
"3 200000647 France \n",
"4 200000647 France \n",
"\n",
" RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
"0 France Diversified Patrimoine \n",
"1 France Diversified Patrimoine \n",
"2 France Diversified Patrimoine \n",
"3 France Diversified Patrimoine \n",
"4 France Diversified Patrimoine \n",
"\n",
" Product - Legal Status Product - Is Dedie ? Product - Fund \\\n",
"0 FCP NO Carmignac Patrimoine \n",
"1 FCP NO Carmignac Patrimoine \n",
"2 FCP NO Carmignac Patrimoine \n",
"3 FCP NO Carmignac Patrimoine \n",
"4 FCP NO Carmignac Patrimoine \n",
"\n",
" Product - Shareclass Type Product - Shareclass Currency Product - Isin \\\n",
"0 A EUR FR0010135103 \n",
"1 A EUR FR0010135103 \n",
"2 A EUR FR0010135103 \n",
"3 A EUR FR0010135103 \n",
"4 A EUR FR0010135103 \n",
"\n",
" Centralisation Date Quantity - AUM Value - AUM CCY Value - AUM € \n",
"0 2015-03-31 35.368 24648.6666 24648.6666 \n",
"1 2015-11-30 35.368 22413.0553 22413.0553 \n",
"2 2015-12-31 35.368 22051.2406 22051.2406 \n",
"3 2016-03-31 35.368 21626.1173 21626.1173 \n",
"4 2016-11-30 35.368 22489.4502 22489.4502 "
]
},
"execution_count": 292,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Import des données\n",
"\n",
"import os\n",
"import s3fs\n",
"import pandas as pd\n",
"\n",
"s3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': s3_ENDPOINT_URL})\n",
"\n",
"BUCKET = \"projet-bdc-data\"\n",
"carmignac_path = \"projet-bdc-data/carmignac\"\n",
"\n",
"# Liste des fichiers FLOWS\n",
"all_files = fs.ls(carmignac_path)\n",
"flows_files = [f for f in all_files if \"Flows\" in f and f.endswith(\".csv\")]\n",
"print(\"Fichiers Flows :\", flows_files)\n",
"\n",
"# Lire tous les fichiers dans un dictionnaire\n",
"flows_data = {}\n",
"for file_path in flows_files:\n",
" with fs.open(file_path, 'r') as f:\n",
" df = pd.read_csv(f, sep=';',low_memory=False)\n",
" flows_data[os.path.basename(file_path)] = df\n",
"\n",
"\n",
"# Liste des fichiers AUM\n",
"all_files = fs.ls(carmignac_path)\n",
"aum_files = [f for f in all_files if \"AUM\" in f and f.endswith(\".csv\")]\n",
"print(\"Fichiers AUM :\", aum_files)\n",
"\n",
"# Lire tous les fichiers dans un dictionnaire\n",
"aum_data = {}\n",
"for file_path in aum_files:\n",
" with fs.open(file_path, 'r') as f:\n",
" df = pd.read_csv(f, sep=';',low_memory=False)\n",
" aum_data[os.path.basename(file_path)] = df\n",
"\n",
"df = aum_data['AUM ENSAE V2 -20251105.csv']\n",
"dg = flows_data['Flows ENSAE V2 -20251105.csv']\n",
"\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 293,
"id": "232e399b-64dc-4943-9c15-793a268ee896",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Agreement - Code</th>\n",
" <th>Company - Id</th>\n",
" <th>Company - Ultimate Parent Id</th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Registrar Account - Region</th>\n",
" <th>RegistrarAccount - Country</th>\n",
" <th>Product - Asset Type</th>\n",
" <th>Product - Strategy</th>\n",
" <th>Product - Legal Status</th>\n",
" <th>Product - Is Dedie ?</th>\n",
" <th>Product - Fund</th>\n",
" <th>Product - Shareclass Type</th>\n",
" <th>Product - Shareclass Currency</th>\n",
" <th>Product - Isin</th>\n",
" <th>Centralisation Date</th>\n",
" <th>Quantity - Subscription</th>\n",
" <th>Quantity - Redemption</th>\n",
" <th>Quantity - NetFlows</th>\n",
" <th>Value Ccy - Subscription</th>\n",
" <th>Value Ccy - Redemption</th>\n",
" <th>Value Ccy - NetFlows</th>\n",
" <th>Value € - Subscription</th>\n",
" <th>Value € - Redemption</th>\n",
" <th>Value € - NetFlows</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200127202</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Portfolio Investissement</td>\n",
" <td>F</td>\n",
" <td>EUR</td>\n",
" <td>LU0992625839</td>\n",
" <td>2020-11-05</td>\n",
" <td>1636.00</td>\n",
" <td>0.000</td>\n",
" <td>1636.000</td>\n",
" <td>280983.00</td>\n",
" <td>0.00</td>\n",
" <td>280983.00</td>\n",
" <td>280983.00</td>\n",
" <td>0.00</td>\n",
" <td>280983.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-03-09</td>\n",
" <td>144.69</td>\n",
" <td>0.000</td>\n",
" <td>144.690</td>\n",
" <td>99985.13</td>\n",
" <td>0.00</td>\n",
" <td>99985.13</td>\n",
" <td>99985.13</td>\n",
" <td>0.00</td>\n",
" <td>99985.13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Investissement</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010148981</td>\n",
" <td>2016-10-26</td>\n",
" <td>0.00</td>\n",
" <td>-8.321</td>\n",
" <td>-8.321</td>\n",
" <td>0.00</td>\n",
" <td>-9384.76</td>\n",
" <td>-9384.76</td>\n",
" <td>0.00</td>\n",
" <td>-9384.76</td>\n",
" <td>-9384.76</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Investissement</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010148981</td>\n",
" <td>2018-10-18</td>\n",
" <td>0.00</td>\n",
" <td>-22.083</td>\n",
" <td>-22.083</td>\n",
" <td>0.00</td>\n",
" <td>-25227.40</td>\n",
" <td>-25227.40</td>\n",
" <td>0.00</td>\n",
" <td>-25227.40</td>\n",
" <td>-25227.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Investissement</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010148981</td>\n",
" <td>2019-04-08</td>\n",
" <td>0.00</td>\n",
" <td>-465.992</td>\n",
" <td>-465.992</td>\n",
" <td>0.00</td>\n",
" <td>-563775.76</td>\n",
" <td>-563775.76</td>\n",
" <td>0.00</td>\n",
" <td>-563775.76</td>\n",
" <td>-563775.76</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
"0 003 166 166 \n",
"1 003 166 166 \n",
"2 003 166 166 \n",
"3 003 166 166 \n",
"4 003 166 166 \n",
"\n",
" Registrar Account - ID Registrar Account - Region \\\n",
"0 200127202 France \n",
"1 406533 France \n",
"2 406533 France \n",
"3 406533 France \n",
"4 406533 France \n",
"\n",
" RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
"0 France Equity Investissement \n",
"1 France Diversified Patrimoine \n",
"2 France Equity Investissement \n",
"3 France Equity Investissement \n",
"4 France Equity Investissement \n",
"\n",
" Product - Legal Status Product - Is Dedie ? \\\n",
"0 SICAV NO \n",
"1 FCP NO \n",
"2 FCP NO \n",
"3 FCP NO \n",
"4 FCP NO \n",
"\n",
" Product - Fund Product - Shareclass Type \\\n",
"0 Carmignac Portfolio Investissement F \n",
"1 Carmignac Patrimoine A \n",
"2 Carmignac Investissement A \n",
"3 Carmignac Investissement A \n",
"4 Carmignac Investissement A \n",
"\n",
" Product - Shareclass Currency Product - Isin Centralisation Date \\\n",
"0 EUR LU0992625839 2020-11-05 \n",
"1 EUR FR0010135103 2015-03-09 \n",
"2 EUR FR0010148981 2016-10-26 \n",
"3 EUR FR0010148981 2018-10-18 \n",
"4 EUR FR0010148981 2019-04-08 \n",
"\n",
" Quantity - Subscription Quantity - Redemption Quantity - NetFlows \\\n",
"0 1636.00 0.000 1636.000 \n",
"1 144.69 0.000 144.690 \n",
"2 0.00 -8.321 -8.321 \n",
"3 0.00 -22.083 -22.083 \n",
"4 0.00 -465.992 -465.992 \n",
"\n",
" Value Ccy - Subscription Value Ccy - Redemption Value Ccy - NetFlows \\\n",
"0 280983.00 0.00 280983.00 \n",
"1 99985.13 0.00 99985.13 \n",
"2 0.00 -9384.76 -9384.76 \n",
"3 0.00 -25227.40 -25227.40 \n",
"4 0.00 -563775.76 -563775.76 \n",
"\n",
" Value € - Subscription Value € - Redemption Value € - NetFlows \n",
"0 280983.00 0.00 280983.00 \n",
"1 99985.13 0.00 99985.13 \n",
"2 0.00 -9384.76 -9384.76 \n",
"3 0.00 -25227.40 -25227.40 \n",
"4 0.00 -563775.76 -563775.76 "
]
},
"execution_count": 293,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dg.head()"
]
},
{
"cell_type": "code",
"execution_count": 294,
"id": "e19e970c-d1dc-4608-9f6f-73dd3e282ba6",
"metadata": {},
"outputs": [],
"source": [
"# Merge\n",
"\n",
"# 1Identifier les IDs présents dans df_aum_repaired\n",
"ids_repaired = df_aum_repaired[\"Registrar Account - ID\"].unique()\n",
"\n",
"# 2Sélectionner dans df uniquement les IDs qui ne sont pas dans df_aum_repaired\n",
"df_only = df[~df[\"Registrar Account - ID\"].isin(ids_repaired)]\n",
"\n",
"# 3Concaténer les deux DataFrames\n",
"df_merged = pd.concat([df_aum_repaired, df_only], ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": 295,
"id": "79c732d4-8d4d-4f7d-9a46-2e89cf2b213d",
"metadata": {},
"outputs": [],
"source": [
"# Filtrer les comptes techniques\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"df['Centralisation Date'] = pd.to_datetime(df['Centralisation Date'])\n",
"df_aum_repaired['Centralisation Date'] = pd.to_datetime(df_aum_repaired['Centralisation Date'])\n",
"dg['Centralisation Date'] = pd.to_datetime(dg['Centralisation Date'])\n",
"df = df[~df['Registrar Account - ID'].isin(['Off Distribution','Private Clients', 'Private Client'])]\n",
"dg = dg[~dg['Registrar Account - ID'].isin(['Off Distribution','Private Clients','Private Client'])]"
]
},
{
"cell_type": "code",
"execution_count": 296,
"id": "f7f7242c-051e-4d7d-9a76-b46523089e49",
"metadata": {},
"outputs": [],
"source": [
"# Date de référence et sélection des 400+ principaux codes\n",
"\n",
"ref_date = pd.Timestamp('2025-10-31')\n",
"\n",
"df_ref = df[df['Centralisation Date'] == ref_date]\n",
"\n",
"aum_account = (\n",
" df_ref\n",
" .groupby('Registrar Account - ID')['Value - AUM €']\n",
" .sum()\n",
" .reset_index()\n",
" .sort_values(by='Value - AUM €', ascending=False)\n",
")\n",
"aum_account = aum_account[aum_account['Value - AUM €'] > 5_000_000]\n",
"selected_accounts = aum_account['Registrar Account - ID']\n",
"\n",
"df_aum = df_merged[df_merged['Registrar Account - ID'].isin(selected_accounts)].copy()\n",
"df_flows = dg[dg['Registrar Account - ID'].isin(selected_accounts)].copy()"
]
},
{
"cell_type": "code",
"execution_count": 297,
"id": "91ea0342-607a-420e-af0d-178d063da761",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(31709, 6)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>month</th>\n",
" <th>aum_qty</th>\n",
" <th>net_flow_qty</th>\n",
" <th>gross_flow_qty</th>\n",
" <th>n_tx</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>2015-01-31</td>\n",
" <td>11819.680</td>\n",
" <td>-1524.010</td>\n",
" <td>15230.010</td>\n",
" <td>32</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>18872</td>\n",
" <td>2015-02-28</td>\n",
" <td>5705.000</td>\n",
" <td>7247.100</td>\n",
" <td>18571.880</td>\n",
" <td>38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>18872</td>\n",
" <td>2015-03-31</td>\n",
" <td>70038.905</td>\n",
" <td>3655.380</td>\n",
" <td>9754.040</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>18872</td>\n",
" <td>2015-04-30</td>\n",
" <td>70324.489</td>\n",
" <td>-218.394</td>\n",
" <td>12840.950</td>\n",
" <td>39</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>18872</td>\n",
" <td>2015-05-31</td>\n",
" <td>75567.276</td>\n",
" <td>-4782.849</td>\n",
" <td>6332.849</td>\n",
" <td>24</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID month aum_qty net_flow_qty gross_flow_qty \\\n",
"0 18872 2015-01-31 11819.680 -1524.010 15230.010 \n",
"1 18872 2015-02-28 5705.000 7247.100 18571.880 \n",
"2 18872 2015-03-31 70038.905 3655.380 9754.040 \n",
"3 18872 2015-04-30 70324.489 -218.394 12840.950 \n",
"4 18872 2015-05-31 75567.276 -4782.849 6332.849 \n",
"\n",
" n_tx \n",
"0 32 \n",
"1 38 \n",
"2 47 \n",
"3 39 \n",
"4 24 "
]
},
"execution_count": 297,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Variables par mois\n",
"\n",
"# Parse dates\n",
"df_flows[\"Centralisation Date\"] = pd.to_datetime(df_flows[\"Centralisation Date\"], errors=\"coerce\")\n",
"df_aum[\"Centralisation Date\"] = pd.to_datetime(df_aum[\"Centralisation Date\"], errors=\"coerce\")\n",
"\n",
"ID_COL = \"Registrar Account - ID\"\n",
"FLOW_COL = \"Quantity - NetFlows\"\n",
"AUM_COL = \"Quantity - AUM\"\n",
"\n",
"# Month key\n",
"df_flows[\"month\"] = df_flows[\"Centralisation Date\"].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
"df_aum[\"month\"] = df_aum[\"Centralisation Date\"].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
"# Flows sont journaliers, AUM est mensuel → il faut une granularité commune.\n",
"\n",
"# 1) Monthly aggregation for FLOWS : je fais mon etude mensuel parce que aum valeur mensuel \n",
"\n",
"ID_COL = \"Registrar Account - ID\"\n",
"FLOW_COL = \"Quantity - NetFlows\"\n",
"AUM_COL = \"Quantity - AUM\"\n",
"\n",
"df_flows_m = (\n",
" df_flows\n",
" .dropna(subset=[ID_COL, \"month\", FLOW_COL])\n",
" .assign(gross_flow_qty=lambda x: x[FLOW_COL].abs()) # absolute quantity moved\n",
" .groupby([ID_COL, \"month\"], as_index=False)\n",
" .agg(\n",
" net_flow_qty=(FLOW_COL, \"sum\"), # net quantity change over the month\n",
" gross_flow_qty=(\"gross_flow_qty\", \"sum\"), # total traded quantity (activity intensity)\n",
" n_tx=(FLOW_COL, \"size\"), # number of transactions\n",
" )\n",
")\n",
"\n",
"# 2) Monthly aggregation for AUM (client-month holdings) ---\n",
"df_aum_m = (\n",
" df_aum\n",
" .dropna(subset=[ID_COL, \"month\", AUM_COL])\n",
" .groupby([ID_COL, \"month\"], as_index=False)\n",
" .agg(aum_qty=(AUM_COL, \"sum\")) # total held quantity across ISINs\n",
")\n",
"\n",
"df_month0 = df_aum_m.merge(df_flows_m, on=[ID_COL, \"month\"], how=\"left\")\n",
"\n",
"# 4) Months without transactions => flows are 0 ---\n",
"df_month0[\"net_flow_qty\"] = df_month0[\"net_flow_qty\"].fillna(0.0)\n",
"df_month0[\"gross_flow_qty\"] = df_month0[\"gross_flow_qty\"].fillna(0.0)\n",
"df_month0[\"n_tx\"] = df_month0[\"n_tx\"].fillna(0).astype(int)\n",
"\n",
"print(df_month0.shape)\n",
"df_month0.head()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "aa176fed-7867-4fb7-9d27-991eeaafaae1",
"metadata": {},
"outputs": [],
"source": [
"eps = 1e-9 \n",
"\n",
"# 1) Active month indicator: did the client trade this month?\n",
"df_month0[\"active_month\"] = (df_month0[\"gross_flow_qty\"] > 0).astype(int)\n",
"\n",
"#client avec beaucoup de mois à 0 → “stable / dormant”\n",
"#client actif presque tous les mois → “rebalancer / institutionnel actif”\n",
"\n",
"\n",
"# 2) Monthly relative intensity (turnover proxy in quantity terms) : Mesurer lintensité de trading relativement à la taille et pouvoir ocmparer client petit avec client plus gros\n",
"df_month0[\"rel_intensity_m\"] = df_month0[\"gross_flow_qty\"] / (df_month0[\"aum_qty\"].abs() + eps)\n",
"\n",
"# 3) Monthly net flow ratio (directional change): sert a Capturer la direction de la dynamique\n",
"df_month0[\"netflow_to_aum_m\"] = df_month0[\"net_flow_qty\"] / (df_month0[\"aum_qty\"].abs() + eps)\n",
"\n",
"# 4) Aggregate to client-level features (1 row per client)\n",
"df_client_feat0 = (\n",
" df_month0.groupby(ID_COL, as_index=False)\n",
" .agg(\n",
" # Coverage / activity\n",
" n_months=(\"month\", \"nunique\"),\n",
" n_active_months=(\"active_month\", \"sum\"),\n",
" flow_freq=(\"active_month\", \"mean\"),\n",
"\n",
" # Size in quantity terms\n",
" aum_qty_mean=(\"aum_qty\", \"mean\"),\n",
" aum_qty_median=(\"aum_qty\", \"median\"),\n",
"\n",
" # Flows in quantity terms\n",
" net_flow_qty_sum=(\"net_flow_qty\", \"sum\"),\n",
" gross_flow_qty_sum=(\"gross_flow_qty\", \"sum\"),\n",
" gross_flow_qty_mean=(\"gross_flow_qty\", \"mean\"),\n",
"\n",
" # Dispersion / volatility proxy\n",
" net_flow_qty_vol=(\"net_flow_qty\", \"std\"),\n",
" rel_intensity=(\"rel_intensity_m\", \"mean\"),\n",
" netflow_to_aum=(\"netflow_to_aum_m\", \"mean\"),\n",
"\n",
" # Trading frequency proxy\n",
" n_tx_total=(\"n_tx\", \"sum\"),\n",
" )\n",
")\n",
"\n",
"# 5) Clean NaNs due to std on constant series\n",
"df_client_feat0[\"net_flow_qty_vol\"] = df_client_feat0[\"net_flow_qty_vol\"].fillna(0.0)\n",
"\n",
"# 6) Log transforms (useful because distributions are heavy-tailed)\n",
"df_client_feat0[\"log_aum_qty_mean\"] = np.log1p(df_client_feat0[\"aum_qty_mean\"].clip(lower=0))\n",
"df_client_feat0[\"log_gross_flow_qty_mean\"] = np.log1p(df_client_feat0[\"gross_flow_qty_mean\"].clip(lower=0))\n",
"\n",
"# 7) Global turnover proxy\n",
"df_client_feat0[\"gross_flow_to_aum\"] = df_client_feat0[\"gross_flow_qty_sum\"] / (df_client_feat0[\"aum_qty_mean\"].abs() + eps)\n",
"\n",
"dfc0 = df_client_feat0.copy()\n",
"\n",
"# Minimal filters (adjust if needed)\n",
"dfc0 = dfc0[(dfc0[\"n_months\"] >= 6)] # at least 6 observed months\n",
"dfc0 = dfc0[(dfc0[\"aum_qty_mean\"].abs() > 0)] # avoid zero holdings\n",
"\n",
"dfc0[\"frequency\"] = dfc0[\"flow_freq\"]\n",
"dfc0[\"rel_intensity_total\"] = dfc0[\"gross_flow_to_aum\"]\n",
"\n",
"# Choose a compact, interpretable feature set (quantity-based)\n",
"features0 = [\n",
" \"log_aum_qty_mean\", # size (log)\n",
" \"log_gross_flow_qty_mean\", # activity intensity (log)\n",
" \"frequency\", # activity frequency\n",
" \"rel_intensity_total\", # turnover proxy\n",
" \"net_flow_qty_vol\", # volatility of net flows\n",
" \"n_tx_total\", # total number of transactions\n",
"]\n",
"\n",
"# Build X (drop NaNs/Infs)\n",
"X0 = (dfc0[features0]\n",
" .replace([np.inf, -np.inf], np.nan)\n",
" .dropna()\n",
" .copy())\n",
"\n",
"# Keep IDs aligned\n",
"ids = dfc0.loc[X0.index, ID_COL].copy()\n",
"\n",
"# Standardize (critical for distance-based clustering)\n",
"scaler = StandardScaler()\n",
"X_scaled0 = scaler.fit_transform(X0)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "1c616e7c-a1c3-4b9a-9e06-8b942f837266",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAfzdJREFUeJzt3XlcVNX7wPHPzACisskmAq4goAICoiZhprmVWamVW2pllpZppZlbuaZUWmqaGpXlUmalpbmUtmqKOwqJK7iirMoq2zC/P/gxXydAZxSYgXner5cvnXvPvfM8XGAe7zn3HIVGo9EghBBCCGHGlMYOQAghhBDC2KQgEkIIIYTZk4JICCGEEGZPCiIhhBBCmD0piIQQQghh9qQgEkIIIYTZk4JICCGEEGZPCiIhhBBCmD0piIQQQghh9qQgEsKE+fr68vHHH2tff/zxx/j6+pKenm7EqMxHTk4O06ZN4/7778fX15d3333X2CEJIaqIFERCVLONGzfi6+tb4Z/o6Ghjhyj+38qVK9m0aRODBw/m/fff5/HHH6/S9+vWrRsvvfRSme0//vgjrVq1YuTIkeTn51dpDEKYKwtjByCEuRo3bhyenp5ltjdp0sQI0YjyREVF0bZtW8aOHWu0GDZv3syUKVMICwvjk08+oU6dOkaLRYjaTAoiIYzkgQceICAgwNhh1Ci5ubnUq1ev2t4vLS0Nb2/vSjtfUVERxcXFWFlZ6dV+69atTJ48mfvuu0+KISGqmHSZCVEDXb9+nfHjxxMSEkLHjh2ZO3duma6UoqIili1bRvfu3fH396dbt258+OGHFBQUaNvMnz+fjh07otFotNvmzJmDr68vq1ev1m5LTU3F19eXr7/++o6x/fTTTzz55JO0bduW9u3bM3ToUPbs2aPd/99xUaW6devG5MmTta9LuxYPHDjAzJkz6dSpE126dGHHjh3a7f+1fv16fH19OX36tHbbuXPnGDduHB06dCAgIID+/fvz22+/3TaH/fv34+vry+XLl/nzzz+13ZmXL18GSgqlqVOnEhYWRkBAAI899hibNm3SOcfly5fx9fXl888/58svv6R79+4EBARw7ty5O34NAbZt28abb75Jhw4dWL58+R2LIV9fX2bPns327dt55JFHCAwMZODAgZw6dUr7tenRowcBAQEMGzZMm8utjh07xsiRI2nXrh1t27blmWee4fDhwzptrly5wsyZM+nVqxeBgYF07NiRcePGlTlf6fU7fPgw8+fP57777iMoKIhXXnmlzBi4mJgYRo4cSceOHQkMDKRbt25MmTJFr6+TEJVF7hAJYSTZ2dllPhgUCgUNGjS447GvvfYaHh4eTJgwgejoaNasWUNmZibvv/++ts306dPZtGkTvXr14rnnnuP48eOsXLmSc+fOsWzZMgBCQ0P58ssvOXPmDD4+PgAcOnQIpVLJoUOHGD58uHYbQPv27W8b19KlS/n4448JDg5m3LhxWFpacuzYMaKioggPD9f/i3OLWbNm4ejoyCuvvEJubi4PPvgg9erVY/v27XTo0EGn7bZt22jZsqU2lzNnzjB48GAaNmzIqFGjtMe98sorfPzxx/To0aPc9/Ty8uL9999n/vz5uLm58dxzzwHg6OhIXl4ew4YN4+LFiwwdOhRPT0927NjB5MmTyczMZMSIETrn2rhxI/n5+Tz99NNYWVlhb29/x5x/+eUX3nzzTUJDQ1mxYgXW1tZ6fa0OHTrE77//zpAhQwD49NNPGT16NC+88AJff/01Q4YMISMjg88++4ypU6fqFL379u1j1KhR+Pv7M3bsWBQKBRs3bmTEiBF8/fXXBAYGAiXFy9GjR+nTpw9ubm5cuXKFb775huHDh7N161bq1q2rE9PcuXOxs7Nj7NixXLlyha+++orZs2ezaNEioKS4HDlyJA0aNODFF1/Ezs6Oy5cvs3PnTr1yFqLSaIQQ1eqHH37Q+Pj4lPvH399fp62Pj49myZIl2tdLlizR+Pj4aEaPHq3TbubMmRofHx9NXFycRqPRaOLi4jQ+Pj6aadOm6bSLiIjQ+Pj4aPbt26fRaDSatLQ0jY+Pj2bdunUajUajyczM1Pj5+WnGjRunCQsL0x43Z84cTYcOHTTFxcUV5nX+/HmNn5+f5pVXXtGo1Wqdfbce99+cSnXt2lXz1ltvlfk6DR48WFNUVKTT9o033tB06tRJZ3tycrLGz89Ps3TpUu22ESNGaB599FFNfn6+TiwDBw7U9OzZs8Jcbo3pxRdf1Nn25Zdfanx8fDQ//fSTdltBQYFm4MCBmqCgIE1WVpZGo9FoLl26pPHx8dGEhIRo0tLS7vhepe8XHh6uad26teaZZ57R5Obm6nWcRqPRfv9cunRJu239+vUaHx8fzf3336+NS6PRaBYuXKjx8fHRti0uLtb07NlT8/zzz+tcq5s3b2q6deumee6553S2/dfRo0c1Pj4+mk2bNmm3lV6/Z599Vuec8+bN07Rq1UqTmZmp0Wg0mp07d2p8fHw0x48f1ztXIaqCdJkJYSTvvPMOq1at0vkTGRmp17FDhw7Vef3MM88A8PfffwPw119/AWjvbJR6/vnndfY7OjrSokUL7R2gI0eOoFKpGDlyJKmpqZw/fx6Aw4cPExISgkKhqDCmXbt2UVxczCuvvIJSqfur5XbH3cnTTz+NSqXS2fbwww+Tlpam0232yy+/UFxczCOPPALAjRs3iIqK4uGHH9bejUtPT+f69euEh4dz/vx5kpKSDI7n77//xsXFhUcffVS7zdLSkmHDhpGbm8vBgwd12vfs2RNHR0e9z5+RkUFRURFubm563xkq1alTJ52B+m3bttXGYGNjo91eerfn0qVLAMTFxXH+/Hn69u3L9evXtV+r3NxcOnXqxMGDBykuLgbQiamwsJDr16/TpEkT7OzsOHHiRJmYnn76aZ3rHxoailqt5sqVKwDY2toC8Oeff1JYWGhQvkJUJukyE8JIAgMD73pQddOmTXVeN2nSBKVSqR3HceXKFZRKZZkn1lxcXLCzs9N+GEHJB1RpgXTo0CH8/f0JCAjAwcGBQ4cO4ezszMmTJ3UKgPJcvHgRpVKJl5fXXeVUkfKexHvggQewtbVl27ZtdOrUCSjpLmvVqhXNmzfXxqPRaFi8eDGLFy8u99xpaWk0bNjQoHiuXLlC06ZNyxR9pXknJibeMf7b6dSpE40aNeKbb77B3t6e6dOna/dlZWWRl5enfW1paYmDg4P2daNGjXTOVVoEubm56WwvLUIyMzMBtIXvW2+9VWFcWVlZ2Nvbk5eXx8qVK9m4cSNJSUk648+ysrLKHOfu7q7z2s7OTue9O3ToQK9evVi6dClffvklHTp0oHv37vTt21fvwedCVAYpiISoBSq6A6PPnZl27dqxYcMGLl26xKFDh2jXrh0KhYKQkBAOHz6Mq6srxcXFhIaGVnbYOtRqdbnbyxtMbGVlRffu3dm5cyczZswgLS2NI0eO8MYbb2jblN7ReP755+ncuXO5566OKQ4MvcsDJXcPMzMzWbNmDfb29rz66qsAvPvuuzqDtzt06MCaNWu0r/97J+1O20uLmdK/J02aRKtWrcptW/p035w5c7Rji4KCgrC1tUWhUPD666/rFEel/ls4/ve9FQoFS5YsITo6mj/++IPdu3czdepUVq1axbfffkv9+vXLPV6IyiYFkRA10IULF2jcuLHO6+LiYu3dCA8PD4qLi7lw4YLOHZvU1FQyMzPx8PDQbmvXrh0A//zzDzExMbz44otAyQDqb775BldXV+rVq0ebNm1uG1OTJk0oLi7m3LlzFX6oAtjb22vvDpQqKCggJSVFz+xLPPzww2zatIl9+/Zx7tw5NBoNDz/8sHZ/6dfH0tKSsLAwg859Ox4eHpw6dYri4mKdD/v4+Hig7B2Ru6FUKnnvvffIyspi6dKl2NvbM3z4cF544QUee+wxbbvSuy33qvRrZWNjc8ev1S+//MITTzyh80Rgfn5+uXeHDBEUFERQUBCvv/46W7ZsYeLEiWzbto2nnnrqns4rhL5kDJEQNdC6det0Xq9duxYo6UoC6NKlCwB
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkAAAAHHCAYAAABXx+fLAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAjoJJREFUeJzt3Xl4U1X6wPFvku77zl6gxZZCC2UtlEVBEAZFFAT9Kat1Y1wRRMVxsI4KM6gzIg6DoAi4oWyCIiKuqGUH2Rfbshe670va5P7+KAmtXdMmTdq8n+fxmcnNzb3vaULz9pz3nKNSFEVBCCGEEMKOqK0dgBBCCCFEU5MESAghhBB2RxIgIYQQQtgdSYCEEEIIYXckARJCCCGE3ZEESAghhBB2RxIgIYQQQtgdSYCEEEIIYXckARJCCCGE3ZEESAgzGT58OM8995zx8e7duwkPD2f37t3GY1OmTOG2226zRniiiaxYsYKbb76ZiIgIxo0bZ+1whBA1kARIiDqcOnWKJ554gmHDhhEVFcWQIUOYMWMGa9assXZoFvHHH3/w9ttvc/HixSrPffTRR2zYsMEKUTUPv/zyC4sWLaJ3794sWLCAp59+2qL3e+655+jVq1eV4ydPniQmJobhw4dX+z4KIcDB2gEIYcsOHDjA1KlTadu2LRMnTiQwMJCUlBR+//13Vq9ezZQpU4znbtu2DZVKZcVozeOPP/5gyZIl9O/fn/bt21d67pNPPsHX15fx48dbKTrbtmvXLtRqNa+++ipOTk5WieH06dNMnz4dNzc3Vq1aVeU9FEKUkwRIiFr873//w9PTk3Xr1uHl5VXpuYyMjEqPrfWFJxpPURRKSkpwcXFp1HUyMjJwcXEx22fB1LjOnDnDtGnTcHFxYfXq1XTo0MEscQjREskQmBC1OH/+PF26dKmS/AD4+/tXevznGqDa/PHHH0yZMoWePXsyZMgQli9fXuWcjIwM5s2bR2xsLFFRUdx+++1s3Lix0jnV1RkBXLx4kfDw8CrDVYmJiTzxxBP079+fqKgoxo8fz3fffWd8fsOGDTz55JMATJ06lfDwcOP1hw8fzpkzZ9izZ4/xeMUesNzcXF599VVuvPFGIiMjGTlyJO+++y56vb7On8eRI0eIi4sjJiaGHj16MHz4cJ5//vlK5+j1elatWsXYsWOJiopiwIABxMXFceTIEeM5ZWVlvPPOO4wYMYLIyEiGDx/Om2++iVarrXSt4cOH8/DDD7Nz507Gjx9Pjx49+PTTTxvVDsPPu7Cw0PjzMfz8zRFXXRITE5k+fTpOTk71Sn4M9WgnT55k8uTJ9OzZk5EjR7Jt2zYA9uzZw8SJE+nRowejRo3it99+q3KNq1ev8vzzzxMbG0tkZCS33nor69atq3SOVqvlrbfeYvz48fTp04fo6Gjuvfdedu3aVek8w2f2vffeY+3atcaf1YQJEzh8+HClc9PS0nj++ecZOnQokZGRDB48mJkzZ8pwnzCJ9AAJUYt27dpx8OBBTp8+TVhYmFmumZOTwwMPPMDIkSP5y1/+wjfffMPrr79OWFgYN954IwDFxcVMmTKF8+fPc99999G+fXu2bdvGc889R25uLtOmTTP5vmfOnOH//u//aNWqFQ8++CBubm58/fXXPProo7z99tuMHDmSfv36MWXKFNasWcMjjzxCSEgIAKGhocybN49//OMfuLm58cgjjwAQEBAAQFFREZMnT+bq1avcc889tGnThoMHD/Lmm2+SlpbGCy+8UGNcGRkZxMXF4evry0MPPYSXlxcXL17k22+/rXTeCy+8wIYNGxg6dCh33XUXOp2Offv28fvvvxMVFQXA3/72NzZu3MioUaOYMWMGhw8fZtmyZSQmJvLOO+9Uul5ycjKzZ8/m7rvvZtKkSXTu3LlR7fjXv/7FZ599xuHDh3nllVcA6N27t1niqktSUhLTpk1Do9GwevVqgoOD63wNlH8WH3nkEcaMGcPo0aP55JNPePrpp9Hr9bz22mvcc8893Hbbbbz33ns88cQT/Pjjj3h4eACQnp7OpEmTUKlU3Hffffj5+fHzzz/zwgsvkJ+fz/Tp0wHIz8/n888/57bbbmPixIkUFBSwbt06HnjgAT7//HMiIiIqxfTll19SUFDA3XffjUqlYsWKFTz++OPs2LEDR0dHAB5//HH++OMPJk+eTLt27cjMzOTXX38lJSVFhvxE/SlCiBr98ssvSkREhBIREaHcfffdyr/+9S9l586dilarrXLusGHDlGeffdb4eNeuXUpYWJiya9cu47HJkycrYWFhysaNG43HSkpKlEGDBimPP/648dgHH3yghIWFKV988YXxmFarVe6++24lOjpaycvLq/EeiqIoFy5cUMLCwpT169cbj02bNk257bbblJKSEuMxvV6v3H333cott9xiPPb1119Xe01FUZRbb71VmTx5cpXj77zzjhIdHa0kJydXOv76668rERERyuXLl6u8xuDbb79VwsLClMOHD9d4TkJCghIWFqb84x//qPKcXq9XFEVRTpw4oYSFhSkvvPBCpecXLlyohIWFKQkJCcZjw4YNU8LCwpSff/7ZbO1QFEV59tlnlejo6ErHzBFXbffr3r27MmjQIGXw4MFV4q6N4bO4ZcsW47HExEQlLCxM6dq1q3Lo0CHj8Z07d1b5PM2bN08ZNGiQkpmZWem6s2bNUvr06aMUFRUpiqIoZWVllT5ziqIoOTk5SmxsrPL8888bjxk+s/3791eys7ONx3fs2KGEhYUp33//vfG1YWFhyooVK+rdViGqI0NgQtRi0KBBfPrppwwfPpyTJ0+yYsUK4uLiGDp0aKWhI1O4ublVmh7t5OREVFQUFy5cMB77+eefCQwMrDRl3tHRkSlTplBYWMjevXtNumd2dja7du3iL3/5C/n5+WRmZpKZmUlWVhaDBw/m7NmzXL16tUHtgfIC8D59+uDl5WW8dmZmJrGxseh0ulrj9fT0BODHH3+ktLS02nO2b9+OSqXiscceq/KcofD8p59+AmDGjBmVnr///vsrPW/Qvn17hgwZYrZ21MQccdVGp9ORnZ2Nj48Pvr6+JsXm5ubGrbfeanwcEhKCl5cXoaGh9OzZ03jc8P8Nn1FFUdi+fTvDhw9HUZRKP6vBgweTl5fHsWPHANBoNMaaKL1eT3Z2NmVlZURGRnL8+PEqMY0ZMwZvb2/j4759+1a6t4uLC46OjuzZs4ecnByT2itERTIEJkQdevTowZIlS9BqtZw8eZIdO3bwwQcf8OSTT7Jp0ya6dOli0vVat25dZbaYt7c3p06dMj6+dOkSHTt2RK2u/DdKaGgoAJcvXzbpnufPn0dRFN566y3eeuutas/JyMigVatWJl3X4Ny5c5w6dYqBAwdW+3xmZmaNr+3fvz+jRo1iyZIlfPDBB/Tv358RI0YwduxY4xfn+fPnCQoKwsfHp8brXLp0CbVaXWX4JzAwEC8vLy5dulTpeHVDJY1phyXjqo2LiwuvvPIKc+bM4eGHH+b999/Hzc0NKB9KzcvLq3Jfg+o+i56enrRu3brKMSivj4Lyn0Nubi5r165l7dq11cZV8We1ceNG3n//fZKTkysludW1tU2bNpUeG5Ihw72dnJyYM2cO//znPxk0aBA9e/bkpptu4o477qjUNiHqIgmQEPXk5OREjx496NGjB506deL5559n27Zt1fZK1Eaj0Zgtppqm3f+5YNfw+P7776+xd6G+dSM13W/QoEE88MAD1T7fqVOnGl+rUqlYvHgxhw4d4ocffmDnzp3MmzePlStXsnbtWtzd3U2Kpb5LEVQ3s6ox7bBkXHW59dZbycnJIT4+nscff5ylS5fi5OTE1q1bqxSTV0y0a/os1nRcURTg+ufp9ttv584776z23PDwcAC++OILnnvuOUaMGEFcXBz+/v5oNBqWLVtWqdezvvcGmD59OsOHD2fHjh388ssvvPXWW7z77rusWrWKbt26Vft6If5MEiAhGiAyMhKA1NRUi1y/Xbt2nDp1Cr1eX6kXKCkpCYC2bdsCGGen/fmv/D/3KhhmBDk6OhIbG1vrvWv7oq7pueD
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best K by silhouette: 5\n"
]
}
],
"source": [
"# 1er clustering\n",
"\n",
"k_range = range(2, 21)\n",
"inertias = []\n",
"silhouettes = []\n",
"\n",
"for k in k_range:\n",
" km = KMeans(n_clusters=k, n_init=30, random_state=42)\n",
" labels = km.fit_predict(X_scaled0)\n",
" inertias.append(km.inertia_)\n",
" silhouettes.append(silhouette_score(X_scaled0, labels))\n",
"\n",
"# Elbow plot\n",
"plt.figure()\n",
"plt.plot(list(k_range), inertias, marker=\"o\")\n",
"plt.xlabel(\"Number of clusters K\")\n",
"plt.ylabel(\"Inertia (within-cluster SSE)\")\n",
"plt.title(\"Elbow curve for K-means\")\n",
"plt.show()\n",
"\n",
"# Silhouette plot\n",
"plt.figure()\n",
"plt.plot(list(k_range), silhouettes, marker=\"o\")\n",
"plt.xlabel(\"Number of clusters K\")\n",
"plt.ylabel(\"Silhouette score\")\n",
"plt.title(\"Silhouette score for K-means\")\n",
"plt.show()\n",
"\n",
"best_k = list(k_range)[int(np.argmax(silhouettes))]\n",
"print(\"Best K by silhouette:\", best_k)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "8f52f288-17fe-4f70-9703-d24e50b6a7a1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>aum_qty_med</th>\n",
" <th>freq_med</th>\n",
" <th>rel_int_med</th>\n",
" <th>gross_flow_med</th>\n",
" <th>n_tx_med</th>\n",
" <th>vol_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_kmeans</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1.0</th>\n",
" <td>242</td>\n",
" <td>2.464422e+04</td>\n",
" <td>0.953846</td>\n",
" <td>8.355143</td>\n",
" <td>2.813915e+03</td>\n",
" <td>1210.0</td>\n",
" <td>3.297591e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2.0</th>\n",
" <td>102</td>\n",
" <td>2.360580e+04</td>\n",
" <td>0.059419</td>\n",
" <td>0.465813</td>\n",
" <td>1.149596e+02</td>\n",
" <td>4.0</td>\n",
" <td>3.228290e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.0</th>\n",
" <td>48</td>\n",
" <td>3.351804e+05</td>\n",
" <td>1.000000</td>\n",
" <td>10.719448</td>\n",
" <td>4.247979e+04</td>\n",
" <td>11538.0</td>\n",
" <td>3.695062e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3.0</th>\n",
" <td>12</td>\n",
" <td>8.769226e+03</td>\n",
" <td>0.968254</td>\n",
" <td>127.337410</td>\n",
" <td>1.484639e+04</td>\n",
" <td>2458.0</td>\n",
" <td>1.675758e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4.0</th>\n",
" <td>1</td>\n",
" <td>2.559419e+07</td>\n",
" <td>1.000000</td>\n",
" <td>2.433395</td>\n",
" <td>1.112157e+06</td>\n",
" <td>4410.0</td>\n",
" <td>4.349047e+06</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients aum_qty_med freq_med rel_int_med \\\n",
"cluster_kmeans \n",
"1.0 242 2.464422e+04 0.953846 8.355143 \n",
"2.0 102 2.360580e+04 0.059419 0.465813 \n",
"0.0 48 3.351804e+05 1.000000 10.719448 \n",
"3.0 12 8.769226e+03 0.968254 127.337410 \n",
"4.0 1 2.559419e+07 1.000000 2.433395 \n",
"\n",
" gross_flow_med n_tx_med vol_med \n",
"cluster_kmeans \n",
"1.0 2.813915e+03 1210.0 3.297591e+03 \n",
"2.0 1.149596e+02 4.0 3.228290e+02 \n",
"0.0 4.247979e+04 11538.0 3.695062e+04 \n",
"3.0 1.484639e+04 2458.0 1.675758e+04 \n",
"4.0 1.112157e+06 4410.0 4.349047e+06 "
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"km = KMeans(n_clusters=5, n_init=50, random_state=42)\n",
"labels_km = km.fit_predict(X_scaled0)\n",
"\n",
"dfc0.loc[X0.index, \"cluster_kmeans\"] = labels_km\n",
"\n",
"# Profiling table (medians = robust to outliers)\n",
"k_profile = (\n",
" dfc0.loc[X0.index]\n",
" .groupby(\"cluster_kmeans\")\n",
" .agg(n_clients=(ID_COL, \"count\"),\n",
" aum_qty_med=(\"aum_qty_mean\", \"median\"),\n",
" freq_med=(\"frequency\", \"median\"),\n",
" rel_int_med=(\"rel_intensity_total\", \"median\"),\n",
" gross_flow_med=(\"gross_flow_qty_mean\", \"median\"),\n",
" n_tx_med=(\"n_tx_total\", \"median\"),\n",
" vol_med=(\"net_flow_qty_vol\", \"median\"),\n",
" )\n",
" .sort_values(\"n_clients\", ascending=False)\n",
")\n",
"\n",
"k_profile"
]
},
{
"cell_type": "code",
"execution_count": 298,
"id": "8caa4710-c7d5-4397-9d90-82f756499016",
"metadata": {},
"outputs": [],
"source": [
"# Ajout de variables\n",
"\n",
"#external data projet-bdc-data /carmignac /Data Modélisation /Nav\n",
"PATH_NAV = \"s3://projet-bdc-data/carmignac/Data Modélisation/Nav/NAV_Bench_data.csv\" #Cest la table de valorisation / performance du produit.\n",
"PATH_RATES = \"s3://projet-bdc-data/carmignac/Data Modélisation/market data/esterRates.csv\"\n",
"\n",
"# optional competitors\n",
"PATH_COMP_FLOWS = \"s3://projet-bdc-data/carmignac/Data Modélisation/competitors/daily_estimated_flows.csv\"\n",
"PATH_COMP_PERF = \"s3://projet-bdc-data/carmignac/Data Modélisation/competitors/weekly_perf_full.csv\"\n",
"PATH_PEERS = \"s3://projet-bdc-carmignac-g3/peers/CAD_peers.csv\"\n",
"\n",
"df_nav = pd.read_csv(PATH_NAV, sep=\";\") \n",
"# Une base de suivi de performance de fonds dans le temps, \n",
"# Price (TF PartPrice) : prix de la part du fond\n",
"# AUM Eur (Assets Under Management) : Taille du fonds en euros\n",
"\n",
"df_rates = pd.read_csv(PATH_RATES,sep=\";\")\n",
"# df_rates : évolution dans le temps dun taux de rendement obligataire (YTM)\n",
"\n",
"df_comp_flows = pd.read_csv(PATH_COMP_FLOWS,sep=\";\")\n",
"# Estimated Fund-level Net Flow (Daily) : Flux nets estimés du fonds\n",
"\n",
"df_comp_perf = pd.read_csv(PATH_COMP_PERF,sep=\";\")\n",
"# perfPeriod : Horizon de performance\n",
"# return : Performance du fonds sur la période donnée\n",
"# percentile : Position du fonds par rapport à ses pairs\n",
"# 0 → top performer\n",
"# 100 → mauvais performer\n",
"\n",
"df_peers = pd.read_csv(PATH_PEERS,sep=\"|\")\n",
"# Global Broad Category Group : grande classe dactifs\n",
"# Global Category : catégorie plus précise et Morningstar Category\n",
"# Index Fund : fonds indiciel (passif)\n",
"# Enhanced Index → quasi-passif (légère surperformance recherchée)\n",
"# Inception Date → date de création de la part\n",
"# Inception Date of Fund's Oldest Share Class → âge réel du fonds\n",
"# Domicile : pays de domiciliation du fonds"
]
},
{
"cell_type": "code",
"execution_count": 299,
"id": "fe081e43-092b-4429-813a-67417e39fd07",
"metadata": {},
"outputs": [],
"source": [
"ID_COL = \"Registrar Account - ID\"\n",
"ISIN_COL = \"Product - Isin\"\n",
"\n",
"FLOW_DATE_COL = \"Centralisation Date\"\n",
"AUM_DATE_COL = \"Centralisation Date\"\n",
"\n",
"FLOW_QTY_COL = \"Quantity - NetFlows\"\n",
"FLOW_SUB_COL = \"Quantity - Subscription\"\n",
"FLOW_RED_COL = \"Quantity - Redemption\"\n",
"\n",
"AUM_QTY_COL = \"Quantity - AUM\"\n",
"AUM_VAL_COL = \"Value - AUM €\"\n",
"\n",
"REGION_COL = \"Registrar Account - Region\"\n",
"COUNTRY_COL = \"RegistrarAccount - Country\"\n",
"\n",
"NAV_DATE_COL = \"Dat\"\n",
"NAV_ISIN_COL = \"Isin\"\n",
"NAV_PRICE_COL = \"Price (TF PartPrice)\"\n",
"NAV_BENCH_COL = \"PriceBench\"\n",
"\n",
"RATE_DATE_COL = \"Date\"\n",
"RATE_VAL_COL = \"Yld to Maturity\""
]
},
{
"cell_type": "code",
"execution_count": 300,
"id": "b2a1cdce-1b1c-45d9-9c74-93f826bd65fd",
"metadata": {},
"outputs": [],
"source": [
"for df, date_col in [\n",
" (df_flows, FLOW_DATE_COL),\n",
" (df_aum, AUM_DATE_COL),\n",
" (df_nav, NAV_DATE_COL),\n",
" (df_rates, RATE_DATE_COL),\n",
"]:\n",
" df[date_col] = pd.to_datetime(df[date_col], errors=\"coerce\")\n",
"\n",
"df_flows[\"month\"] = df_flows[FLOW_DATE_COL].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
"df_aum[\"month\"] = df_aum[AUM_DATE_COL].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
"df_nav[\"month\"] = df_nav[NAV_DATE_COL].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
"df_rates[\"month\"] = df_rates[RATE_DATE_COL].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
"\n",
"for col in [FLOW_QTY_COL, FLOW_SUB_COL, FLOW_RED_COL]:\n",
" df_flows[col] = pd.to_numeric(df_flows[col], errors=\"coerce\")\n",
"\n",
"for col in [AUM_QTY_COL, AUM_VAL_COL]:\n",
" df_aum[col] = pd.to_numeric(df_aum[col], errors=\"coerce\")\n",
"\n",
"for col in [NAV_PRICE_COL, NAV_BENCH_COL]:\n",
" df_nav[col] = pd.to_numeric(df_nav[col], errors=\"coerce\")\n",
"\n",
"df_rates[RATE_VAL_COL] = pd.to_numeric(df_rates[RATE_VAL_COL], errors=\"coerce\")\n",
"\n",
"for df, col in [(df_flows, ISIN_COL), (df_aum, ISIN_COL)]:\n",
" df[col] = df[col].astype(str).str.strip()\n",
"\n",
"df_nav[NAV_ISIN_COL] = df_nav[NAV_ISIN_COL].astype(str).str.strip()"
]
},
{
"cell_type": "code",
"execution_count": 301,
"id": "e10eb2ef-04cd-4186-b188-72d760b4d778",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(492920, 18)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Product - Isin</th>\n",
" <th>month</th>\n",
" <th>aum_qty</th>\n",
" <th>aum_val</th>\n",
" <th>region</th>\n",
" <th>country</th>\n",
" <th>net_flow_qty</th>\n",
" <th>gross_flow_qty</th>\n",
" <th>sub_qty</th>\n",
" <th>red_qty</th>\n",
" <th>n_tx</th>\n",
" <th>region_flow</th>\n",
" <th>country_flow</th>\n",
" <th>active_rel_month</th>\n",
" <th>holding_rel_month</th>\n",
" <th>flow_to_aum_rel</th>\n",
" <th>turnover_rel</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-01-31</td>\n",
" <td>0.000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>673.990</td>\n",
" <td>956.01</td>\n",
" <td>859.990</td>\n",
" <td>-186.000</td>\n",
" <td>9.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>6.739900e+11</td>\n",
" <td>9.560100e+11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-02-28</td>\n",
" <td>0.000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>988.000</td>\n",
" <td>1712.00</td>\n",
" <td>1350.000</td>\n",
" <td>-362.000</td>\n",
" <td>12.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>9.880000e+11</td>\n",
" <td>1.712000e+12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-03-31</td>\n",
" <td>0.000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>9.710</td>\n",
" <td>1447.71</td>\n",
" <td>785.710</td>\n",
" <td>-776.000</td>\n",
" <td>12.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>9.710000e+09</td>\n",
" <td>1.447710e+12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-04-30</td>\n",
" <td>50219.393</td>\n",
" <td>3.452433e+07</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>-123.234</td>\n",
" <td>1708.19</td>\n",
" <td>853.478</td>\n",
" <td>-976.712</td>\n",
" <td>11.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>-2.453913e-03</td>\n",
" <td>3.401455e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-05-31</td>\n",
" <td>53685.393</td>\n",
" <td>3.699729e+07</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>121.000</td>\n",
" <td>529.00</td>\n",
" <td>325.000</td>\n",
" <td>-204.000</td>\n",
" <td>6.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2.253872e-03</td>\n",
" <td>9.853705e-03</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID Product - Isin month aum_qty aum_val \\\n",
"0 18872 FR0010135103 2015-01-31 0.000 0.000000e+00 \n",
"1 18872 FR0010135103 2015-02-28 0.000 0.000000e+00 \n",
"2 18872 FR0010135103 2015-03-31 0.000 0.000000e+00 \n",
"3 18872 FR0010135103 2015-04-30 50219.393 3.452433e+07 \n",
"4 18872 FR0010135103 2015-05-31 53685.393 3.699729e+07 \n",
"\n",
" region country net_flow_qty gross_flow_qty sub_qty red_qty \\\n",
"0 Switzerland Switzerland 673.990 956.01 859.990 -186.000 \n",
"1 Switzerland Switzerland 988.000 1712.00 1350.000 -362.000 \n",
"2 Switzerland Switzerland 9.710 1447.71 785.710 -776.000 \n",
"3 Switzerland Switzerland -123.234 1708.19 853.478 -976.712 \n",
"4 Switzerland Switzerland 121.000 529.00 325.000 -204.000 \n",
"\n",
" n_tx region_flow country_flow active_rel_month holding_rel_month \\\n",
"0 9.0 Switzerland Switzerland 1 0 \n",
"1 12.0 Switzerland Switzerland 1 0 \n",
"2 12.0 Switzerland Switzerland 1 0 \n",
"3 11.0 Switzerland Switzerland 1 1 \n",
"4 6.0 Switzerland Switzerland 1 1 \n",
"\n",
" flow_to_aum_rel turnover_rel \n",
"0 6.739900e+11 9.560100e+11 \n",
"1 9.880000e+11 1.712000e+12 \n",
"2 9.710000e+09 1.447710e+12 \n",
"3 -2.453913e-03 3.401455e-02 \n",
"4 2.253872e-03 9.853705e-03 "
]
},
"execution_count": 301,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_flows_rel_m = (\n",
" df_flows\n",
" .dropna(subset=[ID_COL, ISIN_COL, \"month\"])\n",
" .assign(\n",
" gross_flow_qty=lambda x: x[FLOW_QTY_COL].abs(),\n",
" sub_qty=lambda x: x[FLOW_SUB_COL].fillna(0),\n",
" red_qty=lambda x: x[FLOW_RED_COL].fillna(0)\n",
" )\n",
" .groupby([ID_COL, ISIN_COL, \"month\"], as_index=False)\n",
" .agg(\n",
" net_flow_qty=(FLOW_QTY_COL, \"sum\"),\n",
" gross_flow_qty=(\"gross_flow_qty\", \"sum\"),\n",
" sub_qty=(\"sub_qty\", \"sum\"),\n",
" red_qty=(\"red_qty\", \"sum\"),\n",
" n_tx=(FLOW_QTY_COL, \"size\"),\n",
" region=(REGION_COL, \"last\"),\n",
" country=(COUNTRY_COL, \"last\")\n",
" )\n",
")\n",
"\n",
"df_aum_rel_m = (\n",
" df_aum\n",
" .dropna(subset=[ID_COL, ISIN_COL, \"month\"])\n",
" .groupby([ID_COL, ISIN_COL, \"month\"], as_index=False)\n",
" .agg(\n",
" aum_qty=(AUM_QTY_COL, \"sum\"),\n",
" aum_val=(AUM_VAL_COL, \"sum\"),\n",
" region=(REGION_COL, \"last\"),\n",
" country=(COUNTRY_COL, \"last\")\n",
" )\n",
")\n",
"\n",
"keys = pd.concat([\n",
" df_flows_rel_m[[ID_COL, ISIN_COL, \"month\"]],\n",
" df_aum_rel_m[[ID_COL, ISIN_COL, \"month\"]]\n",
"]).drop_duplicates()\n",
"\n",
"df_rel_m = (\n",
" keys\n",
" .merge(df_aum_rel_m, on=[ID_COL, ISIN_COL, \"month\"], how=\"left\", suffixes=(\"\", \"_aum\"))\n",
" .merge(df_flows_rel_m, on=[ID_COL, ISIN_COL, \"month\"], how=\"left\", suffixes=(\"\", \"_flow\"))\n",
")\n",
"\n",
"for c in [\"aum_qty\", \"aum_val\", \"net_flow_qty\", \"gross_flow_qty\", \"sub_qty\", \"red_qty\", \"n_tx\"]:\n",
" df_rel_m[c] = df_rel_m[c].fillna(0)\n",
"\n",
"df_rel_m[\"region\"] = df_rel_m[\"region\"].fillna(df_rel_m.get(\"region_flow\"))\n",
"df_rel_m[\"country\"] = df_rel_m[\"country\"].fillna(df_rel_m.get(\"country_flow\"))\n",
"\n",
"df_rel_m[\"active_rel_month\"] = (df_rel_m[\"gross_flow_qty\"] > 0).astype(int)\n",
"df_rel_m[\"holding_rel_month\"] = (df_rel_m[\"aum_qty\"] > 0).astype(int)\n",
"df_rel_m[\"flow_to_aum_rel\"] = df_rel_m[\"net_flow_qty\"] / (df_rel_m[\"aum_qty\"].abs() + EPS)\n",
"df_rel_m[\"turnover_rel\"] = df_rel_m[\"gross_flow_qty\"] / (df_rel_m[\"aum_qty\"].abs() + EPS)\n",
"\n",
"print(df_rel_m.shape)\n",
"df_rel_m.head()"
]
},
{
"cell_type": "code",
"execution_count": 302,
"id": "321b09ab-90f0-4add-a670-0d8c74046e03",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Product - Isin</th>\n",
" <th>month</th>\n",
" <th>aum_qty</th>\n",
" <th>aum_val</th>\n",
" <th>region</th>\n",
" <th>country</th>\n",
" <th>net_flow_qty</th>\n",
" <th>gross_flow_qty</th>\n",
" <th>sub_qty</th>\n",
" <th>red_qty</th>\n",
" <th>n_tx</th>\n",
" <th>region_flow</th>\n",
" <th>country_flow</th>\n",
" <th>active_rel_month</th>\n",
" <th>holding_rel_month</th>\n",
" <th>flow_to_aum_rel</th>\n",
" <th>turnover_rel</th>\n",
" <th>ret_fund_m</th>\n",
" <th>ret_bench_m</th>\n",
" <th>active_return_m</th>\n",
" <th>delta_rate_m</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-01-31</td>\n",
" <td>0.000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>673.990</td>\n",
" <td>956.01</td>\n",
" <td>859.990</td>\n",
" <td>-186.000</td>\n",
" <td>9.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>6.739900e+11</td>\n",
" <td>9.560100e+11</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.058</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-02-28</td>\n",
" <td>0.000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>988.000</td>\n",
" <td>1712.00</td>\n",
" <td>1350.000</td>\n",
" <td>-362.000</td>\n",
" <td>12.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>9.880000e+11</td>\n",
" <td>1.712000e+12</td>\n",
" <td>0.121368</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.022</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-03-31</td>\n",
" <td>0.000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>9.710</td>\n",
" <td>1447.71</td>\n",
" <td>785.710</td>\n",
" <td>-776.000</td>\n",
" <td>12.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>9.710000e+09</td>\n",
" <td>1.447710e+12</td>\n",
" <td>0.068598</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.014</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-04-30</td>\n",
" <td>50219.393</td>\n",
" <td>3.452433e+07</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>-123.234</td>\n",
" <td>1708.19</td>\n",
" <td>853.478</td>\n",
" <td>-976.712</td>\n",
" <td>11.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>-2.453913e-03</td>\n",
" <td>3.401455e-02</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.077</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-05-31</td>\n",
" <td>53685.393</td>\n",
" <td>3.699729e+07</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>121.000</td>\n",
" <td>529.00</td>\n",
" <td>325.000</td>\n",
" <td>-204.000</td>\n",
" <td>6.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2.253872e-03</td>\n",
" <td>9.853705e-03</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.053</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID Product - Isin month aum_qty aum_val \\\n",
"0 18872 FR0010135103 2015-01-31 0.000 0.000000e+00 \n",
"1 18872 FR0010135103 2015-02-28 0.000 0.000000e+00 \n",
"2 18872 FR0010135103 2015-03-31 0.000 0.000000e+00 \n",
"3 18872 FR0010135103 2015-04-30 50219.393 3.452433e+07 \n",
"4 18872 FR0010135103 2015-05-31 53685.393 3.699729e+07 \n",
"\n",
" region country net_flow_qty gross_flow_qty sub_qty red_qty \\\n",
"0 Switzerland Switzerland 673.990 956.01 859.990 -186.000 \n",
"1 Switzerland Switzerland 988.000 1712.00 1350.000 -362.000 \n",
"2 Switzerland Switzerland 9.710 1447.71 785.710 -776.000 \n",
"3 Switzerland Switzerland -123.234 1708.19 853.478 -976.712 \n",
"4 Switzerland Switzerland 121.000 529.00 325.000 -204.000 \n",
"\n",
" n_tx region_flow country_flow active_rel_month holding_rel_month \\\n",
"0 9.0 Switzerland Switzerland 1 0 \n",
"1 12.0 Switzerland Switzerland 1 0 \n",
"2 12.0 Switzerland Switzerland 1 0 \n",
"3 11.0 Switzerland Switzerland 1 1 \n",
"4 6.0 Switzerland Switzerland 1 1 \n",
"\n",
" flow_to_aum_rel turnover_rel ret_fund_m ret_bench_m active_return_m \\\n",
"0 6.739900e+11 9.560100e+11 0.000000 0.0 0.0 \n",
"1 9.880000e+11 1.712000e+12 0.121368 0.0 0.0 \n",
"2 9.710000e+09 1.447710e+12 0.068598 0.0 0.0 \n",
"3 -2.453913e-03 3.401455e-02 0.000000 0.0 0.0 \n",
"4 2.253872e-03 9.853705e-03 0.000000 0.0 0.0 \n",
"\n",
" delta_rate_m \n",
"0 -0.058 \n",
"1 -0.022 \n",
"2 -0.014 \n",
"3 -0.077 \n",
"4 -0.053 "
]
},
"execution_count": 302,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Ajout\n",
"\n",
"df_nav_m = (\n",
" df_nav\n",
" .dropna(subset=[NAV_ISIN_COL, \"month\", NAV_PRICE_COL])\n",
" .sort_values([NAV_ISIN_COL, \"month\"])\n",
" .groupby([NAV_ISIN_COL, \"month\"], as_index=False)\n",
" .tail(1)\n",
" .copy()\n",
")\n",
"\n",
"df_nav_m[\"ret_fund_m\"] = df_nav_m.groupby(NAV_ISIN_COL)[NAV_PRICE_COL].pct_change()\n",
"df_nav_m[\"ret_bench_m\"] = df_nav_m.groupby(NAV_ISIN_COL)[NAV_BENCH_COL].pct_change()\n",
"df_nav_m[\"active_return_m\"] = df_nav_m[\"ret_fund_m\"] - df_nav_m[\"ret_bench_m\"]\n",
"\n",
"df_nav_m = df_nav_m.rename(columns={NAV_ISIN_COL: ISIN_COL})\n",
"df_nav_m = df_nav_m[[ISIN_COL, \"month\", \"ret_fund_m\", \"ret_bench_m\", \"active_return_m\"]]\n",
"\n",
"df_rates_m = (\n",
" df_rates\n",
" .dropna(subset=[\"month\", RATE_VAL_COL])\n",
" .sort_values(RATE_DATE_COL)\n",
" .groupby(\"month\", as_index=False)\n",
" .tail(1)\n",
" .copy()\n",
")\n",
"\n",
"df_rates_m[\"delta_rate_m\"] = df_rates_m[RATE_VAL_COL].diff()\n",
"df_rates_m = df_rates_m[[\"month\", RATE_VAL_COL, \"delta_rate_m\"]]\n",
"\n",
"\n",
" \n",
"df_rel_m = df_rel_m.merge(\n",
" df_nav_m,\n",
" on=[ISIN_COL, \"month\"],\n",
" how=\"left\"\n",
")\n",
"\n",
"df_rel_m = df_rel_m.merge(\n",
" df_rates_m[[\"month\", \"delta_rate_m\"]],\n",
" on=\"month\",\n",
" how=\"left\"\n",
")\n",
"\n",
"for c in [\"ret_fund_m\", \"ret_bench_m\", \"active_return_m\", \"delta_rate_m\"]:\n",
" df_rel_m[c] = df_rel_m[c].fillna(0)\n",
"\n",
"df_rel_m.head()"
]
},
{
"cell_type": "code",
"execution_count": 303,
"id": "614bf72b-7afa-4633-ba09-22540a441459",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(31709, 23)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>month</th>\n",
" <th>aum_qty</th>\n",
" <th>aum_val</th>\n",
" <th>net_flow_qty</th>\n",
" <th>gross_flow_qty</th>\n",
" <th>sub_qty</th>\n",
" <th>red_qty</th>\n",
" <th>n_tx</th>\n",
" <th>n_isin_held</th>\n",
" <th>n_isin_active</th>\n",
" <th>delta_rate_m</th>\n",
" <th>region</th>\n",
" <th>country</th>\n",
" <th>ret_fund_m</th>\n",
" <th>ret_bench_m</th>\n",
" <th>active_month</th>\n",
" <th>flow_to_aum_m</th>\n",
" <th>turnover_m</th>\n",
" <th>sub_share_m</th>\n",
" <th>red_share_m</th>\n",
" <th>aum_peak_to_date</th>\n",
" <th>aum_drawdown</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>2015-01-31</td>\n",
" <td>11819.680</td>\n",
" <td>1.694553e+06</td>\n",
" <td>-1524.010</td>\n",
" <td>15230.010</td>\n",
" <td>6897.990</td>\n",
" <td>-8422.000</td>\n",
" <td>32.0</td>\n",
" <td>4</td>\n",
" <td>13</td>\n",
" <td>-0.058</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>0.013100</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>-0.128938</td>\n",
" <td>1.288530</td>\n",
" <td>0.452921</td>\n",
" <td>-0.552987</td>\n",
" <td>11819.680</td>\n",
" <td>8.459899e-14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>18872</td>\n",
" <td>2015-02-28</td>\n",
" <td>5705.000</td>\n",
" <td>7.008600e+05</td>\n",
" <td>7247.100</td>\n",
" <td>18571.880</td>\n",
" <td>13219.490</td>\n",
" <td>-5972.390</td>\n",
" <td>38.0</td>\n",
" <td>3</td>\n",
" <td>13</td>\n",
" <td>-0.022</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>0.079848</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>1.270307</td>\n",
" <td>3.255369</td>\n",
" <td>0.711801</td>\n",
" <td>-0.321582</td>\n",
" <td>11819.680</td>\n",
" <td>5.173304e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>18872</td>\n",
" <td>2015-03-31</td>\n",
" <td>70038.905</td>\n",
" <td>1.503549e+07</td>\n",
" <td>3655.380</td>\n",
" <td>9754.040</td>\n",
" <td>6767.710</td>\n",
" <td>-3112.330</td>\n",
" <td>47.0</td>\n",
" <td>4</td>\n",
" <td>14</td>\n",
" <td>-0.014</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>0.005051</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>0.052191</td>\n",
" <td>0.139266</td>\n",
" <td>0.693837</td>\n",
" <td>-0.319081</td>\n",
" <td>70038.905</td>\n",
" <td>1.432188e-14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>18872</td>\n",
" <td>2015-04-30</td>\n",
" <td>70324.489</td>\n",
" <td>3.928292e+07</td>\n",
" <td>-218.394</td>\n",
" <td>12840.950</td>\n",
" <td>6384.278</td>\n",
" <td>-6602.672</td>\n",
" <td>39.0</td>\n",
" <td>4</td>\n",
" <td>13</td>\n",
" <td>-0.077</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>-0.003106</td>\n",
" <td>0.182596</td>\n",
" <td>0.497181</td>\n",
" <td>-0.514189</td>\n",
" <td>70324.489</td>\n",
" <td>1.432188e-14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>18872</td>\n",
" <td>2015-05-31</td>\n",
" <td>75567.276</td>\n",
" <td>3.987712e+07</td>\n",
" <td>-4782.849</td>\n",
" <td>6332.849</td>\n",
" <td>775.000</td>\n",
" <td>-5557.849</td>\n",
" <td>24.0</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" <td>-0.053</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>-0.063293</td>\n",
" <td>0.083804</td>\n",
" <td>0.122378</td>\n",
" <td>-0.877622</td>\n",
" <td>75567.276</td>\n",
" <td>1.332268e-14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID month aum_qty aum_val net_flow_qty \\\n",
"0 18872 2015-01-31 11819.680 1.694553e+06 -1524.010 \n",
"1 18872 2015-02-28 5705.000 7.008600e+05 7247.100 \n",
"2 18872 2015-03-31 70038.905 1.503549e+07 3655.380 \n",
"3 18872 2015-04-30 70324.489 3.928292e+07 -218.394 \n",
"4 18872 2015-05-31 75567.276 3.987712e+07 -4782.849 \n",
"\n",
" gross_flow_qty sub_qty red_qty n_tx n_isin_held n_isin_active \\\n",
"0 15230.010 6897.990 -8422.000 32.0 4 13 \n",
"1 18571.880 13219.490 -5972.390 38.0 3 13 \n",
"2 9754.040 6767.710 -3112.330 47.0 4 14 \n",
"3 12840.950 6384.278 -6602.672 39.0 4 13 \n",
"4 6332.849 775.000 -5557.849 24.0 7 9 \n",
"\n",
" delta_rate_m region country ret_fund_m ret_bench_m \\\n",
"0 -0.058 Switzerland Switzerland 0.013100 0.0 \n",
"1 -0.022 Switzerland Switzerland 0.079848 0.0 \n",
"2 -0.014 Switzerland Switzerland 0.005051 0.0 \n",
"3 -0.077 Switzerland Switzerland 0.000000 0.0 \n",
"4 -0.053 Switzerland Switzerland 0.000000 0.0 \n",
"\n",
" active_month flow_to_aum_m turnover_m sub_share_m red_share_m \\\n",
"0 1 -0.128938 1.288530 0.452921 -0.552987 \n",
"1 1 1.270307 3.255369 0.711801 -0.321582 \n",
"2 1 0.052191 0.139266 0.693837 -0.319081 \n",
"3 1 -0.003106 0.182596 0.497181 -0.514189 \n",
"4 1 -0.063293 0.083804 0.122378 -0.877622 \n",
"\n",
" aum_peak_to_date aum_drawdown \n",
"0 11819.680 8.459899e-14 \n",
"1 11819.680 5.173304e-01 \n",
"2 70038.905 1.432188e-14 \n",
"3 70324.489 1.432188e-14 \n",
"4 75567.276 1.332268e-14 "
]
},
"execution_count": 303,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Données agrégées sur les ISIN\n",
"\n",
"# =========================\n",
"# ULTRA LIGHT VERSION\n",
"# =========================\n",
"\n",
"tmp = df_rel_m.copy()\n",
"tmp[\"isin_held_flag\"] = (tmp[\"aum_qty\"] > 0).astype(int)\n",
"tmp[\"isin_active_flag\"] = (tmp[\"gross_flow_qty\"] > 0).astype(int)\n",
"\n",
"tmp[\"aum_total\"] = tmp.groupby([ID_COL, \"month\"])[\"aum_qty\"].transform(\"sum\")\n",
"tmp[\"w\"] = tmp[\"aum_qty\"] / (tmp[\"aum_total\"] + 1e-12)\n",
"tmp[\"ret_fund_w\"] = tmp[\"w\"] * tmp[\"ret_fund_m\"]\n",
"tmp[\"ret_bench_w\"] = tmp[\"w\"] * tmp[\"ret_bench_m\"]\n",
"\n",
"df_month = (\n",
" tmp.groupby([ID_COL, \"month\"], as_index=False)\n",
" .agg(\n",
" aum_qty=(\"aum_qty\", \"sum\"),\n",
" aum_val=(\"aum_val\", \"sum\"),\n",
" net_flow_qty=(\"net_flow_qty\", \"sum\"),\n",
" gross_flow_qty=(\"gross_flow_qty\", \"sum\"),\n",
" sub_qty=(\"sub_qty\", \"sum\"),\n",
" red_qty=(\"red_qty\", \"sum\"),\n",
" n_tx=(\"n_tx\", \"sum\"),\n",
" n_isin_held=(\"isin_held_flag\", \"sum\"),\n",
" n_isin_active=(\"isin_active_flag\", \"sum\"),\n",
" delta_rate_m=(\"delta_rate_m\", \"first\"),\n",
" region=(\"region\", \"first\"),\n",
" country=(\"country\", \"first\"),\n",
" ret_fund_m=(\"ret_fund_w\", \"sum\"),\n",
" ret_bench_m=(\"ret_bench_w\", \"sum\")\n",
" )\n",
" .sort_values([ID_COL, \"month\"])\n",
" .reset_index(drop=True)\n",
")\n",
"\n",
"\n",
"df_month[\"active_month\"] = (df_month[\"gross_flow_qty\"] > 0).astype(int)\n",
"df_month[\"flow_to_aum_m\"] = df_month[\"net_flow_qty\"] / (df_month[\"aum_qty\"].abs() + EPS)\n",
"df_month[\"turnover_m\"] = df_month[\"gross_flow_qty\"] / (df_month[\"aum_qty\"].abs() + EPS)\n",
"df_month[\"sub_share_m\"] = df_month[\"sub_qty\"] / (df_month[\"gross_flow_qty\"] + EPS)\n",
"df_month[\"red_share_m\"] = df_month[\"red_qty\"] / (df_month[\"gross_flow_qty\"] + EPS)\n",
"\n",
"df_month[\"aum_peak_to_date\"] = df_month.groupby(ID_COL)[\"aum_qty\"].cummax()\n",
"df_month[\"aum_drawdown\"] = 1 - (df_month[\"aum_qty\"] / (df_month[\"aum_peak_to_date\"] + EPS))\n",
"df_month = df_month[df_month[\"month\"] <= '2025-10-31']\n",
"\n",
"key_cols = [\"Registrar Account - ID\", \"month\"]\n",
"\n",
"df_month = df_month.merge(\n",
" df_month0[key_cols].drop_duplicates(),\n",
" on=key_cols,\n",
" how=\"inner\"\n",
")\n",
"print(df_month.shape)\n",
"df_month.head()"
]
},
{
"cell_type": "code",
"execution_count": 304,
"id": "2e01fa4f-ba89-4c8a-8cbb-528d89bc811c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Product - Isin</th>\n",
" <th>rel_n_months</th>\n",
" <th>rel_active_months</th>\n",
" <th>rel_holding_months</th>\n",
" <th>rel_aum_mean</th>\n",
" <th>rel_turnover_mean</th>\n",
" <th>rel_turnover_vol</th>\n",
" <th>rel_flow_to_aum_vol</th>\n",
" <th>rel_n_tx</th>\n",
" <th>rel_full_exit_count</th>\n",
" <th>rel_entry_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>98</td>\n",
" <td>91</td>\n",
" <td>26</td>\n",
" <td>2519.829520</td>\n",
" <td>5.898325e+11</td>\n",
" <td>9.652436e+11</td>\n",
" <td>9.242856e+11</td>\n",
" <td>382.0</td>\n",
" <td>12</td>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>18872</td>\n",
" <td>FR0010147603</td>\n",
" <td>17</td>\n",
" <td>8</td>\n",
" <td>10</td>\n",
" <td>695.058824</td>\n",
" <td>1.685294e+11</td>\n",
" <td>3.805578e+11</td>\n",
" <td>3.805578e+11</td>\n",
" <td>9.0</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>18872</td>\n",
" <td>FR0010148981</td>\n",
" <td>81</td>\n",
" <td>66</td>\n",
" <td>28</td>\n",
" <td>831.906963</td>\n",
" <td>6.628200e+10</td>\n",
" <td>1.140022e+11</td>\n",
" <td>1.212644e+11</td>\n",
" <td>149.0</td>\n",
" <td>16</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>18872</td>\n",
" <td>FR0010149112</td>\n",
" <td>19</td>\n",
" <td>12</td>\n",
" <td>5</td>\n",
" <td>885.208737</td>\n",
" <td>5.886253e+11</td>\n",
" <td>1.251992e+12</td>\n",
" <td>1.273644e+12</td>\n",
" <td>13.0</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>18872</td>\n",
" <td>FR0010149120</td>\n",
" <td>99</td>\n",
" <td>79</td>\n",
" <td>36</td>\n",
" <td>425.655010</td>\n",
" <td>1.673836e+11</td>\n",
" <td>6.287132e+11</td>\n",
" <td>6.316077e+11</td>\n",
" <td>152.0</td>\n",
" <td>14</td>\n",
" <td>15</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID Product - Isin rel_n_months rel_active_months \\\n",
"0 18872 FR0010135103 98 91 \n",
"1 18872 FR0010147603 17 8 \n",
"2 18872 FR0010148981 81 66 \n",
"3 18872 FR0010149112 19 12 \n",
"4 18872 FR0010149120 99 79 \n",
"\n",
" rel_holding_months rel_aum_mean rel_turnover_mean rel_turnover_vol \\\n",
"0 26 2519.829520 5.898325e+11 9.652436e+11 \n",
"1 10 695.058824 1.685294e+11 3.805578e+11 \n",
"2 28 831.906963 6.628200e+10 1.140022e+11 \n",
"3 5 885.208737 5.886253e+11 1.251992e+12 \n",
"4 36 425.655010 1.673836e+11 6.287132e+11 \n",
"\n",
" rel_flow_to_aum_vol rel_n_tx rel_full_exit_count rel_entry_count \n",
"0 9.242856e+11 382.0 12 13 \n",
"1 3.805578e+11 9.0 4 4 \n",
"2 1.212644e+11 149.0 16 17 \n",
"3 1.273644e+12 13.0 3 3 \n",
"4 6.316077e+11 152.0 14 15 "
]
},
"execution_count": 304,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Données agrégées sur les mois\n",
"tmp = df_rel_m.sort_values([ID_COL, ISIN_COL, \"month\"]).copy()\n",
"tmp[\"prev_aum\"] = tmp.groupby([ID_COL, ISIN_COL])[\"aum_qty\"].shift(1)\n",
"tmp[\"full_exit_event\"] = ((tmp[\"prev_aum\"] > 0) & (tmp[\"aum_qty\"] <= 0)).astype(int)\n",
"tmp[\"entry_event\"] = ((tmp[\"prev_aum\"].fillna(0) <= 0) & (tmp[\"aum_qty\"] > 0)).astype(int)\n",
"\n",
"df_rel_feat = (\n",
" tmp.groupby([ID_COL, ISIN_COL], as_index=False)\n",
" .agg(\n",
" rel_n_months=(\"month\", \"nunique\"),\n",
" rel_active_months=(\"active_rel_month\", \"sum\"),\n",
" rel_holding_months=(\"holding_rel_month\", \"sum\"),\n",
" rel_aum_mean=(\"aum_qty\", \"mean\"),\n",
" rel_turnover_mean=(\"turnover_rel\", \"mean\"),\n",
" rel_turnover_vol=(\"turnover_rel\", \"std\"),\n",
" rel_flow_to_aum_vol=(\"flow_to_aum_rel\", \"std\"),\n",
" rel_n_tx=(\"n_tx\", \"sum\"),\n",
" rel_full_exit_count=(\"full_exit_event\", \"sum\"),\n",
" rel_entry_count=(\"entry_event\", \"sum\")\n",
" )\n",
")\n",
"\n",
"df_rel_feat.head()"
]
},
{
"cell_type": "code",
"execution_count": 305,
"id": "2d81b4fd-f82d-42f1-ba03-8460706fea0d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(431, 40)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>n_months</th>\n",
" <th>n_active_months</th>\n",
" <th>flow_freq</th>\n",
" <th>aum_qty_mean</th>\n",
" <th>aum_qty_median</th>\n",
" <th>aum_qty_max</th>\n",
" <th>aum_qty_last</th>\n",
" <th>net_flow_qty_sum</th>\n",
" <th>gross_flow_qty_sum</th>\n",
" <th>gross_flow_qty_mean</th>\n",
" <th>n_tx_total</th>\n",
" <th>net_flow_vol</th>\n",
" <th>turnover_mean</th>\n",
" <th>turnover_vol</th>\n",
" <th>flow_to_aum_mean</th>\n",
" <th>flow_to_aum_vol</th>\n",
" <th>avg_n_isin_held</th>\n",
" <th>max_n_isin_held</th>\n",
" <th>sub_share_mean</th>\n",
" <th>red_share_mean</th>\n",
" <th>delta_rate_mean</th>\n",
" <th>aum_drawdown_last</th>\n",
" <th>aum_drawdown_max</th>\n",
" <th>region</th>\n",
" <th>country</th>\n",
" <th>n_isin_total</th>\n",
" <th>rel_turnover_mean_avg</th>\n",
" <th>rel_turnover_vol_avg</th>\n",
" <th>rel_flow_to_aum_vol_avg</th>\n",
" <th>full_exit_count</th>\n",
" <th>entry_count</th>\n",
" <th>avg_holding_months_per_isin</th>\n",
" <th>max_holding_months_per_isin</th>\n",
" <th>corr_flow_fund_lag3</th>\n",
" <th>corr_flow_fund_lag6</th>\n",
" <th>corr_flow_bench_lag3</th>\n",
" <th>corr_flow_bench_lag6</th>\n",
" <th>corr_flow_rate_lag3</th>\n",
" <th>corr_flow_rate_lag6</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>130</td>\n",
" <td>130</td>\n",
" <td>1.000000</td>\n",
" <td>23477.224308</td>\n",
" <td>14880.4715</td>\n",
" <td>88818.372</td>\n",
" <td>67570.855</td>\n",
" <td>-45677.1480</td>\n",
" <td>1.244126e+06</td>\n",
" <td>9570.200015</td>\n",
" <td>1926.0</td>\n",
" <td>9832.357264</td>\n",
" <td>6.382330e+10</td>\n",
" <td>5.151309e+11</td>\n",
" <td>-2.560792e+10</td>\n",
" <td>2.841988e+11</td>\n",
" <td>7.507692</td>\n",
" <td>26</td>\n",
" <td>0.429844</td>\n",
" <td>-0.576520</td>\n",
" <td>0.013723</td>\n",
" <td>2.392243e-01</td>\n",
" <td>1.000000</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>107</td>\n",
" <td>4.063407e+11</td>\n",
" <td>8.956214e+11</td>\n",
" <td>8.915940e+11</td>\n",
" <td>310</td>\n",
" <td>344</td>\n",
" <td>9.121495</td>\n",
" <td>36</td>\n",
" <td>0.007825</td>\n",
" <td>0.008326</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.007546</td>\n",
" <td>0.014510</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>200000076</td>\n",
" <td>130</td>\n",
" <td>119</td>\n",
" <td>0.915385</td>\n",
" <td>15840.000331</td>\n",
" <td>9272.4710</td>\n",
" <td>50732.461</td>\n",
" <td>44837.203</td>\n",
" <td>54791.9840</td>\n",
" <td>2.314415e+05</td>\n",
" <td>1780.319492</td>\n",
" <td>518.0</td>\n",
" <td>2838.000232</td>\n",
" <td>1.457820e-01</td>\n",
" <td>2.457632e-01</td>\n",
" <td>-1.707090e-02</td>\n",
" <td>2.717209e-01</td>\n",
" <td>4.700000</td>\n",
" <td>9</td>\n",
" <td>0.508681</td>\n",
" <td>-0.415876</td>\n",
" <td>0.013723</td>\n",
" <td>1.162029e-01</td>\n",
" <td>0.949206</td>\n",
" <td>Spain</td>\n",
" <td>Spain</td>\n",
" <td>22</td>\n",
" <td>6.276897e+10</td>\n",
" <td>2.469731e+11</td>\n",
" <td>2.481822e+11</td>\n",
" <td>71</td>\n",
" <td>81</td>\n",
" <td>27.772727</td>\n",
" <td>85</td>\n",
" <td>0.015278</td>\n",
" <td>0.096449</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.025181</td>\n",
" <td>0.012844</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>200000082</td>\n",
" <td>71</td>\n",
" <td>71</td>\n",
" <td>1.000000</td>\n",
" <td>85194.200239</td>\n",
" <td>25820.0550</td>\n",
" <td>316149.358</td>\n",
" <td>131158.471</td>\n",
" <td>14575.5560</td>\n",
" <td>1.229616e+06</td>\n",
" <td>17318.539183</td>\n",
" <td>4807.0</td>\n",
" <td>13472.042652</td>\n",
" <td>4.056892e+11</td>\n",
" <td>2.421685e+12</td>\n",
" <td>-9.687862e+10</td>\n",
" <td>8.402113e+11</td>\n",
" <td>1.760563</td>\n",
" <td>4</td>\n",
" <td>0.438873</td>\n",
" <td>-0.588724</td>\n",
" <td>0.034282</td>\n",
" <td>5.851376e-01</td>\n",
" <td>1.000000</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>18</td>\n",
" <td>1.147803e+12</td>\n",
" <td>1.251086e+12</td>\n",
" <td>1.333111e+12</td>\n",
" <td>100</td>\n",
" <td>101</td>\n",
" <td>6.944444</td>\n",
" <td>19</td>\n",
" <td>-0.019860</td>\n",
" <td>-0.020797</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.022861</td>\n",
" <td>-0.135696</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>200000146</td>\n",
" <td>130</td>\n",
" <td>130</td>\n",
" <td>1.000000</td>\n",
" <td>71298.603700</td>\n",
" <td>15953.6355</td>\n",
" <td>519508.539</td>\n",
" <td>519508.539</td>\n",
" <td>457533.3310</td>\n",
" <td>1.150546e+06</td>\n",
" <td>8850.350438</td>\n",
" <td>4774.0</td>\n",
" <td>10074.748210</td>\n",
" <td>4.770901e+00</td>\n",
" <td>2.930221e+01</td>\n",
" <td>3.780801e+00</td>\n",
" <td>2.870987e+01</td>\n",
" <td>6.684615</td>\n",
" <td>14</td>\n",
" <td>0.517815</td>\n",
" <td>-0.556667</td>\n",
" <td>0.013723</td>\n",
" <td>1.887379e-15</td>\n",
" <td>0.999302</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>33</td>\n",
" <td>2.123548e+11</td>\n",
" <td>3.670050e+11</td>\n",
" <td>3.882699e+11</td>\n",
" <td>237</td>\n",
" <td>256</td>\n",
" <td>26.333333</td>\n",
" <td>54</td>\n",
" <td>0.281071</td>\n",
" <td>-0.020188</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.018482</td>\n",
" <td>-0.018833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>200000147</td>\n",
" <td>129</td>\n",
" <td>129</td>\n",
" <td>1.000000</td>\n",
" <td>35957.851907</td>\n",
" <td>18047.3390</td>\n",
" <td>174703.188</td>\n",
" <td>8478.402</td>\n",
" <td>677424.2191</td>\n",
" <td>1.210845e+06</td>\n",
" <td>9386.398474</td>\n",
" <td>7523.0</td>\n",
" <td>13914.783110</td>\n",
" <td>1.775257e+00</td>\n",
" <td>8.769726e+00</td>\n",
" <td>1.150007e+00</td>\n",
" <td>7.862819e+00</td>\n",
" <td>13.162791</td>\n",
" <td>27</td>\n",
" <td>0.599433</td>\n",
" <td>-0.448172</td>\n",
" <td>0.013837</td>\n",
" <td>9.514697e-01</td>\n",
" <td>0.996847</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>78</td>\n",
" <td>5.279255e+11</td>\n",
" <td>6.892142e+11</td>\n",
" <td>6.858178e+11</td>\n",
" <td>596</td>\n",
" <td>619</td>\n",
" <td>21.769231</td>\n",
" <td>49</td>\n",
" <td>-0.026933</td>\n",
" <td>-0.010493</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.204637</td>\n",
" <td>-0.109646</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID n_months n_active_months flow_freq aum_qty_mean \\\n",
"0 18872 130 130 1.000000 23477.224308 \n",
"1 200000076 130 119 0.915385 15840.000331 \n",
"2 200000082 71 71 1.000000 85194.200239 \n",
"3 200000146 130 130 1.000000 71298.603700 \n",
"4 200000147 129 129 1.000000 35957.851907 \n",
"\n",
" aum_qty_median aum_qty_max aum_qty_last net_flow_qty_sum \\\n",
"0 14880.4715 88818.372 67570.855 -45677.1480 \n",
"1 9272.4710 50732.461 44837.203 54791.9840 \n",
"2 25820.0550 316149.358 131158.471 14575.5560 \n",
"3 15953.6355 519508.539 519508.539 457533.3310 \n",
"4 18047.3390 174703.188 8478.402 677424.2191 \n",
"\n",
" gross_flow_qty_sum gross_flow_qty_mean n_tx_total net_flow_vol \\\n",
"0 1.244126e+06 9570.200015 1926.0 9832.357264 \n",
"1 2.314415e+05 1780.319492 518.0 2838.000232 \n",
"2 1.229616e+06 17318.539183 4807.0 13472.042652 \n",
"3 1.150546e+06 8850.350438 4774.0 10074.748210 \n",
"4 1.210845e+06 9386.398474 7523.0 13914.783110 \n",
"\n",
" turnover_mean turnover_vol flow_to_aum_mean flow_to_aum_vol \\\n",
"0 6.382330e+10 5.151309e+11 -2.560792e+10 2.841988e+11 \n",
"1 1.457820e-01 2.457632e-01 -1.707090e-02 2.717209e-01 \n",
"2 4.056892e+11 2.421685e+12 -9.687862e+10 8.402113e+11 \n",
"3 4.770901e+00 2.930221e+01 3.780801e+00 2.870987e+01 \n",
"4 1.775257e+00 8.769726e+00 1.150007e+00 7.862819e+00 \n",
"\n",
" avg_n_isin_held max_n_isin_held sub_share_mean red_share_mean \\\n",
"0 7.507692 26 0.429844 -0.576520 \n",
"1 4.700000 9 0.508681 -0.415876 \n",
"2 1.760563 4 0.438873 -0.588724 \n",
"3 6.684615 14 0.517815 -0.556667 \n",
"4 13.162791 27 0.599433 -0.448172 \n",
"\n",
" delta_rate_mean aum_drawdown_last aum_drawdown_max region \\\n",
"0 0.013723 2.392243e-01 1.000000 Switzerland \n",
"1 0.013723 1.162029e-01 0.949206 Spain \n",
"2 0.034282 5.851376e-01 1.000000 Italy \n",
"3 0.013723 1.887379e-15 0.999302 Italy \n",
"4 0.013837 9.514697e-01 0.996847 Italy \n",
"\n",
" country n_isin_total rel_turnover_mean_avg rel_turnover_vol_avg \\\n",
"0 Switzerland 107 4.063407e+11 8.956214e+11 \n",
"1 Spain 22 6.276897e+10 2.469731e+11 \n",
"2 Italy 18 1.147803e+12 1.251086e+12 \n",
"3 Italy 33 2.123548e+11 3.670050e+11 \n",
"4 Italy 78 5.279255e+11 6.892142e+11 \n",
"\n",
" rel_flow_to_aum_vol_avg full_exit_count entry_count \\\n",
"0 8.915940e+11 310 344 \n",
"1 2.481822e+11 71 81 \n",
"2 1.333111e+12 100 101 \n",
"3 3.882699e+11 237 256 \n",
"4 6.858178e+11 596 619 \n",
"\n",
" avg_holding_months_per_isin max_holding_months_per_isin \\\n",
"0 9.121495 36 \n",
"1 27.772727 85 \n",
"2 6.944444 19 \n",
"3 26.333333 54 \n",
"4 21.769231 49 \n",
"\n",
" corr_flow_fund_lag3 corr_flow_fund_lag6 corr_flow_bench_lag3 \\\n",
"0 0.007825 0.008326 NaN \n",
"1 0.015278 0.096449 NaN \n",
"2 -0.019860 -0.020797 NaN \n",
"3 0.281071 -0.020188 NaN \n",
"4 -0.026933 -0.010493 NaN \n",
"\n",
" corr_flow_bench_lag6 corr_flow_rate_lag3 corr_flow_rate_lag6 \n",
"0 NaN 0.007546 0.014510 \n",
"1 NaN -0.025181 0.012844 \n",
"2 NaN 0.022861 -0.135696 \n",
"3 NaN -0.018482 -0.018833 \n",
"4 NaN -0.204637 -0.109646 "
]
},
"execution_count": 305,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Données agrégées sur les ISIN et sur les mois\n",
"df_rel_client = (\n",
" df_rel_feat\n",
" .groupby(ID_COL, as_index=False)\n",
" .agg(\n",
" n_isin_total=(ISIN_COL, \"nunique\"),\n",
" rel_turnover_mean_avg=(\"rel_turnover_mean\", \"mean\"),\n",
" rel_turnover_vol_avg=(\"rel_turnover_vol\", \"mean\"),\n",
" rel_flow_to_aum_vol_avg=(\"rel_flow_to_aum_vol\", \"mean\"),\n",
" full_exit_count=(\"rel_full_exit_count\", \"sum\"),\n",
" entry_count=(\"rel_entry_count\", \"sum\"),\n",
" avg_holding_months_per_isin=(\"rel_holding_months\", \"mean\"),\n",
" max_holding_months_per_isin=(\"rel_holding_months\", \"max\")\n",
" )\n",
")\n",
"\n",
"df_client = (\n",
" df_month\n",
" .groupby(ID_COL, as_index=False)\n",
" .agg(\n",
" n_months=(\"month\", \"nunique\"),\n",
" n_active_months=(\"active_month\", \"sum\"),\n",
" flow_freq=(\"active_month\", \"mean\"),\n",
"\n",
" aum_qty_mean=(\"aum_qty\", \"mean\"),\n",
" aum_qty_median=(\"aum_qty\", \"median\"),\n",
" aum_qty_max=(\"aum_qty\", \"max\"),\n",
" aum_qty_last=(\"aum_qty\", \"last\"),\n",
"\n",
" net_flow_qty_sum=(\"net_flow_qty\", \"sum\"),\n",
" gross_flow_qty_sum=(\"gross_flow_qty\", \"sum\"),\n",
" gross_flow_qty_mean=(\"gross_flow_qty\", \"mean\"),\n",
" n_tx_total=(\"n_tx\", \"sum\"),\n",
"\n",
" net_flow_vol=(\"net_flow_qty\", \"std\"),\n",
" turnover_mean=(\"turnover_m\", \"mean\"),\n",
" turnover_vol=(\"turnover_m\", \"std\"),\n",
" flow_to_aum_mean=(\"flow_to_aum_m\", \"mean\"),\n",
" flow_to_aum_vol=(\"flow_to_aum_m\", \"std\"),\n",
"\n",
" avg_n_isin_held=(\"n_isin_held\", \"mean\"),\n",
" max_n_isin_held=(\"n_isin_held\", \"max\"),\n",
"\n",
" sub_share_mean=(\"sub_share_m\", \"mean\"),\n",
" red_share_mean=(\"red_share_m\", \"mean\"),\n",
"\n",
" delta_rate_mean=(\"delta_rate_m\", \"mean\"),\n",
" aum_drawdown_last=(\"aum_drawdown\", \"last\"),\n",
" aum_drawdown_max=(\"aum_drawdown\", \"max\"),\n",
"\n",
" region=(\"region\", \"last\"),\n",
" country=(\"country\", \"last\")\n",
" )\n",
")\n",
"\n",
"df_client = df_client.merge(df_rel_client, on=ID_COL, how=\"left\")\n",
"\n",
"#Variables de corrélations entre performance et flux\n",
"def corr_lag(x, y, lag):\n",
" x = np.asarray(x, dtype=float)\n",
" y = np.asarray(y, dtype=float)\n",
" \n",
" mask = np.isfinite(x) & np.isfinite(y)\n",
" x, y = x[mask], y[mask]\n",
" \n",
" if len(x) <= lag + 3:\n",
" return np.nan\n",
" \n",
" return pd.Series(x[lag:]).corr(pd.Series(y[:-lag]))\n",
"\n",
"rows = []\n",
"\n",
"for acc, g in df_month.groupby(ID_COL):\n",
" g = g.sort_values(\"month\")\n",
" \n",
" flow = g[\"flow_to_aum_m\"].values\n",
" ret_fund = g[\"ret_fund_m\"].values\n",
" ret_bench = g[\"ret_bench_m\"].values\n",
" rate = g[\"delta_rate_m\"].values\n",
" \n",
" rows.append({\n",
" ID_COL: acc,\n",
" \n",
" # 👇 Corrélations perf vs flux\n",
" \"corr_flow_fund_lag3\": corr_lag(flow, ret_fund, 3),\n",
" \"corr_flow_fund_lag6\": corr_lag(flow, ret_fund, 6),\n",
" \n",
" \"corr_flow_bench_lag3\": corr_lag(flow, ret_bench, 3),\n",
" \"corr_flow_bench_lag6\": corr_lag(flow, ret_bench, 6),\n",
" \n",
" # 👇 Corrélation taux vs flux\n",
" \"corr_flow_rate_lag3\": corr_lag(flow, rate, 3),\n",
" \"corr_flow_rate_lag6\": corr_lag(flow, rate, 6),\n",
" })\n",
"\n",
"df_corr = pd.DataFrame(rows)\n",
"\n",
"df_client = df_client.merge(df_corr, on=ID_COL, how=\"left\")\n",
"\n",
"print(df_client.shape)\n",
"df_client.head()"
]
},
{
"cell_type": "code",
"execution_count": 306,
"id": "8c1a0491-a0bb-4165-b073-41f81637466b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(431, 44)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>n_months</th>\n",
" <th>n_active_months</th>\n",
" <th>flow_freq</th>\n",
" <th>aum_qty_mean</th>\n",
" <th>aum_qty_median</th>\n",
" <th>aum_qty_max</th>\n",
" <th>aum_qty_last</th>\n",
" <th>net_flow_qty_sum</th>\n",
" <th>gross_flow_qty_sum</th>\n",
" <th>gross_flow_qty_mean</th>\n",
" <th>n_tx_total</th>\n",
" <th>net_flow_vol</th>\n",
" <th>turnover_mean</th>\n",
" <th>turnover_vol</th>\n",
" <th>flow_to_aum_mean</th>\n",
" <th>flow_to_aum_vol</th>\n",
" <th>avg_n_isin_held</th>\n",
" <th>max_n_isin_held</th>\n",
" <th>sub_share_mean</th>\n",
" <th>red_share_mean</th>\n",
" <th>delta_rate_mean</th>\n",
" <th>aum_drawdown_last</th>\n",
" <th>aum_drawdown_max</th>\n",
" <th>region</th>\n",
" <th>country</th>\n",
" <th>n_isin_total</th>\n",
" <th>rel_turnover_mean_avg</th>\n",
" <th>rel_turnover_vol_avg</th>\n",
" <th>rel_flow_to_aum_vol_avg</th>\n",
" <th>full_exit_count</th>\n",
" <th>entry_count</th>\n",
" <th>avg_holding_months_per_isin</th>\n",
" <th>max_holding_months_per_isin</th>\n",
" <th>corr_flow_fund_lag3</th>\n",
" <th>corr_flow_fund_lag6</th>\n",
" <th>corr_flow_bench_lag3</th>\n",
" <th>corr_flow_bench_lag6</th>\n",
" <th>corr_flow_rate_lag3</th>\n",
" <th>corr_flow_rate_lag6</th>\n",
" <th>flow_trend_12m</th>\n",
" <th>aum_trend_12m</th>\n",
" <th>drawdown_trend_12m</th>\n",
" <th>beta_rate</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>130</td>\n",
" <td>130</td>\n",
" <td>1.000000</td>\n",
" <td>23477.224308</td>\n",
" <td>14880.4715</td>\n",
" <td>88818.372</td>\n",
" <td>67570.855</td>\n",
" <td>-45677.1480</td>\n",
" <td>1.244126e+06</td>\n",
" <td>9570.200015</td>\n",
" <td>1926.0</td>\n",
" <td>9832.357264</td>\n",
" <td>6.382330e+10</td>\n",
" <td>5.151309e+11</td>\n",
" <td>-2.560792e+10</td>\n",
" <td>2.841988e+11</td>\n",
" <td>7.507692</td>\n",
" <td>26</td>\n",
" <td>0.429844</td>\n",
" <td>-0.576520</td>\n",
" <td>0.013723</td>\n",
" <td>2.392243e-01</td>\n",
" <td>1.000000</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>107</td>\n",
" <td>4.063407e+11</td>\n",
" <td>8.956214e+11</td>\n",
" <td>8.915940e+11</td>\n",
" <td>310</td>\n",
" <td>344</td>\n",
" <td>9.121495</td>\n",
" <td>36</td>\n",
" <td>0.007825</td>\n",
" <td>0.008326</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.007546</td>\n",
" <td>0.014510</td>\n",
" <td>-1.886348e-02</td>\n",
" <td>2920.070661</td>\n",
" <td>-0.024467</td>\n",
" <td>1.405196e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>200000076</td>\n",
" <td>130</td>\n",
" <td>119</td>\n",
" <td>0.915385</td>\n",
" <td>15840.000331</td>\n",
" <td>9272.4710</td>\n",
" <td>50732.461</td>\n",
" <td>44837.203</td>\n",
" <td>54791.9840</td>\n",
" <td>2.314415e+05</td>\n",
" <td>1780.319492</td>\n",
" <td>518.0</td>\n",
" <td>2838.000232</td>\n",
" <td>1.457820e-01</td>\n",
" <td>2.457632e-01</td>\n",
" <td>-1.707090e-02</td>\n",
" <td>2.717209e-01</td>\n",
" <td>4.700000</td>\n",
" <td>9</td>\n",
" <td>0.508681</td>\n",
" <td>-0.415876</td>\n",
" <td>0.013723</td>\n",
" <td>1.162029e-01</td>\n",
" <td>0.949206</td>\n",
" <td>Spain</td>\n",
" <td>Spain</td>\n",
" <td>22</td>\n",
" <td>6.276897e+10</td>\n",
" <td>2.469731e+11</td>\n",
" <td>2.481822e+11</td>\n",
" <td>71</td>\n",
" <td>81</td>\n",
" <td>27.772727</td>\n",
" <td>85</td>\n",
" <td>0.015278</td>\n",
" <td>0.096449</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.025181</td>\n",
" <td>0.012844</td>\n",
" <td>1.789020e-03</td>\n",
" <td>548.538087</td>\n",
" <td>-0.003843</td>\n",
" <td>-1.283031e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>200000082</td>\n",
" <td>71</td>\n",
" <td>71</td>\n",
" <td>1.000000</td>\n",
" <td>85194.200239</td>\n",
" <td>25820.0550</td>\n",
" <td>316149.358</td>\n",
" <td>131158.471</td>\n",
" <td>14575.5560</td>\n",
" <td>1.229616e+06</td>\n",
" <td>17318.539183</td>\n",
" <td>4807.0</td>\n",
" <td>13472.042652</td>\n",
" <td>4.056892e+11</td>\n",
" <td>2.421685e+12</td>\n",
" <td>-9.687862e+10</td>\n",
" <td>8.402113e+11</td>\n",
" <td>1.760563</td>\n",
" <td>4</td>\n",
" <td>0.438873</td>\n",
" <td>-0.588724</td>\n",
" <td>0.034282</td>\n",
" <td>5.851376e-01</td>\n",
" <td>1.000000</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>18</td>\n",
" <td>1.147803e+12</td>\n",
" <td>1.251086e+12</td>\n",
" <td>1.333111e+12</td>\n",
" <td>100</td>\n",
" <td>101</td>\n",
" <td>6.944444</td>\n",
" <td>19</td>\n",
" <td>-0.019860</td>\n",
" <td>-0.020797</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.022861</td>\n",
" <td>-0.135696</td>\n",
" <td>4.793703e+09</td>\n",
" <td>-10443.281371</td>\n",
" <td>0.033033</td>\n",
" <td>7.995257e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>200000146</td>\n",
" <td>130</td>\n",
" <td>130</td>\n",
" <td>1.000000</td>\n",
" <td>71298.603700</td>\n",
" <td>15953.6355</td>\n",
" <td>519508.539</td>\n",
" <td>519508.539</td>\n",
" <td>457533.3310</td>\n",
" <td>1.150546e+06</td>\n",
" <td>8850.350438</td>\n",
" <td>4774.0</td>\n",
" <td>10074.748210</td>\n",
" <td>4.770901e+00</td>\n",
" <td>2.930221e+01</td>\n",
" <td>3.780801e+00</td>\n",
" <td>2.870987e+01</td>\n",
" <td>6.684615</td>\n",
" <td>14</td>\n",
" <td>0.517815</td>\n",
" <td>-0.556667</td>\n",
" <td>0.013723</td>\n",
" <td>1.887379e-15</td>\n",
" <td>0.999302</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>33</td>\n",
" <td>2.123548e+11</td>\n",
" <td>3.670050e+11</td>\n",
" <td>3.882699e+11</td>\n",
" <td>237</td>\n",
" <td>256</td>\n",
" <td>26.333333</td>\n",
" <td>54</td>\n",
" <td>0.281071</td>\n",
" <td>-0.020188</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.018482</td>\n",
" <td>-0.018833</td>\n",
" <td>-9.860558e-02</td>\n",
" <td>24136.047846</td>\n",
" <td>-0.049820</td>\n",
" <td>-4.842472e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>200000147</td>\n",
" <td>129</td>\n",
" <td>129</td>\n",
" <td>1.000000</td>\n",
" <td>35957.851907</td>\n",
" <td>18047.3390</td>\n",
" <td>174703.188</td>\n",
" <td>8478.402</td>\n",
" <td>677424.2191</td>\n",
" <td>1.210845e+06</td>\n",
" <td>9386.398474</td>\n",
" <td>7523.0</td>\n",
" <td>13914.783110</td>\n",
" <td>1.775257e+00</td>\n",
" <td>8.769726e+00</td>\n",
" <td>1.150007e+00</td>\n",
" <td>7.862819e+00</td>\n",
" <td>13.162791</td>\n",
" <td>27</td>\n",
" <td>0.599433</td>\n",
" <td>-0.448172</td>\n",
" <td>0.013837</td>\n",
" <td>9.514697e-01</td>\n",
" <td>0.996847</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>78</td>\n",
" <td>5.279255e+11</td>\n",
" <td>6.892142e+11</td>\n",
" <td>6.858178e+11</td>\n",
" <td>596</td>\n",
" <td>619</td>\n",
" <td>21.769231</td>\n",
" <td>49</td>\n",
" <td>-0.026933</td>\n",
" <td>-0.010493</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.204637</td>\n",
" <td>-0.109646</td>\n",
" <td>1.129487e+00</td>\n",
" <td>2098.385472</td>\n",
" <td>-0.012011</td>\n",
" <td>-2.472128e+00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID n_months n_active_months flow_freq aum_qty_mean \\\n",
"0 18872 130 130 1.000000 23477.224308 \n",
"1 200000076 130 119 0.915385 15840.000331 \n",
"2 200000082 71 71 1.000000 85194.200239 \n",
"3 200000146 130 130 1.000000 71298.603700 \n",
"4 200000147 129 129 1.000000 35957.851907 \n",
"\n",
" aum_qty_median aum_qty_max aum_qty_last net_flow_qty_sum \\\n",
"0 14880.4715 88818.372 67570.855 -45677.1480 \n",
"1 9272.4710 50732.461 44837.203 54791.9840 \n",
"2 25820.0550 316149.358 131158.471 14575.5560 \n",
"3 15953.6355 519508.539 519508.539 457533.3310 \n",
"4 18047.3390 174703.188 8478.402 677424.2191 \n",
"\n",
" gross_flow_qty_sum gross_flow_qty_mean n_tx_total net_flow_vol \\\n",
"0 1.244126e+06 9570.200015 1926.0 9832.357264 \n",
"1 2.314415e+05 1780.319492 518.0 2838.000232 \n",
"2 1.229616e+06 17318.539183 4807.0 13472.042652 \n",
"3 1.150546e+06 8850.350438 4774.0 10074.748210 \n",
"4 1.210845e+06 9386.398474 7523.0 13914.783110 \n",
"\n",
" turnover_mean turnover_vol flow_to_aum_mean flow_to_aum_vol \\\n",
"0 6.382330e+10 5.151309e+11 -2.560792e+10 2.841988e+11 \n",
"1 1.457820e-01 2.457632e-01 -1.707090e-02 2.717209e-01 \n",
"2 4.056892e+11 2.421685e+12 -9.687862e+10 8.402113e+11 \n",
"3 4.770901e+00 2.930221e+01 3.780801e+00 2.870987e+01 \n",
"4 1.775257e+00 8.769726e+00 1.150007e+00 7.862819e+00 \n",
"\n",
" avg_n_isin_held max_n_isin_held sub_share_mean red_share_mean \\\n",
"0 7.507692 26 0.429844 -0.576520 \n",
"1 4.700000 9 0.508681 -0.415876 \n",
"2 1.760563 4 0.438873 -0.588724 \n",
"3 6.684615 14 0.517815 -0.556667 \n",
"4 13.162791 27 0.599433 -0.448172 \n",
"\n",
" delta_rate_mean aum_drawdown_last aum_drawdown_max region \\\n",
"0 0.013723 2.392243e-01 1.000000 Switzerland \n",
"1 0.013723 1.162029e-01 0.949206 Spain \n",
"2 0.034282 5.851376e-01 1.000000 Italy \n",
"3 0.013723 1.887379e-15 0.999302 Italy \n",
"4 0.013837 9.514697e-01 0.996847 Italy \n",
"\n",
" country n_isin_total rel_turnover_mean_avg rel_turnover_vol_avg \\\n",
"0 Switzerland 107 4.063407e+11 8.956214e+11 \n",
"1 Spain 22 6.276897e+10 2.469731e+11 \n",
"2 Italy 18 1.147803e+12 1.251086e+12 \n",
"3 Italy 33 2.123548e+11 3.670050e+11 \n",
"4 Italy 78 5.279255e+11 6.892142e+11 \n",
"\n",
" rel_flow_to_aum_vol_avg full_exit_count entry_count \\\n",
"0 8.915940e+11 310 344 \n",
"1 2.481822e+11 71 81 \n",
"2 1.333111e+12 100 101 \n",
"3 3.882699e+11 237 256 \n",
"4 6.858178e+11 596 619 \n",
"\n",
" avg_holding_months_per_isin max_holding_months_per_isin \\\n",
"0 9.121495 36 \n",
"1 27.772727 85 \n",
"2 6.944444 19 \n",
"3 26.333333 54 \n",
"4 21.769231 49 \n",
"\n",
" corr_flow_fund_lag3 corr_flow_fund_lag6 corr_flow_bench_lag3 \\\n",
"0 0.007825 0.008326 NaN \n",
"1 0.015278 0.096449 NaN \n",
"2 -0.019860 -0.020797 NaN \n",
"3 0.281071 -0.020188 NaN \n",
"4 -0.026933 -0.010493 NaN \n",
"\n",
" corr_flow_bench_lag6 corr_flow_rate_lag3 corr_flow_rate_lag6 \\\n",
"0 NaN 0.007546 0.014510 \n",
"1 NaN -0.025181 0.012844 \n",
"2 NaN 0.022861 -0.135696 \n",
"3 NaN -0.018482 -0.018833 \n",
"4 NaN -0.204637 -0.109646 \n",
"\n",
" flow_trend_12m aum_trend_12m drawdown_trend_12m beta_rate \n",
"0 -1.886348e-02 2920.070661 -0.024467 1.405196e+10 \n",
"1 1.789020e-03 548.538087 -0.003843 -1.283031e-01 \n",
"2 4.793703e+09 -10443.281371 0.033033 7.995257e+10 \n",
"3 -9.860558e-02 24136.047846 -0.049820 -4.842472e+00 \n",
"4 1.129487e+00 2098.385472 -0.012011 -2.472128e+00 "
]
},
"execution_count": 306,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def compute_trend(y):\n",
" y = np.asarray(y, dtype=float)\n",
" if len(y) < 4:\n",
" return np.nan\n",
" x = np.arange(len(y)).reshape(-1, 1)\n",
" mask = np.isfinite(y)\n",
" if mask.sum() < 4:\n",
" return np.nan\n",
" reg = LinearRegression().fit(x[mask], y[mask])\n",
" return reg.coef_[0]\n",
"\n",
"def compute_beta(y, x):\n",
" y = np.asarray(y, dtype=float)\n",
" x = np.asarray(x, dtype=float)\n",
" mask = np.isfinite(y) & np.isfinite(x)\n",
" if mask.sum() < 6:\n",
" return np.nan\n",
" reg = LinearRegression().fit(x[mask].reshape(-1, 1), y[mask])\n",
" return reg.coef_[0]\n",
"\n",
"rows = []\n",
"\n",
"for acc, g in df_month.groupby(ID_COL):\n",
" g = g.sort_values(\"month\")\n",
"\n",
" flow = g[\"flow_to_aum_m\"].values\n",
" aum = g[\"aum_qty\"].values\n",
" delta_rate = g[\"delta_rate_m\"].values\n",
" drawdown = g[\"aum_drawdown\"].values\n",
"\n",
" rows.append({\n",
" ID_COL: acc,\n",
" \"flow_trend_12m\": compute_trend(flow[-12:]),\n",
" \"aum_trend_12m\": compute_trend(aum[-12:]),\n",
" \"drawdown_trend_12m\": compute_trend(drawdown[-12:]),\n",
" \"beta_rate\": compute_beta(flow, delta_rate)\n",
" })\n",
"\n",
"df_beta = pd.DataFrame(rows)\n",
"\n",
"df_client = df_client.merge(df_beta, on=ID_COL, how=\"left\")\n",
"\n",
"print(df_client.shape)\n",
"df_client.head()"
]
},
{
"cell_type": "code",
"execution_count": 307,
"id": "4e4ea46f-5c3d-4a4a-b79c-ff5ae8973bad",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"seg_2D\n",
"Highly active (high int, high freq) 137\n",
"Dormant (low int, low freq) 134\n",
"Small rebalancers (low int, high freq) 80\n",
"Occasional large movers (high int, low freq) 80\n",
"Name: count, dtype: int64\n",
"thr_int: 5.739688017572092 thr_freq: 0.8\n"
]
}
],
"source": [
"df_client[\"rel_intensity_total\"] = df_client[\"gross_flow_qty_sum\"]/df_client[\"aum_qty_mean\"] # turnover proxy\n",
"df_client[\"frequency\"] = df_client[\"flow_freq\"] # share of active months\n",
"\n",
"# Thresholds: medians (simple + explainable)\n",
"thr_int = df_client[\"rel_intensity_total\"].median()\n",
"thr_freq = df_client[\"frequency\"].median()\n",
"\n",
"def quadrant(row):\n",
" low_int = row[\"rel_intensity_total\"] < thr_int\n",
" low_frq = row[\"frequency\"] < thr_freq\n",
"\n",
" if low_int and low_frq:\n",
" return \"Dormant (low int, low freq)\"\n",
" if low_int and (not low_frq):\n",
" return \"Small rebalancers (low int, high freq)\"\n",
" if (not low_int) and low_frq:\n",
" return \"Occasional large movers (high int, low freq)\"\n",
" return \"Highly active (high int, high freq)\"\n",
"\n",
"df_client[\"seg_2D\"] = df_client.apply(quadrant, axis=1)\n",
"\n",
"print(df_client[\"seg_2D\"].value_counts())\n",
"print(\"thr_int:\", thr_int, \"thr_freq:\", thr_freq)\n"
]
},
{
"cell_type": "code",
"execution_count": 308,
"id": "09943df7-8c78-4c51-b387-866c5cddd392",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAApgAAAHHCAYAAAAbASh2AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAA5ZhJREFUeJzs3XdYU9cbB/Bvwt6yEVkCLhQBBf1pqziLe89axd1W3NqqtXXWuuqqojjqqLUVt9bWSd3a1oG49wAVcSDgQEZyfn/QpITchKybAe/neXg0Jyf3nNwkN2/OFDDGGAghhBBCCNERoaErQAghhBBCyhYKMAkhhBBCiE5RgEkIIYQQQnSKAkxCCCGEEKJTFGASQgghhBCdogCTEEIIIYToFAWYhBBCCCFEpyjAJIQQQgghOkUBJiGEEEII0SmjDzCbNGmCWrVq6fSYAoEAw4cP1+kxVbF+/XoIBAI8ePBA72UDQP/+/REQEGCQsomsBw8eQCAQYP369Yauis7w9ZwCAgLQv39/nR7TmMvlQ5MmTdCkSRNDV0MnNm7ciOrVq8PCwgIVKlQwdHXKpLS0NFhbW+PUqVOGrgoxsP3798Pe3h7Pnz9X+7FqBZhnz57F8OHDUbNmTdjZ2cHPzw89evTArVu35PI2adIEAoEAAoEAQqEQjo6OqFatGvr27YtDhw6pXVFC9O3atWuYNm2aVj8IfvnlFyxevFhndSqrTp8+jWnTpiErK8vQVdGJ5cuXG/WPhydPnmDatGm4ePGioauilhs3bqB///4ICgrC6tWrsWrVKkNXqUyaMWMG6tevjw8++EBhnpYtWyptrPnxxx9Ro0YNWFtbo0qVKli6dClnvsePH6NHjx6oUKECHB0d0bFjR9y7d08nz6M0WVlZsLa2hkAgwPXr1/VSpiH88ccfmDZtmkaPbdWqFYKDgzF79mz1H8zU0LVrV+bl5cVGjBjBVq9ezWbOnMk8PT2ZnZ0du3z5skze6Oho5uPjwzZu3Mg2btzIEhIS2Pjx41lgYCADwHr06MHy8/NLLTM6OprVrFlTnWqWCgCLi4vT6TFVUVhYyHJzc5lYLNZ72YwxFhsby/z9/Q1StinaunUrA8COHDmi8THatm3Lec7FYjHLzc1lhYWFmlfQyNy/f58BYOvWrVP7sfPnz2cA2P379+Xue//+vUrXCl3TptyaNWuy6Oho3VZIC3l5eSwvL096++zZsxq/Voa0YsUKBoDdvn3b0FUps549e8YsLCzYL7/8ojDP9u3bmZ2dncLv0oSEBAaAde3ala1atYr17duXAWBz5syRyff69WtWpUoV5uHhwebOncsWLlzIfH19mY+PD3vx4oXOn1tJq1atYtbW1szLy4tNnjyZ9/IMJS4ujqkZ7slYvnw5s7W1ZTk5OWo9Tq0WzLFjx+Lhw4f44YcfMHjwYHz99dc4ceIECgsLMWfOHLn8Tk5O+OSTT/DJJ5/g008/xfz583Hr1i0MGzYMW7Zswddff61+RGzCzMzMpL+WdOHdu3c6OQ7RP4FAAGtra5iZmRm6KgqJxWK8f//e0NWAlZUVLCwsyk25fLC0tISlpaWhq6G1Z8+eAUCpXeOMMeTm5uqhRmXPzz//DHNzc7Rv357z/vfv32PcuHGYMGEC5/25ubmYPHky2rZti23btmHIkCH46aef0KdPH8ycOROvXr2S5l2+fDlu376NvXv34ssvv8SYMWNw8OBBpKenY8GCBUrrOW3aNK2HfP38889o06YNevfujV9++UWrY5VlXbt2RV5eHrZu3areAzUOaYupU6cOq1OnjkyaspbHwsJCFhISwmxtbVlWVpbSY0uOc+7cOdagQQNmbW3NAgIC2IoVK+Tyvn//nk2ZMoUFBQUxS0tL5uPjw7744gv2/v17mXz491fXzp07Wc2aNZmlpSULCQlh+/btk8n34MED9vnnn7OqVasya2tr5uLiwrp16ybTyiJpCVi/fr1cffbv388AsN9++40xxti6des4W2ni4+NZSEgIs7S0ZBUrVmTDhg1jr169UngeGjVqxGxsbNioUaMYY4zt2rWLtWnThlWsWJFZWlqywMBANmPGDLnWMVVbMM+ePcs++ugj5urqKj3fAwYMkMkjEonYokWLWEhICLOysmIeHh5s6NChLDMzUy7f1KlTWcWKFZmNjQ1r0qQJu3r1KvP392exsbHSfJJzc+LECTZixAjm5ubGnJyc2NChQ1leXh579eoV69u3L6tQoQKrUKEC++KLL+RaglWtk7+/P2vbti07ceIEi4qKYlZWVqxy5cpsw4YNcvUp+SdpzVTlnEdHR8s9XnL+FbX2JSUlsQ8//JDZ2toyJycn1qFDB3bt2jWZPFOnTpW24sTGxjInJyfm6OjI+vfvz96+fSuT9/nz5+z69ety6Vwkn4uff/6ZhYSEMHNzc7Zz507GGGOPHj1iAwYMYB4eHtLPy48//ijzeK7nlJKSwmJjY1nlypWZlZUV8/T0ZAMGDJBpnZA8n5J/ks9J8feKOp83VeutiKL36MmTJ9mYMWOYm5sbs7W1ZZ06dWLPnj2TeVzJ51K8NfPVq1ds1KhRzMfHh1laWrKgoCA2Z84cJhKJ5M7l/Pnz2cqVK1lgYCCztLRkkZGR7J9//pGpZ3p6Ouvfvz+rVKkSs7S0ZF5eXqxDhw4y15no6GhpHY4cOcJ5vtetW8emTJnCzM3NZZ6PxJAhQ5iTkxPLzc3lPF+SVugHDx7I3Tdx4kRmYWEh/SzeunWLdenShXl6ejIrKytWqVIl1rNnT6XfB1znderUqdL72rZty/bv38/q1q3LrKys2KJFi1Q+35J8sbGxzNHRkTk5ObF+/fqx5ORkufd08XNZHNf1VZfXpOL1HD16NPP392eWlpasUqVKrG/fvuz58+fs9evXzNbWlo0cOVLucWlpaUwoFLLvvvtO4TlmjLHGjRuzJk2aKLx/+vTpzM/Pj717946zBfP3339nANjvv/8uk3769GkGgG3cuFGaFhUVxaKiouTK+Oijj1hQUJDSek6dOlWrHrmHDx8ygUDAtmzZwv7++28GgJ06dYoz78aNG1lUVBSzsbFhFSpUYI0aNWIHDhyQyfPHH3+wxo0bM3t7e+bg4MAiIyPZpk2bZPJs2bKF1alTh1lbWzNXV1fWp08f9ujRI5k8qr6/VL1GxMbGcn7eJX799VdWp04dab1r1arFFi9eLFd+REQE69Chg8LzyUXrAFMsFrNKlSqxjz76SCa9tK7tmTNnMgBs7969So8fHR3NvL29mYeHBxs+fDj74Ycf2IcffsgAyHxRiEQi9tFHHzFbW1s2evRotnLlSjZ8+HBmbm7OOnbsKHNMACwsLIxVrFiRzZw5ky1evJgFBgYyW1tbmS++rVu3srCwMDZlyhS2atUq9tVXXzFnZ2fm7+8v84UdGBjI2rRpI1f3AQMGMGdnZ2k3G1eAKflybdGiBVu6dCkbPnw4MzMzY1FRUTLdc9HR0czLy4u5u7uzESNGsJUrV7Jdu3Yxxhjr1KkT69GjB5s/fz5bsWIF6969OwPAxo8fL1MfVQLMjIwM5uzszKpWrcrmz5/PVq9ezSZPnsxq1Kghk2/w4MHM3NycDRkyhCUkJLAJEyYwOzs7uXp/+eWXDABr3749W7ZsGRsyZAjz8fFhbm5unF/e4eHhrFWrViw+Pl7arfLll1+yDz/8kH388cds+fLlrF27dgyA3MVX1Tr5+/uzatWqMU9PT/bVV1+xZcuWsTp16jCBQMCuXLnCGGPs7t27bOTIkQwA++qrr6RDPZ4+faryOT948CALDw9nbm5u0sdLAjauYOzQoUPM3NycVa1alc2bN49Nnz6dubm5MWdnZ873TEREBOvSpQtbvnw5Gzx4sPRcFSfJq0o3PwBWo0YN5u7uzqZPn87i4+NZcnIye/r0KfPx8WG+vr5sxowZbMWKFaxDhw4MgPRLXNFz+v7771mjRo3YjBkz2KpVq9ioUaOYjY0Nq1evnvQHQkpKCuvdu7f0eJJz9ebNG+nrVfy9ournTdV6K6IowIyIiGDNmjVjS5cuZePGjWNmZmasR48e0nw
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"plt.style.use('default')\n",
"plt.figure()\n",
"for name, g in df_client.groupby(\"seg_2D\"):\n",
" plt.scatter(g[\"frequency\"], g[\"rel_intensity_total\"], s=10, label=name)\n",
"\n",
"plt.yscale(\"log\")\n",
"plt.axvline(thr_freq, linestyle=\"--\")\n",
"plt.axhline(thr_int, linestyle=\"--\")\n",
"plt.xlabel(\"Activity frequency (share of active months)\")\n",
"plt.ylabel(\"Gross flow / mean AUM (quantity) [log scale]\")\n",
"plt.title(\"2D behavioral segmentation: relative intensity vs frequency (400+ Accounts)\")\n",
"plt.legend(markerscale=2)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 309,
"id": "9eb5fbb8-1a7b-434c-ba36-3c2a560b4cb1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nb clients = 404\n",
"Nb features = 35\n",
"['log_aum_qty_mean', 'flow_freq', 'gross_flow_to_aum', 'flow_to_aum_vol', 'activity_intensity', 'n_tx_total', 'avg_n_isin_held', 'n_isin_total', 'avg_holding_months_per_isin', 'exit_rate_per_isin', 'flow_direction_balance', 'aum_drawdown_last', 'corr_flow_fund_lag3', 'corr_flow_fund_lag6', 'corr_flow_rate_lag3', 'country_grp_France', 'country_grp_Germany', 'country_grp_Italy', 'country_grp_Luxembourg', 'country_grp_Monaco', 'country_grp_Other', 'country_grp_Spain', 'country_grp_Sweden', 'country_grp_Switzerland', 'country_grp_United Kingdom', 'region_grp_France', 'region_grp_Germany', 'region_grp_International', 'region_grp_Italy', 'region_grp_Luxembourg', 'region_grp_Nordics', 'region_grp_Other', 'region_grp_Spain', 'region_grp_Switzerland', 'region_grp_United Kingdom']\n"
]
}
],
"source": [
"dfc = df_client.copy()\n",
"\n",
"dfc[\"gross_flow_to_aum\"] = dfc[\"gross_flow_qty_sum\"] / (dfc[\"aum_qty_mean\"].abs() + EPS)\n",
"dfc[\"avg_ticket\"] = dfc[\"gross_flow_qty_sum\"] / (dfc[\"n_tx_total\"] + EPS)\n",
"dfc[\"flow_direction_balance\"] = dfc[\"net_flow_qty_sum\"] / (dfc[\"gross_flow_qty_sum\"] + EPS)\n",
"dfc[\"redemption_bias\"] = dfc[\"red_share_mean\"] - dfc[\"sub_share_mean\"]\n",
"dfc[\"activity_intensity\"] = dfc[\"n_tx_total\"] / (dfc[\"n_months\"] + EPS)\n",
"dfc[\"exit_rate_per_isin\"] = dfc[\"full_exit_count\"] / (dfc[\"n_isin_total\"] + EPS)\n",
"dfc[\"entry_rate_per_isin\"] = dfc[\"entry_count\"] / (dfc[\"n_isin_total\"] + EPS)\n",
"dfc[\"aum_final_to_peak\"] = dfc[\"aum_qty_last\"] / (dfc[\"aum_qty_max\"] + EPS)\n",
"\n",
"for col in [\"aum_qty_mean\", \"gross_flow_qty_sum\", \"n_tx_total\", \"avg_ticket\", \"gross_flow_qty_mean\"]:\n",
" dfc[f\"log_{col}\"] = np.log1p(dfc[col].clip(lower=0))\n",
"\n",
"dfc = dfc[(dfc[\"n_months\"] >= 6) & (dfc[\"aum_qty_mean\"] > 0)].copy()\n",
"\n",
"top_countries = dfc[\"country\"].fillna(\"Unknown\").value_counts().head(10).index\n",
"top_regions = dfc[\"region\"].fillna(\"Unknown\").value_counts().head(10).index\n",
"\n",
"dfc[\"country_grp\"] = np.where(dfc[\"country\"].isin(top_countries), dfc[\"country\"], \"Other\")\n",
"dfc[\"region_grp\"] = np.where(dfc[\"region\"].isin(top_regions), dfc[\"region\"], \"Other\")\n",
"\n",
"base_features = [\n",
" \"log_aum_qty_mean\",\n",
" \"flow_freq\",\n",
" \"gross_flow_to_aum\",\n",
" #\"turnover_vol\",\n",
" \"flow_to_aum_vol\",\n",
" \"activity_intensity\",\n",
" \"n_tx_total\",\n",
" \"avg_n_isin_held\",\n",
" \"n_isin_total\",\n",
" \"avg_holding_months_per_isin\",\n",
" \"exit_rate_per_isin\",\n",
" \"flow_direction_balance\",\n",
" #\"redemption_bias\",\n",
" \"aum_drawdown_last\",\n",
" \"corr_flow_fund_lag3\",\n",
" \"corr_flow_fund_lag6\",\n",
" \"corr_flow_rate_lag3\",\n",
" #\"corr_flow_rate_lag6\",\n",
" #\"corr_flow_bench_lag3\",\n",
" #\"corr_flow_bench_lag6\"\n",
" \n",
"]\n",
"\n",
"base_features2 = [\n",
" \"log_aum_qty_mean\",\n",
" \"log_gross_flow_qty_mean\",\n",
" \"n_tx_total\",\n",
" \"flow_freq\",\n",
" \"gross_flow_to_aum\",\n",
" \"net_flow_vol\",\n",
" #\"avg_n_isin_held\",\n",
" #\"flow_direction_balance\",\n",
"]\n",
"\n",
"base_features = [c for c in base_features if c in dfc.columns]\n",
"\n",
"X_num = dfc[base_features].replace([np.inf, -np.inf], np.nan).fillna(dfc[base_features].median())\n",
"X_cat = pd.get_dummies(dfc[[\"country_grp\", \"region_grp\"]].fillna(\"Unknown\"), drop_first=True)\n",
"\n",
"X = pd.concat([X_num.reset_index(drop=True), X_cat.reset_index(drop=True)], axis=1)\n",
"\n",
"scaler = StandardScaler()\n",
"scaler2 = RobustScaler()\n",
"\n",
"X_scaled = scaler.fit_transform(X_num)\n",
"X_scaled2 = scaler2.fit_transform(X_num)\n",
"\n",
"print(\"Nb clients =\", X.shape[0])\n",
"print(\"Nb features =\", X.shape[1])\n",
"print(X.columns.tolist())"
]
},
{
"cell_type": "code",
"execution_count": 310,
"id": "5f006fc0-d0e7-47b2-94f0-7e3bbdf91097",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>k</th>\n",
" <th>inertia</th>\n",
" <th>silhouette</th>\n",
" <th>davies_bouldin</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>5178.843770</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3</td>\n",
" <td>4741.629415</td>\n",
" <td>0.132598</td>\n",
" <td>2.166657</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4</td>\n",
" <td>4394.702026</td>\n",
" <td>0.124785</td>\n",
" <td>2.073192</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5</td>\n",
" <td>4115.441587</td>\n",
" <td>0.133249</td>\n",
" <td>1.787169</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>6</td>\n",
" <td>3865.546167</td>\n",
" <td>0.127431</td>\n",
" <td>1.759628</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>7</td>\n",
" <td>3679.273300</td>\n",
" <td>0.135589</td>\n",
" <td>1.702516</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>8</td>\n",
" <td>3448.452307</td>\n",
" <td>0.139533</td>\n",
" <td>1.634761</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>9</td>\n",
" <td>3321.805201</td>\n",
" <td>0.121927</td>\n",
" <td>1.709083</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>10</td>\n",
" <td>3167.889248</td>\n",
" <td>0.128239</td>\n",
" <td>1.605403</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>11</td>\n",
" <td>3048.339811</td>\n",
" <td>0.134592</td>\n",
" <td>1.620711</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>12</td>\n",
" <td>2931.256053</td>\n",
" <td>0.139258</td>\n",
" <td>1.550705</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>13</td>\n",
" <td>2847.001592</td>\n",
" <td>0.144681</td>\n",
" <td>1.537896</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>14</td>\n",
" <td>2742.565943</td>\n",
" <td>0.152159</td>\n",
" <td>1.455955</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>15</td>\n",
" <td>2647.758120</td>\n",
" <td>0.148589</td>\n",
" <td>1.469929</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>16</td>\n",
" <td>2576.736131</td>\n",
" <td>0.132343</td>\n",
" <td>1.498820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>17</td>\n",
" <td>2520.993893</td>\n",
" <td>0.137837</td>\n",
" <td>1.491089</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>18</td>\n",
" <td>2443.613239</td>\n",
" <td>0.135204</td>\n",
" <td>1.450617</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>19</td>\n",
" <td>2368.363384</td>\n",
" <td>0.151232</td>\n",
" <td>1.428163</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>20</td>\n",
" <td>2348.022385</td>\n",
" <td>0.134130</td>\n",
" <td>1.432542</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" k inertia silhouette davies_bouldin\n",
"0 2 5178.843770 NaN NaN\n",
"1 3 4741.629415 0.132598 2.166657\n",
"2 4 4394.702026 0.124785 2.073192\n",
"3 5 4115.441587 0.133249 1.787169\n",
"4 6 3865.546167 0.127431 1.759628\n",
"5 7 3679.273300 0.135589 1.702516\n",
"6 8 3448.452307 0.139533 1.634761\n",
"7 9 3321.805201 0.121927 1.709083\n",
"8 10 3167.889248 0.128239 1.605403\n",
"9 11 3048.339811 0.134592 1.620711\n",
"10 12 2931.256053 0.139258 1.550705\n",
"11 13 2847.001592 0.144681 1.537896\n",
"12 14 2742.565943 0.152159 1.455955\n",
"13 15 2647.758120 0.148589 1.469929\n",
"14 16 2576.736131 0.132343 1.498820\n",
"15 17 2520.993893 0.137837 1.491089\n",
"16 18 2443.613239 0.135204 1.450617\n",
"17 19 2368.363384 0.151232 1.428163\n",
"18 20 2348.022385 0.134130 1.432542"
]
},
"execution_count": 310,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rows = []\n",
"\n",
"for k in range(2, 21):\n",
" km = KMeans(n_clusters=k, n_init=30, random_state=42)\n",
" labels = km.fit_predict(X_scaled)\n",
"\n",
" row = {\n",
" \"k\": k,\n",
" \"inertia\": km.inertia_\n",
" }\n",
"\n",
" if k >= 3:\n",
" row[\"silhouette\"] = silhouette_score(X_scaled, labels)\n",
" row[\"davies_bouldin\"] = davies_bouldin_score(X_scaled, labels)\n",
" else:\n",
" row[\"silhouette\"] = np.nan\n",
" row[\"davies_bouldin\"] = np.nan\n",
"\n",
" rows.append(row)\n",
"\n",
"df_kdiag = pd.DataFrame(rows)\n",
"df_kdiag"
]
},
{
"cell_type": "code",
"execution_count": 311,
"id": "0198c399-f532-44c5-91a7-d4e0a27887ec",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjUAAAGGCAYAAAAzegNcAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAA88RJREFUeJzs3XlcVHX3B/DPzAAzrCM7iAgIuCCK4EouuePappWluTylj6aW2WI8v9LMyjItW0hLK32k0uyxxTLN3FrEUBQVcUFkcWFH2WFg5v7+GGZ0ZIfZgM/79ZpXzZ3v3DmDyty5555zRIIgCCAiIiIiIiIiIiIiIjJzYlMHQERERERERERERERE1BhMahARERERERERERERUavApAYREREREREREREREbUKTGoQEREREREREREREVGrwKQGERERERERERERERG1CkxqEBERERERERERERFRq8CkBhERERERERERERERtQpMahARERERERERERERUavApAYREREREREREREREbUKTGoQEREREVGT+fr6Yvbs2dr7hw8fhkgkwuHDh7Xbhg8fjuDgYOMHR0RERK3Ga6+9BpFIZOowjGr27Nnw9fVtcF1qaipEIhG2bNmi3dYef15Ed2NSg6gVE4lEeO2117T3NR9subm5pguqHbj7505ERNTWnD17FlOnToWPjw9kMhm8vLwwZswYfPTRR6YOTe8SExPx2muvITU1tcZjn3zyic5JBCIiorZoy5YtEIlE2ptMJkPHjh0RERGBDz/8EEVFRaYOUe9mz56t854tLCzg7e2NadOmITEx0dThEVEDmNQgMjN3H0zcfTt27JipQ2yxjz76CHK5HJWVlXWuEYlEWLRokRGj0rVnzx4mLoiIqF06evQo+vXrh9OnT2Pu3Ln4+OOP8dRTT0EsFuODDz7Qrrt48SI2bdpkwkj1IzExEStXrmRSg4iI2r3XX38d27Ztw4YNG7B48WIAwJIlS9CrVy+cOXPGYK/7yiuvoKyszGD7r4tUKsW2bduwbds2bN68GbNnz8aBAwdwzz334MaNG0aPp7FM9fMiMicWpg6AiGr3+uuvw8/Pr8b2gIAAE0SjX7/88gvGjh0LS0tLU4dSpz179iAqKqrWxEZZWRksLPjrk4iI2qY333wTcrkcx48fR4cOHXQey87O1v6/VCo1cmRERERkSOPHj0e/fv209yMjI3Hw4EFMmjQJ9913H86fPw9ra2u9v66FhYVJvmNbWFhgxowZOtsGDRqESZMm4ZdffsHcuXONHlNjmOrnRWROWKlBZKbGjx+PGTNm1Li5uLiYOrQWKS0txZEjRzBx4kRTh1KrkpKSBtfIZDIeQBARUZuVnJyMnj171khoAICbm5v2/++eqVGfxMREjBgxAjY2NvDy8sKaNWtqrMnOzsaTTz4Jd3d3yGQyhISEYOvWrTprapvbAdTebxoALly4gKlTp8LJyQkymQz9+vXDTz/9pH18y5YtePjhhwEAI0aM0FbGHj58GL6+vjh37hyOHDmi3T58+HDtc2/duoUlS5bA29sbUqkUAQEBeOedd6BSqRr1MyEiImoNRo4ciVdffRVpaWmIjo4GAJw5cwazZ89Gly5dIJPJ4OHhgX/961/Iy8vTPu+7776DSCTCkSNHauzz008/hUgkQkJCAoC6Z0RER0ejb9++sLa2hpOTE6ZNm4arV6/qrElKSsKUKVPg4eEBmUyGTp06Ydq0aSgoKGjW+/Xw8ACAGt/5r1y5gocffhhOTk6wsbHBoEGD8Msvv+is0XTeuLv6s67jl7vdunULs2fPhlwuR4cOHTBr1izcunWrxrrafl6abhc//PADgoODIZVK0bNnT+zdu7dxb5yolWFSg6gNys3NxSOPPAIHBwc4Ozvj2WefRXl5uc6aqqoqrFq1Cv7+/pBKpfD19cV//vMfVFRUaNcsXboUzs7OEARBu23x4sUQiUT48MMPtduysrIgEomwYcOGBmM7cOAAKioqMH78+Ca9J81BwLfffos333wTnTp1gkwmw6hRo3D58uUa6//55x+MGzcOcrkcNjY2uPfee/H333/rrNEcCCQmJuLxxx+Ho6MjhgwZgtmzZyMqKgoAdFp/adw9UyMtLQ1PP/00unXrBmtrazg7O+Phhx+utY0FERGRufPx8UFcXJz2RENL3bx5E+PGjUNISAjWrVuH7t27Y9myZfj111+1a8rKyjB8+HBs27YN06dPx7vvvgu5XI7Zs2frtLxqinPnzmHQoEE4f/48Xn75Zaxbtw62trZ44IEH8P333wMAhg0bhmeeeQYA8J///EfbgqJHjx5Yv349OnXqhO7du2u3/9///R8A9UUa9957L6KjozFz5kx8+OGHGDx4MCIjI7F06dIW/sSIiIjMyxNPPAEA+O233wAA+/fvx5UrVzBnzhx89NFHmDZtGrZv344JEyZozx9MnDgRdnZ2+Pbbb2vsb8eOHejZsyeCg4PrfM0333wTM2fORGBgIN577z0sWbIEBw4cwLBhw7Qn+hUKBSIiInDs2DEsXrwYUVFRmDdvHq5cuVJrMqA2ubm5yM3NRVZWFmJiYvDcc8/B2dkZkyZN0q7JysrCPffcg3379uHpp5/Gm2++ifLyctx3333aY4qWEgQB999/P7Zt24YZM2bgjTfewLVr1zBr1qxG7+Ovv/7C008/jWnTpmHNmjUoLy/HlClTdJJNRG2GQERm5csvvxQACL///ruQk5Ojc8vNzdVZC0BYsWKF9v6KFSsEAEKvXr2EyZMnCx9//LEwY8YMAYDwxBNP6Dx31qxZAgBh6tSpQlRUlDBz5kwBgPDAAw9o1+zatUsAIJw9e1a7LSQkRBCLxcLUqVO123bu3CkAEBISEhp8f/Pnzxf69evX4DoAwsKFC7X3Dx06JAAQQkNDhb59+wrvv/++8Nprrwk2NjbCgAEDdJ574MABwcrKSggPDxfWrVsnvP/++0Lv3r0FKysr4Z9//qnx8woKChLuv/9+4ZNPPhGioqKEo0ePCmPGjBEACNu2bdPe7oztzp/7zp07hZCQEGH58uXCZ599JvznP/8RHB0dBR8fH6GkpKTB90pERGROfvvtN0EikQgSiUQIDw8XXnrpJWHfvn2CQqHQWefj4yPMmjVLe1/zWX3o0CHttnvvvVcAIPz3v//VbquoqBA8PDyEKVOmaLetX79eACBER0drtykUCiE8PFyws7MTCgsL63wNQRCElJQUAYDw5ZdfareNGjVK6NWrl1BeXq7dplKphHvuuUcIDAzUbtMcx9y9T0EQhJ49ewr33ntvje2rVq0SbG1thUuXLulsf/nllwWJRCKkp6fXeA4REZG50pyHOH78eJ1r5HK5EBoaKgiCIJSWltZ4/JtvvhEACH/88Yd222OPPSa4ubkJVVVV2m0ZGRmCWCwWXn/9de02zXdzjdTUVEEikQhvvvmmzmucPXtWsLCw0G4/deqUAEDYuXNnE9/x7XMid9+8vLyEuLg4nbVLliwRAAh//vmndltRUZHg5+cn+Pr6CkqlUhCE2z/HlJQUnefXdvwya9YswcfHR3v/hx9+EAAIa9as0W6rqqoShg4dWuMY5+6flyCoz1NYWVkJly9f1m47ffq0AED46KOPmvrjITJ7rNQgMlOjR4+Gq6urzs3Ly6tRz/Xz88NPP/2EhQsXYtu2bXj66aexbds27WCv06dPY+vWrXjqqaewc+dOPP3009i6dSteeOEF/PDDDzh06BAAYMiQIQCAP//8EwBQUFCAs2fPYsqUKdptmsednJwQFBTUYGx79uxpUeup8vJyHD16FEuWLMGKFSuwevVqxMbGaq8mFQQB8+fPx4gRI/D3339j6dKlWLJkCY4dOwYvLy+88sorNfYZEhKCH374AQsWLMDTTz+N8PBwdO3aFQB0Wn/VZeLEiYiPj8fKlSsxd+5cvPnmm9izZw/S0tLwv//9r9nvlYiIyBTGjBmDmJgY3HfffTh9+jTWrFmDiIgIeHl56bRuaiw7Ozudz1ErKysMGDAAV65c0W7bs2cPPDw88Nhjj2m3WVpa4plnnkFxcXGtrSvqk5+
"text/plain": [
"<Figure size 1600x400 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig, axes = plt.subplots(1, 3, figsize=(16, 4))\n",
"\n",
"axes[0].plot(df_kdiag[\"k\"], df_kdiag[\"inertia\"], marker=\"o\")\n",
"axes[0].set_title(\"Elbow / Inertia\")\n",
"axes[0].set_xlabel(\"K\")\n",
"\n",
"axes[1].plot(df_kdiag[\"k\"], df_kdiag[\"silhouette\"], marker=\"o\")\n",
"axes[1].set_title(\"Silhouette\")\n",
"axes[1].set_xlabel(\"K\")\n",
"\n",
"axes[2].plot(df_kdiag[\"k\"], df_kdiag[\"davies_bouldin\"], marker=\"o\")\n",
"axes[2].set_title(\"Davies-Bouldin\")\n",
"axes[2].set_xlabel(\"K\")\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 312,
"id": "5ba1f3bf-7fd7-49aa-8b28-0ca0f2658bf0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"K=2 | silhouette=0.2357 | davies_bouldin=1.8611\n",
"K=4 | silhouette=0.1262 | davies_bouldin=2.0735\n",
"K=5 | silhouette=0.1332 | davies_bouldin=1.7872\n"
]
}
],
"source": [
"RESULTS = {}\n",
"\n",
"for k in [2, 4, 5]:\n",
" km = KMeans(n_clusters=k, n_init=50, random_state=42)\n",
" labels = km.fit_predict(X_scaled)\n",
" dfc[f\"cluster_k{k}\"] = labels\n",
"\n",
" RESULTS[k] = {\n",
" \"model\": km,\n",
" \"labels\": labels,\n",
" \"silhouette\": silhouette_score(X_scaled, labels),\n",
" \"davies_bouldin\": davies_bouldin_score(X_scaled, labels)\n",
" }\n",
"\n",
"for k in [2, 4, 5]:\n",
" print(f\"K={k} | silhouette={RESULTS[k]['silhouette']:.4f} | davies_bouldin={RESULTS[k]['davies_bouldin']:.4f}\")"
]
},
{
"cell_type": "code",
"execution_count": 313,
"id": "0052976f-e30f-4f84-b720-6fa4a9078aba",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== K=2 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>aum_qty_mean_med</th>\n",
" <th>gross_flow_to_aum_med</th>\n",
" <th>flow_freq_med</th>\n",
" <th>n_tx_total_med</th>\n",
" <th>avg_n_isin_held_med</th>\n",
" <th>n_isin_total_med</th>\n",
" <th>avg_holding_months_per_isin_med</th>\n",
" <th>exit_rate_per_isin_med</th>\n",
" <th>flow_direction_balance_med</th>\n",
" <th>redemption_bias_med</th>\n",
" <th>aum_drawdown_last_med</th>\n",
" <th>aum_final_to_peak_med</th>\n",
" <th>corr_flow_fund_lag3_med</th>\n",
" <th>corr_flow_fund_lag6_med</th>\n",
" <th>corr_flow_rate_lag3_med</th>\n",
" <th>corr_flow_rate_lag6_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k2</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>327</td>\n",
" <td>21039.415608</td>\n",
" <td>5.516534</td>\n",
" <td>0.730769</td>\n",
" <td>469.0</td>\n",
" <td>2.347826</td>\n",
" <td>20.0</td>\n",
" <td>10.000000</td>\n",
" <td>1.416667</td>\n",
" <td>0.042861</td>\n",
" <td>-0.762943</td>\n",
" <td>0.818922</td>\n",
" <td>0.181078</td>\n",
" <td>0.002365</td>\n",
" <td>0.004122</td>\n",
" <td>0.000339</td>\n",
" <td>0.000122</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>77</td>\n",
" <td>133315.879515</td>\n",
" <td>10.375358</td>\n",
" <td>1.000000</td>\n",
" <td>8861.0</td>\n",
" <td>14.769231</td>\n",
" <td>56.0</td>\n",
" <td>34.765306</td>\n",
" <td>2.515152</td>\n",
" <td>0.026428</td>\n",
" <td>-1.057873</td>\n",
" <td>0.505165</td>\n",
" <td>0.494835</td>\n",
" <td>0.041515</td>\n",
" <td>0.023970</td>\n",
" <td>-0.045190</td>\n",
" <td>-0.046754</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients aum_qty_mean_med gross_flow_to_aum_med flow_freq_med \\\n",
"cluster_k2 \n",
"0 327 21039.415608 5.516534 0.730769 \n",
"1 77 133315.879515 10.375358 1.000000 \n",
"\n",
" n_tx_total_med avg_n_isin_held_med n_isin_total_med \\\n",
"cluster_k2 \n",
"0 469.0 2.347826 20.0 \n",
"1 8861.0 14.769231 56.0 \n",
"\n",
" avg_holding_months_per_isin_med exit_rate_per_isin_med \\\n",
"cluster_k2 \n",
"0 10.000000 1.416667 \n",
"1 34.765306 2.515152 \n",
"\n",
" flow_direction_balance_med redemption_bias_med \\\n",
"cluster_k2 \n",
"0 0.042861 -0.762943 \n",
"1 0.026428 -1.057873 \n",
"\n",
" aum_drawdown_last_med aum_final_to_peak_med \\\n",
"cluster_k2 \n",
"0 0.818922 0.181078 \n",
"1 0.505165 0.494835 \n",
"\n",
" corr_flow_fund_lag3_med corr_flow_fund_lag6_med \\\n",
"cluster_k2 \n",
"0 0.002365 0.004122 \n",
"1 0.041515 0.023970 \n",
"\n",
" corr_flow_rate_lag3_med corr_flow_rate_lag6_med \n",
"cluster_k2 \n",
"0 0.000339 0.000122 \n",
"1 -0.045190 -0.046754 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== K=4 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>aum_qty_mean_med</th>\n",
" <th>gross_flow_to_aum_med</th>\n",
" <th>flow_freq_med</th>\n",
" <th>n_tx_total_med</th>\n",
" <th>avg_n_isin_held_med</th>\n",
" <th>n_isin_total_med</th>\n",
" <th>avg_holding_months_per_isin_med</th>\n",
" <th>exit_rate_per_isin_med</th>\n",
" <th>flow_direction_balance_med</th>\n",
" <th>redemption_bias_med</th>\n",
" <th>aum_drawdown_last_med</th>\n",
" <th>aum_final_to_peak_med</th>\n",
" <th>corr_flow_fund_lag3_med</th>\n",
" <th>corr_flow_fund_lag6_med</th>\n",
" <th>corr_flow_rate_lag3_med</th>\n",
" <th>corr_flow_rate_lag6_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k4</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>165</td>\n",
" <td>14436.315829</td>\n",
" <td>17.218849</td>\n",
" <td>0.949153</td>\n",
" <td>1252.0</td>\n",
" <td>2.527559</td>\n",
" <td>29.0</td>\n",
" <td>7.756757</td>\n",
" <td>3.093750</td>\n",
" <td>0.018779</td>\n",
" <td>-0.996486</td>\n",
" <td>0.916665</td>\n",
" <td>0.083335</td>\n",
" <td>0.000208</td>\n",
" <td>0.001381</td>\n",
" <td>-0.002783</td>\n",
" <td>-0.003603</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>131</td>\n",
" <td>28566.131306</td>\n",
" <td>0.894162</td>\n",
" <td>0.123077</td>\n",
" <td>14.0</td>\n",
" <td>1.650000</td>\n",
" <td>15.0</td>\n",
" <td>8.500000</td>\n",
" <td>0.619718</td>\n",
" <td>0.000000</td>\n",
" <td>-0.127257</td>\n",
" <td>0.792318</td>\n",
" <td>0.207682</td>\n",
" <td>-0.000988</td>\n",
" <td>0.000061</td>\n",
" <td>0.014765</td>\n",
" <td>0.015976</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>86</td>\n",
" <td>76209.164582</td>\n",
" <td>4.871121</td>\n",
" <td>1.000000</td>\n",
" <td>2339.0</td>\n",
" <td>11.116667</td>\n",
" <td>25.5</td>\n",
" <td>40.643704</td>\n",
" <td>0.750000</td>\n",
" <td>0.154712</td>\n",
" <td>-1.021555</td>\n",
" <td>0.216071</td>\n",
" <td>0.783929</td>\n",
" <td>0.030799</td>\n",
" <td>0.022152</td>\n",
" <td>-0.036992</td>\n",
" <td>-0.046754</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>22</td>\n",
" <td>335180.430008</td>\n",
" <td>15.652972</td>\n",
" <td>1.000000</td>\n",
" <td>20193.0</td>\n",
" <td>13.237500</td>\n",
" <td>81.0</td>\n",
" <td>19.072084</td>\n",
" <td>5.158364</td>\n",
" <td>0.028313</td>\n",
" <td>-1.100355</td>\n",
" <td>0.651444</td>\n",
" <td>0.348556</td>\n",
" <td>0.096447</td>\n",
" <td>0.077212</td>\n",
" <td>-0.029813</td>\n",
" <td>-0.031076</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients aum_qty_mean_med gross_flow_to_aum_med flow_freq_med \\\n",
"cluster_k4 \n",
"3 165 14436.315829 17.218849 0.949153 \n",
"0 131 28566.131306 0.894162 0.123077 \n",
"2 86 76209.164582 4.871121 1.000000 \n",
"1 22 335180.430008 15.652972 1.000000 \n",
"\n",
" n_tx_total_med avg_n_isin_held_med n_isin_total_med \\\n",
"cluster_k4 \n",
"3 1252.0 2.527559 29.0 \n",
"0 14.0 1.650000 15.0 \n",
"2 2339.0 11.116667 25.5 \n",
"1 20193.0 13.237500 81.0 \n",
"\n",
" avg_holding_months_per_isin_med exit_rate_per_isin_med \\\n",
"cluster_k4 \n",
"3 7.756757 3.093750 \n",
"0 8.500000 0.619718 \n",
"2 40.643704 0.750000 \n",
"1 19.072084 5.158364 \n",
"\n",
" flow_direction_balance_med redemption_bias_med \\\n",
"cluster_k4 \n",
"3 0.018779 -0.996486 \n",
"0 0.000000 -0.127257 \n",
"2 0.154712 -1.021555 \n",
"1 0.028313 -1.100355 \n",
"\n",
" aum_drawdown_last_med aum_final_to_peak_med \\\n",
"cluster_k4 \n",
"3 0.916665 0.083335 \n",
"0 0.792318 0.207682 \n",
"2 0.216071 0.783929 \n",
"1 0.651444 0.348556 \n",
"\n",
" corr_flow_fund_lag3_med corr_flow_fund_lag6_med \\\n",
"cluster_k4 \n",
"3 0.000208 0.001381 \n",
"0 -0.000988 0.000061 \n",
"2 0.030799 0.022152 \n",
"1 0.096447 0.077212 \n",
"\n",
" corr_flow_rate_lag3_med corr_flow_rate_lag6_med \n",
"cluster_k4 \n",
"3 -0.002783 -0.003603 \n",
"0 0.014765 0.015976 \n",
"2 -0.036992 -0.046754 \n",
"1 -0.029813 -0.031076 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== K=5 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>aum_qty_mean_med</th>\n",
" <th>gross_flow_to_aum_med</th>\n",
" <th>flow_freq_med</th>\n",
" <th>n_tx_total_med</th>\n",
" <th>avg_n_isin_held_med</th>\n",
" <th>n_isin_total_med</th>\n",
" <th>avg_holding_months_per_isin_med</th>\n",
" <th>exit_rate_per_isin_med</th>\n",
" <th>flow_direction_balance_med</th>\n",
" <th>redemption_bias_med</th>\n",
" <th>aum_drawdown_last_med</th>\n",
" <th>aum_final_to_peak_med</th>\n",
" <th>corr_flow_fund_lag3_med</th>\n",
" <th>corr_flow_fund_lag6_med</th>\n",
" <th>corr_flow_rate_lag3_med</th>\n",
" <th>corr_flow_rate_lag6_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k5</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>168</td>\n",
" <td>12566.405685</td>\n",
" <td>16.253847</td>\n",
" <td>0.911577</td>\n",
" <td>1094.5</td>\n",
" <td>2.524180</td>\n",
" <td>28.5</td>\n",
" <td>8.166667</td>\n",
" <td>2.955196</td>\n",
" <td>0.036286</td>\n",
" <td>-0.970398</td>\n",
" <td>0.914496</td>\n",
" <td>0.085504</td>\n",
" <td>0.001715</td>\n",
" <td>0.003794</td>\n",
" <td>-0.000844</td>\n",
" <td>0.001543</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>111</td>\n",
" <td>37555.632000</td>\n",
" <td>0.661205</td>\n",
" <td>0.076923</td>\n",
" <td>6.0</td>\n",
" <td>1.511111</td>\n",
" <td>12.0</td>\n",
" <td>8.290323</td>\n",
" <td>0.586207</td>\n",
" <td>0.000000</td>\n",
" <td>-0.076923</td>\n",
" <td>0.818922</td>\n",
" <td>0.181078</td>\n",
" <td>-0.002355</td>\n",
" <td>-0.000290</td>\n",
" <td>0.011330</td>\n",
" <td>0.021365</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>90</td>\n",
" <td>59767.645164</td>\n",
" <td>4.216773</td>\n",
" <td>0.995868</td>\n",
" <td>1725.5</td>\n",
" <td>10.811538</td>\n",
" <td>24.0</td>\n",
" <td>39.349432</td>\n",
" <td>0.708536</td>\n",
" <td>0.177072</td>\n",
" <td>-1.017685</td>\n",
" <td>0.181774</td>\n",
" <td>0.818226</td>\n",
" <td>0.029066</td>\n",
" <td>0.024737</td>\n",
" <td>-0.025887</td>\n",
" <td>-0.038057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>30</td>\n",
" <td>212211.195227</td>\n",
" <td>15.786289</td>\n",
" <td>1.000000</td>\n",
" <td>17459.5</td>\n",
" <td>9.768395</td>\n",
" <td>77.5</td>\n",
" <td>17.601779</td>\n",
" <td>5.770464</td>\n",
" <td>0.028313</td>\n",
" <td>-1.083467</td>\n",
" <td>0.691744</td>\n",
" <td>0.308256</td>\n",
" <td>0.062011</td>\n",
" <td>0.052249</td>\n",
" <td>-0.031718</td>\n",
" <td>-0.037537</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>5</td>\n",
" <td>114674.703704</td>\n",
" <td>7.607274</td>\n",
" <td>0.944444</td>\n",
" <td>1221.0</td>\n",
" <td>0.861111</td>\n",
" <td>12.0</td>\n",
" <td>3.800000</td>\n",
" <td>2.600000</td>\n",
" <td>-0.049426</td>\n",
" <td>-1.237358</td>\n",
" <td>0.999914</td>\n",
" <td>0.000086</td>\n",
" <td>-0.035506</td>\n",
" <td>-0.027489</td>\n",
" <td>-0.000900</td>\n",
" <td>-0.036517</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients aum_qty_mean_med gross_flow_to_aum_med flow_freq_med \\\n",
"cluster_k5 \n",
"1 168 12566.405685 16.253847 0.911577 \n",
"3 111 37555.632000 0.661205 0.076923 \n",
"4 90 59767.645164 4.216773 0.995868 \n",
"0 30 212211.195227 15.786289 1.000000 \n",
"2 5 114674.703704 7.607274 0.944444 \n",
"\n",
" n_tx_total_med avg_n_isin_held_med n_isin_total_med \\\n",
"cluster_k5 \n",
"1 1094.5 2.524180 28.5 \n",
"3 6.0 1.511111 12.0 \n",
"4 1725.5 10.811538 24.0 \n",
"0 17459.5 9.768395 77.5 \n",
"2 1221.0 0.861111 12.0 \n",
"\n",
" avg_holding_months_per_isin_med exit_rate_per_isin_med \\\n",
"cluster_k5 \n",
"1 8.166667 2.955196 \n",
"3 8.290323 0.586207 \n",
"4 39.349432 0.708536 \n",
"0 17.601779 5.770464 \n",
"2 3.800000 2.600000 \n",
"\n",
" flow_direction_balance_med redemption_bias_med \\\n",
"cluster_k5 \n",
"1 0.036286 -0.970398 \n",
"3 0.000000 -0.076923 \n",
"4 0.177072 -1.017685 \n",
"0 0.028313 -1.083467 \n",
"2 -0.049426 -1.237358 \n",
"\n",
" aum_drawdown_last_med aum_final_to_peak_med \\\n",
"cluster_k5 \n",
"1 0.914496 0.085504 \n",
"3 0.818922 0.181078 \n",
"4 0.181774 0.818226 \n",
"0 0.691744 0.308256 \n",
"2 0.999914 0.000086 \n",
"\n",
" corr_flow_fund_lag3_med corr_flow_fund_lag6_med \\\n",
"cluster_k5 \n",
"1 0.001715 0.003794 \n",
"3 -0.002355 -0.000290 \n",
"4 0.029066 0.024737 \n",
"0 0.062011 0.052249 \n",
"2 -0.035506 -0.027489 \n",
"\n",
" corr_flow_rate_lag3_med corr_flow_rate_lag6_med \n",
"cluster_k5 \n",
"1 -0.000844 0.001543 \n",
"3 0.011330 0.021365 \n",
"4 -0.025887 -0.038057 \n",
"0 -0.031718 -0.037537 \n",
"2 -0.000900 -0.036517 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"profile_vars = [\n",
" \"aum_qty_mean\",\n",
" \"gross_flow_to_aum\",\n",
" \"flow_freq\",\n",
" \"n_tx_total\",\n",
" \"avg_n_isin_held\",\n",
" \"n_isin_total\",\n",
" \"avg_holding_months_per_isin\",\n",
" \"exit_rate_per_isin\",\n",
" \"flow_direction_balance\",\n",
" \"redemption_bias\",\n",
" \"aum_drawdown_last\",\n",
" \"aum_final_to_peak\",\n",
" \"corr_flow_fund_lag3\",\n",
" \"corr_flow_fund_lag6\",\n",
" \"corr_flow_rate_lag3\",\n",
" \"corr_flow_rate_lag6\",\n",
" #\"corr_flow_bench_lag3\",\n",
" #\"corr_flow_bench_lag6\"\n",
"]\n",
"\n",
"profile_vars2 = [\n",
" \"aum_qty_mean\",\n",
" \"gross_flow_to_aum\",\n",
" \"flow_freq\",\n",
" \"n_tx_total\",\n",
" \"log_gross_flow_qty_mean\",\n",
" \"net_flow_vol\",\n",
"]\n",
"\n",
"profile_vars = [c for c in profile_vars if c in dfc.columns]\n",
"\n",
"for k in [2, 4, 5]:\n",
" print(f\"\\n===== K={k} =====\")\n",
" prof = (\n",
" dfc.groupby(f\"cluster_k{k}\")\n",
" .agg(\n",
" n_clients=(ID_COL, \"count\"),\n",
" **{f\"{c}_med\": (c, \"median\") for c in profile_vars}\n",
" )\n",
" .sort_values(\"n_clients\", ascending=False)\n",
" )\n",
" display(prof)"
]
},
{
"cell_type": "code",
"execution_count": 237,
"id": "ff8bdf91-859c-419e-a2ea-eb4a5f44f0df",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAkyVJREFUeJzs3XdYk9fbB/BvCKAgiiwVERdKQIaA4ECEihvqwlUt8MOtVat2KVoHaou2jrpaceGs1SpuwL2LuFAc4FYEFwKKCgok5/2DN4+EBEwgISTcn+vykpycPLlzeJLcPGfxGGMMhBBCCCFVmI66AyCEEEIIUTdKiAghhBBS5VFCRAghhJAqjxIiQgghhFR5lBARQgghpMqjhIgQQgghVR4lRIQQQgip8ighIoQQQkiVRwkRIYQQQqo8SogqmaCgIHz55ZdKPaZAIMCcOXOUekx5REVFQSAQIDU1tcKfGwCmTp0KX19ftTw3kZSamgqBQICoqCh1h6I0qnpNvr6+mDp1qlKPWZmfVxWCgoIQFBSk7jCUYs+ePejevTscHBzg7u6u7nC0mq66A6jMEhMTsWfPHsTHxyMtLQ21a9dGy5YtMWnSJDRp0kSiblBQEC5cuAAA4PF4MDQ0hIWFBZydndGnTx+0b99eHS+BELndu3cPMTEx6Nu3Lxo0aFCmY+zfvx8ZGRkICQlRbnBa5sqVKzh37hz+97//oVatWuoOp9y2bt0KAwMDBAQEqDsUmV68eIEdO3agc+fOsLe3V3c4crt//z5CQ0PRoUMHjBo1CtWrV1d3SFqNEqJSrF27FleuXEH37t0hEAiQnp6OrVu3IiAgANu3b4etra1E/Xr16uG7774DAOTm5uLx48c4cuQI9u3bhx49euD333+Hnp6eOl6KWvTu3Rv+/v7Q19dXdyhEDvfu3cOKFSvQunXrMidEBw4cwN27d6USIisrKyQmJkJXlz5yACAhIQErVqxA3759pRKi2NhY8Hi8Co+pPM+7bds2mJiYVJqEaN26dRK3X758iRUrVsDKykqjEqILFy5AJBJh+vTpaNSokbrD0Xr06VSKkJAQLFy4UOIL3c/PDz179sTq1auxcOFCifo1a9ZE7969Jcp++OEHzJs3D3///TesrKzw448/VkjslQGfzwefz1fa8XJzc2FgYKC045GKw+PxUK1aNXWHUSqRSIT8/Hy1x6muPyC06Q8XbXktGRkZAAq/W0rDGMPHjx/pClI50RiiUri5uUm9sRo3bozmzZvjwYMHch2Dz+fj559/RrNmzbB161a8fftWrsfduHEDX331FZydneHr64tt27ZJ1cnLy8OyZcvQpUsXODo6wsfHB7/99hvy8vJkHvPo0aP48ssv4ejoCH9/f5w+fVri/rS0NMyePRvdunWDs7Mz2rRpg2+//VZiDND169chEAiwe/duqeOfOXMGAoEAJ06cAFDyGKKtW7fC398fjo6O8PLyQlhYGLKzsyXqiMdS3bhxA19//TVatmyJxYsXc69j1KhR8PLygqOjIzp37oyVK1dCKBTK0bLSrl+/juHDh6NNmzZce4eGhkrUEYlE2LBhA/z9/eHk5ARPT0/MnDkTb968kaq3fPlyeHl5oWXLlggKCsK9e/ekxmeI2+bSpUuYN28e2rZtC3d3d8ycORN5eXnIzs7GTz/9BA8PD3h4eOC3334DY6xMMfn6+mL06NG4dOkS+vfvDycnJ3Tq1Al79uyRiGfixIkAgODgYAgEAggEAsTHx8vd5kFBQTh58iTS0tK4x4vHcJU03iYuLg5DhgyBi4sL3N3dMXbsWNy/f1+izvLlyyEQCPD48WNMnToV7u7uaNWqFUJDQ5GbmytRNzMzE/fv35cql0U8tm7fvn1cG545cwZAYRdLaGgoPD09uffLzp07P3vM5ORkTJ06FZ06dYKTkxPat2+P0NBQZGVlSbye3377DQDQqVMnrq3E75Oi54oi77fyxF38eYFP5+jly5cRHh6Otm3bwsXFBePGjUNmZqbE4+7evYsLFy5wr6Xo+J3s7Gz88ssv8PHxgaOjI7p06YLVq1dDJBJxdcTnx7p167B9+3Z07twZjo6O6NevHxITEyXiTE9PR2hoKLy9vbnPkLFjx0p8zhQdQxQfH4/+/fsDAEJDQ7kYo6KisGzZMjg4OEi8HrEZM2bA3d0dHz9+lNle69atg0AgQFpamtR9ixYtgqOjI/defPToESZMmID27dvDyckJ3t7emDx5cqnfB76+vli+fDkAoF27dhAIBNxt8Xv6zJkzCAgIgLOzM/755x+521tcb+rUqWjVqhXc3d0xZcoUJCUlSb1PSxqPJWuMpjI/k4rG+euvv8LX1xeOjo7w9vbGTz/9hMzMTLx//x4uLi6YN2+e1OOeP38Oe3t7RERElNjGxdEVIgUxxvDq1Ss0b95c7sfw+Xz4+/tj6dKluHz5Mr744otS67958wajRo1Cjx494O/vj5iYGMyePRt6enrcG1skEmHs2LG4fPkyBg4cCBsbG9y5cwcbN27Eo0eP8Oeff0oc8/Llyzh8+DCGDBmCGjVqYPPmzfj2229x4sQJmJiYACj88E1ISIC/vz/q1auHtLQ0bNu2DcHBwTh48CAMDAzg5OQEa2trbqxJUdHR0TA2NoaXl1eJr2358uVYsWIFPD09MXjwYDx8+BDbtm3D9evXsW3bNokuxdevX2PkyJHw9/dHr169YGZmBgDYvXs3DA0NMXToUBgaGuL8+fNYtmwZ3r17hylTpsj9ewEK/wIbPnw4TExMMGrUKNSqVQupqak4cuSIRL2ZM2di9+7dCAgIQFBQEFJTU7F161bcunVLIu5FixZh7dq16NixIzp06IDk5GQMHz68xA/VefPmwdzcHBMmTMC1a9ewfft21KxZEwkJCbC0tMTkyZNx+vRprFu3Dra2tujTp4/CMQHA48ePMXHiRPTv3x99+/bFrl27MHXqVDg4OKB58+bw8PBAUFAQNm/ejDFjxqBp06YAABsbG7nbfMyYMXj79i2eP3/OJZQ1atQose3/++8/jBw5Eg0aNMD48ePx4cMHbNmyBYMHD0ZUVJRUt92kSZPQoEEDfPfdd7h16xb+/fdfmJqaSlx13bp1K1asWIFNmzahTZs2pf7uAeD8+fOIiYnB119/DRMTE1hZWeHVq1cYOHAgeDwevv76a5iamuL06dOYPn063r17V+r4qP/++w9PnjxBQEAALCwscPfuXezYsQP37t3Djh07wOPx0KVLFzx69AgHDhxAaGgo9/4zNTWVOp4i77fyxF2aefPmoVatWhg/fjzS0tKwceNGzJkzB3/88QcAYNq0aZg7dy4MDQ0xZswYAIC5uTmAwqu6gYGBePHiBb766itYWloiISEBixcvRnp6OqZPny7xXAcOHMD79+8xaNAg8Hg8rF27FhMmTMDRo0e583nChAm4d+8eAgMDYWVlhczMTJw7dw7Pnj2T2dVrY2ODb7/9FsuWLcOgQYPQqlUrAIV/8LZq1QorV65EdHQ0AgMDucfk5eXh0KFD6Nq1a4lXDMXDIGJiYjBixAiJ+2JiYtC+fXsYGxsjLy8Pw4cPR15eHgIDA2Fubo4XL17g5MmTyM7OLvHqz7Rp07Bnzx4cOXIEs2fPhqGhIQQCAXf/w4cP8f3332PQoEEYOHAgmjRpInd7M8bwzTff4PLly/jqq69gY2ODI0eOKPz5WZwyP5MA4P379/j6669x//599OvXDy1atEBWVhaOHz+OFy9ewN7eHp07d0ZMTAxCQ0MleiQOHDgAxhh69uwp/wtgRCF79uxhtra27N9//5UoDwwMZP7+/iU+7siRI8zW1pZt3Lix1OMHBgYyW1tbtn79eq7s48ePrHfv3qxdu3YsLy+Pi8POzo5dvHhR4vHbtm1jtra27PLly1yZra0tc3BwYI8fP+bKkpKSmK2tLdu8eTNXlpubKxVPQkICs7W1Zbt37+bKFi1axBwcHNjr168lYnR3d2ehoaFc2a5du5itrS178uQJY4yxjIwM5uDgwIYNG8aEQiFXb8uWLczW1pbt3LlTqh22bdsmFZOsOGfMmMF
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Analyse graphique des clusters : features2\n",
"sns.set_style(\"whitegrid\")\n",
"thr_int = dfc[\"gross_flow_to_aum\"].median()\n",
"thr_freq = dfc[\"flow_freq\"].median()\n",
"\n",
"plt.figure()\n",
"for name, g in dfc[~dfc['cluster_k5'].isin([2, 4, 6.0])].groupby(\"cluster_k5\"):\n",
" plt.scatter(g[\"flow_freq\"], g[\"gross_flow_to_aum\"], s=10, label=f\"Cluster {int(name)}\")\n",
"\n",
"plt.yscale(\"log\")\n",
"plt.axvline(thr_freq, linestyle=\"--\")\n",
"plt.axhline(thr_int, linestyle=\"--\")\n",
"plt.xlabel(\"Activity frequency (share of active months)\")\n",
"plt.ylabel(\"Gross flow / mean AUM (quantity) [log scale]\")\n",
"plt.title(\"2D behavioral segmentation: relative intensity vs frequency\")\n",
"plt.legend(markerscale=2)\n",
"plt.ylim(0.1,1000)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 238,
"id": "0bb325e2-a490-465c-9c8f-2121694f9b92",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAl4VJREFUeJzs3XdcU9f7B/BPCCDgYKOo4GbIEBAHiKC4cQKOaoW6R9VWO5y1raOF9ldttdo6655VcBW07omIihtwoDJUVEAcoEByf3/wTUpIArnhZpHn/Xr5kpzc3Pvk3tybJ+ecew6PYRgGhBBCCCF6zEDTARBCCCGEaBolRIQQQgjRe5QQEUIIIUTvUUJECCGEEL1HCREhhBBC9B4lRIQQQgjRe5QQEUIIIUTvUUJECCGEEL1HCREhhBBC9B4lRFomIiIC/fr143Sdzs7OWLhwIafrVERMTAycnZ2RlZWl9m0DwOzZsxEcHKyRbRNJWVlZcHZ2RkxMjKZD4Yyq3lNwcDBmz57N6Tq1ebuqEBERgYiICE2HwYl9+/ahd+/ecHNzg6+vr6bDqdEMNR2ANrtx4wb27duHxMREZGdnw8LCAm3atMH06dPRrFkziWUjIiJw6dIlAACPx4OZmRlsbW3h6emJQYMGoVOnTpp4C4Qo7P79+4iPj0doaCgaN26s1DoOHjyI3NxcjBo1itvgapirV6/i/Pnz+OSTT1CvXj1Nh1Nt27Ztg6mpKcLCwjQdikw5OTnYvXs3unfvDldXV02Ho7AHDx5gzpw56Ny5MyZMmAATExNNh1SjUUJUiXXr1uHq1avo3bs3nJ2d8eLFC2zbtg1hYWHYtWsXnJycJJZv0KABvvjiCwBAUVERHj9+jKNHj+LAgQPo06cP/u///g9GRkaaeCsaMXDgQPTt2xfGxsaaDoUo4P79+1ixYgXat2+vdEJ06NAh3Lt3TyohatSoEW7cuAFDQ7rkAEBycjJWrFiB0NBQqYTo8OHD4PF4ao+pOtvdsWMHLC0ttSYhWr9+vcTj58+fY8WKFWjUqJFOJUSXLl2CUCjEvHnz0KRJE02HU+PR1akSo0aNwi+//CLxhR4SEoL+/ftjzZo1+OWXXySWr1u3LgYOHChR9tVXX2Hx4sXYvn07GjVqhK+//lotsWsDPp8PPp/P2fqKiopgamrK2fqI+vB4PNSqVUvTYVRKKBSipKRE43Fq6gdETfrhUlPeS25uLoCy75bKMAyDDx8+UA1SNVEfokr4+PhInVhNmzZFq1atkJ6ertA6+Hw+vvnmG7Rs2RLbtm3DmzdvFHrdrVu38NFHH8HT0xPBwcHYsWOH1DLFxcVYvnw5evToAXd3dwQFBeHnn39GcXGxzHUeO3YM/fr1g7u7O/r27YszZ85IPJ+dnY3vv/8evXr1gqenJzp06IDPPvtMog/QzZs34ezsjNjYWKn1nz17Fs7Ozjh58iQA+X2Itm3bhr59+8Ld3R0BAQFYsGABXr9+LbGMqC/VrVu38PHHH6NNmzZYunSp+H1MmDABAQEBcHd3R/fu3bFy5UoIBAIF9qy0mzdvYuzYsejQoYN4f8+ZM0diGaFQiI0bN6Jv377w8PCAv78/vv32WxQUFEgt9/vvvyMgIABt2rRBREQE7t+/L9U/Q7RvLl++jMWLF6Njx47w9fXFt99+i+LiYrx+/RozZ85Eu3bt0K5dO/z8889gGEapmIKDgzFx4kRcvnwZgwcPhoeHB7p164Z9+/ZJxPP5558DACIjI+Hs7AxnZ2ckJiYqvM8jIiJw6tQpZGdni18v6sMlr79NQkICRowYAS8vL/j6+mLy5Ml48OCBxDK///47nJ2d8fjxY8yePRu+vr5o27Yt5syZg6KiIoll8/Ly8ODBA6lyWUR96w4cOCDeh2fPngVQ1sQyZ84c+Pv7i8+XPXv2VLnO1NRUzJ49G926dYOHhwc6deqEOXPmID8/X+L9/PzzzwCAbt26ifeV6Dwp/1lhc75VJ+6K2wX++4xeuXIFUVFR6NixI7y8vDBlyhTk5eVJvO7evXu4dOmS+L2U77/z+vVr/PDDDwgKCoK7uzt69OiBNWvWQCgUipcRfT7Wr1+PXbt2oXv37nB3d0d4eDhu3LghEeeLFy8wZ84cBAYGiq8hkydPlrjOlO9DlJiYiMGDBwMA5syZI44xJiYGy5cvh5ubm8T7EZk/fz58fX3x4cMHmftr/fr1cHZ2RnZ2ttRzS5Ysgbu7u/hcfPToEaZNm4ZOnTrBw8MDgYGBmDFjRqXfB8HBwfj9998BAH5+fnB2dhY/Fp3TZ8+eRVhYGDw9PbFz506F97doudmzZ6Nt27bw9fXFrFmzkJKSInWeyuuPJauPJpfXpPJx/vjjjwgODoa7uzsCAwMxc+ZM5OXl4d27d/Dy8sLixYulXvfs2TO4urpi9erVcvdxRVRDxBLDMHj58iVatWql8Gv4fD769u2LZcuW4cqVK+jSpUulyxcUFGDChAno06cP+vbti/j4eHz//fcwMjISn9hCoRCTJ0/GlStXMHToULRo0QJ3797Fpk2b8OjRI/zxxx8S67xy5Qr+/fdfjBgxArVr18aWLVvw2Wef4eTJk7C0tARQdvFNTk5G37590aBBA2RnZ2PHjh2IjIzEP//8A1NTU3h4eMDBwUHc16S8uLg4mJubIyAgQO57+/3337FixQr4+/tj+PDhePjwIXbs2IGbN29ix44dEk2Kr169wvjx49G3b18MGDAA1tbWAIDY2FiYmZlh9OjRMDMzw8WLF7F8+XK8ffsWs2bNUvi4AGW/wMaOHQtLS0tMmDAB9erVQ1ZWFo4ePSqx3LfffovY2FiEhYUhIiICWVlZ2LZtG+7cuSMR95IlS7Bu3Tp07doVnTt3RmpqKsaOHSv3orp48WLY2Nhg2rRpuH79Onbt2oW6desiOTkZ9vb2mDFjBs6cOYP169fDyckJgwYNYh0TADx+/Biff/45Bg8ejNDQUOzduxezZ8+Gm5sbWrVqhXbt2iEiIgJbtmzBpEmT0Lx5cwBAixYtFN7nkyZNwps3b/Ds2TNxQlm7dm25+/7ChQsYP348GjdujKlTp+L9+/fYunUrhg8fjpiYGKlmu+nTp6Nx48b44osvcOfOHfz999+wsrKSqHXdtm0bVqxYgc2bN6NDhw6VHnsAuHjxIuLj4/Hxxx/D0tISjRo1wsuXLzF06FDweDx8/PHHsLKywpkzZzBv3jy8ffu20v5RFy5cQGZmJsLCwmBra4t79+5h9+7duH//Pnbv3g0ej4cePXrg0aNHOHToEObMmSM+/6ysrKTWx+Z8q07clVm8eDHq1auHqVOnIjs7G5s2bcLChQvx22+/AQDmzp2LRYsWwczMDJMmTQIA2NjYACir1R05ciRycnLw0Ucfwd7eHsnJyVi6dClevHiBefPmSWzr0KFDePfuHYYNGwYej4d169Zh2rRpOHbsmPjzPG3aNNy/fx8jR45Eo0aNkJeXh/Pnz+Pp06cym3pbtGiBzz77DMuXL8ewYcPQtm1bAGU/eNu2bYuVK1ciLi4OI0eOFL+muLgYR44cQc+ePeXWGIq6QcTHx2PcuHESz8XHx6NTp04wNzdHcXExxo4di+LiYowcORI2NjbIycnBqVOn8Pr1a7m1P3PnzsW+fftw9OhRfP/99zAzM4Ozs7P4+YcPH+LLL7/EsGHDMHToUDRr1kzh/c0wDD799FNcuXIFH330EVq0aIGjR4+yvn5WxOU1CQDevXuHjz/+GA8ePEB4eDhat26N/Px8nDhxAjk5OXB1dUX37t0RHx+POXPmSLRIHDp0CAzDoH///oq/AYawsm/fPsbJyYn5+++/JcpHjhzJ9O3bV+7rjh49yjg5OTGbNm2qdP0jR45knJycmL/++ktc9uHDB2bgwIGMn58fU1xcLI7DxcWFSUpKknj9jh07GCcnJ+bKlSviMicnJ8bNzY15/PixuCwlJYVxcnJitmzZIi4rKiqSiic5OZlxcnJiYmNjxWVLlixh3NzcmFevXknE6Ovry8yZM0dctnfvXsbJyYnJzMxkGIZhcnNzGTc3N2bMmDGMQCAQL7d
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Analyse graphique des clusters : features2\n",
"\n",
"thr_int = dfc[\"gross_flow_to_aum\"].median()\n",
"thr_freq = dfc[\"flow_freq\"].median()\n",
"\n",
"plt.figure()\n",
"for name, g in dfc[~dfc['cluster_k5'].isin([1,3,0, 4, 6.0])].groupby(\"cluster_k5\"):\n",
" plt.scatter(g[\"flow_freq\"], g[\"gross_flow_to_aum\"], s=10, label=f\"Cluster {int(name)}\")\n",
"\n",
"plt.yscale(\"log\")\n",
"plt.axvline(thr_freq, linestyle=\"--\")\n",
"plt.axhline(thr_int, linestyle=\"--\")\n",
"plt.xlabel(\"Activity frequency (share of active months)\")\n",
"plt.ylabel(\"Gross flow / mean AUM (quantity) [log scale]\")\n",
"plt.title(\"2D behavioral segmentation: relative intensity vs frequency\")\n",
"plt.legend(markerscale=2)\n",
"plt.ylim(0.1,1000)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 314,
"id": "4d04e670-51ae-482d-a5c5-93fe8263cfeb",
"metadata": {},
"outputs": [],
"source": [
"# Analyse : features\n",
"\n",
"labels_map = {\n",
" 0: \"Cluster 0 (30): Large and highly active movers\",\n",
" 1: \"Cluster 1 (168): Occasional large movers\",\n",
" 3: \"Cluster 3 (111): Dormant profiles\",\n",
" 4: \"Cluster 4 (90): Loyal clients\"\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 315,
"id": "6ebe0025-0532-4e51-acb2-81aa786a164b",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABKUAAAHqCAYAAADVi/1VAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3XdYU9cbB/BvWGEPBQeCogxFVFBEVLRoHbhw1lV/DFet4l6oVQFHFbdVnG1ddddZrQNxL5w4UXHgxK0ooIzk/P5IcyVkkEBCAryf5+GB3Jx777knN5ebN+e8h8cYYyCEEEIIIYQQQgghpAjpabsChBBCCCGEEEIIIaT0oaAUIYQQQgghhBBCCClyFJQihBBCCCGEEEIIIUWOglKEEEIIIYQQQgghpMhRUIoQQgghhBBCCCGEFDkKShFCCCGEEEIIIYSQIkdBKUIIIYQQQgghhBBS5CgoRQghhBBCCCGEEEKKHAWlCCGEEEIIIYQQQkiRo6BUMdWsWTPUqlVLrdvk8XgYOnSoWrepjLVr14LH4yE5ObnI9w0AoaGhcHJy0sq+iaTk5GTweDysXbtW21VRG00dk5OTE0JDQ9W6TV3eryY0a9YMzZo103Y11GLDhg2oUaMGDA0NYW1tre3qEFKq0D2Z+tA9me4oifdkuioyMhI8Hq9A66rzXsbJyQkdOnRQy7aKu8K8JupWku5X5aGglBpcvHgRQ4cOhYeHB8zMzFC5cmX06NED9+7dkyrbrFkz8Hg88Hg86OnpwdLSEtWrV0dQUBBiY2O1UHtCVHP79m1ERkYW6oZ106ZNWLRokdrqVFKdPXsWkZGR+Pjxo7arohbLli3T6ZvbFy9eIDIyEgkJCdquikru3LmD0NBQODs7Y/Xq1Vi1apW2q0SI1tA9GSlN6J6scNTRfsrKyMhAZGQkjh8/rvF9kZKtuN6vKmKg7QqUBNHR0Thz5gy6d++OOnXq4OXLl1i6dCnq1auH8+fPS3175uDggFmzZgEA0tPTcf/+fezcuRN//fUXevTogb/++guGhobaOBStCAoKQq9evcDn87VdFaKE27dvIyoqCs2aNSvwt5mbNm3CzZs3MXLkSInlVapUwZcvX0rV+a/I2bNnERUVhdDQUKneL3fv3oWeXtF/r1CY/S5btgy2trY609Pq8OHDEo9fvHiBqKgoODk5wcvLSzuVKoDjx49DKBRi8eLFcHFx0XZ1CNEquicrHLonK17onqxw1NF+ysrIyEBUVBQASPV6mTx5MiZMmKDR/ZOSo7jerypCQSk1GD16NDZt2gQjIyNuWc+ePVG7dm3Mnj0bf/31l0R5Kysr/O9//5NYNnv2bAwfPhzLli2Dk5MToqOji6TuukBfXx/6+vpq215GRgZMTU3Vtj1SdHg8HoyNjbVdDYWEQiGysrK0Xk9tfWAoSR9Ucl+zi7PXr18DQL7D9hhj+Pr1K0xMTIqgVoRoB92TFQ7dkxGx4nBPVlIYGBjAwKB0fCynexEiCw3fU4PGjRtLfbhxdXWFh4cHEhMTldqGvr4+fvvtN9SsWRNLly5FamqqUutdvnwZjRs3homJCapWrYoVK1ZIlcnMzERERARcXFzA5/Ph6OiI8ePHIzMzU+Y2d+/ejVq1aoHP58PDwwMHDx6UeP7x48cYMmQIqlevDhMTE5QtWxbdu3eX6Pp66dIl8Hg8rFu3Tmr7hw4dAo/Hw759+wDIz1+wbNkyeHh4gM/nw97eHmFhYVLDmMR5HC5fvozvvvsOpqammDRpEgBgz549aN++Pezt7cHn8+Hs7Izp06dDIBDk16wyXbp0CQEBAbC1teXau1+/fhJlhEIhFi1aBA8PDxgbG6N8+fIYNGgQPnz4IFUuMjIS9vb2MDU1RfPmzXH79m2pfD3itjl9+jSGDx8OOzs7WFtbY9CgQcjKysLHjx8RHBwMGxsb2NjYYPz48WCMFahO4nHkp0+fRoMGDWBsbIxq1aph/fr1EvXp3r07AKB58+bcsAdxV2Rl2rxZs2bYv38/Hj9+zK0v/nZKXv6Co0ePomnTpjAzM4O1tTU6deok9d4Sj/2+f/8+17PIysoKffv2RUZGhkTZt2/f4s6dO1LLZRHn9di4cSN3PorfE8+fP0e/fv1Qvnx57v3y559/5rvN69evIzQ0FNWqVYOxsTEqVKiAfv364d27dxLHM27cOABA1apVubYSv09ynyuqvN8KU++8+wW+naNnzpzB6NGjYWdnBzMzM3Tp0gVv3ryRWO/WrVs4ceIEdyy5vyn8+PEjRo4cCUdHR/D5fLi4uCA6OhpCoZArIz4/5s2bh1WrVsHZ2Rl8Ph8+Pj64ePGiRD1fvnyJvn37wsHBAXw+HxUrVkSnTp0krjO5x+gfP34cPj4+AIC+fftydVy7di0iIiJgaGgocTxiP/30E6ytrfH161eZ7TVv3jzweDw8fvxY6rmJEyfCyMiIey8mJSWhW7duqFChAoyNjeHg4IBevXop/H/g5OSEiIgIAICdnR14PB4iIyO55zp06IBDhw6hfv36MDExwcqVK5Vub3G50NBQWFlZwdraGiEhIUhISJB6n8rLdyArP4w6r0m56zlq1Cg4OTmBz+fDwcEBwcHBePv2LdLS0mBmZoYRI0ZIrffs2TPo6+tzPWVI8Uf3ZHRPJkb3ZCX7nqx69eowNjaGt7c3Tp48KVX26tWraNu2LSwtLWFubo4WLVrg/PnzSrcfABw4cIA7VgsLC7Rv3x63bt2S2E9oaCjMzc3x/PlzdO7cGebm5rCzs8PYsWO5tk5OToadnR0AICoqituX+P+1rPxFa9aswffff49y5cqBz+ejZs2aWL58eb5tpMhff/2FBg0awNTUFDY2Nvjuu++keo0DyPf/rrx8S7KuH/LuRY4fPw4ej4dt27Zh5syZcHBwgLGxMVq0aIH79+8rPI6///4bPB4PJ06ckHpu5cqV4PF4uHnzJgDl7geVlZOTg+nTp3P3n05OTpg0aZLE9TskJAS2trbIzs6WWr9169aoXr0697ggr7Gm7le1jhGNEAqFrFKlSqx169YSy/39/ZmHh4fc9aZPn84AsH379incvr+/P7O3t2flypVjQ4cOZb/99htr0qQJA8D++OMPrpxAIGCtW7dmpqambOTIkWzlypVs6NChzMDAgHXq1ElimwCYp6cnq1ixIps+fTpbtGgRq1atGjM1NWVv377lym3fvp15enqyqVOnslWrVrFJkyYxGxsbVqVKFZaens6Vq1atGmvXrp1U3fv27ctsbGxYVlYWY4yxNWvWMADs0aNHXJmIiAgGgLVs2ZItWbKEDR06lOnr6zMfHx9uPXE7VKhQgdnZ2bFhw4axlStXst27dzPGGOvcuTPr0aMHmzt3Llu+fDnr3r07A8DGjh0rUZ+QkBBWpUoVhe396tUrZmNjw9zc3NjcuXPZ6tWr2S+//MLc3d0lyg0YMIAZGBiwgQMHshUrVrDw8HBmZmYmVe/x48czACwwMJAtXbqUDRw4kDk4ODBbW1sWEhLClRO3jZeXF2vTpg2LiYlhQUFBDAAbP348a9KkCfvxxx/ZsmXLWIcOHRgAtm7dugLVqUqVKqx69eqsfPnybNKkSWzp0qWsXr16jMfjsZs3bzLGGHvw4AEbPnw4A8AmTZrENmzYwDZs2MBevnypdJsfPnyYeXl5MVtbW279Xbt2McYYe/ToEQPA1qxZw5WPjY1lBgYGzM3Njc2ZM4dFRUUxW1tbZmNjI/OcqVu3LuvatStbtmwZGzBgANdWuYnLHjt2TOHrzpjofeHu7s7s7OxYVFQUi4mJYVevXmUvX75kDg4OzNHRkU2bNo0tX76cdezYkQFgCxcu5NaXdUzz5s1jTZs2ZdOmTWOrVq1iI0aMYCYmJqxBgwZMKBQyxhi7du0a6927N7c9cVulpaVxr1fuc0XZ95uy9ZYn737F52jdunXZ999/z5YsWcLGjBnD9PX1WY8ePbhyu3btYg4ODqxGjRrcsRw+fJgxxlh6ejqrU6cOK1u2LJs0aRJbsWIFCw4OZjwej40YMUKqLevWrctcXFxYdHQ0mzNnDrO1tWUODg4
"text/plain": [
"<Figure size 1200x500 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig, axes = plt.subplots(1, 2, figsize=(12,5), sharey=False)\n",
"\n",
"thr_log_tx = dfc[\"log_n_tx_total\"].median()\n",
"# --- Graphique 1 ---\n",
"for name, g in dfc[~dfc['cluster_k5'].isin([2,4])].groupby(\"cluster_k5\"):\n",
" axes[0].scatter(g[\"log_n_tx_total\"], g[\"gross_flow_to_aum\"], s=10, label=labels_map.get(int(name), \"Cluster {}\".format(int(name))))\n",
"\n",
"axes[0].set_yscale(\"log\")\n",
"axes[0].axvline(thr_log_tx, linestyle=\"--\")\n",
"axes[0].axhline(thr_int, linestyle=\"--\")\n",
"axes[0].set_xlabel(\"Activity frequency (log_n_tx_total)\")\n",
"axes[0].set_ylabel(\"Gross flow / mean AUM\")\n",
"axes[0].set_title(\"2D behavioral segmentation: relative intensity vs frequency\")\n",
"axes[0].set_ylim(0.1,1000)\n",
"axes[0].legend(markerscale=2)\n",
"\n",
"# --- Graphique 2 ---\n",
"thr_churn = dfc[\"aum_drawdown_last\"].median()\n",
"thr_hold = dfc[\"avg_holding_months_per_isin\"].median()\n",
"\n",
"color_map = {\n",
" 1: \"#ff7f0e\",\n",
" 4: \"red\"\n",
"}\n",
"\n",
"for name, g in dfc[~dfc['cluster_k5'].isin([0,2,3])].groupby(\"cluster_k5\"):\n",
" axes[1].scatter(\n",
" g[\"avg_holding_months_per_isin\"], g[\"aum_drawdown_last\"],\n",
" s=10,\n",
" color= color_map.get(int(name), \"gray\"),\n",
" label=labels_map.get(int(name), \"Cluster {}\".format(int(name)))\n",
" )\n",
"\n",
"axes[1].set_yscale(\"log\")\n",
"axes[1].axvline(thr_hold, linestyle=\"--\")\n",
"axes[1].axhline(thr_churn, linestyle=\"--\")\n",
"axes[1].set_xlabel(\"avg_holding_months_per_isin\")\n",
"axes[1].set_ylabel(\"aum_drawdown_last\")\n",
"axes[1].set_title(\"2D behavioral segmentation: potential churn vs loyalty\")\n",
"axes[1].legend(markerscale=2)\n",
"axes[1].set_ylim(0.001,1.3)\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 316,
"id": "5b3c5228-c176-4f1c-8edb-5b5d093df8a9",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAjtFJREFUeJzt3XdYU2cbB+BfAoQlSxAQ2bjAAShqceFAKVWs2s9VqzjbKm5r1Q5nrataq+Ksq9aBWrV2OHErrahAXeBCcaGiDAVlJO/3B80pIQnkQEIS8tzX5SU5OTnnOS8h58k7BYwxBkIIIYQQAybUdgCEEEIIIdpGCREhhBBCDB4lRIQQQggxeJQQEUIIIcTgUUJECCGEEINHCREhhBBCDB4lRIQQQggxeJQQEUIIIcTgUUJECCGEEINHCZEO6tChAxo3bqzWYwoEAowZM0atx1TF5s2bIRAIcO/evSo/NwAMGTIEnp6eWjk3kXXv3j0IBAJs3rxZ26GojaauydPTE0OGDFHrMXX5vJrQoUMHdOjQQdthqMXWrVvRsGFDmJiYwNbWVtvhVFuUEJUjPj4eY8aMQaNGjWBpaQl3d3f07dsXN2/elNu3Q4cOEAgEEAgEEAqFsLa2RoMGDTBo0CAcPXpUC9ETws/169cxa9asSiWw27dvx7Jly9QWU3V1/vx5zJo1C1lZWdoORS1WrVql08nu48ePMWvWLCQmJmo7FF6Sk5MxZMgQ+Pj4YP369Vi3bp22Q6q2jLUdgK5buHAhzp07hz59+qBp06ZIT0/HypUr0axZM/z1119yNTmurq6YP38+ACA3Nxe3b9/G3r178fPPP6Nv3774+eefYWJioo1L0YpBgwahf//+MDU11XYoRAXXr1/H7Nmz0aFDhwrXrG3fvh1Xr17FhAkTZLZ7eHjgzZs3BvX+L8v58+cxe/ZsDBkyRO5bf0pKCoTCqv++Wpnzrlq1Cg4ODjpTw3TkyBGZx48fP8bs2bPh6emJgIAA7QRVASdPnoREIsEPP/yAunXrajucao0SonJMmjQJ27dvh0gk4rb169cPTZo0wYIFC/Dzzz/L7G9jY4OPPvpIZtuCBQswbtw4rFq1Cp6enli4cGGVxK4LjIyMYGRkpLbj5eXlwcLCQm3HI1VHIBDAzMxM22GUSSKRoKCgQOtxausLRHX64lLyM1ufPXv2DADKbSpjjOHt27cwNzevgqiqKUYqpFmzZqxZs2Yy20JCQlijRo0U7l9UVMT8/PyYhYUFy8rKKvPY0uNcvHiRBQcHMzMzM+bp6clWr14tt+/bt2/ZjBkzmI+PDxOJRMzV1ZVNmTKFvX37VmY/ACwqKort27ePNWrUiIlEIubn58cOHjwos9+9e/fYqFGjWP369ZmZmRmrWbMm+9///sdSU1O5feLj4xkAtnnzZrl4Dh06xACw3377jTHG2KZNmxgAmdczxlh0dDTz8/NjIpGI1a5dm40ePZplZmYqLYd27doxc3NzNn78eMYYY/v372fvvfceq127NhOJRMzb25vNmTOHFRUVyRwjMjKSeXh4lFHa/11T165dmb29PVfeQ4cOldlHLBaz77//nvn5+TFTU1Pm6OjIPv74Y/by5Uu5/WbOnMlq167NzM3NWYcOHdi1a9eYh4cHi4yM5PaTls2ZM2fY2LFjmYODA7OxsWEff/wxy8/PZ5mZmWzQoEHM1taW2drasilTpjCJRFKhmDw8PFi3bt3YmTNnWIsWLZipqSnz8vJiW7ZskYun9L8TJ06oXOYhISFyr5eWf2pqKgPANm3aJBNbbGwsa9u2LbOwsGA2NjasR48e7Pr16zL7zJw5kwFgt27dYpGRkczGxoZZW1uzIUOGsNzcXJl9nz9/zm7cuCG3XRHp38XPP//M/Pz8mLGxMdu3bx9jjLGHDx+yoUOHMkdHR+7vZcOGDTKvV3RNSUlJLDIyknl5eTFTU1Pm5OTEhg4dyjIyMuSup/Q/6d9JyfcKn783VeNWRtl79OzZs2zixInMwcGBWVhYsJ49e7Jnz57JvK70tYSEhHDPZ2ZmsvHjxzNXV1cmEomYj48PW7BgAROLxXJluXjxYrZ27Vrm7e3NRCIRCwoKYhcuXJCJ88mTJ2zIkCGsTp06TCQSMWdnZ9ajRw+Zz5mQkBAuhhMnTigs702bNrEZM2YwY2NjmeuRGjlyJLOxsWFv3rxRWF6LFy9mANi9e/fknps2bRozMTHh/hZv3rzJevfuzZycnJipqSmrU6cO69evX5n3A0XlOnPmTO65bt26sUOHDrHmzZszU1NT9v3336tc3tL9IiMjmbW1NbOxsWGDBw9mCQkJcu/pkmVZkqLPV3V+JpWMc8KECczDw4OJRCJWp04dNmjQIPb8+XP26tUrZmFhwcaNGyf3ugcPHjChUMi+/fZbpWVcEiVEFSCRSFidOnVY165dZbaXlRAxxtjcuXMZAPb777+XefyQkBDm4uLCHB0d2ZgxY9jy5ctZ27ZtGQCZDzaxWMy6du3KLCws2IQJE9jatWvZmDFjmLGxMXv//fdljgmA+fv7s9q1a7O5c+eyZcuWMW9vb2ZhYSHzQb17927m7+/PZsyYwdatW8e++OILZmdnxzw8PGRuMN7e3uy9996Ti33o0KHMzs6OFRQUMMYUJ0TSm0FoaChbsWIFGzNmDDMyMmItWrTgXictB2dnZ1arVi02duxYtnbtWrZ//37GGGM9e/Zkffv2ZYsXL2arV69mffr0YQDYZ599JhOPKgnR06dPmZ2dHatfvz5bvHgxW79+Pfvyyy+Zr6+vzH4jRoxgxsbGbOTIkWzNmjVs6tSpzNLSUi7uzz//nAFgERERbOXKlWzkyJHM1dWVOTg4KLzZBAQEsHfffZdFR0ezQYMGMQDs888/Z23btmUffvghW7VqFevevTsDIPdhoWpMHh4erEGDBszJyYl98cUXbOXKlaxZs2ZMIBCwq1evMsYYu3PnDhs3bhwDwL744gu2detWtnXrVpaenq5ymR85coQFBAQwBwcH7vXSBENR8nD06FFmbGzM6tevzxYtWsRmz57NHBwcmJ2dncL3TGBgIOvduzdbtWoVGzFiBFdWJUn3lSZyZQHAfH19Wa1atdjs2bNZdHQ0S0hIYOnp6czV1ZW5ubmxOXPmsNWrV7MePXowANxNR9k1fffdd6xdu3Zszpw5bN26dWz8+PHM3NyctWzZkktok5KS2IABA7jjScvq9evX3O+r5HtF1b83VeNWRllCFBgYyDp16sRWrFjBJk+ezIyMjFjfvn25/fbt28dcXV1Zw4YNuWs5cuQIY4yx3Nxc1rRpU2Zvb8+++OILtmbNGjZ48GAmEAi4LzglyzIwMJDVrVuXLVy4kC1atIg5ODgwV1dXmfdz69atmY2NDfvqq6/Yjz/+yL799lvWsWNHdurUKW6fkjfx9PR0NmfOHAaAffzxx1yMd+7cYbdu3WIA2IoVK2TKIj8/n9nZ2bFhw4YpLa/79+8zgUDAFi1aJPect7c369atG3csLy8v5uLiwr755hv2448/stmzZ7MWLVooTKZKlmuvXr0YALZ69Wq2detWlpSUxP2u6taty+zs7Ni0adPYmjVr2IkTJ1Qub4lEwtq3b8+EQiEbPXo0W7FiBevUqRNr2rRppRIidX4mMcbYq1evWOPGjZmRkREbOXIkW716NZs7dy5r0aIFS0hIYIwxNnDgQObk5CT3hXjRokVMIBCw+/fvKy3jkighqoCtW7fKJSeMlZ8Q7du3jwFgP/zwQ5nHl37LXrJkCbctPz+fBQQEMEdHR+5NtXXrViYUCtmZM2dkXr9mzRoGgJ07d47bBoCJRCJ2+/ZtbltSUpLcB0FeXp5cPHFxcQwA++mnn7ht06dPl/n2I43R1tZW5gOkdEL07NkzJhKJWNeuXWW+raxcuZIBYBs3bpQrhzVr1sjFpCjOTz75hFlYWMjUjqmSEEl/L/Hx8Ur3OXPmDAPAtm3bJrNd+g1duj09PZ0ZGxuznj17yuw3a9YsBkDhzSYsLEym5ic4OJgJBAL26aefctuKioqYq6urzIeSqjEx9t83zdOnT3Pbnj1
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Profil fidele (avg_holding_months_per_isin_med) vs churn (aum_drawdown_last_med)\n",
"\n",
"thr_churn = dfc[\"aum_drawdown_last\"].median()\n",
"thr_hold = dfc[\"avg_holding_months_per_isin\"].median()\n",
"\n",
"plt.figure()\n",
"for name, g in dfc[~dfc['cluster_k5'].isin([0,2,3])].groupby(\"cluster_k5\"):\n",
" plt.scatter(g[\"avg_holding_months_per_isin\"], g[\"aum_drawdown_last\"], s=10, label=name)\n",
"\n",
"plt.yscale(\"log\")\n",
"plt.axvline(thr_hold, linestyle=\"--\")\n",
"plt.axhline(thr_churn, linestyle=\"--\")\n",
"plt.xlabel(\"Activity frequency (share of active months)\")\n",
"plt.ylabel(\"Gross flow / mean AUM (quantity) [log scale]\")\n",
"plt.title(\"2D behavioral segmentation: relative intensity vs frequency\")\n",
"plt.legend(markerscale=2)\n",
"plt.ylim(0.001,1.1)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 317,
"id": "5071c36c-0176-460c-aeb7-ed7c4fb35ce5",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABMoAAAGGCAYAAACKUW2JAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAA4BBJREFUeJzs3Xd4TOnbB/D7aAkiiShBiN6DIGqQEJIgrGCVLFZdvQuil7V2dVbvffXeW3S7elm9rtWDFIIg833/yDvnN5MidjfJZE6+n+uai5xzZuY+z5x6n6coACBEREREREREREQpXCpTB0BERERERERERJQcMFFGREREREREREQkTJQRERERERERERGJCBNlREREREREREREIsJEGRERERERERERkYgwUUZERERERERERCQiTJQRERERERERERGJCBNlREREREREREREIsJEGRERERERERERkYgwUUZERGRW8uXLJ23btjV1GElq6dKloiiKPHjwwNShEBEREZHGMVFGRESUDNy9e1c6d+4sBQoUEEtLS7G2thZXV1eZPn26vH//PkliePfunYwaNUoOHz6cJN9nrlavXi3Tpk0zdRjJjqIo0qNHjxjTf/rpJ1EURdq3by86ne5ff/6DBw9EUZRYX2vWrPkvoRMRERGp0pg6ACIiopRu586d8u2334qFhYW0adNGnJyc5OPHj3L8+HHx9/eXq1evyvz58xM9jnfv3sno0aNFRMTd3T3Rv+9rtW7dWlq0aCEWFhamDkVEohJlf/75p/Tp08fUoSR7P//8swwdOlS+//57WbhwoaRK9d+f0bZs2VLq1atnNK1KlSr/+XOJiIiIRJgoIyIiMqn79+9LixYtJG/evHLo0CHJmTOnOq979+5y584d2blzpwkj/O/Cw8MlY8aM//r9qVOnltSpUydgRMnTu3fvJEOGDKYOI8FMnDhRAgICpE2bNrJ48eIESZKJiJQrV05atWqVIJ9FREREFB2bXhIREZnQhAkT5O3bt7Jo0SKjJJleoUKFpHfv3nG+f9SoUaIoSozpsfXrdfbsWfHy8pKsWbNK+vTpJX/+/NK+fXsRiWrWli1bNhERGT16tNqkbdSoUer7b9y4IU2bNhU7OzuxtLQUFxcX2bZtW6zfe+TIEenWrZtkz55dcufO/cUy+PXXX6VkyZKSIUMGyZw5s7i4uMjq1au/uC46nU5GjRoluXLlkgwZMkjNmjXl2rVrMfpw07/3xIkT0q9fP8mWLZtkzJhRfH19JSgoyCiOrVu3Sv369SVXrlxiYWEhBQsWlLFjx0pkZKS6jLu7u+zcuVP++usvtYzy5csXZ5wiIocPHxZFUYyatLq7u4uTk5OcO3dOatSoIRkyZJAhQ4aIiEhERISMHDlSChUqJBYWFpInTx4ZOHCgREREGH3u/v37pVq1amJraytWVlZStGhR9TNMbcqUKTJw4EBp1aqVLFmyJMGSZHrh4eHy8ePHBP1MIiIiIhHWKCMiIjKp7du3S4ECBaRq1aqJ+j0vXrwQT09PyZYtmwwePFhsbW3lwYMHsmnTJhERyZYtm8yZM0e6du0qvr6+0rhxYxERKV26tIiIXL16VVxdXcXBwUEGDx4sGTNmlHXr1kmjRo1k48aN4uvra/R93bp1k2zZssmIESMkPDw8zrgWLFggvXr1kqZNm0rv3r3lw4cPcvnyZfnjjz/Ez88vzvcFBATIhAkTpEGDBuLl5SWXLl0SLy8v+fDhQ6zL9+zZUzJnziwjR46UBw8eyLRp06RHjx6ydu1adZmlS5eKlZWV9OvXT6ysrOTQoUMyYsQICQsLk4kTJ4qIyNChQyU0NFQePXokU6dOFRERKyur+Io/Vq9evZK6detKixYtpFWrVmJvby86nU4aNmwox48flx9++EGKFy8uV65ckalTp8qtW7dky5YtIhL1e/j4+Ejp0qVlzJgxYmFhIXfu3JETJ078q1gS0vTp06V///7i5+cnS5cujTVJ9vLly6/6rEyZMsVocjt69Gjx9/cXRVGkfPnyMm7cOPH09EyQ2ImIiIiYKCMiIjKRsLAwefz4sXzzzTeJ/l0nT56U4OBg2bdvn7i4uKjTf/zxRxERyZgxozRt2lS6du0qpUuXjtG0rXfv3uLo6ChnzpxRExfdunWTatWqyaBBg2Ikyuzs7OTgwYPxNpncuXOnlCxZUtavX//V6/L8+XOZMmWKNGrUSDZv3qxOHz16tFENOENZsmSRffv2qbXvdDqdzJgxQ0JDQ8XGxkZEovoeS58+vfqeLl26SJcuXWT27Nny448/ioWFhdSpU0ccHBwkODj4Pzf/e/bsmcydO1c6d+6sTlu5cqUcOHBAjhw5ItWqVVOnOzk5SZcuXeTkyZNStWpV2b9/v3z8+FF2794tWbNm/U9xJKQdO3bIX3/9JS1btpTly5fH+fvray/GZ8mSJWoNwVSpUomnp6f4+vqKg4OD3Lt3T6ZMmSJ169aVbdu2Sf369RNqNYiIiCgFY6KMiIjIRMLCwkQkqtZMYrO1tRWRqERGmTJlJG3atF/93tevX8uhQ4dkzJgx8ubNG3nz5o06z8vLS0aOHCmPHz8WBwcHdXqnTp2+ql8xW1tbefTokZw5c0YqVKjwVfEcPHhQPn/+LN26dTOa3rNnzzgTZT/88INRE9Xq1avL1KlT5a+//lJrzRkmyd68eSMRERFSvXp1mTdvnty4cUPKlCnzVfF9LQsLC2nXrp3RtPXr10vx4sWlWLFiRrWuatWqJSIigYGBUrVqVfX33Lp1q7Rr1y7Bmzb+W8+fPxcRkfz583/x99+/f/9XfV7JkiXV/zs6OsrevXuN5rdu3VpKlCgh/fv3Z6KMiIiIEgQTZURERCZibW0tImKUeEosbm5u0qRJExk9erRMnTpV3N3dpVGjRuLn5xfvaJJ37twRADJ8+HAZPnx4rMu8ePHCKFGWP3/+r4pr0KBBcuDAAalYsaIUKlRIPD09xc/PT1xdXeN8z19//SUiUf23GbKzs5PMmTPH+h5HR0ejv/XLBQcHq9OuXr0qw4YNk0OHDqlJTL3Q0NCvWp9/wsHBQdKlS2c07fbt23L9+vU4a1y9ePFCRESaN28uCxculI4dO8rgwYPFw8NDGjduLE2bNv1i0uz169f/um8vOzu7GPFG9/3338uTJ0/kp59+kqxZs0rfvn1jXa527dr/KobYYmrXrp38/PPP8ujRo3j7wyMiIiKKDxNlREREJmJtbS25cuWSP//8819/Rmwd+YuIUQf0+uU2bNggv//+u2zfvl327t0r7du3l8mTJ8vvv//+xX62dDqdiIgMGDBAvLy8Yl0metLKsHbWlxQvXlxu3rwpO3bskD179sjGjRtl9uzZMmLECBk9evRXfcbXiKt2EwAREQkJCRE3NzextraWMWPGSMGCBcXS0lLOnz8vgwYNUsvgS772t9CLrYx0Op2UKlVKpkyZEut78uTJo7736NGjEhgYKDt37pQ9e/bI2rVrpVatWrJv374417dx48Zy5MiReNclNoGBgeLu7v7FZdKkSSPr1q0Tb29v6d+/v9ja2saoNScS1ez0a9jY2MS7LenL5PXr10yUERER0X/GRBkREZEJ+fj4yPz58+XUqVNSpUqVf/x+fc2okJAQtTmeyP9qXUVXuXJlqVy5sowbN05Wr14t3333naxZs0Y6duwYZ6KnQIECIiKSNm3aBKsJZChjxozSvHlzad68uXz8+FEaN24s48aNk4CAALG0tIyxfN68eUUkqqabYc21V69eGdUQ+ycOHz4sr169kk2bNkmNGjXU6ffv34+xbFzlZPhbGIrrt4hNwYIF5dKlS+Lh4RHn9+ilSpVKPDw8xMPDQ6ZMmSI//fSTDB06VAIDA+P8nSZPnvyvy+hrm55aWlrKtm3bpGbNmtKpUyextbWN0YddbCO8xsawj7K43Lt3T0S+vt8zIiIioi9hooyIiMiEBg4cKKtWrZKOHTvKoUOHxN7e3mj+3bt3ZceOHdK7d+9Y31+wYEERETl69Kg0bNhQRETCw8Nl2bJlRssFBweLra2tUfLF2dlZREQiIiJERCR
"text/plain": [
"<Figure size 1400x400 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#heatmap\n",
"def robust_zscore_col(s):\n",
" med = np.nanmedian(s)\n",
" mad = np.nanmedian(np.abs(s - med))\n",
" if mad == 0 or np.isnan(mad):\n",
" return np.zeros(len(s))\n",
" return (s - med) / (1.4826 * mad)\n",
"\n",
"for k in [5]:\n",
" prof = dfc.groupby(f\"cluster_k{k}\")[profile_vars].median()\n",
" prof_z = prof.copy()\n",
"\n",
" for c in prof.columns:\n",
" # prof_z[c] = robust_zscore_col(prof[c].values)\n",
" prof_z[c] = (prof[c] - prof[c].mean()) / (prof[c].std() + 1e-12)\n",
" prof_z[c] = prof_z[c].fillna(0)\n",
"\n",
" plt.figure(figsize=(14, 4))\n",
" sns.heatmap(prof_z, cmap=\"RdBu_r\", center=0)\n",
" plt.xticks(rotation=45, ha='right') # incline les noms à 45°, alignés à droite\n",
" plt.title(f\"Cluster signatures — K={k}\")\n",
" plt.ylabel(\"Clusters\")\n",
" plt.tight_layout()\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 318,
"id": "72393182-7c5b-4484-b0e0-770bff771d4c",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAuEAAAKyCAYAAAB7WgDLAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs/Xm4bVV1Joy/c87V7X6f/txzey7tpReQiIhih9hENKkYq2ywfSqlyWOMiVCJIpqIVqIVk7K0Kn6RfPrlF5OoFSomqEGQJIgNSt/f/t7Tn7PP7lc75++POdac+wgqGhSI632eC/vsvZrZr7nGeMc7mFJKoUCBAgUKFChQoECBAj8z8Ce6AAUKFChQoECBAgUK/Lyh2IQXKFCgQIECBQoUKPAzRrEJL1CgQIECBQoUKFDgZ4xiE16gQIECBQoUKFCgwM8YxSa8QIECBQoUKFCgQIGfMYpNeIECBQoUKFCgQIECP2MUm/ACBQoUKFCgQIECBX7GKDbhBQoUKFCgQIECBQr8jFFswgsUKFCgQIECBQoU+Bmj2IQXKPBzjPe9731gjD3RxSjwQ3D55Zdj165dT3QxChQoUKDA44xiE16gwL8TXHvttWCMmX9BEGBubg6XXHIJ/uRP/gTdbvdxuc/8/Dze97734fbbb39crvfzgsFggPe973246aabnuii/FTAGMPb3/72R3z/wQ9+EIwxvPGNb4SU8ie+/sGDBzeN79F/f/VXf/VvKXqBAgUKPCFwnugCFChQ4PHF+9//fuzevRtJkmBxcRE33XQT3vGOd+CjH/0orrvuOpxxxhnm2N/7vd/DFVdc8WNdf35+HldffTV27dqFs84663Eu/b9fDAYDXH311QCA5zznOY/5vD/7sz/7N21en0h86EMfwu/+7u/i9a9/PT71qU+B83+73efVr341XvziF2/67hnPeMa/+boFChQo8LNGsQkvUODfGS699FKce+655u8rr7wSX/va1/DSl74Uv/iLv4j77rsPpVIJAOA4DhynWAaejOj3+6hUKnBd94kuyk+EP/zDP8SVV16J173udfjzP//zx2UDDgBPe9rT8JrXvOZxuVaBAgUKPJEo6CgFCvwc4LnPfS7e85734NChQ/jsZz9rvn80TvhXv/pVXHjhhWg2m6hWqzjppJPwX//rfwUA3HTTTTjvvPMAAG94wxsMHeDaa68FAPzzP/8z/sN/+A/YsWMHfN/H9u3b8Zu/+ZsYDoeb7nH55ZejWq3i2LFjuOyyy1CtVjE1NYV3vetdyLJs07FSSnzsYx/D6aefjiAIMDU1hRe96EX4zne+s+m4z372szjnnHNQKpUwPj6OX/3VX8WRI0d+ZNvkbfDggw/iNa95DRqNBqampvCe97wHSikcOXIEL3/5y1Gv1zE7O4uPfOQjm86P4xjvfe97cc4556DRaKBSqeBZz3oWbrzxRnPMwYMHMTU1BQC4+uqrTbu9733v29Qe+/btw4tf/GLUajX8p//0n8xvo5zwq666Cpxz3HDDDZvK8da3vhWe5+GOO+74kXX+aeOjH/0ofud3fgevec1r8OlPf/px24Dn6Pf7iOP4cb1mgQIFCvysUWzCCxT4OcFrX/taAMBXvvKVH3jMPffcg5e+9KWIogjvf//78ZGPfAS/+Iu/iH/9138FAJxyyil4//vfD0Bv+j7zmc/gM5/5DC666CIAwN/8zd9gMBjg137t1/Cnf/qnuOSSS/Cnf/qneN3rXveIe2VZhksuuQQTExP4oz/6Izz72c/GRz7yEfzv//2/Nx33pje9Ce94xzuwfft2fPjDH8YVV1yBIAhw6623mmP+4A/+AK973etwwgkn4KMf/Sje8Y534IYbbsBFF12EjY2Nx9Q+r3rVqyClxIc+9CGcf/75+P3f/3388R//MV7wghdg69at+PCHP4zjjz8e73rXu3DzzTeb8zqdDj71qU/hOc95Dj784Q/jfe97H1ZWVnDJJZcY3vzU1BQ+8YlPAABe8YpXmHZ75Stfaa6TpikuueQSTE9P44/+6I/wS7/0S49azt/7vd/DWWedhTe96U2G5//lL38Zf/Znf4b3vve9OPPMMx9TfX9a+NjHPobf+q3fwn/8j/8R11577aNuwFdXVx/TvyiKHnHu1VdfjWq1iiAIcN555/3Q8VygQIECT2qoAgUK/LvApz/9aQVAffvb3/6BxzQaDXX22Webv6+66io1ugz89//+3xUAtbKy8gOv8e1vf1sBUJ/+9Kcf8dtgMHjEd9dcc41ijKlDhw6Z717/+tcrAOr973//pmPPPvtsdc4555i/v/a1rykA6jd+4zcecV0ppVJKqYMHDyohhPqDP/iDTb/fddddynGcR3z//cjb4K1vfav5Lk1TtW3bNsUYUx/60IfM961WS5VKJfX6179+07FRFG26ZqvVUjMzM+qNb3yj+W5lZUUBUFddddUjypC3xxVXXPGov+3cufMRdfM8T735zW9WrVZLbd26VZ177rkqSZIfWtefJgConTt3KgDq1a9+tUrT9Ice+1j+jY6xQ4cOqRe+8IXqE5/4hLruuuvUH//xH6sdO3Yozrn6+7//+59BDQsUKFDg8UVBBi1Q4OcI1Wr1h6qkNJtNAMDf/d3f4Q1veMOPTSPIueaApgwMh0NccMEFUErhe9/7Hnbs2LHp+P/8n//zpr+f9axn4TOf+Yz5+/Of/zwYY7jqqqseca+cRvOFL3wBUkr8yq/8ClZXV83vs7OzOOGEE3DjjTcaOs0Pw5vf/GbzWQiBc889F0ePHsWb3vQm832z2cRJJ52E/fv3bzpWCAFAU2c2NjYgpcS5556L7373uz/yvqP4tV/7tcd03GmnnYarr74aV155Je68806srq7iK1/5yhPO719aWgIA7N6927TJo+GrX/3qY7reqaeeaj7v2LEDX/7ylzf9/trXvhZ79+7Fb/3Wb+ElL3nJT1DiAgUKFHjiUGzCCxT4OUKv18P09PQP/P1Vr3oVPvWpT+HNb34zrrjiCjzvec/DK1/5SvzyL//yY9qQHz58GO9973tx3XXXodVqbfqt3W5v+jvnd49ibGxs03n79u3D3NwcxsfHf+A9H3roISilcMIJJzzq7481sPH7XxAajQaCIMDk5OQjvl9bW9v03V/8xV/gIx/5CO6//34kSWK+371792O6N6CDZLdt2/aYj//t3/5t/NVf/RW+9a1v4YMf/CD27t37I89ZX1//ibnU4+Pj8Dzvhx7z+te/HvPz8/jgBz+IyclJ/OZv/uajHvf85z//JyrDo5XpDW94Az70oQ/h6NGjP1b7FShQoMATjWITXqDAzwmOHj2KdruN448//gceUyqVcPPNN+PGG2/El770JVx//fX43Oc+h+c+97n4yle+8kOtm1mW4QUveAHW19fx7ne/GyeffDIqlQqOHTuGyy+//BEyez/sWj8OpJRgjOEf//EfH/Wa1Wr1MV3n0c79QWVUSpnPn/3sZ3H55Zfjsssuw2//9m9jenoaQghcc8012Ldv32OsBeD7/o/ledi/fz8eeughAMBdd931mM555Stfia9//euP+R6juPHGG3+ktKLjOPjrv/5rvOhFL8Jv/dZvodls4g1veMMjjltcXHxM92w0Gpu8K4+G7du3A9AvGMUmvECBAk8lFJvwAgV+TpDTPC655JIfehznHM973vPwvOc9Dx/96EfxwQ9+EL/7u7+LG2+8Ec9//vN/YIbNu+66Cw8++CD+4i/+YlMg5mOlHjwa9uzZgy9/+ctYX1//gdbwPXv2QCmF3bt348QTT/yJ7/WT4m//9m9x3HHH4Qtf+MKmtvl+Cs3jmZlUSonLL78c9Xod73jHO/DBD34Qv/zLv7wp0PPR8JGPfOQRHorHisca8BkEAa677jpcfPHFeMtb3oJms4lXvOIVm47ZsmXLY7rWpz/9aVx++eU/9JicGvT9XpUCBQoUeLKj2IQXKPBzgK997Wv4wAc+gN27dxvpu0fDo21284Q8uVJFpVIBgEeojuRW41ErsVIKH/vYx37icv/SL/0SPv7xj+Pqq69+xHWUUmCM4ZWvfCWuvPJ
"text/plain": [
"<Figure size 800x700 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from sklearn.metrics import pairwise_distances\n",
"\n",
"def plot_distance_matrix_sorted(X_scaled, labels, max_points=400, title=\"Distance matrix\"):\n",
" \"\"\"\n",
" Trace la matrice de distance triée par cluster avec des lignes séparatrices.\n",
" \n",
" Parameters\n",
" ----------\n",
" X_scaled : np.array ou pd.DataFrame\n",
" Les données numériques standardisées (n_samples x n_features)\n",
" labels : array-like\n",
" Les labels de cluster pour chaque point\n",
" max_points : int, optional\n",
" Nombre maximum de points à afficher pour éviter des matrices trop grandes\n",
" title : str, optional\n",
" Titre de la figure\n",
" \"\"\"\n",
" n = X_scaled.shape[0]\n",
" idx = np.arange(n)\n",
"\n",
" # Sous-échantillonnage si nécessaire\n",
" if n > max_points:\n",
" rng = np.random.default_rng(42)\n",
" idx = rng.choice(idx, size=max_points, replace=False)\n",
"\n",
" X_sub = X_scaled[idx]\n",
" labels_sub = np.asarray(labels)[idx]\n",
"\n",
" # Tri par cluster\n",
" order = np.lexsort((np.arange(len(labels_sub)), labels_sub))\n",
" X_sub = X_sub[order]\n",
" labels_sub = labels_sub[order]\n",
"\n",
" # Matrice de distances\n",
" D = pairwise_distances(X_sub)\n",
"\n",
" # Figure\n",
" plt.figure(figsize=(8, 7))\n",
" sns.heatmap(D, cmap=\"viridis\")\n",
" \n",
" # Lignes séparatrices entre clusters\n",
" unique_labels, counts = np.unique(labels_sub, return_counts=True)\n",
" boundaries = np.cumsum(counts)\n",
" for b in boundaries[:-1]: # on ignore la dernière limite\n",
" plt.axhline(b, color='red', linewidth=2)\n",
" plt.axvline(b, color='red', linewidth=2)\n",
"\n",
" plt.title(title)\n",
" plt.tight_layout()\n",
" plt.show()\n",
"\n",
"for k in [5]:\n",
" plot_distance_matrix_sorted(\n",
" X_scaled,\n",
" dfc[f\"cluster_k{k}\"].values,\n",
" title=f\"Distance matrix — K={k}\"\n",
" )\n",
"\n",
"# Cluster 0 très distant des autres"
]
},
{
"cell_type": "code",
"execution_count": 319,
"id": "a5f006c5-55a8-475f-b58d-fc26886c0aba",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"churn_hard 0.361386\n",
"churn_soft 0.603960\n",
"churn_warning 0.344059\n",
"dtype: float64\n",
"\n",
"===== CHURN PAR CLUSTER K=2 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>churn_hard_rate</th>\n",
" <th>churn_soft_rate</th>\n",
" <th>churn_warning_rate</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k2</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>327</td>\n",
" <td>0.409786</td>\n",
" <td>0.642202</td>\n",
" <td>0.336391</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>77</td>\n",
" <td>0.155844</td>\n",
" <td>0.441558</td>\n",
" <td>0.376623</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients churn_hard_rate churn_soft_rate churn_warning_rate\n",
"cluster_k2 \n",
"0 327 0.409786 0.642202 0.336391\n",
"1 77 0.155844 0.441558 0.376623"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== CHURN PAR CLUSTER K=5 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>churn_hard_rate</th>\n",
" <th>churn_soft_rate</th>\n",
" <th>churn_warning_rate</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k5</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>168</td>\n",
" <td>0.541667</td>\n",
" <td>0.797619</td>\n",
" <td>0.416667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>111</td>\n",
" <td>0.396396</td>\n",
" <td>0.648649</td>\n",
" <td>0.306306</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>90</td>\n",
" <td>0.000000</td>\n",
" <td>0.166667</td>\n",
" <td>0.211111</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>30</td>\n",
" <td>0.233333</td>\n",
" <td>0.600000</td>\n",
" <td>0.433333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>5</td>\n",
" <td>0.800000</td>\n",
" <td>1.000000</td>\n",
" <td>0.600000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients churn_hard_rate churn_soft_rate churn_warning_rate\n",
"cluster_k5 \n",
"1 168 0.541667 0.797619 0.416667\n",
"3 111 0.396396 0.648649 0.306306\n",
"4 90 0.000000 0.166667 0.211111\n",
"0 30 0.233333 0.600000 0.433333\n",
"2 5 0.800000 1.000000 0.600000"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Analyse churn\n",
"\n",
"dfc[\"churn_hard\"] = (dfc[\"aum_final_to_peak\"] < 0.10).astype(int)\n",
"\n",
"dfc[\"churn_soft\"] = (\n",
" (dfc[\"aum_final_to_peak\"] < 0.40) &\n",
" (dfc[\"aum_drawdown_last\"] > 0.40)\n",
").astype(int)\n",
"\n",
"dfc[\"churn_warning\"] = (\n",
" (dfc[\"flow_direction_balance\"] < 0) &\n",
" (dfc[\"aum_drawdown_last\"] > 0.20)\n",
").astype(int)\n",
"\n",
"print(dfc[[\"churn_hard\", \"churn_soft\", \"churn_warning\"]].mean())\n",
"\n",
"for k in [2, 5]:\n",
" out = (\n",
" dfc.groupby(f\"cluster_k{k}\")\n",
" .agg(\n",
" n_clients=(ID_COL, \"count\"),\n",
" churn_hard_rate=(\"churn_hard\", \"mean\"),\n",
" churn_soft_rate=(\"churn_soft\", \"mean\"),\n",
" churn_warning_rate=(\"churn_warning\", \"mean\")\n",
" )\n",
" .sort_values(\"n_clients\", ascending=False)\n",
" )\n",
" print(f\"\\n===== CHURN PAR CLUSTER K={k} =====\")\n",
" display(out)"
]
},
{
"cell_type": "code",
"execution_count": 320,
"id": "b8b4940e-4ab5-4123-a59a-e99d5f1fc5b6",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAGGCAYAAADmRxfNAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAP9FJREFUeJzt3XlcVdX+//H3AeUgKKipoITihCOCQRpaDle6Vt7KssJ+lUhmg1ImmkrmWIpamhNKpqY53GywUcUBta6J1ynKnMoBsQHETHBIINi/P/x6bidAwQ0cldfz8TiPB2fttfb+bCI8b9Zee1sMwzAEAAAAACY4OboAAAAAANc/ggUAAAAA0wgWAAAAAEwjWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAJSQxWJRVFSUo8solr59+6pq1aqOLkOS1KVLF3Xp0sXRZQAAygjBAgD+z+HDh/XMM8+oUaNGcnV1lYeHhzp27KgZM2bojz/+cHR5uIKJEyfqk08+cXQZpqWkpMhiseiNN96wazcMQ88884wsFovGjh1r6hg7duxQVFSUWrVqJXd3d9WvX1+PPPKIfvjhB1P7BVCxVXJ0AQBwLVi1apUefvhhWa1W9enTR61bt1ZOTo62bNmil156SXv37tW8efMcXSYuY+LEiXrooYfUs2dPR5dS6gzD0IABAzRv3jyNGjXKdLCYPHmyvv76az388MNq06aN0tLSNHv2bN1yyy3atm2bWrduXTqFA6hQCBYAKryjR4+qd+/eatCggTZu3Ki6devatg0cOFCHDh3SqlWryrWm/Px85eTkyNXVtVyPC3sXLlyQi4uLnJwcO8H//PPPKz4+XiNHjtT48eNN7y86OlrLly+Xi4uLrS08PFwBAQGaNGmSli5davoYACoeLoUCUOFNmTJFZ8+e1YIFC+xCxSVNmjTRoEGDCrR/8sknat26taxWq1q1aqWEhAS77X379pWfn1+BcWPHjpXFYrFru7RuY9myZWrVqpWsVqsSEhK0aNEiWSwWff3114qOjlbt2rXl7u6uBx54QBkZGcU+xyNHjqh79+5yd3dXvXr1NH78eBmGIeniX8P9/Px0//33Fxh34cIFeXp66plnnrniMZYuXap27drJzc1NNWrUUKdOnbRu3boi+186t5SUFLv2zZs3y2KxaPPmzba2H3/8Ub169ZK3t7dcXV118803q3fv3srMzJR08ft37tw5LV68WBaLRRaLRX379rWN//nnn/Xkk0/Ky8vL9t9r4cKFhR73vffe0yuvvCIfHx+5ubkpKyvriudelgYNGqS4uDjFxMTotddeK5V9dujQwS5USFLTpk3VqlUr7d+/v1SOAaDiYcYCQIX3+eefq1GjRurQoUOxx2zZskUrV67UgAEDVK1aNc2cOVO9evVSamqqbrrppquqY+PGjXr//fcVFRWlWrVqyc/PT8nJyZIu/sW6Ro0aGjNmjFJSUjR9+nRFRUVpxYoVV9xvXl6e7rrrLt12222aMmWKEhISNGbMGP35558aP368LBaLHn/8cU2ZMkWnTp1SzZo1bWM///xzZWVl6fHHH7/sMcaNG6exY8eqQ4cOGj9+vFxcXPTf//5XGzdu1D//+c+r+n5ckpOTo+7duys7O1vPP/+8vL299fPPP+uLL77Q6dOn5enpqSVLluipp55Su3bt9PTTT0uSGjduLElKT0/XbbfdZgtvtWvX1po1a9SvXz9lZWXpxRdftDveq6++KhcXFw0dOlTZ2dkFPoCXp8GDB2vmzJkaPny4Jk6cWGB7fn6+Tp06Vax9eXp6qnLlykVuNwxD6enpatWq1VXXC6CCMwCgAsvMzDQkGffff3+xx0gyXFxcjEOHDtnavv32W0OSMWvWLFtbRESE0aBBgwLjx4wZY/z9168kw8nJydi7d69d+zvvvGNIMsLCwoz8/Hxb++DBgw1nZ2fj9OnTl601IiLCkGQ8//zztrb8/HyjR48ehouLi5GRkWEYhmEcPHjQkGTMnTvXbvx9991n+Pn52R3773788UfDycnJeOCBB4y8vDy7bX8d17lzZ6Nz584Fzu3o0aN2YzZt2mRIMjZt2mQYhmF88803hiTjgw8+uOy5uru7GxEREQXa+/XrZ9StW9c4efKkXXvv3r0NT09P4/z583bHbdSoka3NEY4ePWpIMho0aGBIMl566aUr9i3O69L3syhLliwxJBkLFiwo5TMCUFEwYwGgQrt0mUu1atVKNC4sLMz2F3FJatOmjTw8PHTkyJGrrqVz585q2bJloduefvppu8un7rjjDr355ps6duyY2rRpc8V9//X2uJf+cr9q1Spt2LBBvXv3lr+/v9q3b69ly5bp2WeflSSdOnVKa9as0bBhwwpcuvVXn3zyifLz8zV69OgCaxEuN664PD09JUlr167VPffcIzc3t2KPNQxDH330kR555BEZhqGTJ0/atnXv3l3vvfeedu/erY4dO9raIyIiVKVKFdN1m5Weni5J8vf3L7KPt7e31q9fX6z9BQYGFrntwIEDGjhwoEJDQxUREVGyQgHg/xAsAFRoHh4ekqQzZ86UaFz9+vULtNWoUUO///77VdfSsGHDYh+vRo0aklSs4zk5OalRo0Z2bZc+rP51fUOfPn0UFRWlY8eOqUGDBvrggw+Um5urJ5544rL7P3z4sJycnIoMRWY1bNhQ0dHRmjZtmpYtW6Y77rhD9913nx5//HFb6ChKRkaGTp8+rXnz5hV5V68TJ04UOF5xZGRkKC8vr3gn8Te1a9eWs7PzZfsMHz5cq1ev1jPPPKPq1avroYceKtDH1dVVYWFhV1XDJWlpaerRo4c8PT314YcfXrEuACgKwQJAhebh4aF69erp+++/L9G4oj58Gf+3IFoq+q/1RX0YvdxfyYtzPLN69+6twYMHa9myZXr55Ze1dOlShYSEqFmzZqV2jL8qyfdn6tSp6tu3rz799FOtW7dOL7zwgmJjY7Vt2zbdfPPNRR4jPz9fkvT4448X+Zf4v8/4FHe24tZbb9WxY8eK1ffvjh49WujC/r+qWrWq1qxZo06dOumxxx6Th4dHgfUqeXl5xV7EX7NmzQLrRTIzM3X33Xfr9OnT+s9//qN69eqV6DwA4K8IFgAqvH/961+aN2+ekpKSFBoaWmr7rVGjhk6fPl2g/Wo/jF6t/Px8HTlyxO6SmksPQvvrh9uaNWuqR48eWrZsmR577DF9/fXXmj59+hX337hxY+Xn52vfvn0KCgoqdl2XZl3+/j0q6vsTEBCggIAAvfLKK9q6das6duyo+Ph4252SCgsqtWvXVrVq1ZSXl2f6L/t/t2zZsqt+cKK3t3ex+t10001at26dOnbsqAcffFDr16+3+xk9fvx4sWdYNm3aZPfk8wsXLujee+/VDz/8oA0bNpTZjBOAioNgAaDCGzZsmJYtW6annnpKGzdulJeXl932w4cP64svvij0lrOX07hxY2VmZuq7776z/VX8119/1ccff1xqtRfX7NmzNXPmTEkXZzlmz56typUrq1u3bnb9nnjiCT344IN66aWX5OzsrN69e19x3z179tTw4cM1fvx4ffjhh3brLAzDKHJm4tIala+++soWSPLy8gpcspSVlSU3NzdVqvS/f7ICAgLk5OSk7OxsW5u7u3uBkOLs7KxevXpp+fLl+v777ws8+C0jI0O1a9e+4jkW5q/rMsqSj4+P1q9fr9tvv109evTQl19+qYCAAElXv8YiLy9P4eHhSkpK0qefflqqgRpAxUWwAFDhNW7cWMuXL1d4eLhatGhh9+TtrVu36oMPPrB7JkJx9e7dW8OHD9cDDzygF154QefPn9fcuXPl7++v3bt3l/6JFMHV1VUJCQmKiIhQ+/bttWbNGq1atUovv/xygQ/VPXr00E033aQPPvhAd999t+rUqXPF/Tdp0kQjR47Uq6++qjvuuEMPPvigrFarduzYoXr16ik2NrbQca1atdJtt92mmJgY221u33vvPf355592/TZu3KioqCg9/PDD8vf3159//qklS5bYQsMlwcHB2rB
"text/plain": [
"<Figure size 800x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAGGCAYAAADmRxfNAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAQNBJREFUeJzt3Xt8zvX/x/HntdmuHdgQNjTN2TAji5xCrVS+Skn0DSOUWGGEJccwKSKnJUTFVyfp4GxMXyGnlJzKYeZbbSZf5rjN9vn94ef6drWD8dm1a7PH/Xa7brdd78/n8/68PpdP3+/1vD7v9+djMQzDEAAAAACY4OLsAgAAAAAUfQQLAAAAAKYRLAAAAACYRrAAAAAAYBrBAgAAAIBpBAsAAAAAphEsAAAAAJhGsAAAAABgGsECAAAAgGkECwC4SRaLRREREc4uI0969uypkiVLOrsMSVKbNm3Upk0bZ5cBAHAQggUA/L+jR4/qhRdeULVq1eTh4SEfHx+1aNFCM2bM0OXLl51dHm5g0qRJWrFihbPLMC0+Pl4Wi0VvvfWWXbthGHrhhRdksVg0duxYU/uIi4uTxWLJ9rV9+3ZTfQMovko4uwAAKAxWrlypzp07y2q1qkePHqpfv77S0tK0ZcsWvfLKK9q/f7/mzZvn7DKRi0mTJumpp55Sx44dnV1KvjMMQ/3799e8efM0atQo08Hiupdffln33HOPXVuNGjXypW8AxQ/BAkCxd/z4cXXt2lV33XWXNm7cqIoVK9qWDRgwQEeOHNHKlSsLtKbMzEylpaXJw8OjQPcLe1euXJG7u7tcXJx7gf+ll15STEyMRo4cqfHjx+dbv61atdJTTz2Vb/0BKN4YCgWg2JsyZYouXLigBQsW2IWK62rUqKGBAwdmaV+xYoXq168vq9WqevXqac2aNXbLe/bsqcDAwCzbjR07VhaLxa7t+ryNJUuWqF69erJarVqzZo0WLVoki8Wi7777TpGRkSpfvry8vb31xBNPKDk5Oc/HeOzYMbVr107e3t6qVKmSxo8fL8MwJF37NTwwMFCPP/54lu2uXLkiX19fvfDCCzfcx0cffaQmTZrIy8tLZcqU0X333ad169bluP71Y4uPj7drvz5MJy4uztb266+/qlOnTvL395eHh4fuvPNOde3aVefOnZN07fO7ePGiFi9ebBvS07NnT9v2v/32m5577jn5+fnZ/r0WLlyY7X6XLVum1157TZUrV5aXl5dSUlJueOyONHDgQM2ePVtRUVGaMGFCvvd//vx5Xb16Nd/7BVD8cMUCQLH39ddfq1q1amrevHmet9myZYuWL1+u/v37q1SpUnrnnXfUqVMnJSQk6I477rilOjZu3KhPPvlEERERKleunAIDA7V3715J136xLlOmjMaMGaP4+HhNnz5dERER+vjjj2/Yb0ZGhh5++GHde++9mjJlitasWaMxY8bo6tWrGj9+vCwWi7p166YpU6bozJkzKlu2rG3br7/+WikpKerWrVuu+xg3bpzGjh2r5s2ba/z48XJ3d9f333+vjRs36qGHHrqlz+O6tLQ0tWvXTqmpqXrppZfk7++v3377Td98843Onj0rX19fffjhh+rTp4+aNGmi559/XpJUvXp1SVJSUpLuvfdeW3grX768Vq9erd69eyslJUWDBg2y29/rr78ud3d3DR06VKmpqXJ3dzdVvxmDBw/WO++8o+HDh2vSpElZlmdmZurMmTN56svX11dubm52bb169dKFCxfk6uqqVq1a6c0331RoaGi+1A6gGDIAoBg7d+6cIcl4/PHH87yNJMPd3d04cuSIre3HH380JBkzZ860tYWHhxt33XVXlu3HjBlj/P1/fiUZLi4uxv79++3a33//fUOSERYWZmRmZtraBw8ebLi6uhpnz57Ntdbw8HBDkvHSSy/Z2jIzM4327dsb7u7uRnJysmEYhnH48GFDkjF37ly77R977DEjMDDQbt9/9+uvvxouLi7GE088YWRkZNgt++t2rVu3Nlq3bp3l2I4fP263zaZNmwxJxqZNmwzDMIwffvjBkGR8+umnuR6rt7e3ER4enqW9d+/eRsWKFY3Tp0/btXft2tXw9fU1Ll26ZLffatWq2dqc4fjx44Yk46677jIkGa+88soN183L6/rnaRiG8d133xmdOnUyFixYYHz55ZdGdHS0cccddxgeHh7Gnj17CuAoAdyOuGIBoFi7PsylVKlSN7VdWFiY7RdxSWrQoIF8fHx07NixW66ldevWqlu3brbLnn/+ebvhU61atdLbb7+tEydOqEGDBjfs+6+3x73+y/3KlSu1YcMGde3aVbVq1VLTpk21ZMkS9evXT5J05swZrV69WsOGDcsydOuvVqxYoczMTI0ePTrLXITctssrX19fSdLatWv16KOPysvLK8/bGoahzz//XE8//bQMw9Dp06dty9q1a6dly5Zpz549atGiha09PDxcnp6epus2KykpSZJUq1atHNfx9/fX+vXr89RfSEiI7e/mzZvbXaF77LHH9NRTT6lBgwaKiorKMqwPAPKCYAGgWPPx8ZF0bZz5zahSpUqWtjJlyui///3vLddStWrVPO+vTJkykpSn/bm4uKhatWp2bde/rP51fkOPHj0UERGhEydO6K677tKnn36q9PR0de/ePdf+jx49KhcXlxxDkVlVq1ZVZGSkpk2bpiVLlqhVq1Z67LHH1K1bN1voyElycrLOnj2refPm5XhXr1OnTmXZX14kJycrIyMjbwfxN+XLl5erq2uu6wwfPlyrVq3SCy+8oNKlS2c7ydrDw0NhYWG3VMPf1ahRQ48//riWL1+ujIyMG9YHAH9HsABQrPn4+KhSpUr6+eefb2q7nL50Gf8/IVrK+df6nL6M5vYreV72Z1bXrl01ePBgLVmyRK+++qo++ugjhYaGqnbt2vm2j7+6mc9n6tSp6tmzp7788kutW7dOL7/8sqKjo7V9+3bdeeedOe4jMzNTktStWzeFh4dnu87fr/jk9WrFPffcoxMnTuRp3b87fvx4thP7/6pkyZJavXq17rvvPj377LPy8fHJMl8lIyMjz5P4y5Yte8P5IgEBAUpLS9PFixdtoRsA8opgAaDY+8c//qF58+Zp27ZtatasWb71W6ZMGZ09ezZL+61+Gb1VmZmZOnbsmN2Qml9++UWS7L7cli1bVu3bt9eSJUv07LPP6rvvvtP06dNv2H/16tWVmZmpAwcOqGHDhnmu6/pVl79/Rjl9PsHBwQoODtZrr72mrVu3qkWLFoqJibHdKSm7oFK+fHmVKlVKGRkZ+fbL/nVLliy55Qcn+vv752m9O+64Q+vWrVOLFi305JNPav369Xbn6MmTJ/N8hWXTpk03fPL5sWPH5OHhUWie1g6gaCFYACj2hg0bpiVLlqhPnz7auHGj/Pz87JYfPXpU33zzTba3nM1N9erVde7cOf3000+2X8X/+OMPffHFF/lWe17NmjVL77zzjqRrVzlmzZolNzc3PfDAA3brde/eXU8++aReeeUVubq6qmvXrjfsu2PHjho+fLjGjx+vzz77zG6ehWEYOV6ZuD5H5dtvv7UFkoyMjCxDllJSUuTl5aUSJf73f1nBwcFycXFRamqqrc3b2ztLSHF1dVWnTp20dOlS/fzzz6pfv77d8uTkZJUvX/6Gx5idv87LcKTKlStr/fr1atmypdq3b6/NmzcrODhY0q3PscjuuH/88Ud99dVXeuSRR5z+3A4ARRPBAkCxV716dS1dulRdunRRUFCQ3ZO3t27dqk8//dTumQh51bVrVw0fPlxPPPGEXn75ZV26dElz585VrVq1tGfPnvw/kBx4eHhozZo1Cg8PV9OmTbV69WqtXLlSr776apYvl+3bt9cdd9yhTz/9VI888ogqVKhww/5r1KihkSNH6vXXX1erVq305JNPymq1aufOnapUqZKio6Oz3a5evXq69957FRUVZbvN7bJly7I8U2Hjxo2KiIhQ586dVatWLV29elUffvihLTRc17hxY23YsEHTpk1TpUqVVLVqVTVt2lSTJ0/Wpk2b1LRpU/Xt21d169bVmTN
"text/plain": [
"<Figure size 800x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"for k in [2, 5]:\n",
" tmp = (\n",
" dfc.groupby(f\"cluster_k{k}\")\n",
" .agg(\n",
" churn_hard=(\"churn_hard\", \"mean\"),\n",
" churn_soft=(\"churn_soft\", \"mean\"),\n",
" churn_warning=(\"churn_warning\", \"mean\")\n",
" )\n",
" )\n",
"\n",
" tmp.plot(kind=\"bar\", figsize=(8, 4))\n",
" plt.title(f\"Churn by cluster — K={k}\")\n",
" plt.ylabel(\"Rate\")\n",
" plt.xlabel(\"Clusters\")\n",
" plt.tight_layout()\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a0370454-561e-48c5-ad3b-28a356a2abac",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}