Project_Carmignac/.ipynb_checkpoints/clustering_repaired-checkpoint.ipynb

4583 lines
1.3 MiB
Plaintext
Raw Normal View History

2026-04-07 12:31:16 +02:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 290,
"id": "2fee3a54-847b-432f-bda5-3d6a9aa9020c",
"metadata": {},
"outputs": [],
"source": [
"import warnings\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"from sklearn.preprocessing import StandardScaler, RobustScaler\n",
"from sklearn.cluster import KMeans\n",
"from sklearn.mixture import GaussianMixture\n",
"from sklearn.metrics import silhouette_score, davies_bouldin_score, pairwise_distances\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.neighbors import kneighbors_graph\n",
"from sklearn.manifold import MDS\n",
"\n",
"sns.set_style(\"whitegrid\")\n",
"pd.set_option(\"display.max_columns\", 200)\n",
"pd.set_option(\"display.max_rows\", 200)\n",
"\n",
"EPS = 1e-9\n",
"RANDOM_STATE = 42"
]
},
{
"cell_type": "code",
"execution_count": 291,
"id": "1f95b6b6-03b8-4f23-b236-5c71beedea04",
"metadata": {},
"outputs": [],
"source": [
"PATH_aum = \"s3://projet-bdc-carmignac-g3/paco/AUM_repaired.csv\"\n",
"df_aum_repaired = pd.read_csv(PATH_aum, sep=\",\")"
]
},
{
"cell_type": "code",
"execution_count": 292,
"id": "cab4432f-d7e5-4c18-ab86-19fe6759eed6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fichiers Flows : ['projet-bdc-data/carmignac/Flows ENSAE V1 -20251027.csv', 'projet-bdc-data/carmignac/Flows ENSAE V2 -20251105.csv']\n",
"Fichiers AUM : ['projet-bdc-data/carmignac/AUM ENSAE V1 -20251027.csv', 'projet-bdc-data/carmignac/AUM ENSAE V2 -20251105.csv']\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Agreement - Code</th>\n",
" <th>Company - Id</th>\n",
" <th>Company - Ultimate Parent Id</th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Registrar Account - Region</th>\n",
" <th>RegistrarAccount - Country</th>\n",
" <th>Product - Asset Type</th>\n",
" <th>Product - Strategy</th>\n",
" <th>Product - Legal Status</th>\n",
" <th>Product - Is Dedie ?</th>\n",
" <th>Product - Fund</th>\n",
" <th>Product - Shareclass Type</th>\n",
" <th>Product - Shareclass Currency</th>\n",
" <th>Product - Isin</th>\n",
" <th>Centralisation Date</th>\n",
" <th>Quantity - AUM</th>\n",
" <th>Value - AUM CCY</th>\n",
" <th>Value - AUM €</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-03-31</td>\n",
" <td>35.368</td>\n",
" <td>24648.6666</td>\n",
" <td>24648.6666</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-11-30</td>\n",
" <td>35.368</td>\n",
" <td>22413.0553</td>\n",
" <td>22413.0553</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-12-31</td>\n",
" <td>35.368</td>\n",
" <td>22051.2406</td>\n",
" <td>22051.2406</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2016-03-31</td>\n",
" <td>35.368</td>\n",
" <td>21626.1173</td>\n",
" <td>21626.1173</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200000647</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2016-11-30</td>\n",
" <td>35.368</td>\n",
" <td>22489.4502</td>\n",
" <td>22489.4502</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
"0 003 166 166 \n",
"1 003 166 166 \n",
"2 003 166 166 \n",
"3 003 166 166 \n",
"4 003 166 166 \n",
"\n",
" Registrar Account - ID Registrar Account - Region \\\n",
"0 200000647 France \n",
"1 200000647 France \n",
"2 200000647 France \n",
"3 200000647 France \n",
"4 200000647 France \n",
"\n",
" RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
"0 France Diversified Patrimoine \n",
"1 France Diversified Patrimoine \n",
"2 France Diversified Patrimoine \n",
"3 France Diversified Patrimoine \n",
"4 France Diversified Patrimoine \n",
"\n",
" Product - Legal Status Product - Is Dedie ? Product - Fund \\\n",
"0 FCP NO Carmignac Patrimoine \n",
"1 FCP NO Carmignac Patrimoine \n",
"2 FCP NO Carmignac Patrimoine \n",
"3 FCP NO Carmignac Patrimoine \n",
"4 FCP NO Carmignac Patrimoine \n",
"\n",
" Product - Shareclass Type Product - Shareclass Currency Product - Isin \\\n",
"0 A EUR FR0010135103 \n",
"1 A EUR FR0010135103 \n",
"2 A EUR FR0010135103 \n",
"3 A EUR FR0010135103 \n",
"4 A EUR FR0010135103 \n",
"\n",
" Centralisation Date Quantity - AUM Value - AUM CCY Value - AUM € \n",
"0 2015-03-31 35.368 24648.6666 24648.6666 \n",
"1 2015-11-30 35.368 22413.0553 22413.0553 \n",
"2 2015-12-31 35.368 22051.2406 22051.2406 \n",
"3 2016-03-31 35.368 21626.1173 21626.1173 \n",
"4 2016-11-30 35.368 22489.4502 22489.4502 "
]
},
"execution_count": 292,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Import des données\n",
"\n",
"import os\n",
"import s3fs\n",
"import pandas as pd\n",
"\n",
"s3_ENDPOINT_URL = \"https://\" + os.environ[\"AWS_S3_ENDPOINT\"]\n",
"\n",
"fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': s3_ENDPOINT_URL})\n",
"\n",
"BUCKET = \"projet-bdc-data\"\n",
"carmignac_path = \"projet-bdc-data/carmignac\"\n",
"\n",
"# Liste des fichiers FLOWS\n",
"all_files = fs.ls(carmignac_path)\n",
"flows_files = [f for f in all_files if \"Flows\" in f and f.endswith(\".csv\")]\n",
"print(\"Fichiers Flows :\", flows_files)\n",
"\n",
"# Lire tous les fichiers dans un dictionnaire\n",
"flows_data = {}\n",
"for file_path in flows_files:\n",
" with fs.open(file_path, 'r') as f:\n",
" df = pd.read_csv(f, sep=';',low_memory=False)\n",
" flows_data[os.path.basename(file_path)] = df\n",
"\n",
"\n",
"# Liste des fichiers AUM\n",
"all_files = fs.ls(carmignac_path)\n",
"aum_files = [f for f in all_files if \"AUM\" in f and f.endswith(\".csv\")]\n",
"print(\"Fichiers AUM :\", aum_files)\n",
"\n",
"# Lire tous les fichiers dans un dictionnaire\n",
"aum_data = {}\n",
"for file_path in aum_files:\n",
" with fs.open(file_path, 'r') as f:\n",
" df = pd.read_csv(f, sep=';',low_memory=False)\n",
" aum_data[os.path.basename(file_path)] = df\n",
"\n",
"df = aum_data['AUM ENSAE V2 -20251105.csv']\n",
"dg = flows_data['Flows ENSAE V2 -20251105.csv']\n",
"\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 293,
"id": "232e399b-64dc-4943-9c15-793a268ee896",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Agreement - Code</th>\n",
" <th>Company - Id</th>\n",
" <th>Company - Ultimate Parent Id</th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Registrar Account - Region</th>\n",
" <th>RegistrarAccount - Country</th>\n",
" <th>Product - Asset Type</th>\n",
" <th>Product - Strategy</th>\n",
" <th>Product - Legal Status</th>\n",
" <th>Product - Is Dedie ?</th>\n",
" <th>Product - Fund</th>\n",
" <th>Product - Shareclass Type</th>\n",
" <th>Product - Shareclass Currency</th>\n",
" <th>Product - Isin</th>\n",
" <th>Centralisation Date</th>\n",
" <th>Quantity - Subscription</th>\n",
" <th>Quantity - Redemption</th>\n",
" <th>Quantity - NetFlows</th>\n",
" <th>Value Ccy - Subscription</th>\n",
" <th>Value Ccy - Redemption</th>\n",
" <th>Value Ccy - NetFlows</th>\n",
" <th>Value € - Subscription</th>\n",
" <th>Value € - Redemption</th>\n",
" <th>Value € - NetFlows</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200127202</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Portfolio Investissement</td>\n",
" <td>F</td>\n",
" <td>EUR</td>\n",
" <td>LU0992625839</td>\n",
" <td>2020-11-05</td>\n",
" <td>1636.00</td>\n",
" <td>0.000</td>\n",
" <td>1636.000</td>\n",
" <td>280983.00</td>\n",
" <td>0.00</td>\n",
" <td>280983.00</td>\n",
" <td>280983.00</td>\n",
" <td>0.00</td>\n",
" <td>280983.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Diversified</td>\n",
" <td>Patrimoine</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Patrimoine</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-03-09</td>\n",
" <td>144.69</td>\n",
" <td>0.000</td>\n",
" <td>144.690</td>\n",
" <td>99985.13</td>\n",
" <td>0.00</td>\n",
" <td>99985.13</td>\n",
" <td>99985.13</td>\n",
" <td>0.00</td>\n",
" <td>99985.13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Investissement</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010148981</td>\n",
" <td>2016-10-26</td>\n",
" <td>0.00</td>\n",
" <td>-8.321</td>\n",
" <td>-8.321</td>\n",
" <td>0.00</td>\n",
" <td>-9384.76</td>\n",
" <td>-9384.76</td>\n",
" <td>0.00</td>\n",
" <td>-9384.76</td>\n",
" <td>-9384.76</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Investissement</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010148981</td>\n",
" <td>2018-10-18</td>\n",
" <td>0.00</td>\n",
" <td>-22.083</td>\n",
" <td>-22.083</td>\n",
" <td>0.00</td>\n",
" <td>-25227.40</td>\n",
" <td>-25227.40</td>\n",
" <td>0.00</td>\n",
" <td>-25227.40</td>\n",
" <td>-25227.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>France</td>\n",
" <td>France</td>\n",
" <td>Equity</td>\n",
" <td>Investissement</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>Carmignac Investissement</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010148981</td>\n",
" <td>2019-04-08</td>\n",
" <td>0.00</td>\n",
" <td>-465.992</td>\n",
" <td>-465.992</td>\n",
" <td>0.00</td>\n",
" <td>-563775.76</td>\n",
" <td>-563775.76</td>\n",
" <td>0.00</td>\n",
" <td>-563775.76</td>\n",
" <td>-563775.76</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
"0 003 166 166 \n",
"1 003 166 166 \n",
"2 003 166 166 \n",
"3 003 166 166 \n",
"4 003 166 166 \n",
"\n",
" Registrar Account - ID Registrar Account - Region \\\n",
"0 200127202 France \n",
"1 406533 France \n",
"2 406533 France \n",
"3 406533 France \n",
"4 406533 France \n",
"\n",
" RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
"0 France Equity Investissement \n",
"1 France Diversified Patrimoine \n",
"2 France Equity Investissement \n",
"3 France Equity Investissement \n",
"4 France Equity Investissement \n",
"\n",
" Product - Legal Status Product - Is Dedie ? \\\n",
"0 SICAV NO \n",
"1 FCP NO \n",
"2 FCP NO \n",
"3 FCP NO \n",
"4 FCP NO \n",
"\n",
" Product - Fund Product - Shareclass Type \\\n",
"0 Carmignac Portfolio Investissement F \n",
"1 Carmignac Patrimoine A \n",
"2 Carmignac Investissement A \n",
"3 Carmignac Investissement A \n",
"4 Carmignac Investissement A \n",
"\n",
" Product - Shareclass Currency Product - Isin Centralisation Date \\\n",
"0 EUR LU0992625839 2020-11-05 \n",
"1 EUR FR0010135103 2015-03-09 \n",
"2 EUR FR0010148981 2016-10-26 \n",
"3 EUR FR0010148981 2018-10-18 \n",
"4 EUR FR0010148981 2019-04-08 \n",
"\n",
" Quantity - Subscription Quantity - Redemption Quantity - NetFlows \\\n",
"0 1636.00 0.000 1636.000 \n",
"1 144.69 0.000 144.690 \n",
"2 0.00 -8.321 -8.321 \n",
"3 0.00 -22.083 -22.083 \n",
"4 0.00 -465.992 -465.992 \n",
"\n",
" Value Ccy - Subscription Value Ccy - Redemption Value Ccy - NetFlows \\\n",
"0 280983.00 0.00 280983.00 \n",
"1 99985.13 0.00 99985.13 \n",
"2 0.00 -9384.76 -9384.76 \n",
"3 0.00 -25227.40 -25227.40 \n",
"4 0.00 -563775.76 -563775.76 \n",
"\n",
" Value € - Subscription Value € - Redemption Value € - NetFlows \n",
"0 280983.00 0.00 280983.00 \n",
"1 99985.13 0.00 99985.13 \n",
"2 0.00 -9384.76 -9384.76 \n",
"3 0.00 -25227.40 -25227.40 \n",
"4 0.00 -563775.76 -563775.76 "
]
},
"execution_count": 293,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dg.head()"
]
},
{
"cell_type": "code",
"execution_count": 294,
"id": "e19e970c-d1dc-4608-9f6f-73dd3e282ba6",
"metadata": {},
"outputs": [],
"source": [
"# Merge\n",
"\n",
"# 1Identifier les IDs présents dans df_aum_repaired\n",
"ids_repaired = df_aum_repaired[\"Registrar Account - ID\"].unique()\n",
"\n",
"# 2Sélectionner dans df uniquement les IDs qui ne sont pas dans df_aum_repaired\n",
"df_only = df[~df[\"Registrar Account - ID\"].isin(ids_repaired)]\n",
"\n",
"# 3Concaténer les deux DataFrames\n",
"df_merged = pd.concat([df_aum_repaired, df_only], ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": 295,
"id": "79c732d4-8d4d-4f7d-9a46-2e89cf2b213d",
"metadata": {},
"outputs": [],
"source": [
"# Filtrer les comptes techniques\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"df['Centralisation Date'] = pd.to_datetime(df['Centralisation Date'])\n",
"df_aum_repaired['Centralisation Date'] = pd.to_datetime(df_aum_repaired['Centralisation Date'])\n",
"dg['Centralisation Date'] = pd.to_datetime(dg['Centralisation Date'])\n",
"df = df[~df['Registrar Account - ID'].isin(['Off Distribution','Private Clients', 'Private Client'])]\n",
"dg = dg[~dg['Registrar Account - ID'].isin(['Off Distribution','Private Clients','Private Client'])]"
]
},
{
"cell_type": "code",
"execution_count": 296,
"id": "f7f7242c-051e-4d7d-9a76-b46523089e49",
"metadata": {},
"outputs": [],
"source": [
"# Date de référence et sélection des 400+ principaux codes\n",
"\n",
"ref_date = pd.Timestamp('2025-10-31')\n",
"\n",
"df_ref = df[df['Centralisation Date'] == ref_date]\n",
"\n",
"aum_account = (\n",
" df_ref\n",
" .groupby('Registrar Account - ID')['Value - AUM €']\n",
" .sum()\n",
" .reset_index()\n",
" .sort_values(by='Value - AUM €', ascending=False)\n",
")\n",
"aum_account = aum_account[aum_account['Value - AUM €'] > 5_000_000]\n",
"selected_accounts = aum_account['Registrar Account - ID']\n",
"\n",
"df_aum = df_merged[df_merged['Registrar Account - ID'].isin(selected_accounts)].copy()\n",
"df_flows = dg[dg['Registrar Account - ID'].isin(selected_accounts)].copy()"
]
},
{
"cell_type": "code",
"execution_count": 297,
"id": "91ea0342-607a-420e-af0d-178d063da761",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(31709, 6)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>month</th>\n",
" <th>aum_qty</th>\n",
" <th>net_flow_qty</th>\n",
" <th>gross_flow_qty</th>\n",
" <th>n_tx</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>2015-01-31</td>\n",
" <td>11819.680</td>\n",
" <td>-1524.010</td>\n",
" <td>15230.010</td>\n",
" <td>32</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>18872</td>\n",
" <td>2015-02-28</td>\n",
" <td>5705.000</td>\n",
" <td>7247.100</td>\n",
" <td>18571.880</td>\n",
" <td>38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>18872</td>\n",
" <td>2015-03-31</td>\n",
" <td>70038.905</td>\n",
" <td>3655.380</td>\n",
" <td>9754.040</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>18872</td>\n",
" <td>2015-04-30</td>\n",
" <td>70324.489</td>\n",
" <td>-218.394</td>\n",
" <td>12840.950</td>\n",
" <td>39</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>18872</td>\n",
" <td>2015-05-31</td>\n",
" <td>75567.276</td>\n",
" <td>-4782.849</td>\n",
" <td>6332.849</td>\n",
" <td>24</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID month aum_qty net_flow_qty gross_flow_qty \\\n",
"0 18872 2015-01-31 11819.680 -1524.010 15230.010 \n",
"1 18872 2015-02-28 5705.000 7247.100 18571.880 \n",
"2 18872 2015-03-31 70038.905 3655.380 9754.040 \n",
"3 18872 2015-04-30 70324.489 -218.394 12840.950 \n",
"4 18872 2015-05-31 75567.276 -4782.849 6332.849 \n",
"\n",
" n_tx \n",
"0 32 \n",
"1 38 \n",
"2 47 \n",
"3 39 \n",
"4 24 "
]
},
"execution_count": 297,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Variables par mois\n",
"\n",
"# Parse dates\n",
"df_flows[\"Centralisation Date\"] = pd.to_datetime(df_flows[\"Centralisation Date\"], errors=\"coerce\")\n",
"df_aum[\"Centralisation Date\"] = pd.to_datetime(df_aum[\"Centralisation Date\"], errors=\"coerce\")\n",
"\n",
"ID_COL = \"Registrar Account - ID\"\n",
"FLOW_COL = \"Quantity - NetFlows\"\n",
"AUM_COL = \"Quantity - AUM\"\n",
"\n",
"# Month key\n",
"df_flows[\"month\"] = df_flows[\"Centralisation Date\"].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
"df_aum[\"month\"] = df_aum[\"Centralisation Date\"].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
"# Flows sont journaliers, AUM est mensuel → il faut une granularité commune.\n",
"\n",
"# 1) Monthly aggregation for FLOWS : je fais mon etude mensuel parce que aum valeur mensuel \n",
"\n",
"ID_COL = \"Registrar Account - ID\"\n",
"FLOW_COL = \"Quantity - NetFlows\"\n",
"AUM_COL = \"Quantity - AUM\"\n",
"\n",
"df_flows_m = (\n",
" df_flows\n",
" .dropna(subset=[ID_COL, \"month\", FLOW_COL])\n",
" .assign(gross_flow_qty=lambda x: x[FLOW_COL].abs()) # absolute quantity moved\n",
" .groupby([ID_COL, \"month\"], as_index=False)\n",
" .agg(\n",
" net_flow_qty=(FLOW_COL, \"sum\"), # net quantity change over the month\n",
" gross_flow_qty=(\"gross_flow_qty\", \"sum\"), # total traded quantity (activity intensity)\n",
" n_tx=(FLOW_COL, \"size\"), # number of transactions\n",
" )\n",
")\n",
"\n",
"# 2) Monthly aggregation for AUM (client-month holdings) ---\n",
"df_aum_m = (\n",
" df_aum\n",
" .dropna(subset=[ID_COL, \"month\", AUM_COL])\n",
" .groupby([ID_COL, \"month\"], as_index=False)\n",
" .agg(aum_qty=(AUM_COL, \"sum\")) # total held quantity across ISINs\n",
")\n",
"\n",
"df_month0 = df_aum_m.merge(df_flows_m, on=[ID_COL, \"month\"], how=\"left\")\n",
"\n",
"# 4) Months without transactions => flows are 0 ---\n",
"df_month0[\"net_flow_qty\"] = df_month0[\"net_flow_qty\"].fillna(0.0)\n",
"df_month0[\"gross_flow_qty\"] = df_month0[\"gross_flow_qty\"].fillna(0.0)\n",
"df_month0[\"n_tx\"] = df_month0[\"n_tx\"].fillna(0).astype(int)\n",
"\n",
"print(df_month0.shape)\n",
"df_month0.head()"
]
},
{
"cell_type": "code",
"execution_count": 298,
"id": "8caa4710-c7d5-4397-9d90-82f756499016",
"metadata": {},
"outputs": [],
"source": [
"# Ajout de variables\n",
"\n",
"#external data projet-bdc-data /carmignac /Data Modélisation /Nav\n",
"PATH_NAV = \"s3://projet-bdc-data/carmignac/Data Modélisation/Nav/NAV_Bench_data.csv\" #Cest la table de valorisation / performance du produit.\n",
"PATH_RATES = \"s3://projet-bdc-data/carmignac/Data Modélisation/market data/esterRates.csv\"\n",
"\n",
"# optional competitors\n",
"PATH_COMP_FLOWS = \"s3://projet-bdc-data/carmignac/Data Modélisation/competitors/daily_estimated_flows.csv\"\n",
"PATH_COMP_PERF = \"s3://projet-bdc-data/carmignac/Data Modélisation/competitors/weekly_perf_full.csv\"\n",
"PATH_PEERS = \"s3://projet-bdc-carmignac-g3/peers/CAD_peers.csv\"\n",
"\n",
"df_nav = pd.read_csv(PATH_NAV, sep=\";\") \n",
"# Une base de suivi de performance de fonds dans le temps, \n",
"# Price (TF PartPrice) : prix de la part du fond\n",
"# AUM Eur (Assets Under Management) : Taille du fonds en euros\n",
"\n",
"df_rates = pd.read_csv(PATH_RATES,sep=\";\")\n",
"# df_rates : évolution dans le temps dun taux de rendement obligataire (YTM)\n",
"\n",
"df_comp_flows = pd.read_csv(PATH_COMP_FLOWS,sep=\";\")\n",
"# Estimated Fund-level Net Flow (Daily) : Flux nets estimés du fonds\n",
"\n",
"df_comp_perf = pd.read_csv(PATH_COMP_PERF,sep=\";\")\n",
"# perfPeriod : Horizon de performance\n",
"# return : Performance du fonds sur la période donnée\n",
"# percentile : Position du fonds par rapport à ses pairs\n",
"# 0 → top performer\n",
"# 100 → mauvais performer\n",
"\n",
"df_peers = pd.read_csv(PATH_PEERS,sep=\"|\")\n",
"# Global Broad Category Group : grande classe dactifs\n",
"# Global Category : catégorie plus précise et Morningstar Category\n",
"# Index Fund : fonds indiciel (passif)\n",
"# Enhanced Index → quasi-passif (légère surperformance recherchée)\n",
"# Inception Date → date de création de la part\n",
"# Inception Date of Fund's Oldest Share Class → âge réel du fonds\n",
"# Domicile : pays de domiciliation du fonds"
]
},
{
"cell_type": "code",
"execution_count": 299,
"id": "fe081e43-092b-4429-813a-67417e39fd07",
"metadata": {},
"outputs": [],
"source": [
"ID_COL = \"Registrar Account - ID\"\n",
"ISIN_COL = \"Product - Isin\"\n",
"\n",
"FLOW_DATE_COL = \"Centralisation Date\"\n",
"AUM_DATE_COL = \"Centralisation Date\"\n",
"\n",
"FLOW_QTY_COL = \"Quantity - NetFlows\"\n",
"FLOW_SUB_COL = \"Quantity - Subscription\"\n",
"FLOW_RED_COL = \"Quantity - Redemption\"\n",
"\n",
"AUM_QTY_COL = \"Quantity - AUM\"\n",
"AUM_VAL_COL = \"Value - AUM €\"\n",
"\n",
"REGION_COL = \"Registrar Account - Region\"\n",
"COUNTRY_COL = \"RegistrarAccount - Country\"\n",
"\n",
"NAV_DATE_COL = \"Dat\"\n",
"NAV_ISIN_COL = \"Isin\"\n",
"NAV_PRICE_COL = \"Price (TF PartPrice)\"\n",
"NAV_BENCH_COL = \"PriceBench\"\n",
"\n",
"RATE_DATE_COL = \"Date\"\n",
"RATE_VAL_COL = \"Yld to Maturity\""
]
},
{
"cell_type": "code",
"execution_count": 300,
"id": "b2a1cdce-1b1c-45d9-9c74-93f826bd65fd",
"metadata": {},
"outputs": [],
"source": [
"for df, date_col in [\n",
" (df_flows, FLOW_DATE_COL),\n",
" (df_aum, AUM_DATE_COL),\n",
" (df_nav, NAV_DATE_COL),\n",
" (df_rates, RATE_DATE_COL),\n",
"]:\n",
" df[date_col] = pd.to_datetime(df[date_col], errors=\"coerce\")\n",
"\n",
"df_flows[\"month\"] = df_flows[FLOW_DATE_COL].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
"df_aum[\"month\"] = df_aum[AUM_DATE_COL].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
"df_nav[\"month\"] = df_nav[NAV_DATE_COL].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
"df_rates[\"month\"] = df_rates[RATE_DATE_COL].dt.to_period(\"M\").dt.to_timestamp(\"M\")\n",
"\n",
"for col in [FLOW_QTY_COL, FLOW_SUB_COL, FLOW_RED_COL]:\n",
" df_flows[col] = pd.to_numeric(df_flows[col], errors=\"coerce\")\n",
"\n",
"for col in [AUM_QTY_COL, AUM_VAL_COL]:\n",
" df_aum[col] = pd.to_numeric(df_aum[col], errors=\"coerce\")\n",
"\n",
"for col in [NAV_PRICE_COL, NAV_BENCH_COL]:\n",
" df_nav[col] = pd.to_numeric(df_nav[col], errors=\"coerce\")\n",
"\n",
"df_rates[RATE_VAL_COL] = pd.to_numeric(df_rates[RATE_VAL_COL], errors=\"coerce\")\n",
"\n",
"for df, col in [(df_flows, ISIN_COL), (df_aum, ISIN_COL)]:\n",
" df[col] = df[col].astype(str).str.strip()\n",
"\n",
"df_nav[NAV_ISIN_COL] = df_nav[NAV_ISIN_COL].astype(str).str.strip()"
]
},
{
"cell_type": "code",
"execution_count": 301,
"id": "e10eb2ef-04cd-4186-b188-72d760b4d778",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(492920, 18)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Product - Isin</th>\n",
" <th>month</th>\n",
" <th>aum_qty</th>\n",
" <th>aum_val</th>\n",
" <th>region</th>\n",
" <th>country</th>\n",
" <th>net_flow_qty</th>\n",
" <th>gross_flow_qty</th>\n",
" <th>sub_qty</th>\n",
" <th>red_qty</th>\n",
" <th>n_tx</th>\n",
" <th>region_flow</th>\n",
" <th>country_flow</th>\n",
" <th>active_rel_month</th>\n",
" <th>holding_rel_month</th>\n",
" <th>flow_to_aum_rel</th>\n",
" <th>turnover_rel</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-01-31</td>\n",
" <td>0.000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>673.990</td>\n",
" <td>956.01</td>\n",
" <td>859.990</td>\n",
" <td>-186.000</td>\n",
" <td>9.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>6.739900e+11</td>\n",
" <td>9.560100e+11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-02-28</td>\n",
" <td>0.000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>988.000</td>\n",
" <td>1712.00</td>\n",
" <td>1350.000</td>\n",
" <td>-362.000</td>\n",
" <td>12.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>9.880000e+11</td>\n",
" <td>1.712000e+12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-03-31</td>\n",
" <td>0.000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>9.710</td>\n",
" <td>1447.71</td>\n",
" <td>785.710</td>\n",
" <td>-776.000</td>\n",
" <td>12.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>9.710000e+09</td>\n",
" <td>1.447710e+12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-04-30</td>\n",
" <td>50219.393</td>\n",
" <td>3.452433e+07</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>-123.234</td>\n",
" <td>1708.19</td>\n",
" <td>853.478</td>\n",
" <td>-976.712</td>\n",
" <td>11.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>-2.453913e-03</td>\n",
" <td>3.401455e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-05-31</td>\n",
" <td>53685.393</td>\n",
" <td>3.699729e+07</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>121.000</td>\n",
" <td>529.00</td>\n",
" <td>325.000</td>\n",
" <td>-204.000</td>\n",
" <td>6.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2.253872e-03</td>\n",
" <td>9.853705e-03</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID Product - Isin month aum_qty aum_val \\\n",
"0 18872 FR0010135103 2015-01-31 0.000 0.000000e+00 \n",
"1 18872 FR0010135103 2015-02-28 0.000 0.000000e+00 \n",
"2 18872 FR0010135103 2015-03-31 0.000 0.000000e+00 \n",
"3 18872 FR0010135103 2015-04-30 50219.393 3.452433e+07 \n",
"4 18872 FR0010135103 2015-05-31 53685.393 3.699729e+07 \n",
"\n",
" region country net_flow_qty gross_flow_qty sub_qty red_qty \\\n",
"0 Switzerland Switzerland 673.990 956.01 859.990 -186.000 \n",
"1 Switzerland Switzerland 988.000 1712.00 1350.000 -362.000 \n",
"2 Switzerland Switzerland 9.710 1447.71 785.710 -776.000 \n",
"3 Switzerland Switzerland -123.234 1708.19 853.478 -976.712 \n",
"4 Switzerland Switzerland 121.000 529.00 325.000 -204.000 \n",
"\n",
" n_tx region_flow country_flow active_rel_month holding_rel_month \\\n",
"0 9.0 Switzerland Switzerland 1 0 \n",
"1 12.0 Switzerland Switzerland 1 0 \n",
"2 12.0 Switzerland Switzerland 1 0 \n",
"3 11.0 Switzerland Switzerland 1 1 \n",
"4 6.0 Switzerland Switzerland 1 1 \n",
"\n",
" flow_to_aum_rel turnover_rel \n",
"0 6.739900e+11 9.560100e+11 \n",
"1 9.880000e+11 1.712000e+12 \n",
"2 9.710000e+09 1.447710e+12 \n",
"3 -2.453913e-03 3.401455e-02 \n",
"4 2.253872e-03 9.853705e-03 "
]
},
"execution_count": 301,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_flows_rel_m = (\n",
" df_flows\n",
" .dropna(subset=[ID_COL, ISIN_COL, \"month\"])\n",
" .assign(\n",
" gross_flow_qty=lambda x: x[FLOW_QTY_COL].abs(),\n",
" sub_qty=lambda x: x[FLOW_SUB_COL].fillna(0),\n",
" red_qty=lambda x: x[FLOW_RED_COL].fillna(0)\n",
" )\n",
" .groupby([ID_COL, ISIN_COL, \"month\"], as_index=False)\n",
" .agg(\n",
" net_flow_qty=(FLOW_QTY_COL, \"sum\"),\n",
" gross_flow_qty=(\"gross_flow_qty\", \"sum\"),\n",
" sub_qty=(\"sub_qty\", \"sum\"),\n",
" red_qty=(\"red_qty\", \"sum\"),\n",
" n_tx=(FLOW_QTY_COL, \"size\"),\n",
" region=(REGION_COL, \"last\"),\n",
" country=(COUNTRY_COL, \"last\")\n",
" )\n",
")\n",
"\n",
"df_aum_rel_m = (\n",
" df_aum\n",
" .dropna(subset=[ID_COL, ISIN_COL, \"month\"])\n",
" .groupby([ID_COL, ISIN_COL, \"month\"], as_index=False)\n",
" .agg(\n",
" aum_qty=(AUM_QTY_COL, \"sum\"),\n",
" aum_val=(AUM_VAL_COL, \"sum\"),\n",
" region=(REGION_COL, \"last\"),\n",
" country=(COUNTRY_COL, \"last\")\n",
" )\n",
")\n",
"\n",
"keys = pd.concat([\n",
" df_flows_rel_m[[ID_COL, ISIN_COL, \"month\"]],\n",
" df_aum_rel_m[[ID_COL, ISIN_COL, \"month\"]]\n",
"]).drop_duplicates()\n",
"\n",
"df_rel_m = (\n",
" keys\n",
" .merge(df_aum_rel_m, on=[ID_COL, ISIN_COL, \"month\"], how=\"left\", suffixes=(\"\", \"_aum\"))\n",
" .merge(df_flows_rel_m, on=[ID_COL, ISIN_COL, \"month\"], how=\"left\", suffixes=(\"\", \"_flow\"))\n",
")\n",
"\n",
"for c in [\"aum_qty\", \"aum_val\", \"net_flow_qty\", \"gross_flow_qty\", \"sub_qty\", \"red_qty\", \"n_tx\"]:\n",
" df_rel_m[c] = df_rel_m[c].fillna(0)\n",
"\n",
"df_rel_m[\"region\"] = df_rel_m[\"region\"].fillna(df_rel_m.get(\"region_flow\"))\n",
"df_rel_m[\"country\"] = df_rel_m[\"country\"].fillna(df_rel_m.get(\"country_flow\"))\n",
"\n",
"df_rel_m[\"active_rel_month\"] = (df_rel_m[\"gross_flow_qty\"] > 0).astype(int)\n",
"df_rel_m[\"holding_rel_month\"] = (df_rel_m[\"aum_qty\"] > 0).astype(int)\n",
"df_rel_m[\"flow_to_aum_rel\"] = df_rel_m[\"net_flow_qty\"] / (df_rel_m[\"aum_qty\"].abs() + EPS)\n",
"df_rel_m[\"turnover_rel\"] = df_rel_m[\"gross_flow_qty\"] / (df_rel_m[\"aum_qty\"].abs() + EPS)\n",
"\n",
"print(df_rel_m.shape)\n",
"df_rel_m.head()"
]
},
{
"cell_type": "code",
"execution_count": 302,
"id": "321b09ab-90f0-4add-a670-0d8c74046e03",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Product - Isin</th>\n",
" <th>month</th>\n",
" <th>aum_qty</th>\n",
" <th>aum_val</th>\n",
" <th>region</th>\n",
" <th>country</th>\n",
" <th>net_flow_qty</th>\n",
" <th>gross_flow_qty</th>\n",
" <th>sub_qty</th>\n",
" <th>red_qty</th>\n",
" <th>n_tx</th>\n",
" <th>region_flow</th>\n",
" <th>country_flow</th>\n",
" <th>active_rel_month</th>\n",
" <th>holding_rel_month</th>\n",
" <th>flow_to_aum_rel</th>\n",
" <th>turnover_rel</th>\n",
" <th>ret_fund_m</th>\n",
" <th>ret_bench_m</th>\n",
" <th>active_return_m</th>\n",
" <th>delta_rate_m</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-01-31</td>\n",
" <td>0.000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>673.990</td>\n",
" <td>956.01</td>\n",
" <td>859.990</td>\n",
" <td>-186.000</td>\n",
" <td>9.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>6.739900e+11</td>\n",
" <td>9.560100e+11</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.058</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-02-28</td>\n",
" <td>0.000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>988.000</td>\n",
" <td>1712.00</td>\n",
" <td>1350.000</td>\n",
" <td>-362.000</td>\n",
" <td>12.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>9.880000e+11</td>\n",
" <td>1.712000e+12</td>\n",
" <td>0.121368</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.022</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-03-31</td>\n",
" <td>0.000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>9.710</td>\n",
" <td>1447.71</td>\n",
" <td>785.710</td>\n",
" <td>-776.000</td>\n",
" <td>12.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>9.710000e+09</td>\n",
" <td>1.447710e+12</td>\n",
" <td>0.068598</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.014</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-04-30</td>\n",
" <td>50219.393</td>\n",
" <td>3.452433e+07</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>-123.234</td>\n",
" <td>1708.19</td>\n",
" <td>853.478</td>\n",
" <td>-976.712</td>\n",
" <td>11.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>-2.453913e-03</td>\n",
" <td>3.401455e-02</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.077</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-05-31</td>\n",
" <td>53685.393</td>\n",
" <td>3.699729e+07</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>121.000</td>\n",
" <td>529.00</td>\n",
" <td>325.000</td>\n",
" <td>-204.000</td>\n",
" <td>6.0</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2.253872e-03</td>\n",
" <td>9.853705e-03</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.053</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID Product - Isin month aum_qty aum_val \\\n",
"0 18872 FR0010135103 2015-01-31 0.000 0.000000e+00 \n",
"1 18872 FR0010135103 2015-02-28 0.000 0.000000e+00 \n",
"2 18872 FR0010135103 2015-03-31 0.000 0.000000e+00 \n",
"3 18872 FR0010135103 2015-04-30 50219.393 3.452433e+07 \n",
"4 18872 FR0010135103 2015-05-31 53685.393 3.699729e+07 \n",
"\n",
" region country net_flow_qty gross_flow_qty sub_qty red_qty \\\n",
"0 Switzerland Switzerland 673.990 956.01 859.990 -186.000 \n",
"1 Switzerland Switzerland 988.000 1712.00 1350.000 -362.000 \n",
"2 Switzerland Switzerland 9.710 1447.71 785.710 -776.000 \n",
"3 Switzerland Switzerland -123.234 1708.19 853.478 -976.712 \n",
"4 Switzerland Switzerland 121.000 529.00 325.000 -204.000 \n",
"\n",
" n_tx region_flow country_flow active_rel_month holding_rel_month \\\n",
"0 9.0 Switzerland Switzerland 1 0 \n",
"1 12.0 Switzerland Switzerland 1 0 \n",
"2 12.0 Switzerland Switzerland 1 0 \n",
"3 11.0 Switzerland Switzerland 1 1 \n",
"4 6.0 Switzerland Switzerland 1 1 \n",
"\n",
" flow_to_aum_rel turnover_rel ret_fund_m ret_bench_m active_return_m \\\n",
"0 6.739900e+11 9.560100e+11 0.000000 0.0 0.0 \n",
"1 9.880000e+11 1.712000e+12 0.121368 0.0 0.0 \n",
"2 9.710000e+09 1.447710e+12 0.068598 0.0 0.0 \n",
"3 -2.453913e-03 3.401455e-02 0.000000 0.0 0.0 \n",
"4 2.253872e-03 9.853705e-03 0.000000 0.0 0.0 \n",
"\n",
" delta_rate_m \n",
"0 -0.058 \n",
"1 -0.022 \n",
"2 -0.014 \n",
"3 -0.077 \n",
"4 -0.053 "
]
},
"execution_count": 302,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Ajout\n",
"\n",
"df_nav_m = (\n",
" df_nav\n",
" .dropna(subset=[NAV_ISIN_COL, \"month\", NAV_PRICE_COL])\n",
" .sort_values([NAV_ISIN_COL, \"month\"])\n",
" .groupby([NAV_ISIN_COL, \"month\"], as_index=False)\n",
" .tail(1)\n",
" .copy()\n",
")\n",
"\n",
"df_nav_m[\"ret_fund_m\"] = df_nav_m.groupby(NAV_ISIN_COL)[NAV_PRICE_COL].pct_change()\n",
"df_nav_m[\"ret_bench_m\"] = df_nav_m.groupby(NAV_ISIN_COL)[NAV_BENCH_COL].pct_change()\n",
"df_nav_m[\"active_return_m\"] = df_nav_m[\"ret_fund_m\"] - df_nav_m[\"ret_bench_m\"]\n",
"\n",
"df_nav_m = df_nav_m.rename(columns={NAV_ISIN_COL: ISIN_COL})\n",
"df_nav_m = df_nav_m[[ISIN_COL, \"month\", \"ret_fund_m\", \"ret_bench_m\", \"active_return_m\"]]\n",
"\n",
"df_rates_m = (\n",
" df_rates\n",
" .dropna(subset=[\"month\", RATE_VAL_COL])\n",
" .sort_values(RATE_DATE_COL)\n",
" .groupby(\"month\", as_index=False)\n",
" .tail(1)\n",
" .copy()\n",
")\n",
"\n",
"df_rates_m[\"delta_rate_m\"] = df_rates_m[RATE_VAL_COL].diff()\n",
"df_rates_m = df_rates_m[[\"month\", RATE_VAL_COL, \"delta_rate_m\"]]\n",
"\n",
"\n",
" \n",
"df_rel_m = df_rel_m.merge(\n",
" df_nav_m,\n",
" on=[ISIN_COL, \"month\"],\n",
" how=\"left\"\n",
")\n",
"\n",
"df_rel_m = df_rel_m.merge(\n",
" df_rates_m[[\"month\", \"delta_rate_m\"]],\n",
" on=\"month\",\n",
" how=\"left\"\n",
")\n",
"\n",
"for c in [\"ret_fund_m\", \"ret_bench_m\", \"active_return_m\", \"delta_rate_m\"]:\n",
" df_rel_m[c] = df_rel_m[c].fillna(0)\n",
"\n",
"df_rel_m.head()"
]
},
{
"cell_type": "code",
"execution_count": 303,
"id": "614bf72b-7afa-4633-ba09-22540a441459",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(31709, 23)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>month</th>\n",
" <th>aum_qty</th>\n",
" <th>aum_val</th>\n",
" <th>net_flow_qty</th>\n",
" <th>gross_flow_qty</th>\n",
" <th>sub_qty</th>\n",
" <th>red_qty</th>\n",
" <th>n_tx</th>\n",
" <th>n_isin_held</th>\n",
" <th>n_isin_active</th>\n",
" <th>delta_rate_m</th>\n",
" <th>region</th>\n",
" <th>country</th>\n",
" <th>ret_fund_m</th>\n",
" <th>ret_bench_m</th>\n",
" <th>active_month</th>\n",
" <th>flow_to_aum_m</th>\n",
" <th>turnover_m</th>\n",
" <th>sub_share_m</th>\n",
" <th>red_share_m</th>\n",
" <th>aum_peak_to_date</th>\n",
" <th>aum_drawdown</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>2015-01-31</td>\n",
" <td>11819.680</td>\n",
" <td>1.694553e+06</td>\n",
" <td>-1524.010</td>\n",
" <td>15230.010</td>\n",
" <td>6897.990</td>\n",
" <td>-8422.000</td>\n",
" <td>32.0</td>\n",
" <td>4</td>\n",
" <td>13</td>\n",
" <td>-0.058</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>0.013100</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>-0.128938</td>\n",
" <td>1.288530</td>\n",
" <td>0.452921</td>\n",
" <td>-0.552987</td>\n",
" <td>11819.680</td>\n",
" <td>8.459899e-14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>18872</td>\n",
" <td>2015-02-28</td>\n",
" <td>5705.000</td>\n",
" <td>7.008600e+05</td>\n",
" <td>7247.100</td>\n",
" <td>18571.880</td>\n",
" <td>13219.490</td>\n",
" <td>-5972.390</td>\n",
" <td>38.0</td>\n",
" <td>3</td>\n",
" <td>13</td>\n",
" <td>-0.022</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>0.079848</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>1.270307</td>\n",
" <td>3.255369</td>\n",
" <td>0.711801</td>\n",
" <td>-0.321582</td>\n",
" <td>11819.680</td>\n",
" <td>5.173304e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>18872</td>\n",
" <td>2015-03-31</td>\n",
" <td>70038.905</td>\n",
" <td>1.503549e+07</td>\n",
" <td>3655.380</td>\n",
" <td>9754.040</td>\n",
" <td>6767.710</td>\n",
" <td>-3112.330</td>\n",
" <td>47.0</td>\n",
" <td>4</td>\n",
" <td>14</td>\n",
" <td>-0.014</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>0.005051</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>0.052191</td>\n",
" <td>0.139266</td>\n",
" <td>0.693837</td>\n",
" <td>-0.319081</td>\n",
" <td>70038.905</td>\n",
" <td>1.432188e-14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>18872</td>\n",
" <td>2015-04-30</td>\n",
" <td>70324.489</td>\n",
" <td>3.928292e+07</td>\n",
" <td>-218.394</td>\n",
" <td>12840.950</td>\n",
" <td>6384.278</td>\n",
" <td>-6602.672</td>\n",
" <td>39.0</td>\n",
" <td>4</td>\n",
" <td>13</td>\n",
" <td>-0.077</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>-0.003106</td>\n",
" <td>0.182596</td>\n",
" <td>0.497181</td>\n",
" <td>-0.514189</td>\n",
" <td>70324.489</td>\n",
" <td>1.432188e-14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>18872</td>\n",
" <td>2015-05-31</td>\n",
" <td>75567.276</td>\n",
" <td>3.987712e+07</td>\n",
" <td>-4782.849</td>\n",
" <td>6332.849</td>\n",
" <td>775.000</td>\n",
" <td>-5557.849</td>\n",
" <td>24.0</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" <td>-0.053</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>-0.063293</td>\n",
" <td>0.083804</td>\n",
" <td>0.122378</td>\n",
" <td>-0.877622</td>\n",
" <td>75567.276</td>\n",
" <td>1.332268e-14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID month aum_qty aum_val net_flow_qty \\\n",
"0 18872 2015-01-31 11819.680 1.694553e+06 -1524.010 \n",
"1 18872 2015-02-28 5705.000 7.008600e+05 7247.100 \n",
"2 18872 2015-03-31 70038.905 1.503549e+07 3655.380 \n",
"3 18872 2015-04-30 70324.489 3.928292e+07 -218.394 \n",
"4 18872 2015-05-31 75567.276 3.987712e+07 -4782.849 \n",
"\n",
" gross_flow_qty sub_qty red_qty n_tx n_isin_held n_isin_active \\\n",
"0 15230.010 6897.990 -8422.000 32.0 4 13 \n",
"1 18571.880 13219.490 -5972.390 38.0 3 13 \n",
"2 9754.040 6767.710 -3112.330 47.0 4 14 \n",
"3 12840.950 6384.278 -6602.672 39.0 4 13 \n",
"4 6332.849 775.000 -5557.849 24.0 7 9 \n",
"\n",
" delta_rate_m region country ret_fund_m ret_bench_m \\\n",
"0 -0.058 Switzerland Switzerland 0.013100 0.0 \n",
"1 -0.022 Switzerland Switzerland 0.079848 0.0 \n",
"2 -0.014 Switzerland Switzerland 0.005051 0.0 \n",
"3 -0.077 Switzerland Switzerland 0.000000 0.0 \n",
"4 -0.053 Switzerland Switzerland 0.000000 0.0 \n",
"\n",
" active_month flow_to_aum_m turnover_m sub_share_m red_share_m \\\n",
"0 1 -0.128938 1.288530 0.452921 -0.552987 \n",
"1 1 1.270307 3.255369 0.711801 -0.321582 \n",
"2 1 0.052191 0.139266 0.693837 -0.319081 \n",
"3 1 -0.003106 0.182596 0.497181 -0.514189 \n",
"4 1 -0.063293 0.083804 0.122378 -0.877622 \n",
"\n",
" aum_peak_to_date aum_drawdown \n",
"0 11819.680 8.459899e-14 \n",
"1 11819.680 5.173304e-01 \n",
"2 70038.905 1.432188e-14 \n",
"3 70324.489 1.432188e-14 \n",
"4 75567.276 1.332268e-14 "
]
},
"execution_count": 303,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Données agrégées sur les ISIN\n",
"\n",
"# =========================\n",
"# ULTRA LIGHT VERSION\n",
"# =========================\n",
"\n",
"tmp = df_rel_m.copy()\n",
"tmp[\"isin_held_flag\"] = (tmp[\"aum_qty\"] > 0).astype(int)\n",
"tmp[\"isin_active_flag\"] = (tmp[\"gross_flow_qty\"] > 0).astype(int)\n",
"\n",
"tmp[\"aum_total\"] = tmp.groupby([ID_COL, \"month\"])[\"aum_qty\"].transform(\"sum\")\n",
"tmp[\"w\"] = tmp[\"aum_qty\"] / (tmp[\"aum_total\"] + 1e-12)\n",
"tmp[\"ret_fund_w\"] = tmp[\"w\"] * tmp[\"ret_fund_m\"]\n",
"tmp[\"ret_bench_w\"] = tmp[\"w\"] * tmp[\"ret_bench_m\"]\n",
"\n",
"df_month = (\n",
" tmp.groupby([ID_COL, \"month\"], as_index=False)\n",
" .agg(\n",
" aum_qty=(\"aum_qty\", \"sum\"),\n",
" aum_val=(\"aum_val\", \"sum\"),\n",
" net_flow_qty=(\"net_flow_qty\", \"sum\"),\n",
" gross_flow_qty=(\"gross_flow_qty\", \"sum\"),\n",
" sub_qty=(\"sub_qty\", \"sum\"),\n",
" red_qty=(\"red_qty\", \"sum\"),\n",
" n_tx=(\"n_tx\", \"sum\"),\n",
" n_isin_held=(\"isin_held_flag\", \"sum\"),\n",
" n_isin_active=(\"isin_active_flag\", \"sum\"),\n",
" delta_rate_m=(\"delta_rate_m\", \"first\"),\n",
" region=(\"region\", \"first\"),\n",
" country=(\"country\", \"first\"),\n",
" ret_fund_m=(\"ret_fund_w\", \"sum\"),\n",
" ret_bench_m=(\"ret_bench_w\", \"sum\")\n",
" )\n",
" .sort_values([ID_COL, \"month\"])\n",
" .reset_index(drop=True)\n",
")\n",
"\n",
"\n",
"df_month[\"active_month\"] = (df_month[\"gross_flow_qty\"] > 0).astype(int)\n",
"df_month[\"flow_to_aum_m\"] = df_month[\"net_flow_qty\"] / (df_month[\"aum_qty\"].abs() + EPS)\n",
"df_month[\"turnover_m\"] = df_month[\"gross_flow_qty\"] / (df_month[\"aum_qty\"].abs() + EPS)\n",
"df_month[\"sub_share_m\"] = df_month[\"sub_qty\"] / (df_month[\"gross_flow_qty\"] + EPS)\n",
"df_month[\"red_share_m\"] = df_month[\"red_qty\"] / (df_month[\"gross_flow_qty\"] + EPS)\n",
"\n",
"df_month[\"aum_peak_to_date\"] = df_month.groupby(ID_COL)[\"aum_qty\"].cummax()\n",
"df_month[\"aum_drawdown\"] = 1 - (df_month[\"aum_qty\"] / (df_month[\"aum_peak_to_date\"] + EPS))\n",
"df_month = df_month[df_month[\"month\"] <= '2025-10-31']\n",
"\n",
"key_cols = [\"Registrar Account - ID\", \"month\"]\n",
"\n",
"df_month = df_month.merge(\n",
" df_month0[key_cols].drop_duplicates(),\n",
" on=key_cols,\n",
" how=\"inner\"\n",
")\n",
"print(df_month.shape)\n",
"df_month.head()"
]
},
{
"cell_type": "code",
"execution_count": 304,
"id": "2e01fa4f-ba89-4c8a-8cbb-528d89bc811c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Product - Isin</th>\n",
" <th>rel_n_months</th>\n",
" <th>rel_active_months</th>\n",
" <th>rel_holding_months</th>\n",
" <th>rel_aum_mean</th>\n",
" <th>rel_turnover_mean</th>\n",
" <th>rel_turnover_vol</th>\n",
" <th>rel_flow_to_aum_vol</th>\n",
" <th>rel_n_tx</th>\n",
" <th>rel_full_exit_count</th>\n",
" <th>rel_entry_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>FR0010135103</td>\n",
" <td>98</td>\n",
" <td>91</td>\n",
" <td>26</td>\n",
" <td>2519.829520</td>\n",
" <td>5.898325e+11</td>\n",
" <td>9.652436e+11</td>\n",
" <td>9.242856e+11</td>\n",
" <td>382.0</td>\n",
" <td>12</td>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>18872</td>\n",
" <td>FR0010147603</td>\n",
" <td>17</td>\n",
" <td>8</td>\n",
" <td>10</td>\n",
" <td>695.058824</td>\n",
" <td>1.685294e+11</td>\n",
" <td>3.805578e+11</td>\n",
" <td>3.805578e+11</td>\n",
" <td>9.0</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>18872</td>\n",
" <td>FR0010148981</td>\n",
" <td>81</td>\n",
" <td>66</td>\n",
" <td>28</td>\n",
" <td>831.906963</td>\n",
" <td>6.628200e+10</td>\n",
" <td>1.140022e+11</td>\n",
" <td>1.212644e+11</td>\n",
" <td>149.0</td>\n",
" <td>16</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>18872</td>\n",
" <td>FR0010149112</td>\n",
" <td>19</td>\n",
" <td>12</td>\n",
" <td>5</td>\n",
" <td>885.208737</td>\n",
" <td>5.886253e+11</td>\n",
" <td>1.251992e+12</td>\n",
" <td>1.273644e+12</td>\n",
" <td>13.0</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>18872</td>\n",
" <td>FR0010149120</td>\n",
" <td>99</td>\n",
" <td>79</td>\n",
" <td>36</td>\n",
" <td>425.655010</td>\n",
" <td>1.673836e+11</td>\n",
" <td>6.287132e+11</td>\n",
" <td>6.316077e+11</td>\n",
" <td>152.0</td>\n",
" <td>14</td>\n",
" <td>15</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID Product - Isin rel_n_months rel_active_months \\\n",
"0 18872 FR0010135103 98 91 \n",
"1 18872 FR0010147603 17 8 \n",
"2 18872 FR0010148981 81 66 \n",
"3 18872 FR0010149112 19 12 \n",
"4 18872 FR0010149120 99 79 \n",
"\n",
" rel_holding_months rel_aum_mean rel_turnover_mean rel_turnover_vol \\\n",
"0 26 2519.829520 5.898325e+11 9.652436e+11 \n",
"1 10 695.058824 1.685294e+11 3.805578e+11 \n",
"2 28 831.906963 6.628200e+10 1.140022e+11 \n",
"3 5 885.208737 5.886253e+11 1.251992e+12 \n",
"4 36 425.655010 1.673836e+11 6.287132e+11 \n",
"\n",
" rel_flow_to_aum_vol rel_n_tx rel_full_exit_count rel_entry_count \n",
"0 9.242856e+11 382.0 12 13 \n",
"1 3.805578e+11 9.0 4 4 \n",
"2 1.212644e+11 149.0 16 17 \n",
"3 1.273644e+12 13.0 3 3 \n",
"4 6.316077e+11 152.0 14 15 "
]
},
"execution_count": 304,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Données agrégées sur les mois\n",
"tmp = df_rel_m.sort_values([ID_COL, ISIN_COL, \"month\"]).copy()\n",
"tmp[\"prev_aum\"] = tmp.groupby([ID_COL, ISIN_COL])[\"aum_qty\"].shift(1)\n",
"tmp[\"full_exit_event\"] = ((tmp[\"prev_aum\"] > 0) & (tmp[\"aum_qty\"] <= 0)).astype(int)\n",
"tmp[\"entry_event\"] = ((tmp[\"prev_aum\"].fillna(0) <= 0) & (tmp[\"aum_qty\"] > 0)).astype(int)\n",
"\n",
"df_rel_feat = (\n",
" tmp.groupby([ID_COL, ISIN_COL], as_index=False)\n",
" .agg(\n",
" rel_n_months=(\"month\", \"nunique\"),\n",
" rel_active_months=(\"active_rel_month\", \"sum\"),\n",
" rel_holding_months=(\"holding_rel_month\", \"sum\"),\n",
" rel_aum_mean=(\"aum_qty\", \"mean\"),\n",
" rel_turnover_mean=(\"turnover_rel\", \"mean\"),\n",
" rel_turnover_vol=(\"turnover_rel\", \"std\"),\n",
" rel_flow_to_aum_vol=(\"flow_to_aum_rel\", \"std\"),\n",
" rel_n_tx=(\"n_tx\", \"sum\"),\n",
" rel_full_exit_count=(\"full_exit_event\", \"sum\"),\n",
" rel_entry_count=(\"entry_event\", \"sum\")\n",
" )\n",
")\n",
"\n",
"df_rel_feat.head()"
]
},
{
"cell_type": "code",
"execution_count": 305,
"id": "2d81b4fd-f82d-42f1-ba03-8460706fea0d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(431, 40)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>n_months</th>\n",
" <th>n_active_months</th>\n",
" <th>flow_freq</th>\n",
" <th>aum_qty_mean</th>\n",
" <th>aum_qty_median</th>\n",
" <th>aum_qty_max</th>\n",
" <th>aum_qty_last</th>\n",
" <th>net_flow_qty_sum</th>\n",
" <th>gross_flow_qty_sum</th>\n",
" <th>gross_flow_qty_mean</th>\n",
" <th>n_tx_total</th>\n",
" <th>net_flow_vol</th>\n",
" <th>turnover_mean</th>\n",
" <th>turnover_vol</th>\n",
" <th>flow_to_aum_mean</th>\n",
" <th>flow_to_aum_vol</th>\n",
" <th>avg_n_isin_held</th>\n",
" <th>max_n_isin_held</th>\n",
" <th>sub_share_mean</th>\n",
" <th>red_share_mean</th>\n",
" <th>delta_rate_mean</th>\n",
" <th>aum_drawdown_last</th>\n",
" <th>aum_drawdown_max</th>\n",
" <th>region</th>\n",
" <th>country</th>\n",
" <th>n_isin_total</th>\n",
" <th>rel_turnover_mean_avg</th>\n",
" <th>rel_turnover_vol_avg</th>\n",
" <th>rel_flow_to_aum_vol_avg</th>\n",
" <th>full_exit_count</th>\n",
" <th>entry_count</th>\n",
" <th>avg_holding_months_per_isin</th>\n",
" <th>max_holding_months_per_isin</th>\n",
" <th>corr_flow_fund_lag3</th>\n",
" <th>corr_flow_fund_lag6</th>\n",
" <th>corr_flow_bench_lag3</th>\n",
" <th>corr_flow_bench_lag6</th>\n",
" <th>corr_flow_rate_lag3</th>\n",
" <th>corr_flow_rate_lag6</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>130</td>\n",
" <td>130</td>\n",
" <td>1.000000</td>\n",
" <td>23477.224308</td>\n",
" <td>14880.4715</td>\n",
" <td>88818.372</td>\n",
" <td>67570.855</td>\n",
" <td>-45677.1480</td>\n",
" <td>1.244126e+06</td>\n",
" <td>9570.200015</td>\n",
" <td>1926.0</td>\n",
" <td>9832.357264</td>\n",
" <td>6.382330e+10</td>\n",
" <td>5.151309e+11</td>\n",
" <td>-2.560792e+10</td>\n",
" <td>2.841988e+11</td>\n",
" <td>7.507692</td>\n",
" <td>26</td>\n",
" <td>0.429844</td>\n",
" <td>-0.576520</td>\n",
" <td>0.013723</td>\n",
" <td>2.392243e-01</td>\n",
" <td>1.000000</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>107</td>\n",
" <td>4.063407e+11</td>\n",
" <td>8.956214e+11</td>\n",
" <td>8.915940e+11</td>\n",
" <td>310</td>\n",
" <td>344</td>\n",
" <td>9.121495</td>\n",
" <td>36</td>\n",
" <td>0.007825</td>\n",
" <td>0.008326</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.007546</td>\n",
" <td>0.014510</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>200000076</td>\n",
" <td>130</td>\n",
" <td>119</td>\n",
" <td>0.915385</td>\n",
" <td>15840.000331</td>\n",
" <td>9272.4710</td>\n",
" <td>50732.461</td>\n",
" <td>44837.203</td>\n",
" <td>54791.9840</td>\n",
" <td>2.314415e+05</td>\n",
" <td>1780.319492</td>\n",
" <td>518.0</td>\n",
" <td>2838.000232</td>\n",
" <td>1.457820e-01</td>\n",
" <td>2.457632e-01</td>\n",
" <td>-1.707090e-02</td>\n",
" <td>2.717209e-01</td>\n",
" <td>4.700000</td>\n",
" <td>9</td>\n",
" <td>0.508681</td>\n",
" <td>-0.415876</td>\n",
" <td>0.013723</td>\n",
" <td>1.162029e-01</td>\n",
" <td>0.949206</td>\n",
" <td>Spain</td>\n",
" <td>Spain</td>\n",
" <td>22</td>\n",
" <td>6.276897e+10</td>\n",
" <td>2.469731e+11</td>\n",
" <td>2.481822e+11</td>\n",
" <td>71</td>\n",
" <td>81</td>\n",
" <td>27.772727</td>\n",
" <td>85</td>\n",
" <td>0.015278</td>\n",
" <td>0.096449</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.025181</td>\n",
" <td>0.012844</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>200000082</td>\n",
" <td>71</td>\n",
" <td>71</td>\n",
" <td>1.000000</td>\n",
" <td>85194.200239</td>\n",
" <td>25820.0550</td>\n",
" <td>316149.358</td>\n",
" <td>131158.471</td>\n",
" <td>14575.5560</td>\n",
" <td>1.229616e+06</td>\n",
" <td>17318.539183</td>\n",
" <td>4807.0</td>\n",
" <td>13472.042652</td>\n",
" <td>4.056892e+11</td>\n",
" <td>2.421685e+12</td>\n",
" <td>-9.687862e+10</td>\n",
" <td>8.402113e+11</td>\n",
" <td>1.760563</td>\n",
" <td>4</td>\n",
" <td>0.438873</td>\n",
" <td>-0.588724</td>\n",
" <td>0.034282</td>\n",
" <td>5.851376e-01</td>\n",
" <td>1.000000</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>18</td>\n",
" <td>1.147803e+12</td>\n",
" <td>1.251086e+12</td>\n",
" <td>1.333111e+12</td>\n",
" <td>100</td>\n",
" <td>101</td>\n",
" <td>6.944444</td>\n",
" <td>19</td>\n",
" <td>-0.019860</td>\n",
" <td>-0.020797</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.022861</td>\n",
" <td>-0.135696</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>200000146</td>\n",
" <td>130</td>\n",
" <td>130</td>\n",
" <td>1.000000</td>\n",
" <td>71298.603700</td>\n",
" <td>15953.6355</td>\n",
" <td>519508.539</td>\n",
" <td>519508.539</td>\n",
" <td>457533.3310</td>\n",
" <td>1.150546e+06</td>\n",
" <td>8850.350438</td>\n",
" <td>4774.0</td>\n",
" <td>10074.748210</td>\n",
" <td>4.770901e+00</td>\n",
" <td>2.930221e+01</td>\n",
" <td>3.780801e+00</td>\n",
" <td>2.870987e+01</td>\n",
" <td>6.684615</td>\n",
" <td>14</td>\n",
" <td>0.517815</td>\n",
" <td>-0.556667</td>\n",
" <td>0.013723</td>\n",
" <td>1.887379e-15</td>\n",
" <td>0.999302</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>33</td>\n",
" <td>2.123548e+11</td>\n",
" <td>3.670050e+11</td>\n",
" <td>3.882699e+11</td>\n",
" <td>237</td>\n",
" <td>256</td>\n",
" <td>26.333333</td>\n",
" <td>54</td>\n",
" <td>0.281071</td>\n",
" <td>-0.020188</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.018482</td>\n",
" <td>-0.018833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>200000147</td>\n",
" <td>129</td>\n",
" <td>129</td>\n",
" <td>1.000000</td>\n",
" <td>35957.851907</td>\n",
" <td>18047.3390</td>\n",
" <td>174703.188</td>\n",
" <td>8478.402</td>\n",
" <td>677424.2191</td>\n",
" <td>1.210845e+06</td>\n",
" <td>9386.398474</td>\n",
" <td>7523.0</td>\n",
" <td>13914.783110</td>\n",
" <td>1.775257e+00</td>\n",
" <td>8.769726e+00</td>\n",
" <td>1.150007e+00</td>\n",
" <td>7.862819e+00</td>\n",
" <td>13.162791</td>\n",
" <td>27</td>\n",
" <td>0.599433</td>\n",
" <td>-0.448172</td>\n",
" <td>0.013837</td>\n",
" <td>9.514697e-01</td>\n",
" <td>0.996847</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>78</td>\n",
" <td>5.279255e+11</td>\n",
" <td>6.892142e+11</td>\n",
" <td>6.858178e+11</td>\n",
" <td>596</td>\n",
" <td>619</td>\n",
" <td>21.769231</td>\n",
" <td>49</td>\n",
" <td>-0.026933</td>\n",
" <td>-0.010493</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.204637</td>\n",
" <td>-0.109646</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID n_months n_active_months flow_freq aum_qty_mean \\\n",
"0 18872 130 130 1.000000 23477.224308 \n",
"1 200000076 130 119 0.915385 15840.000331 \n",
"2 200000082 71 71 1.000000 85194.200239 \n",
"3 200000146 130 130 1.000000 71298.603700 \n",
"4 200000147 129 129 1.000000 35957.851907 \n",
"\n",
" aum_qty_median aum_qty_max aum_qty_last net_flow_qty_sum \\\n",
"0 14880.4715 88818.372 67570.855 -45677.1480 \n",
"1 9272.4710 50732.461 44837.203 54791.9840 \n",
"2 25820.0550 316149.358 131158.471 14575.5560 \n",
"3 15953.6355 519508.539 519508.539 457533.3310 \n",
"4 18047.3390 174703.188 8478.402 677424.2191 \n",
"\n",
" gross_flow_qty_sum gross_flow_qty_mean n_tx_total net_flow_vol \\\n",
"0 1.244126e+06 9570.200015 1926.0 9832.357264 \n",
"1 2.314415e+05 1780.319492 518.0 2838.000232 \n",
"2 1.229616e+06 17318.539183 4807.0 13472.042652 \n",
"3 1.150546e+06 8850.350438 4774.0 10074.748210 \n",
"4 1.210845e+06 9386.398474 7523.0 13914.783110 \n",
"\n",
" turnover_mean turnover_vol flow_to_aum_mean flow_to_aum_vol \\\n",
"0 6.382330e+10 5.151309e+11 -2.560792e+10 2.841988e+11 \n",
"1 1.457820e-01 2.457632e-01 -1.707090e-02 2.717209e-01 \n",
"2 4.056892e+11 2.421685e+12 -9.687862e+10 8.402113e+11 \n",
"3 4.770901e+00 2.930221e+01 3.780801e+00 2.870987e+01 \n",
"4 1.775257e+00 8.769726e+00 1.150007e+00 7.862819e+00 \n",
"\n",
" avg_n_isin_held max_n_isin_held sub_share_mean red_share_mean \\\n",
"0 7.507692 26 0.429844 -0.576520 \n",
"1 4.700000 9 0.508681 -0.415876 \n",
"2 1.760563 4 0.438873 -0.588724 \n",
"3 6.684615 14 0.517815 -0.556667 \n",
"4 13.162791 27 0.599433 -0.448172 \n",
"\n",
" delta_rate_mean aum_drawdown_last aum_drawdown_max region \\\n",
"0 0.013723 2.392243e-01 1.000000 Switzerland \n",
"1 0.013723 1.162029e-01 0.949206 Spain \n",
"2 0.034282 5.851376e-01 1.000000 Italy \n",
"3 0.013723 1.887379e-15 0.999302 Italy \n",
"4 0.013837 9.514697e-01 0.996847 Italy \n",
"\n",
" country n_isin_total rel_turnover_mean_avg rel_turnover_vol_avg \\\n",
"0 Switzerland 107 4.063407e+11 8.956214e+11 \n",
"1 Spain 22 6.276897e+10 2.469731e+11 \n",
"2 Italy 18 1.147803e+12 1.251086e+12 \n",
"3 Italy 33 2.123548e+11 3.670050e+11 \n",
"4 Italy 78 5.279255e+11 6.892142e+11 \n",
"\n",
" rel_flow_to_aum_vol_avg full_exit_count entry_count \\\n",
"0 8.915940e+11 310 344 \n",
"1 2.481822e+11 71 81 \n",
"2 1.333111e+12 100 101 \n",
"3 3.882699e+11 237 256 \n",
"4 6.858178e+11 596 619 \n",
"\n",
" avg_holding_months_per_isin max_holding_months_per_isin \\\n",
"0 9.121495 36 \n",
"1 27.772727 85 \n",
"2 6.944444 19 \n",
"3 26.333333 54 \n",
"4 21.769231 49 \n",
"\n",
" corr_flow_fund_lag3 corr_flow_fund_lag6 corr_flow_bench_lag3 \\\n",
"0 0.007825 0.008326 NaN \n",
"1 0.015278 0.096449 NaN \n",
"2 -0.019860 -0.020797 NaN \n",
"3 0.281071 -0.020188 NaN \n",
"4 -0.026933 -0.010493 NaN \n",
"\n",
" corr_flow_bench_lag6 corr_flow_rate_lag3 corr_flow_rate_lag6 \n",
"0 NaN 0.007546 0.014510 \n",
"1 NaN -0.025181 0.012844 \n",
"2 NaN 0.022861 -0.135696 \n",
"3 NaN -0.018482 -0.018833 \n",
"4 NaN -0.204637 -0.109646 "
]
},
"execution_count": 305,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Données agrégées sur les ISIN et sur les mois\n",
"df_rel_client = (\n",
" df_rel_feat\n",
" .groupby(ID_COL, as_index=False)\n",
" .agg(\n",
" n_isin_total=(ISIN_COL, \"nunique\"),\n",
" rel_turnover_mean_avg=(\"rel_turnover_mean\", \"mean\"),\n",
" rel_turnover_vol_avg=(\"rel_turnover_vol\", \"mean\"),\n",
" rel_flow_to_aum_vol_avg=(\"rel_flow_to_aum_vol\", \"mean\"),\n",
" full_exit_count=(\"rel_full_exit_count\", \"sum\"),\n",
" entry_count=(\"rel_entry_count\", \"sum\"),\n",
" avg_holding_months_per_isin=(\"rel_holding_months\", \"mean\"),\n",
" max_holding_months_per_isin=(\"rel_holding_months\", \"max\")\n",
" )\n",
")\n",
"\n",
"df_client = (\n",
" df_month\n",
" .groupby(ID_COL, as_index=False)\n",
" .agg(\n",
" n_months=(\"month\", \"nunique\"),\n",
" n_active_months=(\"active_month\", \"sum\"),\n",
" flow_freq=(\"active_month\", \"mean\"),\n",
"\n",
" aum_qty_mean=(\"aum_qty\", \"mean\"),\n",
" aum_qty_median=(\"aum_qty\", \"median\"),\n",
" aum_qty_max=(\"aum_qty\", \"max\"),\n",
" aum_qty_last=(\"aum_qty\", \"last\"),\n",
"\n",
" net_flow_qty_sum=(\"net_flow_qty\", \"sum\"),\n",
" gross_flow_qty_sum=(\"gross_flow_qty\", \"sum\"),\n",
" gross_flow_qty_mean=(\"gross_flow_qty\", \"mean\"),\n",
" n_tx_total=(\"n_tx\", \"sum\"),\n",
"\n",
" net_flow_vol=(\"net_flow_qty\", \"std\"),\n",
" turnover_mean=(\"turnover_m\", \"mean\"),\n",
" turnover_vol=(\"turnover_m\", \"std\"),\n",
" flow_to_aum_mean=(\"flow_to_aum_m\", \"mean\"),\n",
" flow_to_aum_vol=(\"flow_to_aum_m\", \"std\"),\n",
"\n",
" avg_n_isin_held=(\"n_isin_held\", \"mean\"),\n",
" max_n_isin_held=(\"n_isin_held\", \"max\"),\n",
"\n",
" sub_share_mean=(\"sub_share_m\", \"mean\"),\n",
" red_share_mean=(\"red_share_m\", \"mean\"),\n",
"\n",
" delta_rate_mean=(\"delta_rate_m\", \"mean\"),\n",
" aum_drawdown_last=(\"aum_drawdown\", \"last\"),\n",
" aum_drawdown_max=(\"aum_drawdown\", \"max\"),\n",
"\n",
" region=(\"region\", \"last\"),\n",
" country=(\"country\", \"last\")\n",
" )\n",
")\n",
"\n",
"df_client = df_client.merge(df_rel_client, on=ID_COL, how=\"left\")\n",
"\n",
"#Variables de corrélations entre performance et flux\n",
"def corr_lag(x, y, lag):\n",
" x = np.asarray(x, dtype=float)\n",
" y = np.asarray(y, dtype=float)\n",
" \n",
" mask = np.isfinite(x) & np.isfinite(y)\n",
" x, y = x[mask], y[mask]\n",
" \n",
" if len(x) <= lag + 3:\n",
" return np.nan\n",
" \n",
" return pd.Series(x[lag:]).corr(pd.Series(y[:-lag]))\n",
"\n",
"rows = []\n",
"\n",
"for acc, g in df_month.groupby(ID_COL):\n",
" g = g.sort_values(\"month\")\n",
" \n",
" flow = g[\"flow_to_aum_m\"].values\n",
" ret_fund = g[\"ret_fund_m\"].values\n",
" ret_bench = g[\"ret_bench_m\"].values\n",
" rate = g[\"delta_rate_m\"].values\n",
" \n",
" rows.append({\n",
" ID_COL: acc,\n",
" \n",
" # 👇 Corrélations perf vs flux\n",
" \"corr_flow_fund_lag3\": corr_lag(flow, ret_fund, 3),\n",
" \"corr_flow_fund_lag6\": corr_lag(flow, ret_fund, 6),\n",
" \n",
" \"corr_flow_bench_lag3\": corr_lag(flow, ret_bench, 3),\n",
" \"corr_flow_bench_lag6\": corr_lag(flow, ret_bench, 6),\n",
" \n",
" # 👇 Corrélation taux vs flux\n",
" \"corr_flow_rate_lag3\": corr_lag(flow, rate, 3),\n",
" \"corr_flow_rate_lag6\": corr_lag(flow, rate, 6),\n",
" })\n",
"\n",
"df_corr = pd.DataFrame(rows)\n",
"\n",
"df_client = df_client.merge(df_corr, on=ID_COL, how=\"left\")\n",
"\n",
"print(df_client.shape)\n",
"df_client.head()"
]
},
{
"cell_type": "code",
"execution_count": 306,
"id": "8c1a0491-a0bb-4165-b073-41f81637466b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(431, 44)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>n_months</th>\n",
" <th>n_active_months</th>\n",
" <th>flow_freq</th>\n",
" <th>aum_qty_mean</th>\n",
" <th>aum_qty_median</th>\n",
" <th>aum_qty_max</th>\n",
" <th>aum_qty_last</th>\n",
" <th>net_flow_qty_sum</th>\n",
" <th>gross_flow_qty_sum</th>\n",
" <th>gross_flow_qty_mean</th>\n",
" <th>n_tx_total</th>\n",
" <th>net_flow_vol</th>\n",
" <th>turnover_mean</th>\n",
" <th>turnover_vol</th>\n",
" <th>flow_to_aum_mean</th>\n",
" <th>flow_to_aum_vol</th>\n",
" <th>avg_n_isin_held</th>\n",
" <th>max_n_isin_held</th>\n",
" <th>sub_share_mean</th>\n",
" <th>red_share_mean</th>\n",
" <th>delta_rate_mean</th>\n",
" <th>aum_drawdown_last</th>\n",
" <th>aum_drawdown_max</th>\n",
" <th>region</th>\n",
" <th>country</th>\n",
" <th>n_isin_total</th>\n",
" <th>rel_turnover_mean_avg</th>\n",
" <th>rel_turnover_vol_avg</th>\n",
" <th>rel_flow_to_aum_vol_avg</th>\n",
" <th>full_exit_count</th>\n",
" <th>entry_count</th>\n",
" <th>avg_holding_months_per_isin</th>\n",
" <th>max_holding_months_per_isin</th>\n",
" <th>corr_flow_fund_lag3</th>\n",
" <th>corr_flow_fund_lag6</th>\n",
" <th>corr_flow_bench_lag3</th>\n",
" <th>corr_flow_bench_lag6</th>\n",
" <th>corr_flow_rate_lag3</th>\n",
" <th>corr_flow_rate_lag6</th>\n",
" <th>flow_trend_12m</th>\n",
" <th>aum_trend_12m</th>\n",
" <th>drawdown_trend_12m</th>\n",
" <th>beta_rate</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18872</td>\n",
" <td>130</td>\n",
" <td>130</td>\n",
" <td>1.000000</td>\n",
" <td>23477.224308</td>\n",
" <td>14880.4715</td>\n",
" <td>88818.372</td>\n",
" <td>67570.855</td>\n",
" <td>-45677.1480</td>\n",
" <td>1.244126e+06</td>\n",
" <td>9570.200015</td>\n",
" <td>1926.0</td>\n",
" <td>9832.357264</td>\n",
" <td>6.382330e+10</td>\n",
" <td>5.151309e+11</td>\n",
" <td>-2.560792e+10</td>\n",
" <td>2.841988e+11</td>\n",
" <td>7.507692</td>\n",
" <td>26</td>\n",
" <td>0.429844</td>\n",
" <td>-0.576520</td>\n",
" <td>0.013723</td>\n",
" <td>2.392243e-01</td>\n",
" <td>1.000000</td>\n",
" <td>Switzerland</td>\n",
" <td>Switzerland</td>\n",
" <td>107</td>\n",
" <td>4.063407e+11</td>\n",
" <td>8.956214e+11</td>\n",
" <td>8.915940e+11</td>\n",
" <td>310</td>\n",
" <td>344</td>\n",
" <td>9.121495</td>\n",
" <td>36</td>\n",
" <td>0.007825</td>\n",
" <td>0.008326</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.007546</td>\n",
" <td>0.014510</td>\n",
" <td>-1.886348e-02</td>\n",
" <td>2920.070661</td>\n",
" <td>-0.024467</td>\n",
" <td>1.405196e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>200000076</td>\n",
" <td>130</td>\n",
" <td>119</td>\n",
" <td>0.915385</td>\n",
" <td>15840.000331</td>\n",
" <td>9272.4710</td>\n",
" <td>50732.461</td>\n",
" <td>44837.203</td>\n",
" <td>54791.9840</td>\n",
" <td>2.314415e+05</td>\n",
" <td>1780.319492</td>\n",
" <td>518.0</td>\n",
" <td>2838.000232</td>\n",
" <td>1.457820e-01</td>\n",
" <td>2.457632e-01</td>\n",
" <td>-1.707090e-02</td>\n",
" <td>2.717209e-01</td>\n",
" <td>4.700000</td>\n",
" <td>9</td>\n",
" <td>0.508681</td>\n",
" <td>-0.415876</td>\n",
" <td>0.013723</td>\n",
" <td>1.162029e-01</td>\n",
" <td>0.949206</td>\n",
" <td>Spain</td>\n",
" <td>Spain</td>\n",
" <td>22</td>\n",
" <td>6.276897e+10</td>\n",
" <td>2.469731e+11</td>\n",
" <td>2.481822e+11</td>\n",
" <td>71</td>\n",
" <td>81</td>\n",
" <td>27.772727</td>\n",
" <td>85</td>\n",
" <td>0.015278</td>\n",
" <td>0.096449</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.025181</td>\n",
" <td>0.012844</td>\n",
" <td>1.789020e-03</td>\n",
" <td>548.538087</td>\n",
" <td>-0.003843</td>\n",
" <td>-1.283031e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>200000082</td>\n",
" <td>71</td>\n",
" <td>71</td>\n",
" <td>1.000000</td>\n",
" <td>85194.200239</td>\n",
" <td>25820.0550</td>\n",
" <td>316149.358</td>\n",
" <td>131158.471</td>\n",
" <td>14575.5560</td>\n",
" <td>1.229616e+06</td>\n",
" <td>17318.539183</td>\n",
" <td>4807.0</td>\n",
" <td>13472.042652</td>\n",
" <td>4.056892e+11</td>\n",
" <td>2.421685e+12</td>\n",
" <td>-9.687862e+10</td>\n",
" <td>8.402113e+11</td>\n",
" <td>1.760563</td>\n",
" <td>4</td>\n",
" <td>0.438873</td>\n",
" <td>-0.588724</td>\n",
" <td>0.034282</td>\n",
" <td>5.851376e-01</td>\n",
" <td>1.000000</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>18</td>\n",
" <td>1.147803e+12</td>\n",
" <td>1.251086e+12</td>\n",
" <td>1.333111e+12</td>\n",
" <td>100</td>\n",
" <td>101</td>\n",
" <td>6.944444</td>\n",
" <td>19</td>\n",
" <td>-0.019860</td>\n",
" <td>-0.020797</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.022861</td>\n",
" <td>-0.135696</td>\n",
" <td>4.793703e+09</td>\n",
" <td>-10443.281371</td>\n",
" <td>0.033033</td>\n",
" <td>7.995257e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>200000146</td>\n",
" <td>130</td>\n",
" <td>130</td>\n",
" <td>1.000000</td>\n",
" <td>71298.603700</td>\n",
" <td>15953.6355</td>\n",
" <td>519508.539</td>\n",
" <td>519508.539</td>\n",
" <td>457533.3310</td>\n",
" <td>1.150546e+06</td>\n",
" <td>8850.350438</td>\n",
" <td>4774.0</td>\n",
" <td>10074.748210</td>\n",
" <td>4.770901e+00</td>\n",
" <td>2.930221e+01</td>\n",
" <td>3.780801e+00</td>\n",
" <td>2.870987e+01</td>\n",
" <td>6.684615</td>\n",
" <td>14</td>\n",
" <td>0.517815</td>\n",
" <td>-0.556667</td>\n",
" <td>0.013723</td>\n",
" <td>1.887379e-15</td>\n",
" <td>0.999302</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>33</td>\n",
" <td>2.123548e+11</td>\n",
" <td>3.670050e+11</td>\n",
" <td>3.882699e+11</td>\n",
" <td>237</td>\n",
" <td>256</td>\n",
" <td>26.333333</td>\n",
" <td>54</td>\n",
" <td>0.281071</td>\n",
" <td>-0.020188</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.018482</td>\n",
" <td>-0.018833</td>\n",
" <td>-9.860558e-02</td>\n",
" <td>24136.047846</td>\n",
" <td>-0.049820</td>\n",
" <td>-4.842472e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>200000147</td>\n",
" <td>129</td>\n",
" <td>129</td>\n",
" <td>1.000000</td>\n",
" <td>35957.851907</td>\n",
" <td>18047.3390</td>\n",
" <td>174703.188</td>\n",
" <td>8478.402</td>\n",
" <td>677424.2191</td>\n",
" <td>1.210845e+06</td>\n",
" <td>9386.398474</td>\n",
" <td>7523.0</td>\n",
" <td>13914.783110</td>\n",
" <td>1.775257e+00</td>\n",
" <td>8.769726e+00</td>\n",
" <td>1.150007e+00</td>\n",
" <td>7.862819e+00</td>\n",
" <td>13.162791</td>\n",
" <td>27</td>\n",
" <td>0.599433</td>\n",
" <td>-0.448172</td>\n",
" <td>0.013837</td>\n",
" <td>9.514697e-01</td>\n",
" <td>0.996847</td>\n",
" <td>Italy</td>\n",
" <td>Italy</td>\n",
" <td>78</td>\n",
" <td>5.279255e+11</td>\n",
" <td>6.892142e+11</td>\n",
" <td>6.858178e+11</td>\n",
" <td>596</td>\n",
" <td>619</td>\n",
" <td>21.769231</td>\n",
" <td>49</td>\n",
" <td>-0.026933</td>\n",
" <td>-0.010493</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-0.204637</td>\n",
" <td>-0.109646</td>\n",
" <td>1.129487e+00</td>\n",
" <td>2098.385472</td>\n",
" <td>-0.012011</td>\n",
" <td>-2.472128e+00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID n_months n_active_months flow_freq aum_qty_mean \\\n",
"0 18872 130 130 1.000000 23477.224308 \n",
"1 200000076 130 119 0.915385 15840.000331 \n",
"2 200000082 71 71 1.000000 85194.200239 \n",
"3 200000146 130 130 1.000000 71298.603700 \n",
"4 200000147 129 129 1.000000 35957.851907 \n",
"\n",
" aum_qty_median aum_qty_max aum_qty_last net_flow_qty_sum \\\n",
"0 14880.4715 88818.372 67570.855 -45677.1480 \n",
"1 9272.4710 50732.461 44837.203 54791.9840 \n",
"2 25820.0550 316149.358 131158.471 14575.5560 \n",
"3 15953.6355 519508.539 519508.539 457533.3310 \n",
"4 18047.3390 174703.188 8478.402 677424.2191 \n",
"\n",
" gross_flow_qty_sum gross_flow_qty_mean n_tx_total net_flow_vol \\\n",
"0 1.244126e+06 9570.200015 1926.0 9832.357264 \n",
"1 2.314415e+05 1780.319492 518.0 2838.000232 \n",
"2 1.229616e+06 17318.539183 4807.0 13472.042652 \n",
"3 1.150546e+06 8850.350438 4774.0 10074.748210 \n",
"4 1.210845e+06 9386.398474 7523.0 13914.783110 \n",
"\n",
" turnover_mean turnover_vol flow_to_aum_mean flow_to_aum_vol \\\n",
"0 6.382330e+10 5.151309e+11 -2.560792e+10 2.841988e+11 \n",
"1 1.457820e-01 2.457632e-01 -1.707090e-02 2.717209e-01 \n",
"2 4.056892e+11 2.421685e+12 -9.687862e+10 8.402113e+11 \n",
"3 4.770901e+00 2.930221e+01 3.780801e+00 2.870987e+01 \n",
"4 1.775257e+00 8.769726e+00 1.150007e+00 7.862819e+00 \n",
"\n",
" avg_n_isin_held max_n_isin_held sub_share_mean red_share_mean \\\n",
"0 7.507692 26 0.429844 -0.576520 \n",
"1 4.700000 9 0.508681 -0.415876 \n",
"2 1.760563 4 0.438873 -0.588724 \n",
"3 6.684615 14 0.517815 -0.556667 \n",
"4 13.162791 27 0.599433 -0.448172 \n",
"\n",
" delta_rate_mean aum_drawdown_last aum_drawdown_max region \\\n",
"0 0.013723 2.392243e-01 1.000000 Switzerland \n",
"1 0.013723 1.162029e-01 0.949206 Spain \n",
"2 0.034282 5.851376e-01 1.000000 Italy \n",
"3 0.013723 1.887379e-15 0.999302 Italy \n",
"4 0.013837 9.514697e-01 0.996847 Italy \n",
"\n",
" country n_isin_total rel_turnover_mean_avg rel_turnover_vol_avg \\\n",
"0 Switzerland 107 4.063407e+11 8.956214e+11 \n",
"1 Spain 22 6.276897e+10 2.469731e+11 \n",
"2 Italy 18 1.147803e+12 1.251086e+12 \n",
"3 Italy 33 2.123548e+11 3.670050e+11 \n",
"4 Italy 78 5.279255e+11 6.892142e+11 \n",
"\n",
" rel_flow_to_aum_vol_avg full_exit_count entry_count \\\n",
"0 8.915940e+11 310 344 \n",
"1 2.481822e+11 71 81 \n",
"2 1.333111e+12 100 101 \n",
"3 3.882699e+11 237 256 \n",
"4 6.858178e+11 596 619 \n",
"\n",
" avg_holding_months_per_isin max_holding_months_per_isin \\\n",
"0 9.121495 36 \n",
"1 27.772727 85 \n",
"2 6.944444 19 \n",
"3 26.333333 54 \n",
"4 21.769231 49 \n",
"\n",
" corr_flow_fund_lag3 corr_flow_fund_lag6 corr_flow_bench_lag3 \\\n",
"0 0.007825 0.008326 NaN \n",
"1 0.015278 0.096449 NaN \n",
"2 -0.019860 -0.020797 NaN \n",
"3 0.281071 -0.020188 NaN \n",
"4 -0.026933 -0.010493 NaN \n",
"\n",
" corr_flow_bench_lag6 corr_flow_rate_lag3 corr_flow_rate_lag6 \\\n",
"0 NaN 0.007546 0.014510 \n",
"1 NaN -0.025181 0.012844 \n",
"2 NaN 0.022861 -0.135696 \n",
"3 NaN -0.018482 -0.018833 \n",
"4 NaN -0.204637 -0.109646 \n",
"\n",
" flow_trend_12m aum_trend_12m drawdown_trend_12m beta_rate \n",
"0 -1.886348e-02 2920.070661 -0.024467 1.405196e+10 \n",
"1 1.789020e-03 548.538087 -0.003843 -1.283031e-01 \n",
"2 4.793703e+09 -10443.281371 0.033033 7.995257e+10 \n",
"3 -9.860558e-02 24136.047846 -0.049820 -4.842472e+00 \n",
"4 1.129487e+00 2098.385472 -0.012011 -2.472128e+00 "
]
},
"execution_count": 306,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def compute_trend(y):\n",
" y = np.asarray(y, dtype=float)\n",
" if len(y) < 4:\n",
" return np.nan\n",
" x = np.arange(len(y)).reshape(-1, 1)\n",
" mask = np.isfinite(y)\n",
" if mask.sum() < 4:\n",
" return np.nan\n",
" reg = LinearRegression().fit(x[mask], y[mask])\n",
" return reg.coef_[0]\n",
"\n",
"def compute_beta(y, x):\n",
" y = np.asarray(y, dtype=float)\n",
" x = np.asarray(x, dtype=float)\n",
" mask = np.isfinite(y) & np.isfinite(x)\n",
" if mask.sum() < 6:\n",
" return np.nan\n",
" reg = LinearRegression().fit(x[mask].reshape(-1, 1), y[mask])\n",
" return reg.coef_[0]\n",
"\n",
"rows = []\n",
"\n",
"for acc, g in df_month.groupby(ID_COL):\n",
" g = g.sort_values(\"month\")\n",
"\n",
" flow = g[\"flow_to_aum_m\"].values\n",
" aum = g[\"aum_qty\"].values\n",
" delta_rate = g[\"delta_rate_m\"].values\n",
" drawdown = g[\"aum_drawdown\"].values\n",
"\n",
" rows.append({\n",
" ID_COL: acc,\n",
" \"flow_trend_12m\": compute_trend(flow[-12:]),\n",
" \"aum_trend_12m\": compute_trend(aum[-12:]),\n",
" \"drawdown_trend_12m\": compute_trend(drawdown[-12:]),\n",
" \"beta_rate\": compute_beta(flow, delta_rate)\n",
" })\n",
"\n",
"df_beta = pd.DataFrame(rows)\n",
"\n",
"df_client = df_client.merge(df_beta, on=ID_COL, how=\"left\")\n",
"\n",
"print(df_client.shape)\n",
"df_client.head()"
]
},
{
"cell_type": "code",
"execution_count": 307,
"id": "4e4ea46f-5c3d-4a4a-b79c-ff5ae8973bad",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"seg_2D\n",
"Highly active (high int, high freq) 137\n",
"Dormant (low int, low freq) 134\n",
"Small rebalancers (low int, high freq) 80\n",
"Occasional large movers (high int, low freq) 80\n",
"Name: count, dtype: int64\n",
"thr_int: 5.739688017572092 thr_freq: 0.8\n"
]
}
],
"source": [
"df_client[\"rel_intensity_total\"] = df_client[\"gross_flow_qty_sum\"]/df_client[\"aum_qty_mean\"] # turnover proxy\n",
"df_client[\"frequency\"] = df_client[\"flow_freq\"] # share of active months\n",
"\n",
"# Thresholds: medians (simple + explainable)\n",
"thr_int = df_client[\"rel_intensity_total\"].median()\n",
"thr_freq = df_client[\"frequency\"].median()\n",
"\n",
"def quadrant(row):\n",
" low_int = row[\"rel_intensity_total\"] < thr_int\n",
" low_frq = row[\"frequency\"] < thr_freq\n",
"\n",
" if low_int and low_frq:\n",
" return \"Dormant (low int, low freq)\"\n",
" if low_int and (not low_frq):\n",
" return \"Small rebalancers (low int, high freq)\"\n",
" if (not low_int) and low_frq:\n",
" return \"Occasional large movers (high int, low freq)\"\n",
" return \"Highly active (high int, high freq)\"\n",
"\n",
"df_client[\"seg_2D\"] = df_client.apply(quadrant, axis=1)\n",
"\n",
"print(df_client[\"seg_2D\"].value_counts())\n",
"print(\"thr_int:\", thr_int, \"thr_freq:\", thr_freq)\n"
]
},
{
"cell_type": "code",
"execution_count": 308,
"id": "09943df7-8c78-4c51-b387-866c5cddd392",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAApgAAAHHCAYAAAAbASh2AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAA5ZhJREFUeJzs3XdYU9cbB/Bvwt6yEVkCLhQBBf1pqziLe89axd1W3NqqtXXWuuqqojjqqLUVt9bWSd3a1oG49wAVcSDgQEZyfn/QpITchKybAe/neXg0Jyf3nNwkN2/OFDDGGAghhBBCCNERoaErQAghhBBCyhYKMAkhhBBCiE5RgEkIIYQQQnSKAkxCCCGEEKJTFGASQgghhBCdogCTEEIIIYToFAWYhBBCCCFEpyjAJIQQQgghOkUBJiGEEEII0SmjDzCbNGmCWrVq6fSYAoEAw4cP1+kxVbF+/XoIBAI8ePBA72UDQP/+/REQEGCQsomsBw8eQCAQYP369Yauis7w9ZwCAgLQv39/nR7TmMvlQ5MmTdCkSRNDV0MnNm7ciOrVq8PCwgIVKlQwdHXKpLS0NFhbW+PUqVOGrgoxsP3798Pe3h7Pnz9X+7FqBZhnz57F8OHDUbNmTdjZ2cHPzw89evTArVu35PI2adIEAoEAAoEAQqEQjo6OqFatGvr27YtDhw6pXVFC9O3atWuYNm2aVj8IfvnlFyxevFhndSqrTp8+jWnTpiErK8vQVdGJ5cuXG/WPhydPnmDatGm4ePGioauilhs3bqB///4ICgrC6tWrsWrVKkNXqUyaMWMG6tevjw8++EBhnpYtWyptrPnxxx9Ro0YNWFtbo0qVKli6dClnvsePH6NHjx6oUKECHB0d0bFjR9y7d08nz6M0WVlZsLa2hkAgwPXr1/VSpiH88ccfmDZtmkaPbdWqFYKDgzF79mz1H8zU0LVrV+bl5cVGjBjBVq9ezWbOnMk8PT2ZnZ0du3z5skze6Oho5uPjwzZu3Mg2btzIEhIS2Pjx41lgYCADwHr06MHy8/NLLTM6OprVrFlTnWqWCgCLi4vT6TFVUVhYyHJzc5lYLNZ72YwxFhsby/z9/Q1StinaunUrA8COHDmi8THatm3Lec7FYjHLzc1lhYWFmlfQyNy/f58BYOvWrVP7sfPnz2cA2P379+Xue//+vUrXCl3TptyaNWuy6Oho3VZIC3l5eSwvL096++zZsxq/Voa0YsUKBoDdvn3b0FUps549e8YsLCzYL7/8ojDP9u3bmZ2dncLv0oSEBAaAde3ala1atYr17duXAWBz5syRyff69WtWpUoV5uHhwebOncsWLlzIfH19mY+PD3vx4oXOn1tJq1atYtbW1szLy4tNnjyZ9/IMJS4ujqkZ7slYvnw5s7W1ZTk5OWo9Tq0WzLFjx+Lhw4f44YcfMHjwYHz99dc4ceIECgsLMWfOHLn8Tk5O+OSTT/DJJ5/g008/xfz583Hr1i0MGzYMW7Zswddff61+RGzCzMzMpL+WdOHdu3c6OQ7RP4FAAGtra5iZmRm6KgqJxWK8f//e0NWAlZUVLCwsyk25fLC0tISlpaWhq6G1Z8+eAUCpXeOMMeTm5uqhRmXPzz//DHNzc7Rv357z/vfv32PcuHGYMGEC5/25ubmYPHky2rZti23btmHIkCH46aef0KdPH8ycOROvXr2S5l2+fDlu376NvXv34ssvv8SYMWNw8OBBpKenY8GCBUrrOW3aNK2HfP38889o06YNevfujV9++UWrY5VlXbt2RV5eHrZu3areAzUOaYupU6cOq1OnjkyaspbHwsJCFhISwmxtbVlWVpbSY0uOc+7cOdagQQNmbW3NAgIC2IoVK+Tyvn//nk2ZMoUFBQUxS0tL5uPjw7744gv2/v17mXz491fXzp07Wc2aNZmlpSULCQlh+/btk8n34MED9vnnn7OqVasya2tr5uLiwrp16ybTyiJpCVi/fr1cffbv388AsN9++40xxti6des4W2ni4+NZSEgIs7S0ZBUrVmTDhg1jr169UngeGjVqxGxsbNioUaMYY4zt2rWLtWnThlWsWJFZWlqywMBANmPGDLnWMVVbMM+ePcs++ugj5urqKj3fAwYMkMkjEonYokWLWEhICLOysmIeHh5s6NChLDMzUy7f1KlTWcWKFZmNjQ1r0qQJu3r1KvP392exsbHSfJJzc+LECTZixAjm5ubGnJyc2NChQ1leXh579eoV69u3L6tQoQKrUKEC++KLL+RaglWtk7+/P2vbti07ceIEi4qKYlZWVqxy5cpsw4YNcvUp+SdpzVTlnEdHR8s9XnL+FbX2JSUlsQ8//JDZ2toyJycn1qFDB3bt2jWZPFOnTpW24sTGxjInJyfm6OjI+vfvz96+fSuT9/nz5+z69ety6Vwkn4uff/6ZhYSEMHNzc7Zz507GGGOPHj1iAwYMYB4eHtLPy48//ijzeK7nlJKSwmJjY1nlypWZlZUV8/T0ZAMGDJBpnZA8n5J/ks9J8feKOp83VeutiKL36MmTJ9mYMWOYm5sbs7W1ZZ06dWLPnj2TeVzJ51K8NfPVq1ds1KhRzMfHh1laWrKgoCA2Z84cJhKJ5M7l/Pnz2cqVK1lgYCCztLRkkZGR7J9//pGpZ3p6Ouvfvz+rVKkSs7S0ZF5eXqxDhw4y15no6GhpHY4cOcJ5vtetW8emTJnCzM3NZZ6PxJAhQ5iTkxPLzc3lPF+SVugHDx7I3Tdx4kRmYWEh/SzeunWLdenShXl6ejIrKytWqVIl1rNnT6XfB1znderUqdL72rZty/bv38/q1q3LrKys2KJFi1Q+35J8sbGxzNHRkTk5ObF+/fqx5ORkufd08XNZHNf1VZfXpOL1HD16NPP392eWlpasUqVKrG/fvuz58+fs9evXzNbWlo0cOVLucWlpaUwoFLLvvvtO4TlmjLHGjRuzJk2aKLx/+vTpzM/Pj717946zBfP3339nANjvv/8uk3769GkGgG3cuFGaFhUVxaKiouTK+Oijj1hQUJDSek6dOlWrHrmHDx8ygUDAtmzZwv7++28GgJ06dYoz78aNG1lUVBSzsbFhFSpUYI0aNWIHDhyQyfPHH3+wxo0bM3t7e+bg4MAiIyPZpk2bZPJs2bKF1alTh1lbWzNXV1fWp08f9ujRI5k8qr6/VL1GxMbGcn7eJX799VdWp04dab1r1arFFi9eLFd+REQE69Chg8LzyUXrAFMsFrNKlSqxjz76SCa9tK7tmTNnMgBs7969So8fHR3NvL29mYeHBxs+fDj74Ycf2IcffsgAyHxRiEQi9tFHHzFbW1s2evRotnLlSjZ8+HBmbm7OOnbsKHNMACwsLIxVrFiRzZw5ky1evJgFBgYyW1tbmS++rVu3srCwMDZlyhS2atUq9tVXXzFnZ2fm7+8v84UdGBjI2rRpI1f3AQMGMGdnZ2k3G1eAKflybdGiBVu6dCkbPnw4MzMzY1FRUTLdc9HR0czLy4u5u7uzESNGsJUrV7Jdu3Yxxhjr1KkT69GjB5s/fz5bsWIF6969OwPAxo8fL1MfVQLMjIwM5uzszKpWrcrmz5/PVq9ezSZPnsxq1Kghk2/w4MHM3NycDRkyhCUkJLAJEyYwOzs7uXp/+eWXDABr3749W7ZsGRsyZAjz8fFhbm5unF/e4eHhrFWrViw+Pl7arfLll1+yDz/8kH388cds+fLlrF27dgyA3MVX1Tr5+/uzatWqMU9PT/bVV1+xZcuWsTp16jCBQMCuXLnCGGPs7t27bOTIkQwA++qrr6RDPZ4+faryOT948CALDw9nbm5u0sdLAjauYOzQoUPM3NycVa1alc2bN49Nnz6dubm5MWdnZ873TEREBOvSpQtbvnw5Gzx4sPRcFSfJq0o3PwBWo0YN5u7uzqZPn87i4+NZcnIye/r0KfPx8WG+vr5sxowZbMWKFaxDhw4MgPRLXNFz+v7771mjRo3YjBkz2KpVq9ioUaOYjY0Nq1evnvQHQkpKCuvdu7f0eJJz9ebNG+nrVfy9ournTdV6K6IowIyIiGDNmjVjS5cuZePGjWNmZmasR48e0nw
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"plt.style.use('default')\n",
"plt.figure()\n",
"for name, g in df_client.groupby(\"seg_2D\"):\n",
" plt.scatter(g[\"frequency\"], g[\"rel_intensity_total\"], s=10, label=name)\n",
"\n",
"plt.yscale(\"log\")\n",
"plt.axvline(thr_freq, linestyle=\"--\")\n",
"plt.axhline(thr_int, linestyle=\"--\")\n",
"plt.xlabel(\"Activity frequency (share of active months)\")\n",
"plt.ylabel(\"Gross flow / mean AUM (quantity) [log scale]\")\n",
"plt.title(\"2D behavioral segmentation: relative intensity vs frequency (400+ Accounts)\")\n",
"plt.legend(markerscale=2)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 309,
"id": "9eb5fbb8-1a7b-434c-ba36-3c2a560b4cb1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nb clients = 404\n",
"Nb features = 35\n",
"['log_aum_qty_mean', 'flow_freq', 'gross_flow_to_aum', 'flow_to_aum_vol', 'activity_intensity', 'n_tx_total', 'avg_n_isin_held', 'n_isin_total', 'avg_holding_months_per_isin', 'exit_rate_per_isin', 'flow_direction_balance', 'aum_drawdown_last', 'corr_flow_fund_lag3', 'corr_flow_fund_lag6', 'corr_flow_rate_lag3', 'country_grp_France', 'country_grp_Germany', 'country_grp_Italy', 'country_grp_Luxembourg', 'country_grp_Monaco', 'country_grp_Other', 'country_grp_Spain', 'country_grp_Sweden', 'country_grp_Switzerland', 'country_grp_United Kingdom', 'region_grp_France', 'region_grp_Germany', 'region_grp_International', 'region_grp_Italy', 'region_grp_Luxembourg', 'region_grp_Nordics', 'region_grp_Other', 'region_grp_Spain', 'region_grp_Switzerland', 'region_grp_United Kingdom']\n"
]
}
],
"source": [
"dfc = df_client.copy()\n",
"\n",
"dfc[\"gross_flow_to_aum\"] = dfc[\"gross_flow_qty_sum\"] / (dfc[\"aum_qty_mean\"].abs() + EPS)\n",
"dfc[\"avg_ticket\"] = dfc[\"gross_flow_qty_sum\"] / (dfc[\"n_tx_total\"] + EPS)\n",
"dfc[\"flow_direction_balance\"] = dfc[\"net_flow_qty_sum\"] / (dfc[\"gross_flow_qty_sum\"] + EPS)\n",
"dfc[\"redemption_bias\"] = dfc[\"red_share_mean\"] - dfc[\"sub_share_mean\"]\n",
"dfc[\"activity_intensity\"] = dfc[\"n_tx_total\"] / (dfc[\"n_months\"] + EPS)\n",
"dfc[\"exit_rate_per_isin\"] = dfc[\"full_exit_count\"] / (dfc[\"n_isin_total\"] + EPS)\n",
"dfc[\"entry_rate_per_isin\"] = dfc[\"entry_count\"] / (dfc[\"n_isin_total\"] + EPS)\n",
"dfc[\"aum_final_to_peak\"] = dfc[\"aum_qty_last\"] / (dfc[\"aum_qty_max\"] + EPS)\n",
"\n",
"for col in [\"aum_qty_mean\", \"gross_flow_qty_sum\", \"n_tx_total\", \"avg_ticket\", \"gross_flow_qty_mean\"]:\n",
" dfc[f\"log_{col}\"] = np.log1p(dfc[col].clip(lower=0))\n",
"\n",
"dfc = dfc[(dfc[\"n_months\"] >= 6) & (dfc[\"aum_qty_mean\"] > 0)].copy()\n",
"\n",
"top_countries = dfc[\"country\"].fillna(\"Unknown\").value_counts().head(10).index\n",
"top_regions = dfc[\"region\"].fillna(\"Unknown\").value_counts().head(10).index\n",
"\n",
"dfc[\"country_grp\"] = np.where(dfc[\"country\"].isin(top_countries), dfc[\"country\"], \"Other\")\n",
"dfc[\"region_grp\"] = np.where(dfc[\"region\"].isin(top_regions), dfc[\"region\"], \"Other\")\n",
"\n",
"base_features = [\n",
" \"log_aum_qty_mean\",\n",
" \"flow_freq\",\n",
" \"gross_flow_to_aum\",\n",
" #\"turnover_vol\",\n",
" \"flow_to_aum_vol\",\n",
" \"activity_intensity\",\n",
" \"n_tx_total\",\n",
" \"avg_n_isin_held\",\n",
" \"n_isin_total\",\n",
" \"avg_holding_months_per_isin\",\n",
" \"exit_rate_per_isin\",\n",
" \"flow_direction_balance\",\n",
" #\"redemption_bias\",\n",
" \"aum_drawdown_last\",\n",
" \"corr_flow_fund_lag3\",\n",
" \"corr_flow_fund_lag6\",\n",
" \"corr_flow_rate_lag3\",\n",
" #\"corr_flow_rate_lag6\",\n",
" #\"corr_flow_bench_lag3\",\n",
" #\"corr_flow_bench_lag6\"\n",
" \n",
"]\n",
"\n",
"base_features2 = [\n",
" \"log_aum_qty_mean\",\n",
" \"log_gross_flow_qty_mean\",\n",
" \"n_tx_total\",\n",
" \"flow_freq\",\n",
" \"gross_flow_to_aum\",\n",
" \"net_flow_vol\",\n",
" #\"avg_n_isin_held\",\n",
" #\"flow_direction_balance\",\n",
"]\n",
"\n",
"base_features = [c for c in base_features if c in dfc.columns]\n",
"\n",
"X_num = dfc[base_features].replace([np.inf, -np.inf], np.nan).fillna(dfc[base_features].median())\n",
"X_cat = pd.get_dummies(dfc[[\"country_grp\", \"region_grp\"]].fillna(\"Unknown\"), drop_first=True)\n",
"\n",
"X = pd.concat([X_num.reset_index(drop=True), X_cat.reset_index(drop=True)], axis=1)\n",
"\n",
"scaler = StandardScaler()\n",
"scaler2 = RobustScaler()\n",
"\n",
"X_scaled = scaler.fit_transform(X_num)\n",
"X_scaled2 = scaler2.fit_transform(X_num)\n",
"\n",
"print(\"Nb clients =\", X.shape[0])\n",
"print(\"Nb features =\", X.shape[1])\n",
"print(X.columns.tolist())"
]
},
{
"cell_type": "code",
"execution_count": 310,
"id": "5f006fc0-d0e7-47b2-94f0-7e3bbdf91097",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>k</th>\n",
" <th>inertia</th>\n",
" <th>silhouette</th>\n",
" <th>davies_bouldin</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>5178.843770</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3</td>\n",
" <td>4741.629415</td>\n",
" <td>0.132598</td>\n",
" <td>2.166657</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4</td>\n",
" <td>4394.702026</td>\n",
" <td>0.124785</td>\n",
" <td>2.073192</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5</td>\n",
" <td>4115.441587</td>\n",
" <td>0.133249</td>\n",
" <td>1.787169</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>6</td>\n",
" <td>3865.546167</td>\n",
" <td>0.127431</td>\n",
" <td>1.759628</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>7</td>\n",
" <td>3679.273300</td>\n",
" <td>0.135589</td>\n",
" <td>1.702516</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>8</td>\n",
" <td>3448.452307</td>\n",
" <td>0.139533</td>\n",
" <td>1.634761</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>9</td>\n",
" <td>3321.805201</td>\n",
" <td>0.121927</td>\n",
" <td>1.709083</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>10</td>\n",
" <td>3167.889248</td>\n",
" <td>0.128239</td>\n",
" <td>1.605403</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>11</td>\n",
" <td>3048.339811</td>\n",
" <td>0.134592</td>\n",
" <td>1.620711</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>12</td>\n",
" <td>2931.256053</td>\n",
" <td>0.139258</td>\n",
" <td>1.550705</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>13</td>\n",
" <td>2847.001592</td>\n",
" <td>0.144681</td>\n",
" <td>1.537896</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>14</td>\n",
" <td>2742.565943</td>\n",
" <td>0.152159</td>\n",
" <td>1.455955</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>15</td>\n",
" <td>2647.758120</td>\n",
" <td>0.148589</td>\n",
" <td>1.469929</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>16</td>\n",
" <td>2576.736131</td>\n",
" <td>0.132343</td>\n",
" <td>1.498820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>17</td>\n",
" <td>2520.993893</td>\n",
" <td>0.137837</td>\n",
" <td>1.491089</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>18</td>\n",
" <td>2443.613239</td>\n",
" <td>0.135204</td>\n",
" <td>1.450617</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>19</td>\n",
" <td>2368.363384</td>\n",
" <td>0.151232</td>\n",
" <td>1.428163</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>20</td>\n",
" <td>2348.022385</td>\n",
" <td>0.134130</td>\n",
" <td>1.432542</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" k inertia silhouette davies_bouldin\n",
"0 2 5178.843770 NaN NaN\n",
"1 3 4741.629415 0.132598 2.166657\n",
"2 4 4394.702026 0.124785 2.073192\n",
"3 5 4115.441587 0.133249 1.787169\n",
"4 6 3865.546167 0.127431 1.759628\n",
"5 7 3679.273300 0.135589 1.702516\n",
"6 8 3448.452307 0.139533 1.634761\n",
"7 9 3321.805201 0.121927 1.709083\n",
"8 10 3167.889248 0.128239 1.605403\n",
"9 11 3048.339811 0.134592 1.620711\n",
"10 12 2931.256053 0.139258 1.550705\n",
"11 13 2847.001592 0.144681 1.537896\n",
"12 14 2742.565943 0.152159 1.455955\n",
"13 15 2647.758120 0.148589 1.469929\n",
"14 16 2576.736131 0.132343 1.498820\n",
"15 17 2520.993893 0.137837 1.491089\n",
"16 18 2443.613239 0.135204 1.450617\n",
"17 19 2368.363384 0.151232 1.428163\n",
"18 20 2348.022385 0.134130 1.432542"
]
},
"execution_count": 310,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rows = []\n",
"\n",
"for k in range(2, 21):\n",
" km = KMeans(n_clusters=k, n_init=30, random_state=42)\n",
" labels = km.fit_predict(X_scaled)\n",
"\n",
" row = {\n",
" \"k\": k,\n",
" \"inertia\": km.inertia_\n",
" }\n",
"\n",
" if k >= 3:\n",
" row[\"silhouette\"] = silhouette_score(X_scaled, labels)\n",
" row[\"davies_bouldin\"] = davies_bouldin_score(X_scaled, labels)\n",
" else:\n",
" row[\"silhouette\"] = np.nan\n",
" row[\"davies_bouldin\"] = np.nan\n",
"\n",
" rows.append(row)\n",
"\n",
"df_kdiag = pd.DataFrame(rows)\n",
"df_kdiag"
]
},
{
"cell_type": "code",
"execution_count": 311,
"id": "0198c399-f532-44c5-91a7-d4e0a27887ec",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjUAAAGGCAYAAAAzegNcAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAA88RJREFUeJzs3XlcVHX3B/DPzAAzrCM7iAgIuCCK4EouuePappWluTylj6aW2WI8v9LMyjItW0hLK32k0uyxxTLN3FrEUBQVcUFkcWFH2WFg5v7+GGZ0ZIfZgM/79ZpXzZ3v3DmDyty5555zRIIgCCAiIiIiIiIiIiIiIjJzYlMHQERERERERERERERE1BhMahARERERERERERERUavApAYREREREREREREREbUKTGoQEREREREREREREVGrwKQGERERERERERERERG1CkxqEBERERERERERERFRq8CkBhERERERERERERERtQpMahARERERERERERERUavApAYREREREREREREREbUKTGoQEREREVGT+fr6Yvbs2dr7hw8fhkgkwuHDh7Xbhg8fjuDgYOMHR0RERK3Ga6+9BpFIZOowjGr27Nnw9fVtcF1qaipEIhG2bNmi3dYef15Ed2NSg6gVE4lEeO2117T3NR9subm5pguqHbj7505ERNTWnD17FlOnToWPjw9kMhm8vLwwZswYfPTRR6YOTe8SExPx2muvITU1tcZjn3zyic5JBCIiorZoy5YtEIlE2ptMJkPHjh0RERGBDz/8EEVFRaYOUe9mz56t854tLCzg7e2NadOmITEx0dThEVEDmNQgMjN3H0zcfTt27JipQ2yxjz76CHK5HJWVlXWuEYlEWLRokRGj0rVnzx4mLoiIqF06evQo+vXrh9OnT2Pu3Ln4+OOP8dRTT0EsFuODDz7Qrrt48SI2bdpkwkj1IzExEStXrmRSg4iI2r3XX38d27Ztw4YNG7B48WIAwJIlS9CrVy+cOXPGYK/7yiuvoKyszGD7r4tUKsW2bduwbds2bN68GbNnz8aBAwdwzz334MaNG0aPp7FM9fMiMicWpg6AiGr3+uuvw8/Pr8b2gIAAE0SjX7/88gvGjh0LS0tLU4dSpz179iAqKqrWxEZZWRksLPjrk4iI2qY333wTcrkcx48fR4cOHXQey87O1v6/VCo1cmRERERkSOPHj0e/fv209yMjI3Hw4EFMmjQJ9913H86fPw9ra2u9v66FhYVJvmNbWFhgxowZOtsGDRqESZMm4ZdffsHcuXONHlNjmOrnRWROWKlBZKbGjx+PGTNm1Li5uLiYOrQWKS0txZEjRzBx4kRTh1KrkpKSBtfIZDIeQBARUZuVnJyMnj171khoAICbm5v2/++eqVGfxMREjBgxAjY2NvDy8sKaNWtqrMnOzsaTTz4Jd3d3yGQyhISEYOvWrTprapvbAdTebxoALly4gKlTp8LJyQkymQz9+vXDTz/9pH18y5YtePjhhwEAI0aM0FbGHj58GL6+vjh37hyOHDmi3T58+HDtc2/duoUlS5bA29sbUqkUAQEBeOedd6BSqRr1MyEiImoNRo4ciVdffRVpaWmIjo4GAJw5cwazZ89Gly5dIJPJ4OHhgX/961/Iy8vTPu+7776DSCTCkSNHauzz008/hUgkQkJCAoC6Z0RER0ejb9++sLa2hpOTE6ZNm4arV6/qrElKSsKUKVPg4eEBmUyGTp06Ydq0aSgoKGjW+/Xw8ACAGt/5r1y5gocffhhOTk6wsbHBoEGD8Msvv+is0XTeuLv6s67jl7vdunULs2fPhlwuR4cOHTBr1izcunWrxrrafl6abhc//PADgoODIZVK0bNnT+zdu7dxb5yolWFSg6gNys3NxSOPPAIHBwc4Ozvj2WefRXl5uc6aqqoqrFq1Cv7+/pBKpfD19cV//vMfVFRUaNcsXboUzs7OEARBu23x4sUQiUT48MMPtduysrIgEomwYcOGBmM7cOAAKioqMH78+Ca9J81BwLfffos333wTnTp1gkwmw6hRo3D58uUa6//55x+MGzcOcrkcNjY2uPfee/H333/rrNEcCCQmJuLxxx+Ho6MjhgwZgtmzZyMqKgoAdFp/adw9UyMtLQ1PP/00unXrBmtrazg7O+Phhx+utY0FERGRufPx8UFcXJz2RENL3bx5E+PGjUNISAjWrVuH7t27Y9myZfj111+1a8rKyjB8+HBs27YN06dPx7vvvgu5XI7Zs2frtLxqinPnzmHQoEE4f/48Xn75Zaxbtw62trZ44IEH8P333wMAhg0bhmeeeQYA8J///EfbgqJHjx5Yv349OnXqhO7du2u3/9///R8A9UUa9957L6KjozFz5kx8+OGHGDx4MCIjI7F06dIW/sSIiIjMyxNPPAEA+O233wAA+/fvx5UrVzBnzhx89NFHmDZtGrZv344JEyZozx9MnDgRdnZ2+Pbbb2vsb8eOHejZsyeCg4PrfM0333wTM2fORGBgIN577z0sWbIEBw4cwLBhw7Qn+hUKBSIiInDs2DEsXrwYUVFRmDdvHq5cuVJrMqA2ubm5yM3NRVZWFmJiYvDcc8/B2dkZkyZN0q7JysrCPffcg3379uHpp5/Gm2++ifLyctx3333aY4qWEgQB999/P7Zt24YZM2bgjTfewLVr1zBr1qxG7+Ovv/7C008/jWnTpmHNmjUoLy/HlClTdJJNRG2GQERm5csvvxQACL///ruQk5Ojc8vNzdVZC0BYsWKF9v6KFSsEAEKvXr2EyZMnCx9//LEwY8YMAYDwxBNP6Dx31qxZAgBh6tSpQlRUlDBz5kwBgPDAAw9o1+zatUsAIJw9e1a7LSQkRBCLxcLUqVO123bu3CkAEBISEhp8f/Pnzxf69evX4DoAwsKFC7X3Dx06JAAQQkNDhb59+wrvv/++8Nprrwk2NjbCgAEDdJ574MABwcrKSggPDxfWrVsnvP/++0Lv3r0FKysr4Z9//qnx8woKChLuv/9+4ZNPPhGioqKEo0ePCmPGjBEACNu2bdPe7oztzp/7zp07hZCQEGH58uXCZ599JvznP/8RHB0dBR8fH6GkpKTB90pERGROfvvtN0EikQgSiUQIDw8XXnrpJWHfvn2CQqHQWefj4yPMmjVLe1/zWX3o0CHttnvvvVcAIPz3v//VbquoqBA8PDyEKVOmaLetX79eACBER0drtykUCiE8PFyws7MTCgsL63wNQRCElJQUAYDw5ZdfareNGjVK6NWrl1BeXq7dplKphHvuuUcIDAzUbtMcx9y9T0EQhJ49ewr33ntvje2rVq0SbG1thUuXLulsf/nllwWJRCKkp6fXeA4REZG50pyHOH78eJ1r5HK5EBoaKgiCIJSWltZ4/JtvvhEACH/88Yd222OPPSa4ubkJVVVV2m0ZGRmCWCwWXn/9de02zXdzjdTUVEEikQhvvvmmzmucPXtWsLCw0G4/deqUAEDYuXNnE9/x7XMid9+8vLyEuLg4nbVLliwRAAh//vmndltRUZHg5+cn+Pr6CkqlUhCE2z/HlJQUnefXdvwya9YswcfHR3v/hx9+EAAIa9as0W6rqqoShg4dWuMY5+6flyCoz1NYWVkJly9f1m47ffq0AED46KOPmvrjITJ7rNQgMlOjR4+Gq6urzs3Ly6tRz/Xz88NPP/2EhQsXYtu2bXj66aexbds27WCv06dPY+vWrXjqqaewc+dOPP3009i6dSteeOEF/PDDDzh06BAAYMiQIQCAP//8EwBQUFCAs2fPYsqUKdptmsednJwQFBTUYGx79uxpUeup8vJyHD16FEuWLMGKFSuwevVqxMbGaq8mFQQB8+fPx4gRI/D3339j6dKlWLJkCY4dOwYvLy+88sorNfYZEhKCH374AQsWLMDTTz+N8PBwdO3aFQB0Wn/VZeLEiYiPj8fKlSsxd+5cvPnmm9izZw/S0tLwv//9r9nvlYiIyBTGjBmDmJgY3HfffTh9+jTWrFmDiIgIeHl56bRuaiw7Ozudz1ErKysMGDAAV65c0W7bs2cPPDw88Nhjj2m3WVpa4plnnkFxcXGtrSvqk5+
"text/plain": [
"<Figure size 1600x400 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig, axes = plt.subplots(1, 3, figsize=(16, 4))\n",
"\n",
"axes[0].plot(df_kdiag[\"k\"], df_kdiag[\"inertia\"], marker=\"o\")\n",
"axes[0].set_title(\"Elbow / Inertia\")\n",
"axes[0].set_xlabel(\"K\")\n",
"\n",
"axes[1].plot(df_kdiag[\"k\"], df_kdiag[\"silhouette\"], marker=\"o\")\n",
"axes[1].set_title(\"Silhouette\")\n",
"axes[1].set_xlabel(\"K\")\n",
"\n",
"axes[2].plot(df_kdiag[\"k\"], df_kdiag[\"davies_bouldin\"], marker=\"o\")\n",
"axes[2].set_title(\"Davies-Bouldin\")\n",
"axes[2].set_xlabel(\"K\")\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 312,
"id": "5ba1f3bf-7fd7-49aa-8b28-0ca0f2658bf0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"K=2 | silhouette=0.2357 | davies_bouldin=1.8611\n",
"K=4 | silhouette=0.1262 | davies_bouldin=2.0735\n",
"K=5 | silhouette=0.1332 | davies_bouldin=1.7872\n"
]
}
],
"source": [
"RESULTS = {}\n",
"\n",
"for k in [2, 4, 5]:\n",
" km = KMeans(n_clusters=k, n_init=50, random_state=42)\n",
" labels = km.fit_predict(X_scaled)\n",
" dfc[f\"cluster_k{k}\"] = labels\n",
"\n",
" RESULTS[k] = {\n",
" \"model\": km,\n",
" \"labels\": labels,\n",
" \"silhouette\": silhouette_score(X_scaled, labels),\n",
" \"davies_bouldin\": davies_bouldin_score(X_scaled, labels)\n",
" }\n",
"\n",
"for k in [2, 4, 5]:\n",
" print(f\"K={k} | silhouette={RESULTS[k]['silhouette']:.4f} | davies_bouldin={RESULTS[k]['davies_bouldin']:.4f}\")"
]
},
{
"cell_type": "code",
"execution_count": 313,
"id": "0052976f-e30f-4f84-b720-6fa4a9078aba",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== K=2 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>aum_qty_mean_med</th>\n",
" <th>gross_flow_to_aum_med</th>\n",
" <th>flow_freq_med</th>\n",
" <th>n_tx_total_med</th>\n",
" <th>avg_n_isin_held_med</th>\n",
" <th>n_isin_total_med</th>\n",
" <th>avg_holding_months_per_isin_med</th>\n",
" <th>exit_rate_per_isin_med</th>\n",
" <th>flow_direction_balance_med</th>\n",
" <th>redemption_bias_med</th>\n",
" <th>aum_drawdown_last_med</th>\n",
" <th>aum_final_to_peak_med</th>\n",
" <th>corr_flow_fund_lag3_med</th>\n",
" <th>corr_flow_fund_lag6_med</th>\n",
" <th>corr_flow_rate_lag3_med</th>\n",
" <th>corr_flow_rate_lag6_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k2</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>327</td>\n",
" <td>21039.415608</td>\n",
" <td>5.516534</td>\n",
" <td>0.730769</td>\n",
" <td>469.0</td>\n",
" <td>2.347826</td>\n",
" <td>20.0</td>\n",
" <td>10.000000</td>\n",
" <td>1.416667</td>\n",
" <td>0.042861</td>\n",
" <td>-0.762943</td>\n",
" <td>0.818922</td>\n",
" <td>0.181078</td>\n",
" <td>0.002365</td>\n",
" <td>0.004122</td>\n",
" <td>0.000339</td>\n",
" <td>0.000122</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>77</td>\n",
" <td>133315.879515</td>\n",
" <td>10.375358</td>\n",
" <td>1.000000</td>\n",
" <td>8861.0</td>\n",
" <td>14.769231</td>\n",
" <td>56.0</td>\n",
" <td>34.765306</td>\n",
" <td>2.515152</td>\n",
" <td>0.026428</td>\n",
" <td>-1.057873</td>\n",
" <td>0.505165</td>\n",
" <td>0.494835</td>\n",
" <td>0.041515</td>\n",
" <td>0.023970</td>\n",
" <td>-0.045190</td>\n",
" <td>-0.046754</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients aum_qty_mean_med gross_flow_to_aum_med flow_freq_med \\\n",
"cluster_k2 \n",
"0 327 21039.415608 5.516534 0.730769 \n",
"1 77 133315.879515 10.375358 1.000000 \n",
"\n",
" n_tx_total_med avg_n_isin_held_med n_isin_total_med \\\n",
"cluster_k2 \n",
"0 469.0 2.347826 20.0 \n",
"1 8861.0 14.769231 56.0 \n",
"\n",
" avg_holding_months_per_isin_med exit_rate_per_isin_med \\\n",
"cluster_k2 \n",
"0 10.000000 1.416667 \n",
"1 34.765306 2.515152 \n",
"\n",
" flow_direction_balance_med redemption_bias_med \\\n",
"cluster_k2 \n",
"0 0.042861 -0.762943 \n",
"1 0.026428 -1.057873 \n",
"\n",
" aum_drawdown_last_med aum_final_to_peak_med \\\n",
"cluster_k2 \n",
"0 0.818922 0.181078 \n",
"1 0.505165 0.494835 \n",
"\n",
" corr_flow_fund_lag3_med corr_flow_fund_lag6_med \\\n",
"cluster_k2 \n",
"0 0.002365 0.004122 \n",
"1 0.041515 0.023970 \n",
"\n",
" corr_flow_rate_lag3_med corr_flow_rate_lag6_med \n",
"cluster_k2 \n",
"0 0.000339 0.000122 \n",
"1 -0.045190 -0.046754 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== K=4 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>aum_qty_mean_med</th>\n",
" <th>gross_flow_to_aum_med</th>\n",
" <th>flow_freq_med</th>\n",
" <th>n_tx_total_med</th>\n",
" <th>avg_n_isin_held_med</th>\n",
" <th>n_isin_total_med</th>\n",
" <th>avg_holding_months_per_isin_med</th>\n",
" <th>exit_rate_per_isin_med</th>\n",
" <th>flow_direction_balance_med</th>\n",
" <th>redemption_bias_med</th>\n",
" <th>aum_drawdown_last_med</th>\n",
" <th>aum_final_to_peak_med</th>\n",
" <th>corr_flow_fund_lag3_med</th>\n",
" <th>corr_flow_fund_lag6_med</th>\n",
" <th>corr_flow_rate_lag3_med</th>\n",
" <th>corr_flow_rate_lag6_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k4</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>165</td>\n",
" <td>14436.315829</td>\n",
" <td>17.218849</td>\n",
" <td>0.949153</td>\n",
" <td>1252.0</td>\n",
" <td>2.527559</td>\n",
" <td>29.0</td>\n",
" <td>7.756757</td>\n",
" <td>3.093750</td>\n",
" <td>0.018779</td>\n",
" <td>-0.996486</td>\n",
" <td>0.916665</td>\n",
" <td>0.083335</td>\n",
" <td>0.000208</td>\n",
" <td>0.001381</td>\n",
" <td>-0.002783</td>\n",
" <td>-0.003603</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>131</td>\n",
" <td>28566.131306</td>\n",
" <td>0.894162</td>\n",
" <td>0.123077</td>\n",
" <td>14.0</td>\n",
" <td>1.650000</td>\n",
" <td>15.0</td>\n",
" <td>8.500000</td>\n",
" <td>0.619718</td>\n",
" <td>0.000000</td>\n",
" <td>-0.127257</td>\n",
" <td>0.792318</td>\n",
" <td>0.207682</td>\n",
" <td>-0.000988</td>\n",
" <td>0.000061</td>\n",
" <td>0.014765</td>\n",
" <td>0.015976</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>86</td>\n",
" <td>76209.164582</td>\n",
" <td>4.871121</td>\n",
" <td>1.000000</td>\n",
" <td>2339.0</td>\n",
" <td>11.116667</td>\n",
" <td>25.5</td>\n",
" <td>40.643704</td>\n",
" <td>0.750000</td>\n",
" <td>0.154712</td>\n",
" <td>-1.021555</td>\n",
" <td>0.216071</td>\n",
" <td>0.783929</td>\n",
" <td>0.030799</td>\n",
" <td>0.022152</td>\n",
" <td>-0.036992</td>\n",
" <td>-0.046754</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>22</td>\n",
" <td>335180.430008</td>\n",
" <td>15.652972</td>\n",
" <td>1.000000</td>\n",
" <td>20193.0</td>\n",
" <td>13.237500</td>\n",
" <td>81.0</td>\n",
" <td>19.072084</td>\n",
" <td>5.158364</td>\n",
" <td>0.028313</td>\n",
" <td>-1.100355</td>\n",
" <td>0.651444</td>\n",
" <td>0.348556</td>\n",
" <td>0.096447</td>\n",
" <td>0.077212</td>\n",
" <td>-0.029813</td>\n",
" <td>-0.031076</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients aum_qty_mean_med gross_flow_to_aum_med flow_freq_med \\\n",
"cluster_k4 \n",
"3 165 14436.315829 17.218849 0.949153 \n",
"0 131 28566.131306 0.894162 0.123077 \n",
"2 86 76209.164582 4.871121 1.000000 \n",
"1 22 335180.430008 15.652972 1.000000 \n",
"\n",
" n_tx_total_med avg_n_isin_held_med n_isin_total_med \\\n",
"cluster_k4 \n",
"3 1252.0 2.527559 29.0 \n",
"0 14.0 1.650000 15.0 \n",
"2 2339.0 11.116667 25.5 \n",
"1 20193.0 13.237500 81.0 \n",
"\n",
" avg_holding_months_per_isin_med exit_rate_per_isin_med \\\n",
"cluster_k4 \n",
"3 7.756757 3.093750 \n",
"0 8.500000 0.619718 \n",
"2 40.643704 0.750000 \n",
"1 19.072084 5.158364 \n",
"\n",
" flow_direction_balance_med redemption_bias_med \\\n",
"cluster_k4 \n",
"3 0.018779 -0.996486 \n",
"0 0.000000 -0.127257 \n",
"2 0.154712 -1.021555 \n",
"1 0.028313 -1.100355 \n",
"\n",
" aum_drawdown_last_med aum_final_to_peak_med \\\n",
"cluster_k4 \n",
"3 0.916665 0.083335 \n",
"0 0.792318 0.207682 \n",
"2 0.216071 0.783929 \n",
"1 0.651444 0.348556 \n",
"\n",
" corr_flow_fund_lag3_med corr_flow_fund_lag6_med \\\n",
"cluster_k4 \n",
"3 0.000208 0.001381 \n",
"0 -0.000988 0.000061 \n",
"2 0.030799 0.022152 \n",
"1 0.096447 0.077212 \n",
"\n",
" corr_flow_rate_lag3_med corr_flow_rate_lag6_med \n",
"cluster_k4 \n",
"3 -0.002783 -0.003603 \n",
"0 0.014765 0.015976 \n",
"2 -0.036992 -0.046754 \n",
"1 -0.029813 -0.031076 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== K=5 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>aum_qty_mean_med</th>\n",
" <th>gross_flow_to_aum_med</th>\n",
" <th>flow_freq_med</th>\n",
" <th>n_tx_total_med</th>\n",
" <th>avg_n_isin_held_med</th>\n",
" <th>n_isin_total_med</th>\n",
" <th>avg_holding_months_per_isin_med</th>\n",
" <th>exit_rate_per_isin_med</th>\n",
" <th>flow_direction_balance_med</th>\n",
" <th>redemption_bias_med</th>\n",
" <th>aum_drawdown_last_med</th>\n",
" <th>aum_final_to_peak_med</th>\n",
" <th>corr_flow_fund_lag3_med</th>\n",
" <th>corr_flow_fund_lag6_med</th>\n",
" <th>corr_flow_rate_lag3_med</th>\n",
" <th>corr_flow_rate_lag6_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k5</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>168</td>\n",
" <td>12566.405685</td>\n",
" <td>16.253847</td>\n",
" <td>0.911577</td>\n",
" <td>1094.5</td>\n",
" <td>2.524180</td>\n",
" <td>28.5</td>\n",
" <td>8.166667</td>\n",
" <td>2.955196</td>\n",
" <td>0.036286</td>\n",
" <td>-0.970398</td>\n",
" <td>0.914496</td>\n",
" <td>0.085504</td>\n",
" <td>0.001715</td>\n",
" <td>0.003794</td>\n",
" <td>-0.000844</td>\n",
" <td>0.001543</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>111</td>\n",
" <td>37555.632000</td>\n",
" <td>0.661205</td>\n",
" <td>0.076923</td>\n",
" <td>6.0</td>\n",
" <td>1.511111</td>\n",
" <td>12.0</td>\n",
" <td>8.290323</td>\n",
" <td>0.586207</td>\n",
" <td>0.000000</td>\n",
" <td>-0.076923</td>\n",
" <td>0.818922</td>\n",
" <td>0.181078</td>\n",
" <td>-0.002355</td>\n",
" <td>-0.000290</td>\n",
" <td>0.011330</td>\n",
" <td>0.021365</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>90</td>\n",
" <td>59767.645164</td>\n",
" <td>4.216773</td>\n",
" <td>0.995868</td>\n",
" <td>1725.5</td>\n",
" <td>10.811538</td>\n",
" <td>24.0</td>\n",
" <td>39.349432</td>\n",
" <td>0.708536</td>\n",
" <td>0.177072</td>\n",
" <td>-1.017685</td>\n",
" <td>0.181774</td>\n",
" <td>0.818226</td>\n",
" <td>0.029066</td>\n",
" <td>0.024737</td>\n",
" <td>-0.025887</td>\n",
" <td>-0.038057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>30</td>\n",
" <td>212211.195227</td>\n",
" <td>15.786289</td>\n",
" <td>1.000000</td>\n",
" <td>17459.5</td>\n",
" <td>9.768395</td>\n",
" <td>77.5</td>\n",
" <td>17.601779</td>\n",
" <td>5.770464</td>\n",
" <td>0.028313</td>\n",
" <td>-1.083467</td>\n",
" <td>0.691744</td>\n",
" <td>0.308256</td>\n",
" <td>0.062011</td>\n",
" <td>0.052249</td>\n",
" <td>-0.031718</td>\n",
" <td>-0.037537</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>5</td>\n",
" <td>114674.703704</td>\n",
" <td>7.607274</td>\n",
" <td>0.944444</td>\n",
" <td>1221.0</td>\n",
" <td>0.861111</td>\n",
" <td>12.0</td>\n",
" <td>3.800000</td>\n",
" <td>2.600000</td>\n",
" <td>-0.049426</td>\n",
" <td>-1.237358</td>\n",
" <td>0.999914</td>\n",
" <td>0.000086</td>\n",
" <td>-0.035506</td>\n",
" <td>-0.027489</td>\n",
" <td>-0.000900</td>\n",
" <td>-0.036517</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients aum_qty_mean_med gross_flow_to_aum_med flow_freq_med \\\n",
"cluster_k5 \n",
"1 168 12566.405685 16.253847 0.911577 \n",
"3 111 37555.632000 0.661205 0.076923 \n",
"4 90 59767.645164 4.216773 0.995868 \n",
"0 30 212211.195227 15.786289 1.000000 \n",
"2 5 114674.703704 7.607274 0.944444 \n",
"\n",
" n_tx_total_med avg_n_isin_held_med n_isin_total_med \\\n",
"cluster_k5 \n",
"1 1094.5 2.524180 28.5 \n",
"3 6.0 1.511111 12.0 \n",
"4 1725.5 10.811538 24.0 \n",
"0 17459.5 9.768395 77.5 \n",
"2 1221.0 0.861111 12.0 \n",
"\n",
" avg_holding_months_per_isin_med exit_rate_per_isin_med \\\n",
"cluster_k5 \n",
"1 8.166667 2.955196 \n",
"3 8.290323 0.586207 \n",
"4 39.349432 0.708536 \n",
"0 17.601779 5.770464 \n",
"2 3.800000 2.600000 \n",
"\n",
" flow_direction_balance_med redemption_bias_med \\\n",
"cluster_k5 \n",
"1 0.036286 -0.970398 \n",
"3 0.000000 -0.076923 \n",
"4 0.177072 -1.017685 \n",
"0 0.028313 -1.083467 \n",
"2 -0.049426 -1.237358 \n",
"\n",
" aum_drawdown_last_med aum_final_to_peak_med \\\n",
"cluster_k5 \n",
"1 0.914496 0.085504 \n",
"3 0.818922 0.181078 \n",
"4 0.181774 0.818226 \n",
"0 0.691744 0.308256 \n",
"2 0.999914 0.000086 \n",
"\n",
" corr_flow_fund_lag3_med corr_flow_fund_lag6_med \\\n",
"cluster_k5 \n",
"1 0.001715 0.003794 \n",
"3 -0.002355 -0.000290 \n",
"4 0.029066 0.024737 \n",
"0 0.062011 0.052249 \n",
"2 -0.035506 -0.027489 \n",
"\n",
" corr_flow_rate_lag3_med corr_flow_rate_lag6_med \n",
"cluster_k5 \n",
"1 -0.000844 0.001543 \n",
"3 0.011330 0.021365 \n",
"4 -0.025887 -0.038057 \n",
"0 -0.031718 -0.037537 \n",
"2 -0.000900 -0.036517 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"profile_vars = [\n",
" \"aum_qty_mean\",\n",
" \"gross_flow_to_aum\",\n",
" \"flow_freq\",\n",
" \"n_tx_total\",\n",
" \"avg_n_isin_held\",\n",
" \"n_isin_total\",\n",
" \"avg_holding_months_per_isin\",\n",
" \"exit_rate_per_isin\",\n",
" \"flow_direction_balance\",\n",
" \"redemption_bias\",\n",
" \"aum_drawdown_last\",\n",
" \"aum_final_to_peak\",\n",
" \"corr_flow_fund_lag3\",\n",
" \"corr_flow_fund_lag6\",\n",
" \"corr_flow_rate_lag3\",\n",
" \"corr_flow_rate_lag6\",\n",
" #\"corr_flow_bench_lag3\",\n",
" #\"corr_flow_bench_lag6\"\n",
"]\n",
"\n",
"profile_vars2 = [\n",
" \"aum_qty_mean\",\n",
" \"gross_flow_to_aum\",\n",
" \"flow_freq\",\n",
" \"n_tx_total\",\n",
" \"log_gross_flow_qty_mean\",\n",
" \"net_flow_vol\",\n",
"]\n",
"\n",
"profile_vars = [c for c in profile_vars if c in dfc.columns]\n",
"\n",
"for k in [2, 4, 5]:\n",
" print(f\"\\n===== K={k} =====\")\n",
" prof = (\n",
" dfc.groupby(f\"cluster_k{k}\")\n",
" .agg(\n",
" n_clients=(ID_COL, \"count\"),\n",
" **{f\"{c}_med\": (c, \"median\") for c in profile_vars}\n",
" )\n",
" .sort_values(\"n_clients\", ascending=False)\n",
" )\n",
" display(prof)"
]
},
{
"cell_type": "code",
"execution_count": 237,
"id": "ff8bdf91-859c-419e-a2ea-eb4a5f44f0df",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAkyVJREFUeJzs3XdYk9fbB/BvCKAgiiwVERdKQIaA4ECEihvqwlUt8MOtVat2KVoHaou2jrpaceGs1SpuwL2LuFAc4FYEFwKKCgok5/2DN4+EBEwgISTcn+vykpycPLlzeJLcPGfxGGMMhBBCCCFVmI66AyCEEEIIUTdKiAghhBBS5VFCRAghhJAqjxIiQgghhFR5lBARQgghpMqjhIgQQgghVR4lRIQQQgip8ighIoQQQkiVRwkRIYQQQqo8SogqmaCgIHz55ZdKPaZAIMCcOXOUekx5REVFQSAQIDU1tcKfGwCmTp0KX19ftTw3kZSamgqBQICoqCh1h6I0qnpNvr6+mDp1qlKPWZmfVxWCgoIQFBSk7jCUYs+ePejevTscHBzg7u6u7nC0mq66A6jMEhMTsWfPHsTHxyMtLQ21a9dGy5YtMWnSJDRp0kSiblBQEC5cuAAA4PF4MDQ0hIWFBZydndGnTx+0b99eHS+BELndu3cPMTEx6Nu3Lxo0aFCmY+zfvx8ZGRkICQlRbnBa5sqVKzh37hz+97//oVatWuoOp9y2bt0KAwMDBAQEqDsUmV68eIEdO3agc+fOsLe3V3c4crt//z5CQ0PRoUMHjBo1CtWrV1d3SFqNEqJSrF27FleuXEH37t0hEAiQnp6OrVu3IiAgANu3b4etra1E/Xr16uG7774DAOTm5uLx48c4cuQI9u3bhx49euD333+Hnp6eOl6KWvTu3Rv+/v7Q19dXdyhEDvfu3cOKFSvQunXrMidEBw4cwN27d6USIisrKyQmJkJXlz5yACAhIQErVqxA3759pRKi2NhY8Hi8Co+pPM+7bds2mJiYVJqEaN26dRK3X758iRUrVsDKykqjEqILFy5AJBJh+vTpaNSokbrD0Xr06VSKkJAQLFy4UOIL3c/PDz179sTq1auxcOFCifo1a9ZE7969Jcp++OEHzJs3D3///TesrKzw448/VkjslQGfzwefz1fa8XJzc2FgYKC045GKw+PxUK1aNXWHUSqRSIT8/Hy1x6muPyC06Q8XbXktGRkZAAq/W0rDGMPHjx/pClI50RiiUri5uUm9sRo3bozmzZvjwYMHch2Dz+fj559/RrNmzbB161a8fftWrsfduHEDX331FZydneHr64tt27ZJ1cnLy8OyZcvQpUsXODo6wsfHB7/99hvy8vJkHvPo0aP48ssv4ejoCH9/f5w+fVri/rS0NMyePRvdunWDs7Mz2rRpg2+//VZiDND169chEAiwe/duqeOfOXMGAoEAJ06cAFDyGKKtW7fC398fjo6O8PLyQlhYGLKzsyXqiMdS3bhxA19//TVatmyJxYsXc69j1KhR8PLygqOjIzp37oyVK1dCKBTK0bLSrl+/juHDh6NNmzZce4eGhkrUEYlE2LBhA/z9/eHk5ARPT0/MnDkTb968kaq3fPlyeHl5oWXLlggKCsK9e/ekxmeI2+bSpUuYN28e2rZtC3d3d8ycORN5eXnIzs7GTz/9BA8PD3h4eOC3334DY6xMMfn6+mL06NG4dOkS+vfvDycnJ3Tq1Al79uyRiGfixIkAgODgYAgEAggEAsTHx8vd5kFBQTh58iTS0tK4x4vHcJU03iYuLg5DhgyBi4sL3N3dMXbsWNy/f1+izvLlyyEQCPD48WNMnToV7u7uaNWqFUJDQ5GbmytRNzMzE/fv35cql0U8tm7fvn1cG545cwZAYRdLaGgoPD09uffLzp07P3vM5ORkTJ06FZ06dYKTkxPat2+P0NBQZGVlSbye3377DQDQqVMnrq3E75Oi54oi77fyxF38eYFP5+jly5cRHh6Otm3bwsXFBePGjUNmZqbE4+7evYsLFy5wr6Xo+J3s7Gz88ssv8PHxgaOjI7p06YLVq1dDJBJxdcTnx7p167B9+3Z07twZjo6O6NevHxITEyXiTE9PR2hoKLy9vbnPkLFjx0p8zhQdQxQfH4/+/fsDAEJDQ7kYo6KisGzZMjg4OEi8HrEZM2bA3d0dHz9+lNle69atg0AgQFpamtR9ixYtgqOjI/defPToESZMmID27dvDyckJ3t7emDx5cqnfB76+vli+fDkAoF27dhAIBNxt8Xv6zJkzCAgIgLOzM/755x+521tcb+rUqWjVqhXc3d0xZcoUJCUlSb1PSxqPJWuMpjI/k4rG+euvv8LX1xeOjo7w9vbGTz/9hMzMTLx//x4uLi6YN2+e1OOeP38Oe3t7RERElNjGxdEVIgUxxvDq1Ss0b95c7sfw+Xz4+/tj6dKluHz5Mr744otS67958wajRo1Cjx494O/vj5iYGMyePRt6enrcG1skEmHs2LG4fPkyBg4cCBsbG9y5cwcbN27Eo0eP8Oeff0oc8/Llyzh8+DCGDBmCGjVqYPPmzfj2229x4sQJmJiYACj88E1ISIC/vz/q1auHtLQ0bNu2DcHBwTh48CAMDAzg5OQEa2trbqxJUdHR0TA2NoaXl1eJr2358uVYsWIFPD09MXjwYDx8+BDbtm3D9evXsW3bNokuxdevX2PkyJHw9/dHr169YGZmBgDYvXs3DA0NMXToUBgaGuL8+fNYtmwZ3r17hylTpsj9ewEK/wIbPnw4TExMMGrUKNSqVQupqak4cuSIRL2ZM2di9+7dCAgIQFBQEFJTU7F161bcunVLIu5FixZh7dq16NixIzp06IDk5GQMHz68xA/VefPmwdzcHBMmTMC1a9ewfft21KxZEwkJCbC0tMTkyZNx+vRprFu3Dra2tujTp4/CMQHA48ePMXHiRPTv3x99+/bFrl27MHXqVDg4OKB58+bw8PBAUFAQNm/ejDFjxqBp06YAABsbG7nbfMyYMXj79i2eP3/OJZQ1atQose3/++8/jBw5Eg0aNMD48ePx4cMHbNmyBYMHD0ZUVJRUt92kSZPQoEEDfPfdd7h16xb+/fdfmJqaSlx13bp1K1asWIFNmzahTZs2pf7uAeD8+fOIiYnB119/DRMTE1hZWeHVq1cYOHAgeDwevv76a5iamuL06dOYPn063r17V+r4qP/++w9PnjxBQEAALCwscPfuXezYsQP37t3Djh07wOPx0KVLFzx69AgHDhxAaGgo9/4zNTWVOp4i77fyxF2aefPmoVatWhg/fjzS0tKwceNGzJkzB3/88QcAYNq0aZg7dy4MDQ0xZswYAIC5uTmAwqu6gYGBePHiBb766itYWloiISEBixcvRnp6OqZPny7xXAcOHMD79+8xaNAg8Hg8rF27FhMmTMDRo0e583nChAm4d+8eAgMDYWVlhczMTJw7dw7Pnj2T2dVrY2ODb7/9FsuWLcOgQYPQqlUrAIV/8LZq1QorV65EdHQ0AgMDucfk5eXh0KFD6Nq1a4lXDMXDIGJiYjBixAiJ+2JiYtC+fXsYGxsjLy8Pw4cPR15eHgIDA2Fubo4XL17g5MmTyM7OLvHqz7Rp07Bnzx4cOXIEs2fPhqGhIQQCAXf/w4cP8f3332PQoEEYOHAgmjRpInd7M8bwzTff4PLly/jqq69gY2ODI0eOKPz5WZwyP5MA4P379/j6669x//599OvXDy1atEBWVhaOHz+OFy9ewN7eHp07d0ZMTAxCQ0MleiQOHDgAxhh69uwp/wtgRCF79uxhtra27N9//5UoDwwMZP7+/iU+7siRI8zW1pZt3Lix1OMHBgYyW1tbtn79eq7s48ePrHfv3qxdu3YsLy+Pi8POzo5dvHhR4vHbtm1jtra27PLly1yZra0tc3BwYI8fP+bKkpKSmK2tLdu8eTNXlpubKxVPQkICs7W1Zbt37+bKFi1axBwcHNjr168lYnR3d2ehoaFc2a5du5itrS178uQJY4yxjIwM5uDgwIYNG8aEQiFXb8uWLczW1pbt3LlTqh22bdsmFZOsOGfMmMF
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Analyse graphique des clusters : features2\n",
"sns.set_style(\"whitegrid\")\n",
"thr_int = dfc[\"gross_flow_to_aum\"].median()\n",
"thr_freq = dfc[\"flow_freq\"].median()\n",
"\n",
"plt.figure()\n",
"for name, g in dfc[~dfc['cluster_k5'].isin([2, 4, 6.0])].groupby(\"cluster_k5\"):\n",
" plt.scatter(g[\"flow_freq\"], g[\"gross_flow_to_aum\"], s=10, label=f\"Cluster {int(name)}\")\n",
"\n",
"plt.yscale(\"log\")\n",
"plt.axvline(thr_freq, linestyle=\"--\")\n",
"plt.axhline(thr_int, linestyle=\"--\")\n",
"plt.xlabel(\"Activity frequency (share of active months)\")\n",
"plt.ylabel(\"Gross flow / mean AUM (quantity) [log scale]\")\n",
"plt.title(\"2D behavioral segmentation: relative intensity vs frequency\")\n",
"plt.legend(markerscale=2)\n",
"plt.ylim(0.1,1000)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 238,
"id": "0bb325e2-a490-465c-9c8f-2121694f9b92",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAl4VJREFUeJzs3XdcU9f7B/BPCCDgYKOo4GbIEBAHiKC4cQKOaoW6R9VWO5y1raOF9ldttdo6655VcBW07omIihtwoDJUVEAcoEByf3/wTUpIArnhZpHn/Xr5kpzc3Pvk3tybJ+ecew6PYRgGhBBCCCF6zEDTARBCCCGEaBolRIQQQgjRe5QQEUIIIUTvUUJECCGEEL1HCREhhBBC9B4lRIQQQgjRe5QQEUIIIUTvUUJECCGEEL1HCREhhBBC9B4lRFomIiIC/fr143Sdzs7OWLhwIafrVERMTAycnZ2RlZWl9m0DwOzZsxEcHKyRbRNJWVlZcHZ2RkxMjKZD4Yyq3lNwcDBmz57N6Tq1ebuqEBERgYiICE2HwYl9+/ahd+/ecHNzg6+vr6bDqdEMNR2ANrtx4wb27duHxMREZGdnw8LCAm3atMH06dPRrFkziWUjIiJw6dIlAACPx4OZmRlsbW3h6emJQYMGoVOnTpp4C4Qo7P79+4iPj0doaCgaN26s1DoOHjyI3NxcjBo1itvgapirV6/i/Pnz+OSTT1CvXj1Nh1Nt27Ztg6mpKcLCwjQdikw5OTnYvXs3unfvDldXV02Ho7AHDx5gzpw56Ny5MyZMmAATExNNh1SjUUJUiXXr1uHq1avo3bs3nJ2d8eLFC2zbtg1hYWHYtWsXnJycJJZv0KABvvjiCwBAUVERHj9+jKNHj+LAgQPo06cP/u///g9GRkaaeCsaMXDgQPTt2xfGxsaaDoUo4P79+1ixYgXat2+vdEJ06NAh3Lt3TyohatSoEW7cuAFDQ7rkAEBycjJWrFiB0NBQqYTo8OHD4PF4ao+pOtvdsWMHLC0ttSYhWr9+vcTj58+fY8WKFWjUqJFOJUSXLl2CUCjEvHnz0KRJE02HU+PR1akSo0aNwi+//CLxhR4SEoL+/ftjzZo1+OWXXySWr1u3LgYOHChR9tVXX2Hx4sXYvn07GjVqhK+//lotsWsDPp8PPp/P2fqKiopgamrK2fqI+vB4PNSqVUvTYVRKKBSipKRE43Fq6gdETfrhUlPeS25uLoCy75bKMAyDDx8+UA1SNVEfokr4+PhInVhNmzZFq1atkJ6ertA6+Hw+vvnmG7Rs2RLbtm3DmzdvFHrdrVu38NFHH8HT0xPBwcHYsWOH1DLFxcVYvnw5evToAXd3dwQFBeHnn39GcXGxzHUeO3YM/fr1g7u7O/r27YszZ85IPJ+dnY3vv/8evXr1gqenJzp06IDPPvtMog/QzZs34ezsjNjYWKn1nz17Fs7Ozjh58iQA+X2Itm3bhr59+8Ld3R0BAQFYsGABXr9+LbGMqC/VrVu38PHHH6NNmzZYunSp+H1MmDABAQEBcHd3R/fu3bFy5UoIBAIF9qy0mzdvYuzYsejQoYN4f8+ZM0diGaFQiI0bN6Jv377w8PCAv78/vv32WxQUFEgt9/vvvyMgIABt2rRBREQE7t+/L9U/Q7RvLl++jMWLF6Njx47w9fXFt99+i+LiYrx+/RozZ85Eu3bt0K5dO/z8889gGEapmIKDgzFx4kRcvnwZgwcPhoeHB7p164Z9+/ZJxPP5558DACIjI+Hs7AxnZ2ckJiYqvM8jIiJw6tQpZGdni18v6sMlr79NQkICRowYAS8vL/j6+mLy5Ml48OCBxDK///47nJ2d8fjxY8yePRu+vr5o27Yt5syZg6KiIoll8/Ly8ODBA6lyWUR96w4cOCDeh2fPngVQ1sQyZ84c+Pv7i8+XPXv2VLnO1NRUzJ49G926dYOHhwc6deqEOXPmID8/X+L9/PzzzwCAbt26ifeV6Dwp/1lhc75VJ+6K2wX++4xeuXIFUVFR6NixI7y8vDBlyhTk5eVJvO7evXu4dOmS+L2U77/z+vVr/PDDDwgKCoK7uzt69OiBNWvWQCgUipcRfT7Wr1+PXbt2oXv37nB3d0d4eDhu3LghEeeLFy8wZ84cBAYGiq8hkydPlrjOlO9DlJiYiMGDBwMA5syZI44xJiYGy5cvh5ubm8T7EZk/fz58fX3x4cMHmftr/fr1cHZ2RnZ2ttRzS5Ysgbu7u/hcfPToEaZNm4ZOnTrBw8MDgYGBmDFjRqXfB8HBwfj9998BAH5+fnB2dhY/Fp3TZ8+eRVhYGDw9PbFz506F97doudmzZ6Nt27bw9fXFrFmzkJKSInWeyuuPJauPJpfXpPJx/vjjjwgODoa7uzsCAwMxc+ZM5OXl4d27d/Dy8sLixYulXvfs2TO4urpi9erVcvdxRVRDxBLDMHj58iVatWql8Gv4fD769u2LZcuW4cqVK+jSpUulyxcUFGDChAno06cP+vbti/j4eHz//fcwMjISn9hCoRCTJ0/GlStXMHToULRo0QJ3797Fpk2b8OjRI/zxxx8S67xy5Qr+/fdfjBgxArVr18aWLVvw2Wef4eTJk7C0tARQdvFNTk5G37590aBBA2RnZ2PHjh2IjIzEP//8A1NTU3h4eMDBwUHc16S8uLg4mJubIyAgQO57+/3337FixQr4+/tj+PDhePjwIXbs2IGbN29ix44dEk2Kr169wvjx49G3b18MGDAA1tbWAIDY2FiYmZlh9OjRMDMzw8WLF7F8+XK8ffsWs2bNUvi4AGW/wMaOHQtLS0tMmDAB9erVQ1ZWFo4ePSqx3LfffovY2FiEhYUhIiICWVlZ2LZtG+7cuSMR95IlS7Bu3Tp07doVnTt3RmpqKsaOHSv3orp48WLY2Nhg2rRpuH79Onbt2oW6desiOTkZ9vb2mDFjBs6cOYP169fDyckJgwYNYh0TADx+/Biff/45Bg8ejNDQUOzduxezZ8+Gm5sbWrVqhXbt2iEiIgJbtmzBpEmT0Lx5cwBAixYtFN7nkyZNwps3b/Ds2TNxQlm7dm25+/7ChQsYP348GjdujKlTp+L9+/fYunUrhg8fjpiYGKlmu+nTp6Nx48b44osvcOfOHfz999+wsrKSqHXdtm0bVqxYgc2bN6NDhw6VHnsAuHjxIuLj4/Hxxx/D0tISjRo1wsuXLzF06FDweDx8/PHHsLKywpkzZzBv3jy8ffu20v5RFy5cQGZmJsLCwmBra4t79+5h9+7duH//Pnbv3g0ej4cePXrg0aNHOHToEObMmSM+/6ysrKTWx+Z8q07clVm8eDHq1auHqVOnIjs7G5s2bcLChQvx22+/AQDmzp2LRYsWwczMDJMmTQIA2NjYACir1R05ciRycnLw0Ucfwd7eHsnJyVi6dClevHiBefPmSWzr0KFDePfuHYYNGwYej4d169Zh2rRpOHbsmPjzPG3aNNy/fx8jR45Eo0aNkJeXh/Pnz+Pp06cym3pbtGiBzz77DMuXL8ewYcPQtm1bAGU/eNu2bYuVK1ciLi4OI0eOFL+muLgYR44cQc+ePeXWGIq6QcTHx2PcuHESz8XHx6NTp04wNzdHcXExxo4di+LiYowcORI2NjbIycnBqVOn8Pr1a7m1P3PnzsW+fftw9OhRfP/99zAzM4Ozs7P4+YcPH+LLL7/EsGHDMHToUDRr1kzh/c0wDD799FNcuXIFH330EVq0aIGjR4+yvn5WxOU1CQDevXuHjz/+GA8ePEB4eDhat26N/Px8nDhxAjk5OXB1dUX37t0RHx+POXPmSLRIHDp0CAzDoH///oq/AYawsm/fPsbJyYn5+++/JcpHjhzJ9O3bV+7rjh49yjg5OTGbNm2qdP0jR45knJycmL/++ktc9uHDB2bgwIGMn58fU1xcLI7DxcWFSUpKknj9jh07GCcnJ+bKlSviMicnJ8bNzY15/PixuCwlJYVxcnJitmzZIi4rKiqSiic5OZlxcnJiYmNjxWVLlixh3NzcmFevXknE6Ovry8yZM0dctnfvXsbJyYnJzMxkGIZhcnNzGTc3N2bMmDGMQCAQL7d
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Analyse graphique des clusters : features2\n",
"\n",
"thr_int = dfc[\"gross_flow_to_aum\"].median()\n",
"thr_freq = dfc[\"flow_freq\"].median()\n",
"\n",
"plt.figure()\n",
"for name, g in dfc[~dfc['cluster_k5'].isin([1,3,0, 4, 6.0])].groupby(\"cluster_k5\"):\n",
" plt.scatter(g[\"flow_freq\"], g[\"gross_flow_to_aum\"], s=10, label=f\"Cluster {int(name)}\")\n",
"\n",
"plt.yscale(\"log\")\n",
"plt.axvline(thr_freq, linestyle=\"--\")\n",
"plt.axhline(thr_int, linestyle=\"--\")\n",
"plt.xlabel(\"Activity frequency (share of active months)\")\n",
"plt.ylabel(\"Gross flow / mean AUM (quantity) [log scale]\")\n",
"plt.title(\"2D behavioral segmentation: relative intensity vs frequency\")\n",
"plt.legend(markerscale=2)\n",
"plt.ylim(0.1,1000)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 314,
"id": "4d04e670-51ae-482d-a5c5-93fe8263cfeb",
"metadata": {},
"outputs": [],
"source": [
"# Analyse : features\n",
"\n",
"labels_map = {\n",
" 0: \"Cluster 0 (30): Large and highly active movers\",\n",
" 1: \"Cluster 1 (168): Occasional large movers\",\n",
" 3: \"Cluster 3 (111): Dormant profiles\",\n",
" 4: \"Cluster 4 (90): Loyal clients\"\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 315,
"id": "6ebe0025-0532-4e51-acb2-81aa786a164b",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABKUAAAHqCAYAAADVi/1VAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3XdYU9cbB/BvWGEPBQeCogxFVFBEVLRoHbhw1lV/DFet4l6oVQFHFbdVnG1ddddZrQNxL5w4UXHgxK0ooIzk/P5IcyVkkEBCAryf5+GB3Jx777knN5ebN+e8h8cYYyCEEEIIIYQQQgghpAjpabsChBBCCCGEEEIIIaT0oaAUIYQQQgghhBBCCClyFJQihBBCCCGEEEIIIUWOglKEEEIIIYQQQgghpMhRUIoQQgghhBBCCCGEFDkKShFCCCGEEEIIIYSQIkdBKUIIIYQQQgghhBBS5CgoRQghhBBCCCGEEEKKHAWlCCGEEEIIIYQQQkiRo6BUMdWsWTPUqlVLrdvk8XgYOnSoWrepjLVr14LH4yE5ObnI9w0AoaGhcHJy0sq+iaTk5GTweDysXbtW21VRG00dk5OTE0JDQ9W6TV3eryY0a9YMzZo103Y11GLDhg2oUaMGDA0NYW1tre3qEFKq0D2Z+tA9me4oifdkuioyMhI8Hq9A66rzXsbJyQkdOnRQy7aKu8K8JupWku5X5aGglBpcvHgRQ4cOhYeHB8zMzFC5cmX06NED9+7dkyrbrFkz8Hg88Hg86OnpwdLSEtWrV0dQUBBiY2O1UHtCVHP79m1ERkYW6oZ106ZNWLRokdrqVFKdPXsWkZGR+Pjxo7arohbLli3T6ZvbFy9eIDIyEgkJCdquikru3LmD0NBQODs7Y/Xq1Vi1apW2q0SI1tA9GSlN6J6scNTRfsrKyMhAZGQkjh8/rvF9kZKtuN6vKmKg7QqUBNHR0Thz5gy6d++OOnXq4OXLl1i6dCnq1auH8+fPS3175uDggFmzZgEA0tPTcf/+fezcuRN//fUXevTogb/++guGhobaOBStCAoKQq9evcDn87VdFaKE27dvIyoqCs2aNSvwt5mbNm3CzZs3MXLkSInlVapUwZcvX0rV+a/I2bNnERUVhdDQUKneL3fv3oWeXtF/r1CY/S5btgy2trY609Pq8OHDEo9fvHiBqKgoODk5wcvLSzuVKoDjx49DKBRi8eLFcHFx0XZ1CNEquicrHLonK17onqxw1NF+ysrIyEBUVBQASPV6mTx5MiZMmKDR/ZOSo7jerypCQSk1GD16NDZt2gQjIyNuWc+ePVG7dm3Mnj0bf/31l0R5Kysr/O9//5NYNnv2bAwfPhzLli2Dk5MToqOji6TuukBfXx/6+vpq215GRgZMTU3Vtj1SdHg8HoyNjbVdDYWEQiGysrK0Xk9tfWAoSR9Ucl+zi7PXr18DQL7D9hhj+Pr1K0xMTIqgVoRoB92TFQ7dkxGx4nBPVlIYGBjAwKB0fCynexEiCw3fU4PGjRtLfbhxdXWFh4cHEhMTldqGvr4+fvvtN9SsWRNLly5FamqqUutdvnwZjRs3homJCapWrYoVK1ZIlcnMzERERARcXFzA5/Ph6OiI8ePHIzMzU+Y2d+/ejVq1aoHP58PDwwMHDx6UeP7x48cYMmQIqlevDhMTE5QtWxbdu3eX6Pp66dIl8Hg8rFu3Tmr7hw4dAo/Hw759+wDIz1+wbNkyeHh4gM/nw97eHmFhYVLDmMR5HC5fvozvvvsOpqammDRpEgBgz549aN++Pezt7cHn8+Hs7Izp06dDIBDk16wyXbp0CQEBAbC1teXau1+/fhJlhEIhFi1aBA8PDxgbG6N8+fIYNGgQPnz4IFUuMjIS9vb2MDU1RfPmzXH79m2pfD3itjl9+jSGDx8OOzs7WFtbY9CgQcjKysLHjx8RHBwMGxsb2NjYYPz48WCMFahO4nHkp0+fRoMGDWBsbIxq1aph/fr1EvXp3r07AKB58+bcsAdxV2Rl2rxZs2bYv38/Hj9+zK0v/nZKXv6Co0ePomnTpjAzM4O1tTU6deok9d4Sj/2+f/8+17PIysoKffv2RUZGhkTZt2/f4s6dO1LLZRHn9di4cSN3PorfE8+fP0e/fv1Qvnx57v3y559/5rvN69evIzQ0FNWqVYOxsTEqVKiAfv364d27dxLHM27cOABA1apVubYSv09ynyuqvN8KU++8+wW+naNnzpzB6NGjYWdnBzMzM3Tp0gVv3ryRWO/WrVs4ceIEdyy5vyn8+PEjRo4cCUdHR/D5fLi4uCA6OhpCoZArIz4/5s2bh1WrVsHZ2Rl8Ph8+Pj64ePGiRD1fvnyJvn37wsHBAXw+HxUrVkSnTp0krjO5x+gfP34cPj4+AIC+fftydVy7di0iIiJgaGgocTxiP/30E6ytrfH161eZ7TVv3jzweDw8fvxY6rmJEyfCyMiIey8mJSWhW7duqFChAoyNjeHg4IBevXop/H/g5OSEiIgIAICdnR14PB4iIyO55zp06IBDhw6hfv36MDExwcqVK5Vub3G50NBQWFlZwdraGiEhIUhISJB6n8rLdyArP4w6r0m56zlq1Cg4OTmBz+fDwcEBwcHBePv2LdLS0mBmZoYRI0ZIrffs2TPo6+tzPWVI8Uf3ZHRPJkb3ZCX7nqx69eowNjaGt7c3Tp48KVX26tWraNu2LSwtLWFubo4WLVrg/PnzSrcfABw4cIA7VgsLC7Rv3x63bt2S2E9oaCjMzc3x/PlzdO7cGebm5rCzs8PYsWO5tk5OToadnR0AICoqituX+P+1rPxFa9aswffff49y5cqBz+ejZs2aWL58eb5tpMhff/2FBg0awNTUFDY2Nvjuu++keo0DyPf/rrx8S7KuH/LuRY4fPw4ej4dt27Zh5syZcHBwgLGxMVq0aIH79+8rPI6///4bPB4PJ06ckHpu5cqV4PF4uHnzJgDl7geVlZOTg+nTp3P3n05OTpg0aZLE9TskJAS2trbIzs6WWr9169aoXr0697ggr7Gm7le1jhGNEAqFrFKlSqx169YSy/39/ZmHh4fc9aZPn84AsH379incvr+/P7O3t2flypVjQ4cOZb/99htr0qQJA8D++OMPrpxAIGCtW7dmpqambOTIkWzlypVs6NChzMDAgHXq1ElimwCYp6cnq1ixIps+fTpbtGgRq1atGjM1NWVv377lym3fvp15enqyqVOnslWrVrFJkyYxGxsbVqVKFZaens6Vq1atGmvXrp1U3fv27ctsbGxYVlYWY4yxNWvWMADs0aNHXJmIiAgGgLVs2ZItWbKEDR06lOnr6zMfHx9uPXE7VKhQgdnZ2bFhw4axlStXst27dzPGGOvcuTPr0aMHmzt3Llu+fDnr3r07A8DGjh0rUZ+QkBBWpUoVhe396tUrZmNjw9zc3NjcuXPZ6tWr2S+//MLc3d0lyg0YMIAZGBiwgQMHshUrVrDw8HBmZmYmVe/x48czACwwMJAtXbqUDRw4kDk4ODBbW1sWEhLClRO3jZeXF2vTpg2LiYlhQUFBDAAbP348a9KkCfvxxx/ZsmXLWIcOHRgAtm7dugLVqUqVKqx69eqsfPnybNKkSWzp0qWsXr16jMfjsZs3bzLGGHvw4AEbPnw4A8AmTZrENmzYwDZs2MBevnypdJsfPnyYeXl5MVtbW279Xbt2McYYe/ToEQPA1qxZw5WPjY1lBgYGzM3Njc2ZM4dFRUUxW1tbZmNjI/OcqVu3LuvatStbtmwZGzBgANdWuYnLHjt2TOHrzpjofeHu7s7s7OxYVFQUi4mJYVevXmUvX75kDg4OzNHRkU2bNo0tX76cdezYkQFgCxcu5NaXdUzz5s1jTZs2ZdOmTWOrVq1iI0aMYCYmJqxBgwZMKBQyxhi7du0a6927N7c9cVulpaVxr1fuc0XZ95uy9ZYn737F52jdunXZ999/z5YsWcLGjBnD9PX1WY8ePbhyu3btYg4ODqxGjRrcsRw+fJgxxlh6ejqrU6cOK1u2LJs0aRJbsWIFCw4OZjwej40YMUKqLevWrctcXFxYdHQ0mzNnDrO1tWUODg4
"text/plain": [
"<Figure size 1200x500 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig, axes = plt.subplots(1, 2, figsize=(12,5), sharey=False)\n",
"\n",
"thr_log_tx = dfc[\"log_n_tx_total\"].median()\n",
"# --- Graphique 1 ---\n",
"for name, g in dfc[~dfc['cluster_k5'].isin([2,4])].groupby(\"cluster_k5\"):\n",
" axes[0].scatter(g[\"log_n_tx_total\"], g[\"gross_flow_to_aum\"], s=10, label=labels_map.get(int(name), \"Cluster {}\".format(int(name))))\n",
"\n",
"axes[0].set_yscale(\"log\")\n",
"axes[0].axvline(thr_log_tx, linestyle=\"--\")\n",
"axes[0].axhline(thr_int, linestyle=\"--\")\n",
"axes[0].set_xlabel(\"Activity frequency (log_n_tx_total)\")\n",
"axes[0].set_ylabel(\"Gross flow / mean AUM\")\n",
"axes[0].set_title(\"2D behavioral segmentation: relative intensity vs frequency\")\n",
"axes[0].set_ylim(0.1,1000)\n",
"axes[0].legend(markerscale=2)\n",
"\n",
"# --- Graphique 2 ---\n",
"thr_churn = dfc[\"aum_drawdown_last\"].median()\n",
"thr_hold = dfc[\"avg_holding_months_per_isin\"].median()\n",
"\n",
"color_map = {\n",
" 1: \"#ff7f0e\",\n",
" 4: \"red\"\n",
"}\n",
"\n",
"for name, g in dfc[~dfc['cluster_k5'].isin([0,2,3])].groupby(\"cluster_k5\"):\n",
" axes[1].scatter(\n",
" g[\"avg_holding_months_per_isin\"], g[\"aum_drawdown_last\"],\n",
" s=10,\n",
" color= color_map.get(int(name), \"gray\"),\n",
" label=labels_map.get(int(name), \"Cluster {}\".format(int(name)))\n",
" )\n",
"\n",
"axes[1].set_yscale(\"log\")\n",
"axes[1].axvline(thr_hold, linestyle=\"--\")\n",
"axes[1].axhline(thr_churn, linestyle=\"--\")\n",
"axes[1].set_xlabel(\"avg_holding_months_per_isin\")\n",
"axes[1].set_ylabel(\"aum_drawdown_last\")\n",
"axes[1].set_title(\"2D behavioral segmentation: potential churn vs loyalty\")\n",
"axes[1].legend(markerscale=2)\n",
"axes[1].set_ylim(0.001,1.3)\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 316,
"id": "5b3c5228-c176-4f1c-8edb-5b5d093df8a9",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAjtFJREFUeJzt3XdYU2cbB+BfAoQlSxAQ2bjAAShqceFAKVWs2s9VqzjbKm5r1Q5nrataq+Ksq9aBWrV2OHErrahAXeBCcaGiDAVlJO/3B80pIQnkQEIS8tzX5SU5OTnnOS8h58k7BYwxBkIIIYQQAybUdgCEEEIIIdpGCREhhBBCDB4lRIQQQggxeJQQEUIIIcTgUUJECCGEEINHCREhhBBCDB4lRIQQQggxeJQQEUIIIcTgUUJECCGEEINHCZEO6tChAxo3bqzWYwoEAowZM0atx1TF5s2bIRAIcO/evSo/NwAMGTIEnp6eWjk3kXXv3j0IBAJs3rxZ26GojaauydPTE0OGDFHrMXX5vJrQoUMHdOjQQdthqMXWrVvRsGFDmJiYwNbWVtvhVFuUEJUjPj4eY8aMQaNGjWBpaQl3d3f07dsXN2/elNu3Q4cOEAgEEAgEEAqFsLa2RoMGDTBo0CAcPXpUC9ETws/169cxa9asSiWw27dvx7Jly9QWU3V1/vx5zJo1C1lZWdoORS1WrVql08nu48ePMWvWLCQmJmo7FF6Sk5MxZMgQ+Pj4YP369Vi3bp22Q6q2jLUdgK5buHAhzp07hz59+qBp06ZIT0/HypUr0axZM/z1119yNTmurq6YP38+ACA3Nxe3b9/G3r178fPPP6Nv3774+eefYWJioo1L0YpBgwahf//+MDU11XYoRAXXr1/H7Nmz0aFDhwrXrG3fvh1Xr17FhAkTZLZ7eHjgzZs3BvX+L8v58+cxe/ZsDBkyRO5bf0pKCoTCqv++Wpnzrlq1Cg4ODjpTw3TkyBGZx48fP8bs2bPh6emJgIAA7QRVASdPnoREIsEPP/yAunXrajucao0SonJMmjQJ27dvh0gk4rb169cPTZo0wYIFC/Dzzz/L7G9jY4OPPvpIZtuCBQswbtw4rFq1Cp6enli4cGGVxK4LjIyMYGRkpLbj5eXlwcLCQm3HI1VHIBDAzMxM22GUSSKRoKCgQOtxausLRHX64lLyM1ufPXv2DADKbSpjjOHt27cwNzevgqiqKUYqpFmzZqxZs2Yy20JCQlijRo0U7l9UVMT8/PyYhYUFy8rKKvPY0uNcvHiRBQcHMzMzM+bp6clWr14tt+/bt2/ZjBkzmI+PDxOJRMzV1ZVNmTKFvX37VmY/ACwqKort27ePNWrUiIlEIubn58cOHjwos9+9e/fYqFGjWP369ZmZmRmrWbMm+9///sdSU1O5feLj4xkAtnnzZrl4Dh06xACw3377jTHG2KZNmxgAmdczxlh0dDTz8/NjIpGI1a5dm40ePZplZmYqLYd27doxc3NzNn78eMYYY/v372fvvfceq127NhOJRMzb25vNmTOHFRUVyRwjMjKSeXh4lFHa/11T165dmb29PVfeQ4cOldlHLBaz77//nvn5+TFTU1Pm6OjIPv74Y/by5Uu5/WbOnMlq167NzM3NWYcOHdi1a9eYh4cHi4yM5PaTls2ZM2fY2LFjmYODA7OxsWEff/wxy8/PZ5mZmWzQoEHM1taW2drasilTpjCJRFKhmDw8PFi3bt3YmTNnWIsWLZipqSnz8vJiW7ZskYun9L8TJ06oXOYhISFyr5eWf2pqKgPANm3aJBNbbGwsa9u2LbOwsGA2NjasR48e7Pr16zL7zJw5kwFgt27dYpGRkczGxoZZW1uzIUOGsNzcXJl9nz9/zm7cuCG3XRHp38XPP//M/Pz8mLGxMdu3bx9jjLGHDx+yoUOHMkdHR+7vZcOGDTKvV3RNSUlJLDIyknl5eTFTU1Pm5OTEhg4dyjIyMuSup/Q/6d9JyfcKn783VeNWRtl79OzZs2zixInMwcGBWVhYsJ49e7Jnz57JvK70tYSEhHDPZ2ZmsvHjxzNXV1cmEomYj48PW7BgAROLxXJluXjxYrZ27Vrm7e3NRCIRCwoKYhcuXJCJ88mTJ2zIkCGsTp06TCQSMWdnZ9ajRw+Zz5mQkBAuhhMnTigs702bNrEZM2YwY2NjmeuRGjlyJLOxsWFv3rxRWF6LFy9mANi9e/fknps2bRozMTHh/hZv3rzJevfuzZycnJipqSmrU6cO69evX5n3A0XlOnPmTO65bt26sUOHDrHmzZszU1NT9v3336tc3tL9IiMjmbW1NbOxsWGDBw9mCQkJcu/pkmVZkqLPV3V+JpWMc8KECczDw4OJRCJWp04dNmjQIPb8+XP26tUrZmFhwcaNGyf3ugcPHjChUMi+/fZbpWVcEiVEFSCRSFidOnVY165dZbaXlRAxxtjcuXMZAPb777+XefyQkBDm4uLCHB0d2ZgxY9jy5ctZ27ZtGQCZDzaxWMy6du3KLCws2IQJE9jatWvZmDFjmLGxMXv//fdljgmA+fv7s9q1a7O5c+eyZcuWMW9vb2ZhYSHzQb17927m7+/PZsyYwdatW8e++OILZmdnxzw8PGRuMN7e3uy9996Ti33o0KHMzs6OFRQUMMYUJ0TSm0FoaChbsWIFGzNmDDMyMmItWrTgXictB2dnZ1arVi02duxYtnbtWrZ//37GGGM9e/Zkffv2ZYsXL2arV69mffr0YQDYZ599JhOPKgnR06dPmZ2dHatfvz5bvHgxW79+Pfvyyy+Zr6+vzH4jRoxgxsbGbOTIkWzNmjVs6tSpzNLSUi7uzz//nAFgERERbOXKlWzkyJHM1dWVOTg4KLzZBAQEsHfffZdFR0ezQYMGMQDs888/Z23btmUffvghW7VqFevevTsDIPdhoWpMHh4erEGDBszJyYl98cUXbOXKlaxZs2ZMIBCwq1evMsYYu3PnDhs3bhwDwL744gu2detWtnXrVpaenq5ymR85coQFBAQwBwcH7vXSBENR8nD06FFmbGzM6tevzxYtWsRmz57NHBwcmJ2dncL3TGBgIOvduzdbtWoVGzFiBFdWJUn3lSZyZQHAfH19Wa1atdjs2bNZdHQ0S0hIYOnp6czV1ZW5ubmxOXPmsNWrV7MePXowANxNR9k1fffdd6xdu3Zszpw5bN26dWz8+PHM3NyctWzZkktok5KS2IABA7jjScvq9evX3O+r5HtF1b83VeNWRllCFBgYyDp16sRWrFjBJk+ezIyMjFjfvn25/fbt28dcXV1Zw4YNuWs5cuQIY4yx3Nxc1rRpU2Zvb8+++OILtmbNGjZ48GAmEAi4LzglyzIwMJDVrVuXLVy4kC1atIg5ODgwV1dXmfdz69atmY2NDfvqq6/Yjz/+yL799lvWsWNHdurUKW6fkjfx9PR0NmfOHAaAffzxx1yMd+7cYbdu3WIA2IoVK2TKIj8/n9nZ2bFhw4YpLa/79+8zgUDAFi1aJPect7c369atG3csLy8v5uLiwr755hv2448/stmzZ7MWLVooTKZKlmuvXr0YALZ69Wq2detWlpSUxP2u6taty+zs7Ni0adPYmjVr2IkTJ1Qub4lEwtq3b8+EQiEbPXo0W7FiBevUqRNr2rRppRIidX4mMcbYq1evWOPGjZmRkREbOXIkW716NZs7dy5r0aIFS0hIYIwxNnDgQObk5CT3hXjRokVMIBCw+/fvKy3jkighqoCtW7fKJSeMlZ8Q7du3jwFgP/zwQ5nHl37LXrJkCbctPz+fBQQEMEdHR+5NtXXrViYUCtmZM2dkXr9mzRoGgJ07d47bBoCJRCJ2+/ZtbltSUpLcB0FeXp5cPHFxcQwA++mnn7ht06dPl/n2I43R1tZW5gOkdEL07NkzJhKJWNeuXWW+raxcuZIBYBs3bpQrhzVr1sjFpCjOTz75hFlYWMjUjqmSEEl/L/Hx8Ur3OXPmDAPAtm3bJrNd+g1duj09PZ0ZGxuznj17yuw3a9YsBkDhzSYsLEym5ic4OJgJBAL26aefctuKioqYq6urzIeSqjEx9t83zdOnT3Pbnj1
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Profil fidele (avg_holding_months_per_isin_med) vs churn (aum_drawdown_last_med)\n",
"\n",
"thr_churn = dfc[\"aum_drawdown_last\"].median()\n",
"thr_hold = dfc[\"avg_holding_months_per_isin\"].median()\n",
"\n",
"plt.figure()\n",
"for name, g in dfc[~dfc['cluster_k5'].isin([0,2,3])].groupby(\"cluster_k5\"):\n",
" plt.scatter(g[\"avg_holding_months_per_isin\"], g[\"aum_drawdown_last\"], s=10, label=name)\n",
"\n",
"plt.yscale(\"log\")\n",
"plt.axvline(thr_hold, linestyle=\"--\")\n",
"plt.axhline(thr_churn, linestyle=\"--\")\n",
"plt.xlabel(\"Activity frequency (share of active months)\")\n",
"plt.ylabel(\"Gross flow / mean AUM (quantity) [log scale]\")\n",
"plt.title(\"2D behavioral segmentation: relative intensity vs frequency\")\n",
"plt.legend(markerscale=2)\n",
"plt.ylim(0.001,1.1)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 317,
"id": "5071c36c-0176-460c-aeb7-ed7c4fb35ce5",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABMoAAAGGCAYAAACKUW2JAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAA4BBJREFUeJzs3Xd4TOnbB/D7aAkiiShBiN6DIGqQEJIgrGCVLFZdvQuil7V2dVbvffXeW3S7elm9rtWDFIIg833/yDvnN5MidjfJZE6+n+uai5xzZuY+z5x6n6coACBEREREREREREQpXCpTB0BERERERERERJQcMFFGREREREREREQkTJQRERERERERERGJCBNlREREREREREREIsJEGRERERERERERkYgwUUZERERERERERCQiTJQRERERERERERGJCBNlREREREREREREIsJEGRERERERERERkYgwUUZERGRW8uXLJ23btjV1GElq6dKloiiKPHjwwNShEBEREZHGMVFGRESUDNy9e1c6d+4sBQoUEEtLS7G2thZXV1eZPn26vH//PkliePfunYwaNUoOHz6cJN9nrlavXi3Tpk0zdRjJjqIo0qNHjxjTf/rpJ1EURdq3by86ne5ff/6DBw9EUZRYX2vWrPkvoRMRERGp0pg6ACIiopRu586d8u2334qFhYW0adNGnJyc5OPHj3L8+HHx9/eXq1evyvz58xM9jnfv3sno0aNFRMTd3T3Rv+9rtW7dWlq0aCEWFhamDkVEohJlf/75p/Tp08fUoSR7P//8swwdOlS+//57WbhwoaRK9d+f0bZs2VLq1atnNK1KlSr/+XOJiIiIRJgoIyIiMqn79+9LixYtJG/evHLo0CHJmTOnOq979+5y584d2blzpwkj/O/Cw8MlY8aM//r9qVOnltSpUydgRMnTu3fvJEOGDKYOI8FMnDhRAgICpE2bNrJ48eIESZKJiJQrV05atWqVIJ9FREREFB2bXhIREZnQhAkT5O3bt7Jo0SKjJJleoUKFpHfv3nG+f9SoUaIoSozpsfXrdfbsWfHy8pKsWbNK+vTpJX/+/NK+fXsRiWrWli1bNhERGT16tNqkbdSoUer7b9y4IU2bNhU7OzuxtLQUFxcX2bZtW6zfe+TIEenWrZtkz55dcufO/cUy+PXXX6VkyZKSIUMGyZw5s7i4uMjq1au/uC46nU5GjRoluXLlkgwZMkjNmjXl2rVrMfpw07/3xIkT0q9fP8mWLZtkzJhRfH19JSgoyCiOrVu3Sv369SVXrlxiYWEhBQsWlLFjx0pkZKS6jLu7u+zcuVP++usvtYzy5csXZ5wiIocPHxZFUYyatLq7u4uTk5OcO3dOatSoIRkyZJAhQ4aIiEhERISMHDlSChUqJBYWFpInTx4ZOHCgREREGH3u/v37pVq1amJraytWVlZStGhR9TNMbcqUKTJw4EBp1aqVLFmyJMGSZHrh4eHy8ePHBP1MIiIiIhHWKCMiIjKp7du3S4ECBaRq1aqJ+j0vXrwQT09PyZYtmwwePFhsbW3lwYMHsmnTJhERyZYtm8yZM0e6du0qvr6+0rhxYxERKV26tIiIXL16VVxdXcXBwUEGDx4sGTNmlHXr1kmjRo1k48aN4uvra/R93bp1k2zZssmIESMkPDw8zrgWLFggvXr1kqZNm0rv3r3lw4cPcvnyZfnjjz/Ez88vzvcFBATIhAkTpEGDBuLl5SWXLl0SLy8v+fDhQ6zL9+zZUzJnziwjR46UBw8eyLRp06RHjx6ydu1adZmlS5eKlZWV9OvXT6ysrOTQoUMyYsQICQsLk4kTJ4qIyNChQyU0NFQePXokU6dOFRERKyur+Io/Vq9evZK6detKixYtpFWrVmJvby86nU4aNmwox48flx9++EGKFy8uV65ckalTp8qtW7dky5YtIhL1e/j4+Ejp0qVlzJgxYmFhIXfu3JETJ078q1gS0vTp06V///7i5+cnS5cujTVJ9vLly6/6rEyZMsVocjt69Gjx9/cXRVGkfPnyMm7cOPH09EyQ2ImIiIiYKCMiIjKRsLAwefz4sXzzzTeJ/l0nT56U4OBg2bdvn7i4uKjTf/zxRxERyZgxozRt2lS6du0qpUuXjtG0rXfv3uLo6ChnzpxRExfdunWTatWqyaBBg2Ikyuzs7OTgwYPxNpncuXOnlCxZUtavX//V6/L8+XOZMmWKNGrUSDZv3qxOHz16tFENOENZsmSRffv2qbXvdDqdzJgxQ0JDQ8XGxkZEovoeS58+vfqeLl26SJcuXWT27Nny448/ioWFhdSpU0ccHBwkODj4Pzf/e/bsmcydO1c6d+6sTlu5cqUcOHBAjhw5ItWqVVOnOzk5SZcuXeTkyZNStWpV2b9/v3z8+FF2794tWbNm/U9xJKQdO3bIX3/9JS1btpTly5fH+fvray/GZ8mSJWoNwVSpUomnp6f4+vqKg4OD3Lt3T6ZMmSJ169aVbdu2Sf369RNqNYiIiCgFY6KMiIjIRMLCwkQkqtZMYrO1tRWRqERGmTJlJG3atF/93tevX8uhQ4dkzJgx8ubNG3nz5o06z8vLS0aOHCmPHz8WBwcHdXqnTp2+ql8xW1tbefTokZw5c0YqVKjwVfEcPHhQPn/+LN26dTOa3rNnzzgTZT/88INRE9Xq1avL1KlT5a+//lJrzRkmyd68eSMRERFSvXp1mTdvnty4cUPKlCnzVfF9LQsLC2nXrp3RtPXr10vx4sWlWLFiRrWuatWqJSIigYGBUrVqVfX33Lp1q7Rr1y7Bmzb+W8+fPxcRkfz583/x99+/f/9XfV7JkiXV/zs6OsrevXuN5rdu3VpKlCgh/fv3Z6KMiIiIEgQTZURERCZibW0tImKUeEosbm5u0qRJExk9erRMnTpV3N3dpVGjRuLn5xfvaJJ37twRADJ8+HAZPnx4rMu8ePHCKFGWP3/+r4pr0KBBcuDAAalYsaIUKlRIPD09xc/PT1xdXeN8z19//SUiUf23GbKzs5PMmTPH+h5HR0ejv/XLBQcHq9OuXr0qw4YNk0OHDqlJTL3Q0NCvWp9/wsHBQdKlS2c07fbt23L9+vU4a1y9ePFCRESaN28uCxculI4dO8rgwYPFw8NDGjduLE2bNv1i0uz169f/um8vOzu7GPFG9/3338uTJ0/kp59+kqxZs0rfvn1jXa527dr/KobYYmrXrp38/PPP8ujRo3j7wyMiIiKKDxNlREREJmJtbS25cuWSP//8819/Rmwd+YuIUQf0+uU2bNggv//+u2zfvl327t0r7du3l8mTJ8vvv//+xX62dDqdiIgMGDBAvLy8Yl0metLKsHbWlxQvXlxu3rwpO3bskD179sjGjRtl9uzZMmLECBk9evRXfcbXiKt2EwAREQkJCRE3NzextraWMWPGSMGCBcXS0lLOnz8vgwYNUsvgS772t9CLrYx0Op2UKlVKpkyZEut78uTJo7736NGjEhgYKDt37pQ9e/bI2rVrpVatWrJv374417dx48Zy5MiReNclNoGBgeLu7v7FZdKkSSPr1q0Tb29v6d+/v9ja2saoNScS1ez0a9jY2MS7LenL5PXr10yUERER0X/GRBkREZEJ+fj4yPz58+XUqVNSpUqVf/x+fc2okJAQtTmeyP9qXUVXuXJlqVy5sowbN05Wr14t3333naxZs0Y6duwYZ6KnQIECIiKSNm3aBKsJZChjxozSvHlzad68uXz8+FEaN24s48aNk4CAALG0tIyxfN68eUUkqqabYc21V69eGdUQ+ycOHz4sr169kk2bNkmNGjXU6ffv34+xbFzlZPhbGIrrt4hNwYIF5dKlS+Lh4RHn9+ilSpVKPDw8xMPDQ6ZMmSI//fSTDB06VAIDA+P8nSZPnvyvy+hrm55aWlrKtm3bpGbNmtKpUyextbWN0YddbCO8xsawj7K43Lt3T0S+vt8zIiIioi9hooyIiMiEBg4cKKtWrZKOHTvKoUOHxN7e3mj+3bt3ZceOHdK7d+9Y31+wYEERETl69Kg0bNhQRETCw8Nl2bJlRssFBweLra2tUfLF2dlZREQiIiJERCR
"text/plain": [
"<Figure size 1400x400 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#heatmap\n",
"def robust_zscore_col(s):\n",
" med = np.nanmedian(s)\n",
" mad = np.nanmedian(np.abs(s - med))\n",
" if mad == 0 or np.isnan(mad):\n",
" return np.zeros(len(s))\n",
" return (s - med) / (1.4826 * mad)\n",
"\n",
"for k in [5]:\n",
" prof = dfc.groupby(f\"cluster_k{k}\")[profile_vars].median()\n",
" prof_z = prof.copy()\n",
"\n",
" for c in prof.columns:\n",
" # prof_z[c] = robust_zscore_col(prof[c].values)\n",
" prof_z[c] = (prof[c] - prof[c].mean()) / (prof[c].std() + 1e-12)\n",
" prof_z[c] = prof_z[c].fillna(0)\n",
"\n",
" plt.figure(figsize=(14, 4))\n",
" sns.heatmap(prof_z, cmap=\"RdBu_r\", center=0)\n",
" plt.xticks(rotation=45, ha='right') # incline les noms à 45°, alignés à droite\n",
" plt.title(f\"Cluster signatures — K={k}\")\n",
" plt.ylabel(\"Clusters\")\n",
" plt.tight_layout()\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 318,
"id": "72393182-7c5b-4484-b0e0-770bff771d4c",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAuEAAAKyCAYAAAB7WgDLAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs/Xm4bVV1Joy/c87V7X6f/txzey7tpReQiIhih9hENKkYq2ywfSqlyWOMiVCJIpqIVqIVk7K0Kn6RfPrlF5OoFSomqEGQJIgNSt/f/t7Tn7PP7lc75++POdac+wgqGhSI632eC/vsvZrZr7nGeMc7mFJKoUCBAgUKFChQoECBAj8z8Ce6AAUKFChQoECBAgUK/Lyh2IQXKFCgQIECBQoUKPAzRrEJL1CgQIECBQoUKFDgZ4xiE16gQIECBQoUKFCgwM8YxSa8QIECBQoUKFCgQIGfMYpNeIECBQoUKFCgQIECP2MUm/ACBQoUKFCgQIECBX7GKDbhBQoUKFCgQIECBQr8jFFswgsUKFCgQIECBQoU+Bmj2IQXKPBzjPe9731gjD3RxSjwQ3D55Zdj165dT3QxChQoUKDA44xiE16gwL8TXHvttWCMmX9BEGBubg6XXHIJ/uRP/gTdbvdxuc/8/Dze97734fbbb39crvfzgsFggPe973246aabnuii/FTAGMPb3/72R3z/wQ9+EIwxvPGNb4SU8ie+/sGDBzeN79F/f/VXf/VvKXqBAgUKPCFwnugCFChQ4PHF+9//fuzevRtJkmBxcRE33XQT3vGOd+CjH/0orrvuOpxxxhnm2N/7vd/DFVdc8WNdf35+HldffTV27dqFs84663Eu/b9fDAYDXH311QCA5zznOY/5vD/7sz/7N21en0h86EMfwu/+7u/i9a9/PT71qU+B83+73efVr341XvziF2/67hnPeMa/+boFChQo8LNGsQkvUODfGS699FKce+655u8rr7wSX/va1/DSl74Uv/iLv4j77rsPpVIJAOA4DhynWAaejOj3+6hUKnBd94kuyk+EP/zDP8SVV16J173udfjzP//zx2UDDgBPe9rT8JrXvOZxuVaBAgUKPJEo6CgFCvwc4LnPfS7e85734NChQ/jsZz9rvn80TvhXv/pVXHjhhWg2m6hWqzjppJPwX//rfwUA3HTTTTjvvPMAAG94wxsMHeDaa68FAPzzP/8z/sN/+A/YsWMHfN/H9u3b8Zu/+ZsYDoeb7nH55ZejWq3i2LFjuOyyy1CtVjE1NYV3vetdyLJs07FSSnzsYx/D6aefjiAIMDU1hRe96EX4zne+s+m4z372szjnnHNQKpUwPj6OX/3VX8WRI0d+ZNvkbfDggw/iNa95DRqNBqampvCe97wHSikcOXIEL3/5y1Gv1zE7O4uPfOQjm86P4xjvfe97cc4556DRaKBSqeBZz3oWbrzxRnPMwYMHMTU1BQC4+uqrTbu9733v29Qe+/btw4tf/GLUajX8p//0n8xvo5zwq666Cpxz3HDDDZvK8da3vhWe5+GOO+74kXX+aeOjH/0ofud3fgevec1r8OlPf/px24Dn6Pf7iOP4cb1mgQIFCvysUWzCCxT4OcFrX/taAMBXvvKVH3jMPffcg5e+9KWIogjvf//78ZGPfAS/+Iu/iH/9138FAJxyyil4//vfD0Bv+j7zmc/gM5/5DC666CIAwN/8zd9gMBjg137t1/Cnf/qnuOSSS/Cnf/qneN3rXveIe2VZhksuuQQTExP4oz/6Izz72c/GRz7yEfzv//2/Nx33pje9Ce94xzuwfft2fPjDH8YVV1yBIAhw6623mmP+4A/+AK973etwwgkn4KMf/Sje8Y534IYbbsBFF12EjY2Nx9Q+r3rVqyClxIc+9CGcf/75+P3f/3388R//MV7wghdg69at+PCHP4zjjz8e73rXu3DzzTeb8zqdDj71qU/hOc95Dj784Q/jfe97H1ZWVnDJJZcY3vzU1BQ+8YlPAABe8YpXmHZ75Stfaa6TpikuueQSTE9P44/+6I/wS7/0S49azt/7vd/DWWedhTe96U2G5//lL38Zf/Znf4b3vve9OPPMMx9TfX9a+NjHPobf+q3fwn/8j/8R11577aNuwFdXVx/TvyiKHnHu1VdfjWq1iiAIcN555/3Q8VygQIECT2qoAgUK/LvApz/9aQVAffvb3/6BxzQaDXX22Webv6+66io1ugz89//+3xUAtbKy8gOv8e1vf1sBUJ/+9Kcf8dtgMHjEd9dcc41ijKlDhw6Z717/+tcrAOr973//pmPPPvtsdc4555i/v/a1rykA6jd+4zcecV0ppVJKqYMHDyohhPqDP/iDTb/fddddynGcR3z//cjb4K1vfav5Lk1TtW3bNsUYUx/60IfM961WS5VKJfX6179+07FRFG26ZqvVUjMzM+qNb3yj+W5lZUUBUFddddUjypC3xxVXXPGov+3cufMRdfM8T735zW9WrVZLbd26VZ177rkqSZIfWtefJgConTt3KgDq1a9+tUrT9Ice+1j+jY6xQ4cOqRe+8IXqE5/4hLruuuvUH//xH6sdO3Yozrn6+7//+59BDQsUKFDg8UVBBi1Q4OcI1Wr1h6qkNJtNAMDf/d3f4Q1veMOPTSPIueaApgwMh0NccMEFUErhe9/7Hnbs2LHp+P/8n//zpr+f9axn4TOf+Yz5+/Of/zwYY7jqqqseca+cRvOFL3wBUkr8yq/8ClZXV83vs7OzOOGEE3DjjTcaOs0Pw5vf/GbzWQiBc889F0ePHsWb3vQm832z2cRJJ52E/fv3bzpWCAFAU2c2NjYgpcS5556L7373uz/yvqP4tV/7tcd03GmnnYarr74aV155Je68806srq7iK1/5yhPO719aWgIA7N6927TJo+GrX/3qY7reqaeeaj7v2LEDX/7ylzf9/trXvhZ79+7Fb/3Wb+ElL3nJT1DiAgUKFHjiUGzCCxT4OUKv18P09PQP/P1Vr3oVPvWpT+HNb34zrrjiCjzvec/DK1/5SvzyL//yY9qQHz58GO9973tx3XXXodVqbfqt3W5v+jvnd49ibGxs03n79u3D3NwcxsfHf+A9H3roISilcMIJJzzq7481sPH7XxAajQaCIMDk5OQjvl9bW9v03V/8xV/gIx/5CO6//34kSWK+371792O6N6CDZLdt2/aYj//t3/5t/NVf/RW+9a1v4YMf/CD27t37I89ZX1//ibnU4+Pj8Dzvhx7z+te/HvPz8/jgBz+IyclJ/OZv/uajHvf85z//JyrDo5XpDW94Az70oQ/h6NGjP1b7FShQoMATjWITXqDAzwmOHj2KdruN448//gceUyqVcPPNN+PGG2/El770JVx//fX43Oc+h+c+97n4yle+8kOtm1mW4QUveAHW19fx7ne/GyeffDIqlQqOHTuGyy+//BEyez/sWj8OpJRgjOEf//EfH/Wa1Wr1MV3n0c79QWVUSpnPn/3sZ3H55Zfjsssuw2//9m9jenoaQghcc8012Ldv32OsBeD7/o/ledi/fz8eeughAMBdd931mM555Stfia9//euP+R6juPHGG3+ktKLjOPjrv/5rvOhFL8Jv/dZvodls4g1veMMjjltcXHxM92w0Gpu8K4+G7du3A9AvGMUmvECBAk8lFJvwAgV+TpDTPC655JIfehznHM973vPwvOc9Dx/96EfxwQ9+EL/7u7+LG2+8Ec9//vN/YIbNu+66Cw8++CD+4i/+YlMg5mOlHjwa9uzZgy9/+ctYX1//gdbwPXv2QCmF3bt348QTT/yJ7/WT4m//9m9x3HHH4Qtf+MKmtvl+Cs3jmZlUSonLL78c9Xod73jHO/DBD34Qv/zLv7wp0PPR8JGPfOQRHorHisca8BkEAa677jpcfPHFeMtb3oJms4lXvOIVm47ZsmXLY7rWpz/9aVx++eU/9JicGvT9XpUCBQoUeLKj2IQXKPBzgK997Wv4wAc+gN27dxvpu0fDo21284Q8uVJFpVIBgEeojuRW41ErsVIKH/vYx37icv/SL/0SPv7xj+Pqq69+xHWUUmCM4ZWvfCWuvPJ
"text/plain": [
"<Figure size 800x700 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from sklearn.metrics import pairwise_distances\n",
"\n",
"def plot_distance_matrix_sorted(X_scaled, labels, max_points=400, title=\"Distance matrix\"):\n",
" \"\"\"\n",
" Trace la matrice de distance triée par cluster avec des lignes séparatrices.\n",
" \n",
" Parameters\n",
" ----------\n",
" X_scaled : np.array ou pd.DataFrame\n",
" Les données numériques standardisées (n_samples x n_features)\n",
" labels : array-like\n",
" Les labels de cluster pour chaque point\n",
" max_points : int, optional\n",
" Nombre maximum de points à afficher pour éviter des matrices trop grandes\n",
" title : str, optional\n",
" Titre de la figure\n",
" \"\"\"\n",
" n = X_scaled.shape[0]\n",
" idx = np.arange(n)\n",
"\n",
" # Sous-échantillonnage si nécessaire\n",
" if n > max_points:\n",
" rng = np.random.default_rng(42)\n",
" idx = rng.choice(idx, size=max_points, replace=False)\n",
"\n",
" X_sub = X_scaled[idx]\n",
" labels_sub = np.asarray(labels)[idx]\n",
"\n",
" # Tri par cluster\n",
" order = np.lexsort((np.arange(len(labels_sub)), labels_sub))\n",
" X_sub = X_sub[order]\n",
" labels_sub = labels_sub[order]\n",
"\n",
" # Matrice de distances\n",
" D = pairwise_distances(X_sub)\n",
"\n",
" # Figure\n",
" plt.figure(figsize=(8, 7))\n",
" sns.heatmap(D, cmap=\"viridis\")\n",
" \n",
" # Lignes séparatrices entre clusters\n",
" unique_labels, counts = np.unique(labels_sub, return_counts=True)\n",
" boundaries = np.cumsum(counts)\n",
" for b in boundaries[:-1]: # on ignore la dernière limite\n",
" plt.axhline(b, color='red', linewidth=2)\n",
" plt.axvline(b, color='red', linewidth=2)\n",
"\n",
" plt.title(title)\n",
" plt.tight_layout()\n",
" plt.show()\n",
"\n",
"for k in [5]:\n",
" plot_distance_matrix_sorted(\n",
" X_scaled,\n",
" dfc[f\"cluster_k{k}\"].values,\n",
" title=f\"Distance matrix — K={k}\"\n",
" )\n",
"\n",
"# Cluster 0 très distant des autres"
]
},
{
"cell_type": "code",
"execution_count": 319,
"id": "a5f006c5-55a8-475f-b58d-fc26886c0aba",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"churn_hard 0.361386\n",
"churn_soft 0.603960\n",
"churn_warning 0.344059\n",
"dtype: float64\n",
"\n",
"===== CHURN PAR CLUSTER K=2 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>churn_hard_rate</th>\n",
" <th>churn_soft_rate</th>\n",
" <th>churn_warning_rate</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k2</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>327</td>\n",
" <td>0.409786</td>\n",
" <td>0.642202</td>\n",
" <td>0.336391</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>77</td>\n",
" <td>0.155844</td>\n",
" <td>0.441558</td>\n",
" <td>0.376623</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients churn_hard_rate churn_soft_rate churn_warning_rate\n",
"cluster_k2 \n",
"0 327 0.409786 0.642202 0.336391\n",
"1 77 0.155844 0.441558 0.376623"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== CHURN PAR CLUSTER K=5 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>churn_hard_rate</th>\n",
" <th>churn_soft_rate</th>\n",
" <th>churn_warning_rate</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k5</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>168</td>\n",
" <td>0.541667</td>\n",
" <td>0.797619</td>\n",
" <td>0.416667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>111</td>\n",
" <td>0.396396</td>\n",
" <td>0.648649</td>\n",
" <td>0.306306</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>90</td>\n",
" <td>0.000000</td>\n",
" <td>0.166667</td>\n",
" <td>0.211111</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>30</td>\n",
" <td>0.233333</td>\n",
" <td>0.600000</td>\n",
" <td>0.433333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>5</td>\n",
" <td>0.800000</td>\n",
" <td>1.000000</td>\n",
" <td>0.600000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients churn_hard_rate churn_soft_rate churn_warning_rate\n",
"cluster_k5 \n",
"1 168 0.541667 0.797619 0.416667\n",
"3 111 0.396396 0.648649 0.306306\n",
"4 90 0.000000 0.166667 0.211111\n",
"0 30 0.233333 0.600000 0.433333\n",
"2 5 0.800000 1.000000 0.600000"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Analyse churn\n",
"\n",
"dfc[\"churn_hard\"] = (dfc[\"aum_final_to_peak\"] < 0.10).astype(int)\n",
"\n",
"dfc[\"churn_soft\"] = (\n",
" (dfc[\"aum_final_to_peak\"] < 0.40) &\n",
" (dfc[\"aum_drawdown_last\"] > 0.40)\n",
").astype(int)\n",
"\n",
"dfc[\"churn_warning\"] = (\n",
" (dfc[\"flow_direction_balance\"] < 0) &\n",
" (dfc[\"aum_drawdown_last\"] > 0.20)\n",
").astype(int)\n",
"\n",
"print(dfc[[\"churn_hard\", \"churn_soft\", \"churn_warning\"]].mean())\n",
"\n",
"for k in [2, 5]:\n",
" out = (\n",
" dfc.groupby(f\"cluster_k{k}\")\n",
" .agg(\n",
" n_clients=(ID_COL, \"count\"),\n",
" churn_hard_rate=(\"churn_hard\", \"mean\"),\n",
" churn_soft_rate=(\"churn_soft\", \"mean\"),\n",
" churn_warning_rate=(\"churn_warning\", \"mean\")\n",
" )\n",
" .sort_values(\"n_clients\", ascending=False)\n",
" )\n",
" print(f\"\\n===== CHURN PAR CLUSTER K={k} =====\")\n",
" display(out)"
]
},
{
"cell_type": "code",
"execution_count": 320,
"id": "b8b4940e-4ab5-4123-a59a-e99d5f1fc5b6",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAGGCAYAAADmRxfNAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAP9FJREFUeJzt3XlcVdX+//H3AeUgKKipoITihCOCQRpaDle6Vt7KssJ+lUhmg1ImmkrmWIpamhNKpqY53GywUcUBta6J1ynKnMoBsQHETHBIINi/P/x6bidAwQ0cldfz8TiPB2fttfb+bCI8b9Zee1sMwzAEAAAAACY4OboAAAAAANc/ggUAAAAA0wgWAAAAAEwjWAAAAAAwjWABAAAAwDSCBQAAAADTCBYAAAAATCNYAAAAADCNYAEAAADANIIFAJSQxWJRVFSUo8solr59+6pq1aqOLkOS1KVLF3Xp0sXRZQAAygjBAgD+z+HDh/XMM8+oUaNGcnV1lYeHhzp27KgZM2bojz/+cHR5uIKJEyfqk08+cXQZpqWkpMhiseiNN96wazcMQ88884wsFovGjh1r6hg7duxQVFSUWrVqJXd3d9WvX1+PPPKIfvjhB1P7BVCxVXJ0AQBwLVi1apUefvhhWa1W9enTR61bt1ZOTo62bNmil156SXv37tW8efMcXSYuY+LEiXrooYfUs2dPR5dS6gzD0IABAzRv3jyNGjXKdLCYPHmyvv76az388MNq06aN0tLSNHv2bN1yyy3atm2bWrduXTqFA6hQCBYAKryjR4+qd+/eatCggTZu3Ki6devatg0cOFCHDh3SqlWryrWm/Px85eTkyNXVtVyPC3sXLlyQi4uLnJwcO8H//PPPKz4+XiNHjtT48eNN7y86OlrLly+Xi4uLrS08PFwBAQGaNGmSli5davoYACoeLoUCUOFNmTJFZ8+e1YIFC+xCxSVNmjTRoEGDCrR/8sknat26taxWq1q1aqWEhAS77X379pWfn1+BcWPHjpXFYrFru7RuY9myZWrVqpWsVqsSEhK0aNEiWSwWff3114qOjlbt2rXl7u6uBx54QBkZGcU+xyNHjqh79+5yd3dXvXr1NH78eBmGIeniX8P9/Px0//33Fxh34cIFeXp66plnnrniMZYuXap27drJzc1NNWrUUKdOnbRu3boi+186t5SUFLv2zZs3y2KxaPPmzba2H3/8Ub169ZK3t7dcXV118803q3fv3srMzJR08ft37tw5LV68WBaLRRaLRX379rWN//nnn/Xkk0/Ky8vL9t9r4cKFhR73vffe0yuvvCIfHx+5ubkpKyvriudelgYNGqS4uDjFxMTotddeK5V9dujQwS5USFLTpk3VqlUr7d+/v1SOAaDiYcYCQIX3+eefq1GjRurQoUOxx2zZskUrV67UgAEDVK1aNc2cOVO9evVSamqqbrrppquqY+PGjXr//fcVFRWlWrVqyc/PT8nJyZIu/sW6Ro0aGjNmjFJSUjR9+nRFRUVpxYoVV9xvXl6e7rrrLt12222aMmWKEhISNGbMGP35558aP368LBaLHn/8cU2ZMkWnTp1SzZo1bWM///xzZWVl6fHHH7/sMcaNG6exY8eqQ4cOGj9+vFxcXPTf//5XGzdu1D//+c+r+n5ckpOTo+7duys7O1vPP/+8vL299fPPP+uLL77Q6dOn5enpqSVLluipp55Su3bt9PTTT0uSGjduLElKT0/XbbfdZgtvtWvX1po1a9SvXz9lZWXpxRdftDveq6++KhcXFw0dOlTZ2dkFPoCXp8GDB2vmzJkaPny4Jk6cWGB7fn6+Tp06Vax9eXp6qnLlykVuNwxD6enpatWq1VXXC6CCMwCgAsvMzDQkGffff3+xx0gyXFxcjEOHDtnavv32W0OSMWvWLFtbRESE0aBBgwLjx4wZY/z9168kw8nJydi7d69d+zvvvGNIMsLCwoz8/Hxb++DBgw1nZ2fj9OnTl601IiLCkGQ8//zztrb8/HyjR48ehouLi5GRkWEYhmEcPHjQkGTMnTvXbvx9991n+Pn52R3773788UfDycnJeOCBB4y8vDy7bX8d17lzZ6Nz584Fzu3o0aN2YzZt2mRIMjZt2mQYhmF88803hiTjgw8+uOy5uru7GxEREQXa+/XrZ9StW9c4efKkXXvv3r0NT09P4/z583bHbdSoka3NEY4ePWpIMho0aGBIMl566aUr9i3O69L3syhLliwxJBkLFiwo5TMCUFEwYwGgQrt0mUu1atVKNC4sLMz2F3FJatOmjTw8PHTkyJGrrqVz585q2bJloduefvppu8un7rjjDr355ps6duyY2rRpc8V9//X2uJf+cr9q1Spt2LBBvXv3lr+/v9q3b69ly5bp2WeflSSdOnVKa9as0bBhwwpcuvVXn3zyifLz8zV69OgCaxEuN664PD09JUlr167VPffcIzc3t2KPNQxDH330kR555BEZhqGTJ0/atnXv3l3vvfeedu/erY4dO9raIyIiVKVKFdN1m5Weni5J8vf3L7KPt7e31q9fX6z9BQYGFrntwIEDGjhwoEJDQxUREVGyQgHg/xAsAFRoHh4ekqQzZ86UaFz9+vULtNWoUUO///77VdfSsGHDYh+vRo0aklSs4zk5OalRo0Z2bZc+rP51fUOfPn0UFRWlY8eOqUGDBvrggw+Um5urJ5544rL7P3z4sJycnIoMRWY1bNhQ0dHRmjZtmpYtW6Y77rhD9913nx5//HFb6ChKRkaGTp8+rXnz5hV5V68TJ04UOF5xZGRkKC8vr3gn8Te1a9eWs7PzZfsMHz5cq1ev1jPPPKPq1avroYceKtDH1dVVYWFhV1XDJWlpaerRo4c8PT314YcfXrEuACgKwQJAhebh4aF69erp+++/L9G4oj58Gf+3IFoq+q/1RX0YvdxfyYtzPLN69+6twYMHa9myZXr55Ze1dOlShYSEqFmzZqV2jL8qyfdn6tSp6tu3rz799FOtW7dOL7zwgmJjY7Vt2zbdfPPNRR4jPz9fkvT4448X+Zf4v8/4FHe24tZbb9WxY8eK1ffvjh49WujC/r+qWrWq1qxZo06dOumxxx6Th4dHgfUqeXl5xV7EX7NmzQLrRTIzM3X33Xfr9OnT+s9//qN69eqV6DwA4K8IFgAqvH/961+aN2+ekpKSFBoaWmr7rVGjhk6fPl2g/Wo/jF6t/Px8HTlyxO6SmksPQvvrh9uaNWuqR48eWrZsmR577DF9/fXXmj59+hX337hxY+Xn52vfvn0KCgoqdl2XZl3+/j0q6vsTEBCggIAAvfLKK9q6das6duyo+Ph4252SCgsqtWvXVrVq1ZSXl2f6L/t/t2zZsqt+cKK3t3ex+t10001at26dOnbsqAcffFDr16+3+xk9fvx4sWdYNm3aZPfk8wsXLujee+/VDz/8oA0bNpTZjBOAioNgAaDCGzZsmJYtW6annnpKGzdulJeXl932w4cP64svvij0lrOX07hxY2VmZuq7776z/VX8119/1ccff1xqtRfX7NmzNXPmTEkXZzlmz56typUrq1u3bnb9nnjiCT344IN66aWX5OzsrN69e19x3z179tTw4cM1fvx4ffjhh3brLAzDKHJm4tIala+++soWSPLy8gpcspSVlSU3NzdVqvS/f7ICAgLk5OSk7OxsW5u7u3uBkOLs7KxevXpp+fLl+v777ws8+C0jI0O1a9e+4jkW5q/rMsqSj4+P1q9fr9tvv109evTQl19+qYCAAElXv8YiLy9P4eHhSkpK0qefflqqgRpAxUWwAFDhNW7cWMuXL1d4eLhatGhh9+TtrVu36oMPPrB7JkJx9e7dW8OHD9cDDzygF154QefPn9fcuXPl7++v3bt3l/6JFMHV1VUJCQmKiIhQ+/bttWbNGq1atUovv/xygQ/VPXr00E033aQPPvhAd999t+rUqXPF/Tdp0kQjR47Uq6++qjvuuEMPPvigrFarduzYoXr16ik2NrbQca1atdJtt92mmJgY221u33vvPf355592/TZu3KioqCg9/PDD8vf3159//qklS5bYQsMlwcHB2rB
"text/plain": [
"<Figure size 800x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAGGCAYAAADmRxfNAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAQNBJREFUeJzt3Xt8zvX/x/HntdmuHdgQNjTN2TAji5xCrVS+Skn0DSOUWGGEJccwKSKnJUTFVyfp4GxMXyGnlJzKYeZbbSZf5rjN9vn94ef6drWD8dm1a7PH/Xa7brdd78/n8/68PpdP3+/1vD7v9+djMQzDEAAAAACY4OLsAgAAAAAUfQQLAAAAAKYRLAAAAACYRrAAAAAAYBrBAgAAAIBpBAsAAAAAphEsAAAAAJhGsAAAAABgGsECAAAAgGkECwC4SRaLRREREc4uI0969uypkiVLOrsMSVKbNm3Upk0bZ5cBAHAQggUA/L+jR4/qhRdeULVq1eTh4SEfHx+1aNFCM2bM0OXLl51dHm5g0qRJWrFihbPLMC0+Pl4Wi0VvvfWWXbthGHrhhRdksVg0duxYU/uIi4uTxWLJ9rV9+3ZTfQMovko4uwAAKAxWrlypzp07y2q1qkePHqpfv77S0tK0ZcsWvfLKK9q/f7/mzZvn7DKRi0mTJumpp55Sx44dnV1KvjMMQ/3799e8efM0atQo08Hiupdffln33HOPXVuNGjXypW8AxQ/BAkCxd/z4cXXt2lV33XWXNm7cqIoVK9qWDRgwQEeOHNHKlSsLtKbMzEylpaXJw8OjQPcLe1euXJG7u7tcXJx7gf+ll15STEyMRo4cqfHjx+dbv61atdJTTz2Vb/0BKN4YCgWg2JsyZYouXLigBQsW2IWK62rUqKGBAwdmaV+xYoXq168vq9WqevXqac2aNXbLe/bsqcDAwCzbjR07VhaLxa7t+ryNJUuWqF69erJarVqzZo0WLVoki8Wi7777TpGRkSpfvry8vb31xBNPKDk5Oc/HeOzYMbVr107e3t6qVKmSxo8fL8MwJF37NTwwMFCPP/54lu2uXLkiX19fvfDCCzfcx0cffaQmTZrIy8tLZcqU0X333ad169bluP71Y4uPj7drvz5MJy4uztb266+/qlOnTvL395eHh4fuvPNOde3aVefOnZN07fO7ePGiFi9ebBvS07NnT9v2v/32m5577jn5+fnZ/r0WLlyY7X6XLVum1157TZUrV5aXl5dSUlJueOyONHDgQM2ePVtRUVGaMGFCvvd//vx5Xb16Nd/7BVD8cMUCQLH39ddfq1q1amrevHmet9myZYuWL1+u/v37q1SpUnrnnXfUqVMnJSQk6I477rilOjZu3KhPPvlEERERKleunAIDA7V3715J136xLlOmjMaMGaP4+HhNnz5dERER+vjjj2/Yb0ZGhh5++GHde++9mjJlitasWaMxY8bo6tWrGj9+vCwWi7p166YpU6bozJkzKlu2rG3br7/+WikpKerWrVuu+xg3bpzGjh2r5s2ba/z48XJ3d9f333+vjRs36qGHHrqlz+O6tLQ0tWvXTqmpqXrppZfk7++v3377Td98843Onj0rX19fffjhh+rTp4+aNGmi559/XpJUvXp1SVJSUpLuvfdeW3grX768Vq9erd69eyslJUWDBg2y29/rr78ud3d3DR06VKmpqXJ3dzdVvxmDBw/WO++8o+HDh2vSpElZlmdmZurMmTN56svX11dubm52bb169dKFCxfk6uqqVq1a6c0331RoaGi+1A6gGDIAoBg7d+6cIcl4/PHH87yNJMPd3d04cuSIre3HH380JBkzZ860tYWHhxt33XVXlu3HjBlj/P1/fiUZLi4uxv79++3a33//fUOSERYWZmRmZtraBw8ebLi6uhpnz57Ntdbw8HBDkvHSSy/Z2jIzM4327dsb7u7uRnJysmEYhnH48GFDkjF37ly77R977DEjMDDQbt9/9+uvvxouLi7GE088YWRkZNgt++t2rVu3Nlq3bp3l2I4fP263zaZNmwxJxqZNmwzDMIwffvjBkGR8+umnuR6rt7e3ER4enqW9d+/eRsWKFY3Tp0/btXft2tXw9fU1Ll26ZLffatWq2dqc4fjx44Yk46677jIkGa+88soN183L6/rnaRiG8d133xmdOnUyFixYYHz55ZdGdHS0cccddxgeHh7Gnj17CuAoAdyOuGIBoFi7PsylVKlSN7VdWFiY7RdxSWrQoIF8fHx07NixW66ldevWqlu3brbLnn/+ebvhU61atdLbb7+tEydOqEGDBjfs+6+3x73+y/3KlSu1YcMGde3aVbVq1VLTpk21ZMkS9evXT5J05swZrV69WsOGDcsydOuvVqxYoczMTI0ePTrLXITctssrX19fSdLatWv16KOPysvLK8/bGoahzz//XE8//bQMw9Dp06dty9q1a6dly5Zpz549atGiha09PDxcnp6epus2KykpSZJUq1atHNfx9/fX+vXr89RfSEiI7e/mzZvbXaF77LHH9NRTT6lBgwaKiorKMqwPAPKCYAGgWPPx8ZF0bZz5zahSpUqWtjJlyui///3vLddStWrVPO+vTJkykpSn/bm4uKhatWp2bde/rP51fkOPHj0UERGhEydO6K677tKnn36q9PR0de/ePdf+jx49KhcXlxxDkVlVq1ZVZGSkpk2bpiVLlqhVq1Z67LHH1K1bN1voyElycrLOnj2refPm5XhXr1OnTmXZX14kJycrIyMjbwfxN+XLl5erq2uu6wwfPlyrVq3SCy+8oNKlS2c7ydrDw0NhYWG3VMPf1ahRQ48//riWL1+ujIyMG9YHAH9HsABQrPn4+KhSpUr6+eefb2q7nL50Gf8/IVrK+df6nL6M5vYreV72Z1bXrl01ePBgLVmyRK+++qo++ugjhYaGqnbt2vm2j7+6mc9n6tSp6tmzp7788kutW7dOL7/8sqKjo7V9+3bdeeedOe4jMzNTktStWzeFh4dnu87fr/jk9WrFPffcoxMnTuRp3b87fvx4thP7/6pkyZJavXq17rvvPj377LPy8fHJMl8lIyMjz5P4y5Yte8P5IgEBAUpLS9PFixdtoRsA8opgAaDY+8c//qF58+Zp27ZtatasWb71W6ZMGZ09ezZL+61+Gb1VmZmZOnbsmN2Qml9++UWS7L7cli1bVu3bt9eSJUv07LPP6rvvvtP06dNv2H/16tWVmZmpAwcOqGHDhnmu6/pVl79/Rjl9PsHBwQoODtZrr72mrVu3qkWLFoqJibHdKSm7oFK+fHmVKlVKGRkZ+fbL/nVLliy55Qcn+vv752m9O+64Q+vWrVOLFi305JNPav369Xbn6MmTJ/N8hWXTpk03fPL5sWPH5OHhUWie1g6gaCFYACj2hg0bpiVLlqhPnz7auHGj/Pz87JYfPXpU33zzTba3nM1N9erVde7cOf3000+2X8X/+OMPffHFF/lWe17NmjVL77zzjqRrVzlmzZolNzc3PfDAA3brde/eXU8++aReeeUVubq6qmvXrjfsu2PHjho+fLjGjx+vzz77zG6ehWEYOV6ZuD5H5dtvv7UFkoyMjCxDllJSUuTl5aUSJf73f1nBwcFycXFRamqqrc3b2ztLSHF1dVWnTp20dOlS/fzzz6pfv77d8uTkZJUvX/6Gx5idv87LcKTKlStr/fr1atmypdq3b6/NmzcrODhY0q3PscjuuH/88Ud99dVXeuSRR5z+3A4ARRPBAkCxV716dS1dulRdunRRUFCQ3ZO3t27dqk8//dTumQh51bVrVw0fPlxPPPGEXn75ZV26dElz585VrVq1tGfPnvw/kBx4eHhozZo1Cg8PV9OmTbV69WqtXLlSr776apYvl+3bt9cdd9yhTz/9VI888ogqVKhww/5r1KihkSNH6vXXX1erVq305JNPymq1aufOnapUqZKio6Oz3a5evXq69957FRUVZbvN7bJly7I8U2Hjxo2KiIhQ586dVatWLV29elUffvihLTRc17hxY23YsEHTpk1TpUqVVLVqVTVt2lSTJ0/Wpk2b1LRpU/Xt21d169bVmTN
"text/plain": [
"<Figure size 800x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"for k in [2, 5]:\n",
" tmp = (\n",
" dfc.groupby(f\"cluster_k{k}\")\n",
" .agg(\n",
" churn_hard=(\"churn_hard\", \"mean\"),\n",
" churn_soft=(\"churn_soft\", \"mean\"),\n",
" churn_warning=(\"churn_warning\", \"mean\")\n",
" )\n",
" )\n",
"\n",
" tmp.plot(kind=\"bar\", figsize=(8, 4))\n",
" plt.title(f\"Churn by cluster — K={k}\")\n",
" plt.ylabel(\"Rate\")\n",
" plt.xlabel(\"Clusters\")\n",
" plt.tight_layout()\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a0370454-561e-48c5-ad3b-28a356a2abac",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}