Project_Carmignac/brouillon/clus11mars-Copy1 (5).ipynb

5502 lines
718 KiB
Plaintext
Raw Normal View History

2026-04-05 17:52:42 +02:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "2ef771e3-f905-4c82-97ab-7f879551824c",
"metadata": {},
"outputs": [],
"source": [
"# MEETING 11 MARS\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "f645c749-321e-46c4-ae5e-0ba2c1967f81",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: networkx in /opt/python/lib/python3.13/site-packages (3.6.1)\n",
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.3\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m26.0.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install networkx"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "909d6019-1d2e-4d5f-9249-86748e5c8f69",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import s3fs\n",
"os.environ[\"AWS_ACCESS_KEY_ID\"] = 'UMMV3Z72A70MCCSRV17O'\n",
"os.environ[\"AWS_SECRET_ACCESS_KEY\"] = 'wBFxaez78UPNW3BtchZOf4f238ZNXKnCexeGufaa'\n",
"os.environ[\"AWS_SESSION_TOKEN\"] = 'eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3NLZXkiOiJVTU1WM1o3MkE3ME1DQ1NSVjE3TyIsImFjciI6IjAiLCJhbGxvd2VkLW9yaWdpbnMiOlsiKiJdLCJhdWQiOlsibWluaW8iLCJhY2NvdW50Il0sImF1dGhfdGltZSI6MTc3NTEzNTA4NiwiYXpwIjoib255eGlhLW1pbmlvIiwiZW1haWwiOiJzYXJhaC50aG91bXlyZUBlbnNhZS5mciIsImVtYWlsX3ZlcmlmaWVkIjp0cnVlLCJleHAiOjE3NzYzNDQ3NDksImZhbWlseV9uYW1lIjoiVEhPVU1ZUkUiLCJnaXZlbl9uYW1lIjoiU2FyYWgiLCJncm91cHMiOlsiYmRjLWRhdGEiLCJiZGMtY2FybWlnbmFjLWczIl0sImlhdCI6MTc3NTEzNTE0OCwiaXNzIjoiaHR0cHM6Ly9hdXRoLmdyb3VwZS1nZW5lcy5mci9yZWFsbXMvZ2VuZXMiLCJqdGkiOiJlZGY1ZDQ1OC1hYzkxLTQ5NTAtYmI5Ny0zNjMwNWY1MTQwYTIiLCJuYW1lIjoiU2FyYWggVEhPVU1ZUkUiLCJwb2xpY3kiOiJzdHNvbmx5IiwicHJlZmVycmVkX3VzZXJuYW1lIjoic3Rob3VteXJlLWVuc2FlIiwicmVhbG1fYWNjZXNzIjp7InJvbGVzIjpbIm9mZmxpbmVfYWNjZXNzIiwiZGVmYXVsdC1yb2xlcy1nZW5lcyIsInVtYV9hdXRob3JpemF0aW9uIl19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xlcyI6WyJtYW5hZ2UtYWNjb3VudCIsIm1hbmFnZS1hY2NvdW50LWxpbmtzIiwidmlldy1wcm9maWxlIl19fSwic2NvcGUiOiJvcGVuaWQgcHJvZmlsZSBlbWFpbCIsInNpZCI6IjMzMjg4YjJjLTlhMjAtNDNhOS1iMDlhLTdlMjc1OWQ1NjIxNiIsInN1YiI6ImVhYWVkN2QyLWM4MjYtNGIxNC05MzczLTYwYjNhODhlMWFiNiIsInR5cCI6IkJlYXJlciJ9.rffoTJijRiGK2DCDhXj5y8R31DRH1LWkTwuH_1lvU9qN_xJSTmBIM4uGR_zp7XpMnq_ePwVhlkoWN15cNUgjMA'\n",
"os.environ[\"AWS_DEFAULT_REGION\"] = 'us-east-1'\n",
"fs = s3fs.S3FileSystem(\n",
" client_kwargs={'endpoint_url': 'https://'+'minio-simple.lab.groupe-genes.fr'},\n",
" key = os.environ[\"AWS_ACCESS_KEY_ID\"], \n",
" secret = os.environ[\"AWS_SECRET_ACCESS_KEY\"], \n",
" token = os.environ[\"AWS_SESSION_TOKEN\"])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "38108d9e-a00c-4026-afd8-706b7131566e",
"metadata": {},
"outputs": [],
"source": [
"import warnings\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"from sklearn.preprocessing import StandardScaler, RobustScaler\n",
"from sklearn.cluster import KMeans\n",
"from sklearn.mixture import GaussianMixture\n",
"from sklearn.metrics import silhouette_score, davies_bouldin_score, pairwise_distances\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.neighbors import kneighbors_graph\n",
"from sklearn.manifold import MDS\n",
"\n",
"import networkx as nx\n",
"\n",
"sns.set_style(\"whitegrid\")\n",
"pd.set_option(\"display.max_columns\", 200)\n",
"pd.set_option(\"display.max_rows\", 200)\n",
"\n",
"EPS = 1e-9\n",
"RANDOM_STATE = 42"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "558c8d6d-9a8d-4c82-9765-620f7ce8d116",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0</th>\n",
" <th>Agreement - Code</th>\n",
" <th>Company - Id</th>\n",
" <th>Company - Ultimate Parent Id</th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Registrar Account - Region</th>\n",
" <th>RegistrarAccount - Country</th>\n",
" <th>Product - Asset Type</th>\n",
" <th>Product - Strategy</th>\n",
" <th>Product - Legal Status</th>\n",
" <th>Product - Is Dedie ?</th>\n",
" <th>Product - Fund</th>\n",
" <th>Product - Shareclass Type</th>\n",
" <th>Product - Shareclass Currency</th>\n",
" <th>Product - Isin</th>\n",
" <th>Centralisation Date</th>\n",
" <th>Quantity - Subscription</th>\n",
" <th>Quantity - Redemption</th>\n",
" <th>Quantity - NetFlows</th>\n",
" <th>Value Ccy - Subscription</th>\n",
" <th>Value Ccy - Redemption</th>\n",
" <th>Value Ccy - NetFlows</th>\n",
" <th>Value € - Subscription</th>\n",
" <th>Value € - Redemption</th>\n",
" <th>Value € - NetFlows</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>200127202</td>\n",
" <td>FRANCE</td>\n",
" <td>FRANCE</td>\n",
" <td>EQUITY</td>\n",
" <td>INVESTISSEMENT</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC PORTFOLIO INVESTISSEMENT</td>\n",
" <td>F</td>\n",
" <td>EUR</td>\n",
" <td>LU0992625839</td>\n",
" <td>2020-11-05</td>\n",
" <td>1636.000</td>\n",
" <td>0.000</td>\n",
" <td>1636.000</td>\n",
" <td>280983.00</td>\n",
" <td>0.00</td>\n",
" <td>280983.00</td>\n",
" <td>280983.00</td>\n",
" <td>0.00</td>\n",
" <td>280983.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>FRANCE</td>\n",
" <td>FRANCE</td>\n",
" <td>DIVERSIFIED</td>\n",
" <td>PATRIMOINE</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC PATRIMOINE</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-03-09</td>\n",
" <td>144.690</td>\n",
" <td>0.000</td>\n",
" <td>144.690</td>\n",
" <td>99985.13</td>\n",
" <td>0.00</td>\n",
" <td>99985.13</td>\n",
" <td>99985.13</td>\n",
" <td>0.00</td>\n",
" <td>99985.13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>FRANCE</td>\n",
" <td>FRANCE</td>\n",
" <td>EQUITY</td>\n",
" <td>INVESTISSEMENT</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC INVESTISSEMENT</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010148981</td>\n",
" <td>2016-10-26</td>\n",
" <td>0.000</td>\n",
" <td>-8.321</td>\n",
" <td>-8.321</td>\n",
" <td>0.00</td>\n",
" <td>-9384.76</td>\n",
" <td>-9384.76</td>\n",
" <td>0.00</td>\n",
" <td>-9384.76</td>\n",
" <td>-9384.76</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>FRANCE</td>\n",
" <td>FRANCE</td>\n",
" <td>EQUITY</td>\n",
" <td>INVESTISSEMENT</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC INVESTISSEMENT</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010148981</td>\n",
" <td>2018-10-18</td>\n",
" <td>0.000</td>\n",
" <td>-22.083</td>\n",
" <td>-22.083</td>\n",
" <td>0.00</td>\n",
" <td>-25227.40</td>\n",
" <td>-25227.40</td>\n",
" <td>0.00</td>\n",
" <td>-25227.40</td>\n",
" <td>-25227.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>003</td>\n",
" <td>166</td>\n",
" <td>166</td>\n",
" <td>406533</td>\n",
" <td>FRANCE</td>\n",
" <td>FRANCE</td>\n",
" <td>EQUITY</td>\n",
" <td>INVESTISSEMENT</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC INVESTISSEMENT</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010148981</td>\n",
" <td>2019-04-08</td>\n",
" <td>0.000</td>\n",
" <td>-465.992</td>\n",
" <td>-465.992</td>\n",
" <td>0.00</td>\n",
" <td>-563775.76</td>\n",
" <td>-563775.76</td>\n",
" <td>0.00</td>\n",
" <td>-563775.76</td>\n",
" <td>-563775.76</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2574456</th>\n",
" <td>2574456</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>FIXED INCOME</td>\n",
" <td>SÉCURITÉ</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC SÉCURITÉ</td>\n",
" <td>AW &amp; AW-R</td>\n",
" <td>EUR</td>\n",
" <td>FR0010149120</td>\n",
" <td>2015-06-12</td>\n",
" <td>0.000</td>\n",
" <td>-20.000</td>\n",
" <td>-20.000</td>\n",
" <td>0.00</td>\n",
" <td>-34294.40</td>\n",
" <td>-34294.40</td>\n",
" <td>0.00</td>\n",
" <td>-34294.40</td>\n",
" <td>-34294.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2574457</th>\n",
" <td>2574457</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>FIXED INCOME</td>\n",
" <td>SÉCURITÉ</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC SÉCURITÉ</td>\n",
" <td>AW &amp; AW-R</td>\n",
" <td>EUR</td>\n",
" <td>FR0010149120</td>\n",
" <td>2015-09-18</td>\n",
" <td>328.726</td>\n",
" <td>0.000</td>\n",
" <td>328.726</td>\n",
" <td>564028.07</td>\n",
" <td>0.00</td>\n",
" <td>564028.07</td>\n",
" <td>564028.07</td>\n",
" <td>0.00</td>\n",
" <td>564028.07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2574458</th>\n",
" <td>2574458</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>FIXED INCOME</td>\n",
" <td>SÉCURITÉ</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC SÉCURITÉ</td>\n",
" <td>AW &amp; AW-R</td>\n",
" <td>EUR</td>\n",
" <td>FR0010149120</td>\n",
" <td>2015-09-25</td>\n",
" <td>4.443</td>\n",
" <td>0.000</td>\n",
" <td>4.443</td>\n",
" <td>7603.66</td>\n",
" <td>0.00</td>\n",
" <td>7603.66</td>\n",
" <td>7603.66</td>\n",
" <td>0.00</td>\n",
" <td>7603.66</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2574459</th>\n",
" <td>2574459</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>FIXED INCOME</td>\n",
" <td>SÉCURITÉ</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC SÉCURITÉ</td>\n",
" <td>AW &amp; AW-R</td>\n",
" <td>EUR</td>\n",
" <td>FR0010149120</td>\n",
" <td>2015-11-09</td>\n",
" <td>0.000</td>\n",
" <td>-440.000</td>\n",
" <td>-440.000</td>\n",
" <td>0.00</td>\n",
" <td>-754696.80</td>\n",
" <td>-754696.80</td>\n",
" <td>0.00</td>\n",
" <td>-754696.80</td>\n",
" <td>-754696.80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2574460</th>\n",
" <td>2574460</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>FIXED INCOME</td>\n",
" <td>SÉCURITÉ</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC PORTFOLIO SÉCURITÉ</td>\n",
" <td>AW &amp; AW-R</td>\n",
" <td>EUR</td>\n",
" <td>LU1299306321</td>\n",
" <td>2016-01-11</td>\n",
" <td>3595.000</td>\n",
" <td>0.000</td>\n",
" <td>3595.000</td>\n",
" <td>358385.55</td>\n",
" <td>0.00</td>\n",
" <td>358385.55</td>\n",
" <td>358385.55</td>\n",
" <td>0.00</td>\n",
" <td>358385.55</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2574461 rows × 25 columns</p>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0 Agreement - Code Company - Id \\\n",
"0 0 003 166 \n",
"1 1 003 166 \n",
"2 2 003 166 \n",
"3 3 003 166 \n",
"4 4 003 166 \n",
"... ... ... ... \n",
"2574456 2574456 PRIVATE CLIENT PRIVATE CLIENT \n",
"2574457 2574457 PRIVATE CLIENT PRIVATE CLIENT \n",
"2574458 2574458 PRIVATE CLIENT PRIVATE CLIENT \n",
"2574459 2574459 PRIVATE CLIENT PRIVATE CLIENT \n",
"2574460 2574460 PRIVATE CLIENT PRIVATE CLIENT \n",
"\n",
" Company - Ultimate Parent Id Registrar Account - ID \\\n",
"0 166 200127202 \n",
"1 166 406533 \n",
"2 166 406533 \n",
"3 166 406533 \n",
"4 166 406533 \n",
"... ... ... \n",
"2574456 PRIVATE CLIENT PRIVATE CLIENT \n",
"2574457 PRIVATE CLIENT PRIVATE CLIENT \n",
"2574458 PRIVATE CLIENT PRIVATE CLIENT \n",
"2574459 PRIVATE CLIENT PRIVATE CLIENT \n",
"2574460 PRIVATE CLIENT PRIVATE CLIENT \n",
"\n",
" Registrar Account - Region RegistrarAccount - Country \\\n",
"0 FRANCE FRANCE \n",
"1 FRANCE FRANCE \n",
"2 FRANCE FRANCE \n",
"3 FRANCE FRANCE \n",
"4 FRANCE FRANCE \n",
"... ... ... \n",
"2574456 LUXEMBOURG LUXEMBOURG \n",
"2574457 LUXEMBOURG LUXEMBOURG \n",
"2574458 LUXEMBOURG LUXEMBOURG \n",
"2574459 LUXEMBOURG LUXEMBOURG \n",
"2574460 LUXEMBOURG LUXEMBOURG \n",
"\n",
" Product - Asset Type Product - Strategy Product - Legal Status \\\n",
"0 EQUITY INVESTISSEMENT SICAV \n",
"1 DIVERSIFIED PATRIMOINE FCP \n",
"2 EQUITY INVESTISSEMENT FCP \n",
"3 EQUITY INVESTISSEMENT FCP \n",
"4 EQUITY INVESTISSEMENT FCP \n",
"... ... ... ... \n",
"2574456 FIXED INCOME SÉCURITÉ FCP \n",
"2574457 FIXED INCOME SÉCURITÉ FCP \n",
"2574458 FIXED INCOME SÉCURITÉ FCP \n",
"2574459 FIXED INCOME SÉCURITÉ FCP \n",
"2574460 FIXED INCOME SÉCURITÉ SICAV \n",
"\n",
" Product - Is Dedie ? Product - Fund \\\n",
"0 NO CARMIGNAC PORTFOLIO INVESTISSEMENT \n",
"1 NO CARMIGNAC PATRIMOINE \n",
"2 NO CARMIGNAC INVESTISSEMENT \n",
"3 NO CARMIGNAC INVESTISSEMENT \n",
"4 NO CARMIGNAC INVESTISSEMENT \n",
"... ... ... \n",
"2574456 NO CARMIGNAC SÉCURITÉ \n",
"2574457 NO CARMIGNAC SÉCURITÉ \n",
"2574458 NO CARMIGNAC SÉCURITÉ \n",
"2574459 NO CARMIGNAC SÉCURITÉ \n",
"2574460 NO CARMIGNAC PORTFOLIO SÉCURITÉ \n",
"\n",
" Product - Shareclass Type Product - Shareclass Currency \\\n",
"0 F EUR \n",
"1 A EUR \n",
"2 A EUR \n",
"3 A EUR \n",
"4 A EUR \n",
"... ... ... \n",
"2574456 AW & AW-R EUR \n",
"2574457 AW & AW-R EUR \n",
"2574458 AW & AW-R EUR \n",
"2574459 AW & AW-R EUR \n",
"2574460 AW & AW-R EUR \n",
"\n",
" Product - Isin Centralisation Date Quantity - Subscription \\\n",
"0 LU0992625839 2020-11-05 1636.000 \n",
"1 FR0010135103 2015-03-09 144.690 \n",
"2 FR0010148981 2016-10-26 0.000 \n",
"3 FR0010148981 2018-10-18 0.000 \n",
"4 FR0010148981 2019-04-08 0.000 \n",
"... ... ... ... \n",
"2574456 FR0010149120 2015-06-12 0.000 \n",
"2574457 FR0010149120 2015-09-18 328.726 \n",
"2574458 FR0010149120 2015-09-25 4.443 \n",
"2574459 FR0010149120 2015-11-09 0.000 \n",
"2574460 LU1299306321 2016-01-11 3595.000 \n",
"\n",
" Quantity - Redemption Quantity - NetFlows Value Ccy - Subscription \\\n",
"0 0.000 1636.000 280983.00 \n",
"1 0.000 144.690 99985.13 \n",
"2 -8.321 -8.321 0.00 \n",
"3 -22.083 -22.083 0.00 \n",
"4 -465.992 -465.992 0.00 \n",
"... ... ... ... \n",
"2574456 -20.000 -20.000 0.00 \n",
"2574457 0.000 328.726 564028.07 \n",
"2574458 0.000 4.443 7603.66 \n",
"2574459 -440.000 -440.000 0.00 \n",
"2574460 0.000 3595.000 358385.55 \n",
"\n",
" Value Ccy - Redemption Value Ccy - NetFlows Value € - Subscription \\\n",
"0 0.00 280983.00 280983.00 \n",
"1 0.00 99985.13 99985.13 \n",
"2 -9384.76 -9384.76 0.00 \n",
"3 -25227.40 -25227.40 0.00 \n",
"4 -563775.76 -563775.76 0.00 \n",
"... ... ... ... \n",
"2574456 -34294.40 -34294.40 0.00 \n",
"2574457 0.00 564028.07 564028.07 \n",
"2574458 0.00 7603.66 7603.66 \n",
"2574459 -754696.80 -754696.80 0.00 \n",
"2574460 0.00 358385.55 358385.55 \n",
"\n",
" Value € - Redemption Value € - NetFlows \n",
"0 0.00 280983.00 \n",
"1 0.00 99985.13 \n",
"2 -9384.76 -9384.76 \n",
"3 -25227.40 -25227.40 \n",
"4 -563775.76 -563775.76 \n",
"... ... ... \n",
"2574456 -34294.40 -34294.40 \n",
"2574457 0.00 564028.07 \n",
"2574458 0.00 7603.66 \n",
"2574459 -754696.80 -754696.80 \n",
"2574460 0.00 358385.55 \n",
"\n",
"[2574461 rows x 25 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd \n",
"df_flows = pd.read_csv(\"flows.csv\")\n",
"df_flows"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "b1b88d12-7909-435b-b5a8-7814d5ad09af",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0</th>\n",
" <th>Agreement - Code</th>\n",
" <th>Company - Id</th>\n",
" <th>Company - Ultimate Parent Id</th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Registrar Account - Region</th>\n",
" <th>RegistrarAccount - Country</th>\n",
" <th>Product - Asset Type</th>\n",
" <th>Product - Strategy</th>\n",
" <th>Product - Legal Status</th>\n",
" <th>Product - Is Dedie ?</th>\n",
" <th>Product - Fund</th>\n",
" <th>Product - Shareclass Type</th>\n",
" <th>Product - Shareclass Currency</th>\n",
" <th>Product - Isin</th>\n",
" <th>Centralisation Date</th>\n",
" <th>Quantity - AUM</th>\n",
" <th>Value - AUM CCY</th>\n",
" <th>Value - AUM €</th>\n",
" <th>repair_flag</th>\n",
" <th>n_repairs</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>166.0</td>\n",
" <td>166.0</td>\n",
" <td>200000647</td>\n",
" <td>FRANCE</td>\n",
" <td>FRANCE</td>\n",
" <td>DIVERSIFIED</td>\n",
" <td>PATRIMOINE</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC PATRIMOINE</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-03-31</td>\n",
" <td>35.368</td>\n",
" <td>24648.6666</td>\n",
" <td>24648.6666</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>166.0</td>\n",
" <td>166.0</td>\n",
" <td>200000647</td>\n",
" <td>FRANCE</td>\n",
" <td>FRANCE</td>\n",
" <td>DIVERSIFIED</td>\n",
" <td>PATRIMOINE</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC PATRIMOINE</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-11-30</td>\n",
" <td>35.368</td>\n",
" <td>22413.0553</td>\n",
" <td>22413.0553</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>166.0</td>\n",
" <td>166.0</td>\n",
" <td>200000647</td>\n",
" <td>FRANCE</td>\n",
" <td>FRANCE</td>\n",
" <td>DIVERSIFIED</td>\n",
" <td>PATRIMOINE</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC PATRIMOINE</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2015-12-31</td>\n",
" <td>35.368</td>\n",
" <td>22051.2406</td>\n",
" <td>22051.2406</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>166.0</td>\n",
" <td>166.0</td>\n",
" <td>200000647</td>\n",
" <td>FRANCE</td>\n",
" <td>FRANCE</td>\n",
" <td>DIVERSIFIED</td>\n",
" <td>PATRIMOINE</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC PATRIMOINE</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2016-03-31</td>\n",
" <td>35.368</td>\n",
" <td>21626.1173</td>\n",
" <td>21626.1173</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>166.0</td>\n",
" <td>166.0</td>\n",
" <td>200000647</td>\n",
" <td>FRANCE</td>\n",
" <td>FRANCE</td>\n",
" <td>DIVERSIFIED</td>\n",
" <td>PATRIMOINE</td>\n",
" <td>FCP</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC PATRIMOINE</td>\n",
" <td>A</td>\n",
" <td>EUR</td>\n",
" <td>FR0010135103</td>\n",
" <td>2016-11-30</td>\n",
" <td>35.368</td>\n",
" <td>22489.4502</td>\n",
" <td>22489.4502</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5589910</th>\n",
" <td>4880294</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>SWITZERLAND</td>\n",
" <td>SWITZERLAND</td>\n",
" <td>FIXED INCOME</td>\n",
" <td>SÉCURITÉ</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC PORTFOLIO SÉCURITÉ</td>\n",
" <td>AW &amp; AW-R</td>\n",
" <td>EUR</td>\n",
" <td>LU1299306321</td>\n",
" <td>2020-10-31</td>\n",
" <td>3099.000</td>\n",
" <td>318422.2500</td>\n",
" <td>318422.2500</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5589911</th>\n",
" <td>4880294</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>SWITZERLAND</td>\n",
" <td>SWITZERLAND</td>\n",
" <td>FIXED INCOME</td>\n",
" <td>SÉCURITÉ</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC PORTFOLIO SÉCURITÉ</td>\n",
" <td>AW &amp; AW-R</td>\n",
" <td>EUR</td>\n",
" <td>LU1299306321</td>\n",
" <td>2020-10-31</td>\n",
" <td>3099.000</td>\n",
" <td>318422.2500</td>\n",
" <td>318422.2500</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5589912</th>\n",
" <td>4880295</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>SWITZERLAND</td>\n",
" <td>SWITZERLAND</td>\n",
" <td>FIXED INCOME</td>\n",
" <td>SÉCURITÉ</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC PORTFOLIO SÉCURITÉ</td>\n",
" <td>AW &amp; AW-R</td>\n",
" <td>EUR</td>\n",
" <td>LU1299306321</td>\n",
" <td>2021-07-31</td>\n",
" <td>2835.000</td>\n",
" <td>297618.3000</td>\n",
" <td>297618.3000</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5589913</th>\n",
" <td>4880295</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>SWITZERLAND</td>\n",
" <td>SWITZERLAND</td>\n",
" <td>FIXED INCOME</td>\n",
" <td>SÉCURITÉ</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC PORTFOLIO SÉCURITÉ</td>\n",
" <td>AW &amp; AW-R</td>\n",
" <td>EUR</td>\n",
" <td>LU1299306321</td>\n",
" <td>2021-07-31</td>\n",
" <td>2835.000</td>\n",
" <td>297618.3000</td>\n",
" <td>297618.3000</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5589914</th>\n",
" <td>4880296</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>SWITZERLAND</td>\n",
" <td>SWITZERLAND</td>\n",
" <td>FIXED INCOME</td>\n",
" <td>SÉCURITÉ</td>\n",
" <td>SICAV</td>\n",
" <td>NO</td>\n",
" <td>CARMIGNAC PORTFOLIO SÉCURITÉ</td>\n",
" <td>FW &amp; FW-R</td>\n",
" <td>EUR</td>\n",
" <td>LU1792391911</td>\n",
" <td>2020-07-31</td>\n",
" <td>2916.394</td>\n",
" <td>287410.6287</td>\n",
" <td>287410.6287</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5589915 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0 Agreement - Code Company - Id \\\n",
"0 0 3 166.0 \n",
"1 1 3 166.0 \n",
"2 2 3 166.0 \n",
"3 3 3 166.0 \n",
"4 4 3 166.0 \n",
"... ... ... ... \n",
"5589910 4880294 PRIVATE CLIENT PRIVATE CLIENT \n",
"5589911 4880294 PRIVATE CLIENT PRIVATE CLIENT \n",
"5589912 4880295 PRIVATE CLIENT PRIVATE CLIENT \n",
"5589913 4880295 PRIVATE CLIENT PRIVATE CLIENT \n",
"5589914 4880296 PRIVATE CLIENT PRIVATE CLIENT \n",
"\n",
" Company - Ultimate Parent Id Registrar Account - ID \\\n",
"0 166.0 200000647 \n",
"1 166.0 200000647 \n",
"2 166.0 200000647 \n",
"3 166.0 200000647 \n",
"4 166.0 200000647 \n",
"... ... ... \n",
"5589910 PRIVATE CLIENT PRIVATE CLIENT \n",
"5589911 PRIVATE CLIENT PRIVATE CLIENT \n",
"5589912 PRIVATE CLIENT PRIVATE CLIENT \n",
"5589913 PRIVATE CLIENT PRIVATE CLIENT \n",
"5589914 PRIVATE CLIENT PRIVATE CLIENT \n",
"\n",
" Registrar Account - Region RegistrarAccount - Country \\\n",
"0 FRANCE FRANCE \n",
"1 FRANCE FRANCE \n",
"2 FRANCE FRANCE \n",
"3 FRANCE FRANCE \n",
"4 FRANCE FRANCE \n",
"... ... ... \n",
"5589910 SWITZERLAND SWITZERLAND \n",
"5589911 SWITZERLAND SWITZERLAND \n",
"5589912 SWITZERLAND SWITZERLAND \n",
"5589913 SWITZERLAND SWITZERLAND \n",
"5589914 SWITZERLAND SWITZERLAND \n",
"\n",
" Product - Asset Type Product - Strategy Product - Legal Status \\\n",
"0 DIVERSIFIED PATRIMOINE FCP \n",
"1 DIVERSIFIED PATRIMOINE FCP \n",
"2 DIVERSIFIED PATRIMOINE FCP \n",
"3 DIVERSIFIED PATRIMOINE FCP \n",
"4 DIVERSIFIED PATRIMOINE FCP \n",
"... ... ... ... \n",
"5589910 FIXED INCOME SÉCURITÉ SICAV \n",
"5589911 FIXED INCOME SÉCURITÉ SICAV \n",
"5589912 FIXED INCOME SÉCURITÉ SICAV \n",
"5589913 FIXED INCOME SÉCURITÉ SICAV \n",
"5589914 FIXED INCOME SÉCURITÉ SICAV \n",
"\n",
" Product - Is Dedie ? Product - Fund \\\n",
"0 NO CARMIGNAC PATRIMOINE \n",
"1 NO CARMIGNAC PATRIMOINE \n",
"2 NO CARMIGNAC PATRIMOINE \n",
"3 NO CARMIGNAC PATRIMOINE \n",
"4 NO CARMIGNAC PATRIMOINE \n",
"... ... ... \n",
"5589910 NO CARMIGNAC PORTFOLIO SÉCURITÉ \n",
"5589911 NO CARMIGNAC PORTFOLIO SÉCURITÉ \n",
"5589912 NO CARMIGNAC PORTFOLIO SÉCURITÉ \n",
"5589913 NO CARMIGNAC PORTFOLIO SÉCURITÉ \n",
"5589914 NO CARMIGNAC PORTFOLIO SÉCURITÉ \n",
"\n",
" Product - Shareclass Type Product - Shareclass Currency \\\n",
"0 A EUR \n",
"1 A EUR \n",
"2 A EUR \n",
"3 A EUR \n",
"4 A EUR \n",
"... ... ... \n",
"5589910 AW & AW-R EUR \n",
"5589911 AW & AW-R EUR \n",
"5589912 AW & AW-R EUR \n",
"5589913 AW & AW-R EUR \n",
"5589914 FW & FW-R EUR \n",
"\n",
" Product - Isin Centralisation Date Quantity - AUM Value - AUM CCY \\\n",
"0 FR0010135103 2015-03-31 35.368 24648.6666 \n",
"1 FR0010135103 2015-11-30 35.368 22413.0553 \n",
"2 FR0010135103 2015-12-31 35.368 22051.2406 \n",
"3 FR0010135103 2016-03-31 35.368 21626.1173 \n",
"4 FR0010135103 2016-11-30 35.368 22489.4502 \n",
"... ... ... ... ... \n",
"5589910 LU1299306321 2020-10-31 3099.000 318422.2500 \n",
"5589911 LU1299306321 2020-10-31 3099.000 318422.2500 \n",
"5589912 LU1299306321 2021-07-31 2835.000 297618.3000 \n",
"5589913 LU1299306321 2021-07-31 2835.000 297618.3000 \n",
"5589914 LU1792391911 2020-07-31 2916.394 287410.6287 \n",
"\n",
" Value - AUM € repair_flag n_repairs \n",
"0 24648.6666 False 0.0 \n",
"1 22413.0553 False 0.0 \n",
"2 22051.2406 False 0.0 \n",
"3 21626.1173 False 0.0 \n",
"4 22489.4502 False 0.0 \n",
"... ... ... ... \n",
"5589910 318422.2500 False 0.0 \n",
"5589911 318422.2500 False 0.0 \n",
"5589912 297618.3000 False 0.0 \n",
"5589913 297618.3000 False 0.0 \n",
"5589914 287410.6287 False 0.0 \n",
"\n",
"[5589915 rows x 21 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_aum = pd.read_csv(\"stock_repaired.csv\")\n",
"df_aum"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "4c799ab2-b16e-4cbe-85ee-818002c758c4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['CARMIGNAC PATRIMOINE', 'CARMIGNAC INVESTISSEMENT',\n",
" 'CARMIGNAC PORTFOLIO INVESTISSEMENT',\n",
" 'CARMIGNAC EURO-INVESTISSEMENT', 'CARMIGNAC INNOVATION',\n",
" 'CARMIGNAC ABSOLUTE RETURN EUROPE',\n",
" 'CARMIGNAC PORTFOLIO CLIMATE TRANSITION',\n",
" 'CARMIGNAC EURO-ENTREPRENEURS', 'CARMIGNAC EMERGENTS',\n",
" 'CARMIGNAC PORTFOLIO EMERGING PATRIMOINE',\n",
" 'CARMIGNAC PORTFOLIO GRANDE EUROPE',\n",
" 'CARMIGNAC INVESTISSEMENT LATITUDE', 'CARMIGNAC COURT TERME',\n",
" 'CARMIGNAC SÉCURITÉ', 'CARMIGNAC MULTI EXPERTISE',\n",
" 'CARMIGNAC PORTFOLIO INFOTECH', 'CARMIGNAC PROFIL RÉACTIF 100',\n",
" 'CARMIGNAC PORTFOLIO GLOBAL BOND', 'CARMIGNAC PROFIL RÉACTIF 75',\n",
" 'CARMIGNAC PORTFOLIO PATRIMOINE EUROPE', 'CARMIGNAC CREDIT 2027',\n",
" 'CARMIGNAC PORTFOLIO ASIA DISCOVERY',\n",
" 'CARMIGNAC PORTFOLIO TECH SOLUTIONS',\n",
" 'CARMIGNAC PORTFOLIO FLEXIBLE BOND', 'CARMIGNAC PORTFOLIO CREDIT',\n",
" 'CARMIGNAC PORTFOLIO MARKET NEUTRAL',\n",
" 'CARMIGNAC PORTFOLIO EM DEBT',\n",
" 'CARMIGNAC PORTFOLIO LONG-SHORT EUROPEAN EQUITIES',\n",
" 'CARMIGNAC PORTFOLIO LONG-SHORT GLOBAL EQUITIES',\n",
" 'CARMIGNAC PORTFOLIO PATRIMOINE',\n",
" 'CARMIGNAC PORTFOLIO EURO-ENTREPRENEURS',\n",
" 'CARMIGNAC PORTFOLIO GRANDCHILDREN',\n",
" 'CARMIGNAC PORTFOLIO INVESTISSEMENT LATITUDE',\n",
" 'CARMIGNAC PORTFOLIO EMERGENTS', 'CARMIGNAC PORTFOLIO SÉCURITÉ',\n",
" 'CARMIGNAC PORTFOLIO INFLATION SOLUTION', 'CARMIGNAC CREDIT 2029',\n",
" 'CARMIGNAC CREDIT 2031', 'CARMIGNAC PORTFOLIO CAPITAL CUBE',\n",
" 'CARMIGNAC PORTFOLIO CHINA NEW ECONOMY', 'CARMIGNAC CREDIT 2025',\n",
" 'CARMIGNAC EPARGNE ACTIONS MONDE ISR',\n",
" 'CARMIGNAC PORTFOLIO FAMILY GOVERNED',\n",
" 'CARMIGNAC S.A. SICAV - PART II UCI PRIVATE EVERGREEN',\n",
" 'CARMIGNAC PORTFOLIO HUMAN XPERIENCE',\n",
" 'CARMIGNAC CHINA NEW ECONOMY', 'CARMIGNAC PORTFOLIO CHINA',\n",
" 'CARMIGNAC ALTS ICAV CARMIGNAC CREDIT OPPORTUNITIES',\n",
" 'CARMIGNAC PORTFOLIO MERGER ARBITRAGE PLUS',\n",
" 'CARMIGNAC PORTFOLIO ABSOLUTE RETURN EUROPE',\n",
" 'CARMIGNAC PORTFOLIO FLEXIBLE ALLOCATION 2024',\n",
" 'FP CARMIGNAC EUROPEAN LEADERS',\n",
" 'FP CARMIGNAC GLOBAL EQUITY COMPOUNDERS',\n",
" 'CARMIGNAC PORTFOLIO MERGER ARBITRAGE',\n",
" 'SOLYS - CARMIGNAC EQUITY SELECTION',\n",
" 'FP CARMIGNAC EMERGING MARKETS', 'MAPFRE CARMIGNAC F.P.',\n",
" 'FP CARMIGNAC GLOBAL BOND',\n",
" 'CARMIGNAC ALTS ICAV EUROPEAN LONG SHORT',\n",
" 'CARMIGNAC PORTFOLIO ACTIVE RISK ALLOCATION',\n",
" 'FP CARMIGNAC EMERGING PATRIMOINE', 'FP CARMIGNAC PATRIMOINE',\n",
" 'FP CARMIGNAC EMERGING DISCOVERY',\n",
" 'CREDIT SUISSE CARMIGNAC EMERGING MARKETS MULTI-ASSET FUND',\n",
" 'FONDITALIA CARMIGNAC ACTIVE ALLOCATION',\n",
" 'CARMIGNAC GLOBAL ACTIVE',\n",
" 'LUX IM - CARMIGNAC EMERGING FLEXIBLE BOND',\n",
" 'CARMIGNAC PORTFOLIO EVOLUTION', 'UFF GRANDE EUROPE 0-100',\n",
" 'CFP 1', 'CARMIGNAC PORTFOLIO SUSTAINABLE BOND',\n",
" 'CARMIGNAC PORTFOLIO CROSS ASSET OPPORTUNITIES',\n",
" 'CARMIGNAC PORTFOLIO ALPHA THEMES',\n",
" 'CARMIGNAC PORTFOLIO GLOBAL MARKET NEUTRAL'], dtype=object)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_aum['Product - Fund'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "09068a72-80a4-4239-947a-a73f6de10a57",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>n_days</th>\n",
" <th>n_transactions</th>\n",
" <th>total_netflows</th>\n",
" <th>mean_flow</th>\n",
" <th>std_flow</th>\n",
" <th>total_subscription</th>\n",
" <th>total_redemption</th>\n",
" <th>churn_ratio</th>\n",
" <th>churn_flag</th>\n",
" <th>activity_score</th>\n",
" <th>flow_volatility</th>\n",
" <th>inertia_ratio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>100000028</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>-1.092380e+02</td>\n",
" <td>-36.412667</td>\n",
" <td>49.280511</td>\n",
" <td>0.000000e+00</td>\n",
" <td>-1.092380e+02</td>\n",
" <td>-1.092380e+08</td>\n",
" <td>0</td>\n",
" <td>1.386294</td>\n",
" <td>49.280511</td>\n",
" <td>0.998921</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>100000042</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>-6.601150e+02</td>\n",
" <td>-660.115000</td>\n",
" <td>NaN</td>\n",
" <td>0.000000e+00</td>\n",
" <td>-6.601150e+02</td>\n",
" <td>-6.601150e+08</td>\n",
" <td>0</td>\n",
" <td>0.693147</td>\n",
" <td>0.000000</td>\n",
" <td>0.999640</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>100000065</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>-1.746460e+02</td>\n",
" <td>-174.646000</td>\n",
" <td>NaN</td>\n",
" <td>0.000000e+00</td>\n",
" <td>-1.746460e+02</td>\n",
" <td>-1.746460e+08</td>\n",
" <td>0</td>\n",
" <td>0.693147</td>\n",
" <td>0.000000</td>\n",
" <td>0.999640</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>100000069</td>\n",
" <td>65</td>\n",
" <td>73</td>\n",
" <td>-7.479755e+03</td>\n",
" <td>-102.462397</td>\n",
" <td>2168.971331</td>\n",
" <td>3.332040e+04</td>\n",
" <td>-4.080016e+04</td>\n",
" <td>-1.224480e+00</td>\n",
" <td>0</td>\n",
" <td>4.304065</td>\n",
" <td>2168.971331</td>\n",
" <td>0.976619</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>100000073</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>-1.334020e+02</td>\n",
" <td>-133.402000</td>\n",
" <td>NaN</td>\n",
" <td>0.000000e+00</td>\n",
" <td>-1.334020e+02</td>\n",
" <td>-1.334020e+08</td>\n",
" <td>0</td>\n",
" <td>0.693147</td>\n",
" <td>0.000000</td>\n",
" <td>0.999640</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6837</th>\n",
" <td>422905</td>\n",
" <td>208</td>\n",
" <td>212</td>\n",
" <td>-4.329218e+03</td>\n",
" <td>-20.420840</td>\n",
" <td>331.677297</td>\n",
" <td>9.699140e+03</td>\n",
" <td>-1.402836e+04</td>\n",
" <td>-1.446351e+00</td>\n",
" <td>0</td>\n",
" <td>5.361292</td>\n",
" <td>331.677297</td>\n",
" <td>0.925180</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6838</th>\n",
" <td>422906</td>\n",
" <td>1146</td>\n",
" <td>1556</td>\n",
" <td>4.455099e+03</td>\n",
" <td>2.863174</td>\n",
" <td>201.071555</td>\n",
" <td>6.078686e+04</td>\n",
" <td>-5.633177e+04</td>\n",
" <td>-9.267095e-01</td>\n",
" <td>0</td>\n",
" <td>7.350516</td>\n",
" <td>201.071555</td>\n",
" <td>0.587770</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6839</th>\n",
" <td>8307</td>\n",
" <td>204</td>\n",
" <td>221</td>\n",
" <td>2.168303e+04</td>\n",
" <td>98.113249</td>\n",
" <td>2217.940406</td>\n",
" <td>1.204399e+05</td>\n",
" <td>-9.875688e+04</td>\n",
" <td>-8.199681e-01</td>\n",
" <td>0</td>\n",
" <td>5.402677</td>\n",
" <td>2217.940406</td>\n",
" <td>0.926619</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6840</th>\n",
" <td>OFF DISTRIBUTION</td>\n",
" <td>2656</td>\n",
" <td>27679</td>\n",
" <td>1.319043e+08</td>\n",
" <td>4765.499704</td>\n",
" <td>391347.475503</td>\n",
" <td>3.388942e+08</td>\n",
" <td>-2.069900e+08</td>\n",
" <td>-6.107804e-01</td>\n",
" <td>0</td>\n",
" <td>10.228465</td>\n",
" <td>391347.475503</td>\n",
" <td>0.044604</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6841</th>\n",
" <td>PRIVATE CLIENT</td>\n",
" <td>2690</td>\n",
" <td>32363</td>\n",
" <td>-4.181221e+05</td>\n",
" <td>-12.919758</td>\n",
" <td>7572.830139</td>\n",
" <td>1.354277e+07</td>\n",
" <td>-1.396089e+07</td>\n",
" <td>-1.030874e+00</td>\n",
" <td>0</td>\n",
" <td>10.384802</td>\n",
" <td>7572.830139</td>\n",
" <td>0.032374</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>6842 rows × 13 columns</p>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID n_days n_transactions total_netflows \\\n",
"0 100000028 3 3 -1.092380e+02 \n",
"1 100000042 1 1 -6.601150e+02 \n",
"2 100000065 1 1 -1.746460e+02 \n",
"3 100000069 65 73 -7.479755e+03 \n",
"4 100000073 1 1 -1.334020e+02 \n",
"... ... ... ... ... \n",
"6837 422905 208 212 -4.329218e+03 \n",
"6838 422906 1146 1556 4.455099e+03 \n",
"6839 8307 204 221 2.168303e+04 \n",
"6840 OFF DISTRIBUTION 2656 27679 1.319043e+08 \n",
"6841 PRIVATE CLIENT 2690 32363 -4.181221e+05 \n",
"\n",
" mean_flow std_flow total_subscription total_redemption \\\n",
"0 -36.412667 49.280511 0.000000e+00 -1.092380e+02 \n",
"1 -660.115000 NaN 0.000000e+00 -6.601150e+02 \n",
"2 -174.646000 NaN 0.000000e+00 -1.746460e+02 \n",
"3 -102.462397 2168.971331 3.332040e+04 -4.080016e+04 \n",
"4 -133.402000 NaN 0.000000e+00 -1.334020e+02 \n",
"... ... ... ... ... \n",
"6837 -20.420840 331.677297 9.699140e+03 -1.402836e+04 \n",
"6838 2.863174 201.071555 6.078686e+04 -5.633177e+04 \n",
"6839 98.113249 2217.940406 1.204399e+05 -9.875688e+04 \n",
"6840 4765.499704 391347.475503 3.388942e+08 -2.069900e+08 \n",
"6841 -12.919758 7572.830139 1.354277e+07 -1.396089e+07 \n",
"\n",
" churn_ratio churn_flag activity_score flow_volatility inertia_ratio \n",
"0 -1.092380e+08 0 1.386294 49.280511 0.998921 \n",
"1 -6.601150e+08 0 0.693147 0.000000 0.999640 \n",
"2 -1.746460e+08 0 0.693147 0.000000 0.999640 \n",
"3 -1.224480e+00 0 4.304065 2168.971331 0.976619 \n",
"4 -1.334020e+08 0 0.693147 0.000000 0.999640 \n",
"... ... ... ... ... ... \n",
"6837 -1.446351e+00 0 5.361292 331.677297 0.925180 \n",
"6838 -9.267095e-01 0 7.350516 201.071555 0.587770 \n",
"6839 -8.199681e-01 0 5.402677 2217.940406 0.926619 \n",
"6840 -6.107804e-01 0 10.228465 391347.475503 0.044604 \n",
"6841 -1.030874e+00 0 10.384802 7572.830139 0.032374 \n",
"\n",
"[6842 rows x 13 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_client = pd.read_csv(\"client_behavior_clean.csv\")\n",
"df_client"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "791d8a11-fa46-400b-bbf1-55c8c055f524",
"metadata": {},
"outputs": [],
"source": [
"#external data projet-bdc-data /carmignac /Data Modélisation /Nav\n",
"PATH_NAV = \"s3://projet-bdc-data/carmignac/Data Modélisation/Nav/NAV_Bench_data.csv\" #Cest la table de valorisation / performance du produit.\n",
"PATH_RATES = \"s3://projet-bdc-data/carmignac/Data Modélisation/market data/esterRates.csv\"\n",
"\n",
"# optional competitors\n",
"PATH_COMP_FLOWS = \"s3://projet-bdc-data/carmignac/Data Modélisation/competitors/daily_estimated_flows.csv\"\n",
"PATH_COMP_PERF = \"s3://projet-bdc-data/carmignac/Data Modélisation/competitors/weekly_perf_full.csv\"\n",
"PATH_PEERS = \"s3://projet-bdc-carmignac-g3/peers/CAD_peers.csv\""
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "415015e5-4cdc-4ea9-9c0e-701c58314873",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"flows: (2574461, 25)\n",
"aum: (5589915, 21)\n",
"nav: (623914, 5)\n",
"rates: (2826, 2)\n",
"comp_flows: (963003, 6)\n",
"comp_perf: (2370192, 5)\n",
"peers: (31, 12)\n"
]
}
],
"source": [
"df_nav = pd.read_csv(PATH_NAV, sep=\";\")\n",
"df_rates = pd.read_csv(PATH_RATES,sep=\";\")\n",
"\n",
"df_comp_flows = pd.read_csv(PATH_COMP_FLOWS,sep=\";\")\n",
"df_comp_perf = pd.read_csv(PATH_COMP_PERF,sep=\";\")\n",
"df_peers = pd.read_csv(PATH_PEERS,sep=\"|\")\n",
"\n",
"print(\"flows:\", df_flows.shape)\n",
"print(\"aum:\", df_aum.shape)\n",
"print(\"nav:\", df_nav.shape)\n",
"print(\"rates:\", df_rates.shape)\n",
"print(\"comp_flows:\", df_comp_flows.shape)\n",
"print(\"comp_perf:\", df_comp_perf.shape)\n",
"print(\"peers:\", df_peers.shape)\n",
"\n",
"\n",
"#dbe bel ekhr un dataset avec une ligne pour : un client - un produit Carmignac - un mois "
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "0a52ab96-1c47-4530-83d0-148e72f70d9b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Dat</th>\n",
" <th>Isin</th>\n",
" <th>Aum Eur</th>\n",
" <th>Price (TF PartPrice)</th>\n",
" <th>PriceBench</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>623909</th>\n",
" <td>10/16/2025</td>\n",
" <td>FR001400KIF0</td>\n",
" <td>108424691,27</td>\n",
" <td>148,73</td>\n",
" <td>462,49002312</td>\n",
" </tr>\n",
" <tr>\n",
" <th>623910</th>\n",
" <td>10/17/2025</td>\n",
" <td>FR001400KIF0</td>\n",
" <td>107947215,67</td>\n",
" <td>148,08</td>\n",
" <td>462,6446111</td>\n",
" </tr>\n",
" <tr>\n",
" <th>623911</th>\n",
" <td>10/20/2025</td>\n",
" <td>FR001400KIF0</td>\n",
" <td>109597120,24</td>\n",
" <td>150,34</td>\n",
" <td>468,73416853</td>\n",
" </tr>\n",
" <tr>\n",
" <th>623912</th>\n",
" <td>10/21/2025</td>\n",
" <td>FR001400KIF0</td>\n",
" <td>110216503,12</td>\n",
" <td>151,19</td>\n",
" <td>470,33788616</td>\n",
" </tr>\n",
" <tr>\n",
" <th>623913</th>\n",
" <td>10/22/2025</td>\n",
" <td>FR001400KIF0</td>\n",
" <td>109692584,22</td>\n",
" <td>150,47</td>\n",
" <td>468,13202429</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Dat Isin Aum Eur Price (TF PartPrice) \\\n",
"623909 10/16/2025 FR001400KIF0 108424691,27 148,73 \n",
"623910 10/17/2025 FR001400KIF0 107947215,67 148,08 \n",
"623911 10/20/2025 FR001400KIF0 109597120,24 150,34 \n",
"623912 10/21/2025 FR001400KIF0 110216503,12 151,19 \n",
"623913 10/22/2025 FR001400KIF0 109692584,22 150,47 \n",
"\n",
" PriceBench \n",
"623909 462,49002312 \n",
"623910 462,6446111 \n",
"623911 468,73416853 \n",
"623912 470,33788616 \n",
"623913 468,13202429 "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_nav.tail()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "e15c0a1d-d636-48f5-83ab-ff43b49b0e44",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Yld to Maturity</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2821</th>\n",
" <td>16/10/2025</td>\n",
" <td>1.928</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2822</th>\n",
" <td>17/10/2025</td>\n",
" <td>1.928</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2823</th>\n",
" <td>20/10/2025</td>\n",
" <td>1.928</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2824</th>\n",
" <td>21/10/2025</td>\n",
" <td>1.927</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2825</th>\n",
" <td>22/10/2025</td>\n",
" <td>1.928</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date Yld to Maturity\n",
"2821 16/10/2025 1.928\n",
"2822 17/10/2025 1.928\n",
"2823 20/10/2025 1.928\n",
"2824 21/10/2025 1.927\n",
"2825 22/10/2025 1.928"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_rates.tail() #Table de marché macro, ici probablement un taux de référence."
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "ede9c196-1d10-48fc-b99f-7369a2fb3d9b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>shareClass_name</th>\n",
" <th>SecId_MS</th>\n",
" <th>ISIN</th>\n",
" <th>FundId</th>\n",
" <th>Date</th>\n",
" <th>Estimated Fund-level Net Flow (Daily)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>962998</th>\n",
" <td>TM Lansdowne European Spec Sit I Acc GBP</td>\n",
" <td>F00000VH4K</td>\n",
" <td>GB00BTJRQ064</td>\n",
" <td>FS0000BQP3</td>\n",
" <td>2025-06-27</td>\n",
" <td>183110.412</td>\n",
" </tr>\n",
" <tr>\n",
" <th>962999</th>\n",
" <td>Waverton European Dividend Gr B GBP Inc</td>\n",
" <td>F000011TLC</td>\n",
" <td>IE00BF5KV626</td>\n",
" <td>FS0000E90E</td>\n",
" <td>2025-06-27</td>\n",
" <td>16425.603</td>\n",
" </tr>\n",
" <tr>\n",
" <th>963000</th>\n",
" <td>Premier Miton European Opports B Acc</td>\n",
" <td>F00000WMCF</td>\n",
" <td>GB00BZ2K2M84</td>\n",
" <td>FS0000C8WZ</td>\n",
" <td>2025-06-27</td>\n",
" <td>-612606.416</td>\n",
" </tr>\n",
" <tr>\n",
" <th>963001</th>\n",
" <td>Incrementum Active Commodity Fund R EUR</td>\n",
" <td>F00000SVVR</td>\n",
" <td>LI0226274319</td>\n",
" <td>FS0000AMCV</td>\n",
" <td>2025-06-27</td>\n",
" <td>-5123.607</td>\n",
" </tr>\n",
" <tr>\n",
" <th>963002</th>\n",
" <td>AXAWF Inflation Plus A Cap EUR</td>\n",
" <td>F00001CSX2</td>\n",
" <td>LU2257473269</td>\n",
" <td>FS0000H62L</td>\n",
" <td>2025-06-27</td>\n",
" <td>393490.630</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" shareClass_name SecId_MS ISIN \\\n",
"962998 TM Lansdowne European Spec Sit I Acc GBP F00000VH4K GB00BTJRQ064 \n",
"962999 Waverton European Dividend Gr B GBP Inc F000011TLC IE00BF5KV626 \n",
"963000 Premier Miton European Opports B Acc F00000WMCF GB00BZ2K2M84 \n",
"963001 Incrementum Active Commodity Fund R EUR F00000SVVR LI0226274319 \n",
"963002 AXAWF Inflation Plus A Cap EUR F00001CSX2 LU2257473269 \n",
"\n",
" FundId Date Estimated Fund-level Net Flow (Daily) \n",
"962998 FS0000BQP3 2025-06-27 183110.412 \n",
"962999 FS0000E90E 2025-06-27 16425.603 \n",
"963000 FS0000C8WZ 2025-06-27 -612606.416 \n",
"963001 FS0000AMCV 2025-06-27 -5123.607 \n",
"963002 FS0000H62L 2025-06-27 393490.630 "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_comp_flows.tail()\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "71407c1d-dda2-4ec6-9df4-499d4502e560",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>perfPeriod</th>\n",
" <th>shareClass_name</th>\n",
" <th>return</th>\n",
" <th>percentile</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2370187</th>\n",
" <td>2015-12-30</td>\n",
" <td>WeeklyRet</td>\n",
" <td>BNP Paribas Emerging Eq Cl Eur C</td>\n",
" <td>-1.623478</td>\n",
" <td>83.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2370188</th>\n",
" <td>2015-12-30</td>\n",
" <td>WeeklyRet</td>\n",
" <td>Capital Group EM Debt (LUX) B</td>\n",
" <td>-0.162338</td>\n",
" <td>88.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2370189</th>\n",
" <td>2015-12-30</td>\n",
" <td>WeeklyRet</td>\n",
" <td>BGF Global Allocation A2 EUR Hedged</td>\n",
" <td>0.387712</td>\n",
" <td>44.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2370190</th>\n",
" <td>2015-12-30</td>\n",
" <td>WeeklyRet</td>\n",
" <td>Exane Funds 2 Exane Pleiade B EUR Acc</td>\n",
" <td>0.082896</td>\n",
" <td>60.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2370191</th>\n",
" <td>2015-12-30</td>\n",
" <td>WeeklyRet</td>\n",
" <td>Invesco Euro Short Term Bond A EUR Acc</td>\n",
" <td>0.034302</td>\n",
" <td>35.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date perfPeriod shareClass_name \\\n",
"2370187 2015-12-30 WeeklyRet BNP Paribas Emerging Eq Cl Eur C \n",
"2370188 2015-12-30 WeeklyRet Capital Group EM Debt (LUX) B \n",
"2370189 2015-12-30 WeeklyRet BGF Global Allocation A2 EUR Hedged \n",
"2370190 2015-12-30 WeeklyRet Exane Funds 2 Exane Pleiade B EUR Acc \n",
"2370191 2015-12-30 WeeklyRet Invesco Euro Short Term Bond A EUR Acc \n",
"\n",
" return percentile \n",
"2370187 -1.623478 83.0 \n",
"2370188 -0.162338 88.0 \n",
"2370189 0.387712 44.0 \n",
"2370190 0.082896 60.0 \n",
"2370191 0.034302 35.0 "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_comp_perf.tail() #Performance des concurrents, avec rang relatif.\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "05b87839-dc25-4f0d-8bf8-f43ac8c932ee",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Name</th>\n",
" <th>ISIN</th>\n",
" <th>SecId_MS</th>\n",
" <th>FundId</th>\n",
" <th>Global Broad Category Group</th>\n",
" <th>Global Category</th>\n",
" <th>Morningstar Category</th>\n",
" <th>Index Fund</th>\n",
" <th>Enhanced Index</th>\n",
" <th>Inception Date</th>\n",
" <th>Inception Date of Fund's Oldest Share Class</th>\n",
" <th>Domicile</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>Vontobel mtx EM Ldrs ex Chn S USD Acc</td>\n",
" <td>LU2601939452</td>\n",
" <td>F00001G9PT</td>\n",
" <td>FS0000IAHL</td>\n",
" <td>Equity</td>\n",
" <td>Global Emerging Markets Equity</td>\n",
" <td>EAA Fund Global Emerging Markets ex-China Equity</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>2023-09-20</td>\n",
" <td>2023-09-20</td>\n",
" <td>Luxembourg</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>PineBridge Asia exJapan SmCap Eq A</td>\n",
" <td>IE00B12V2V27</td>\n",
" <td>FOGBR05LNR</td>\n",
" <td>FSGBR06C4W</td>\n",
" <td>Equity</td>\n",
" <td>Asia ex-Japan Equity</td>\n",
" <td>EAA Fund Asia ex-Japan Small/Mid-Cap Equity</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>2006-04-19</td>\n",
" <td>1994-09-01</td>\n",
" <td>Ireland</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>abrdn-Asian Smaller Companies A Acc USD</td>\n",
" <td>LU0231459107</td>\n",
" <td>F0GBR06X7H</td>\n",
" <td>FSGBR05GSY</td>\n",
" <td>Equity</td>\n",
" <td>Asia ex-Japan Equity</td>\n",
" <td>EAA Fund Asia ex-Japan Small/Mid-Cap Equity</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>2004-05-14</td>\n",
" <td>2004-05-14</td>\n",
" <td>Luxembourg</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>Allianz Asian Small Cap Equity A EUR</td>\n",
" <td>LU2420271673</td>\n",
" <td>F00001DBJJ</td>\n",
" <td>FS0000ASMB</td>\n",
" <td>Equity</td>\n",
" <td>Asia ex-Japan Equity</td>\n",
" <td>EAA Fund Asia ex-Japan Small/Mid-Cap Equity</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>2022-01-05</td>\n",
" <td>2014-05-13</td>\n",
" <td>Luxembourg</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>Allianz Asia Ex China Equity A (USD)</td>\n",
" <td>LU0348788117</td>\n",
" <td>F000000F7V</td>\n",
" <td>FSUSA08CND</td>\n",
" <td>Equity</td>\n",
" <td>Asia ex-Japan Equity</td>\n",
" <td>EAA Fund Asia ex-Japan Equity</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>2008-10-03</td>\n",
" <td>2008-10-03</td>\n",
" <td>Luxembourg</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Name ISIN SecId_MS \\\n",
"26 Vontobel mtx EM Ldrs ex Chn S USD Acc LU2601939452 F00001G9PT \n",
"27 PineBridge Asia exJapan SmCap Eq A IE00B12V2V27 FOGBR05LNR \n",
"28 abrdn-Asian Smaller Companies A Acc USD LU0231459107 F0GBR06X7H \n",
"29 Allianz Asian Small Cap Equity A EUR LU2420271673 F00001DBJJ \n",
"30 Allianz Asia Ex China Equity A (USD) LU0348788117 F000000F7V \n",
"\n",
" FundId Global Broad Category Group Global Category \\\n",
"26 FS0000IAHL Equity Global Emerging Markets Equity \n",
"27 FSGBR06C4W Equity Asia ex-Japan Equity \n",
"28 FSGBR05GSY Equity Asia ex-Japan Equity \n",
"29 FS0000ASMB Equity Asia ex-Japan Equity \n",
"30 FSUSA08CND Equity Asia ex-Japan Equity \n",
"\n",
" Morningstar Category Index Fund \\\n",
"26 EAA Fund Global Emerging Markets ex-China Equity No \n",
"27 EAA Fund Asia ex-Japan Small/Mid-Cap Equity No \n",
"28 EAA Fund Asia ex-Japan Small/Mid-Cap Equity No \n",
"29 EAA Fund Asia ex-Japan Small/Mid-Cap Equity No \n",
"30 EAA Fund Asia ex-Japan Equity No \n",
"\n",
" Enhanced Index Inception Date Inception Date of Fund's Oldest Share Class \\\n",
"26 No 2023-09-20 2023-09-20 \n",
"27 No 2006-04-19 1994-09-01 \n",
"28 No 2004-05-14 2004-05-14 \n",
"29 No 2022-01-05 2014-05-13 \n",
"30 No 2008-10-03 2008-10-03 \n",
"\n",
" Domicile \n",
"26 Luxembourg \n",
"27 Ireland \n",
"28 Luxembourg \n",
"29 Luxembourg \n",
"30 Luxembourg "
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_peers.tail() #Ça permet de mesurer la pression concurrentielle."
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "9128c36b-33fd-44b2-a622-d3059f482c02",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['Unnamed: 0', 'Agreement - Code', 'Company - Id',\n",
" 'Company - Ultimate Parent Id', 'Registrar Account - ID',\n",
" 'Registrar Account - Region', 'RegistrarAccount - Country',\n",
" 'Product - Asset Type', 'Product - Strategy', 'Product - Legal Status',\n",
" 'Product - Is Dedie ?', 'Product - Fund', 'Product - Shareclass Type',\n",
" 'Product - Shareclass Currency', 'Product - Isin',\n",
" 'Centralisation Date', 'Quantity - Subscription',\n",
" 'Quantity - Redemption', 'Quantity - NetFlows',\n",
" 'Value Ccy - Subscription', 'Value Ccy - Redemption',\n",
" 'Value Ccy - NetFlows', 'Value € - Subscription',\n",
" 'Value € - Redemption', 'Value € - NetFlows'],\n",
" dtype='object')\n",
"Index(['Unnamed: 0', 'Agreement - Code', 'Company - Id',\n",
" 'Company - Ultimate Parent Id', 'Registrar Account - ID',\n",
" 'Registrar Account - Region', 'RegistrarAccount - Country',\n",
" 'Product - Asset Type', 'Product - Strategy', 'Product - Legal Status',\n",
" 'Product - Is Dedie ?', 'Product - Fund', 'Product - Shareclass Type',\n",
" 'Product - Shareclass Currency', 'Product - Isin',\n",
" 'Centralisation Date', 'Quantity - AUM', 'Value - AUM CCY',\n",
" 'Value - AUM €', 'repair_flag', 'n_repairs'],\n",
" dtype='object')\n",
"Index(['Registrar Account - ID', 'n_days', 'n_transactions', 'total_netflows',\n",
" 'mean_flow', 'std_flow', 'total_subscription', 'total_redemption',\n",
" 'churn_ratio', 'churn_flag', 'activity_score', 'flow_volatility',\n",
" 'inertia_ratio'],\n",
" dtype='object')\n",
"Index(['Dat', 'Isin', 'Aum Eur', 'Price (TF PartPrice)', 'PriceBench'], dtype='object')\n",
"Index(['Date', 'Yld to Maturity'], dtype='object')\n",
"Index(['shareClass_name', 'SecId_MS', 'ISIN', 'FundId', 'Date',\n",
" 'Estimated Fund-level Net Flow (Daily)'],\n",
" dtype='object')\n",
"Index(['Date', 'perfPeriod', 'shareClass_name', 'return', 'percentile'], dtype='object')\n",
"Index(['Name', 'ISIN', 'SecId_MS', 'FundId', 'Global Broad Category Group',\n",
" 'Global Category', 'Morningstar Category', 'Index Fund',\n",
" 'Enhanced Index', 'Inception Date',\n",
" 'Inception Date of Fund's Oldest Share Class', 'Domicile'],\n",
" dtype='object')\n"
]
}
],
"source": [
"for d in [df_flows,df_aum,df_client,df_nav,df_rates,df_comp_flows,df_comp_perf,df_peers]:\n",
" print (d.columns)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "7e82983d-aaf3-40c1-9186-2cb13c6274d7",
"metadata": {},
"outputs": [],
"source": [
"ID_COL = \"Registrar Account - ID\"\n",
"ISIN_COL = \"Product - Isin\"\n",
"\n",
"FLOW_DATE_COL = \"Centralisation Date\"\n",
"AUM_DATE_COL = \"Centralisation Date\"\n",
"\n",
"FLOW_QTY_COL = \"Quantity - NetFlows\"\n",
"FLOW_SUB_COL = \"Quantity - Subscription\"\n",
"FLOW_RED_COL = \"Quantity - Redemption\"\n",
"\n",
"AUM_QTY_COL = \"Quantity - AUM\"\n",
"AUM_VAL_COL = \"Value - AUM €\"\n",
"\n",
"REGION_COL = \"Registrar Account - Region\"\n",
"COUNTRY_COL = \"RegistrarAccount - Country\"\n",
"\n",
"NAV_DATE_COL = \"Dat\"\n",
"NAV_ISIN_COL = \"Isin\"\n",
"NAV_PRICE_COL = \"Price (TF PartPrice)\"\n",
"NAV_BENCH_COL = \"PriceBench\"\n",
"\n",
"RATE_DATE_COL = \"Date\"\n",
"RATE_VAL_COL = \"Yld to Maturity\""
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "6dd153e1-7e6f-47b7-81b1-c77fa763a087",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"' Pourquoi le mois ?\\nParce que :\\nles flux journaliers sont trop bruités\\nle churn et les comportements de portefeuille se lisent mieux au mois #le churn signifie quun client quitte le fonds\\nla plupart des comportements dallocation sont plus lisibles à cette fréquence'"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"for df, date_col in [\n",
" (df_flows, FLOW_DATE_COL),\n",
" (df_aum, AUM_DATE_COL),\n",
" (df_nav, NAV_DATE_COL),\n",
" (df_rates, RATE_DATE_COL),\n",
"]:\n",
" df[date_col] = pd.to_datetime(df[date_col], errors=\"coerce\")\n",
"\n",
"df_flows[\"month\"] = df_flows[FLOW_DATE_COL].dt.to_period(\"M\").dt.to_timestamp()\n",
"df_aum[\"month\"] = df_aum[AUM_DATE_COL].dt.to_period(\"M\").dt.to_timestamp()\n",
"df_nav[\"month\"] = df_nav[NAV_DATE_COL].dt.to_period(\"M\").dt.to_timestamp()\n",
"df_rates[\"month\"] = df_rates[RATE_DATE_COL].dt.to_period(\"M\").dt.to_timestamp()\n",
"\n",
"for col in [FLOW_QTY_COL, FLOW_SUB_COL, FLOW_RED_COL]:\n",
" df_flows[col] = pd.to_numeric(df_flows[col], errors=\"coerce\")\n",
"\n",
"for col in [AUM_QTY_COL, AUM_VAL_COL]:\n",
" df_aum[col] = pd.to_numeric(df_aum[col], errors=\"coerce\")\n",
"\n",
"for col in [NAV_PRICE_COL, NAV_BENCH_COL]:\n",
" df_nav[col] = pd.to_numeric(df_nav[col], errors=\"coerce\")\n",
"\n",
"df_rates[RATE_VAL_COL] = pd.to_numeric(df_rates[RATE_VAL_COL], errors=\"coerce\")\n",
"\n",
"for df, col in [(df_flows, ISIN_COL), (df_aum, ISIN_COL)]:\n",
" df[col] = df[col].astype(str).str.strip()\n",
"\n",
"df_nav[NAV_ISIN_COL] = df_nav[NAV_ISIN_COL].astype(str).str.strip()\n",
"\n",
"''' Pourquoi le mois ?\n",
"Parce que :\n",
"les flux journaliers sont trop bruités\n",
"le churn et les comportements de portefeuille se lisent mieux au mois #le churn signifie quun client quitte le fonds\n",
"la plupart des comportements dallocation sont plus lisibles à cette fréquence'''"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "ac43bb83-5800-4000-af9f-8cd4cae9e9d5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4906626, 18)\n"
]
},
{
"data": {
"text/plain": [
"'\\nOn agrège séparément :\\nles flows au niveau client-produit-mois\\nles AUM au niveau client-produit-mois\\n\\nPuis on les fusionne.\\n\\nDatasets créés : df_flows_rel_m\\nTable des transactions mensuelles au niveau : Registrar Account - ID × Product - Isin × month'"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"'''On veut sassurer que lunivers des produits Carmignac détenus par les clients est bien cohérent avec lunivers des NAV \n",
"utilisés pour calculer les performances.'''\n",
"\n",
"#pour merge flux et aum c est un full\n",
"'''\n",
"Si un mois existe :\n",
"dans les flux mais pas dans les encours → la clé est gardée\n",
"dans les encours mais pas dans les flux → la clé est gardée\n",
"dans les deux → une seule ligne de clé\n",
"👉 Cest donc une logique proche dun full outer join, mais construite manuellement.\n",
"'''\n",
"df_flows_rel_m = (\n",
" df_flows\n",
" .dropna(subset=[ID_COL, ISIN_COL, \"month\"])\n",
" .assign(\n",
" gross_flow_qty=lambda x: x[FLOW_QTY_COL].abs(),\n",
" sub_qty=lambda x: x[FLOW_SUB_COL].fillna(0),\n",
" red_qty=lambda x: x[FLOW_RED_COL].fillna(0)\n",
" )\n",
" .groupby([ID_COL, ISIN_COL, \"month\"], as_index=False)\n",
" .agg(\n",
" net_flow_qty=(FLOW_QTY_COL, \"sum\"),\n",
" gross_flow_qty=(\"gross_flow_qty\", \"sum\"),\n",
" sub_qty=(\"sub_qty\", \"sum\"),\n",
" red_qty=(\"red_qty\", \"sum\"),\n",
" n_tx=(FLOW_QTY_COL, \"size\"),\n",
" region=(REGION_COL, \"last\"),\n",
" country=(COUNTRY_COL, \"last\")\n",
" )\n",
")\n",
"\n",
"df_aum_rel_m = (\n",
" df_aum\n",
" .dropna(subset=[ID_COL, ISIN_COL, \"month\"])\n",
" .groupby([ID_COL, ISIN_COL, \"month\"], as_index=False)\n",
" .agg(\n",
" aum_qty=(AUM_QTY_COL, \"sum\"),\n",
" aum_val=(AUM_VAL_COL, \"sum\"),\n",
" region=(REGION_COL, \"last\"),\n",
" country=(COUNTRY_COL, \"last\")\n",
" )\n",
")\n",
"\n",
"keys = pd.concat([\n",
" df_flows_rel_m[[ID_COL, ISIN_COL, \"month\"]],\n",
" df_aum_rel_m[[ID_COL, ISIN_COL, \"month\"]]\n",
"]).drop_duplicates()\n",
"\n",
"df_rel_m = (\n",
" keys\n",
" .merge(df_aum_rel_m, on=[ID_COL, ISIN_COL, \"month\"], how=\"left\", suffixes=(\"\", \"_aum\"))\n",
" .merge(df_flows_rel_m, on=[ID_COL, ISIN_COL, \"month\"], how=\"left\", suffixes=(\"\", \"_flow\"))\n",
")\n",
"\n",
"for c in [\"aum_qty\", \"aum_val\", \"net_flow_qty\", \"gross_flow_qty\", \"sub_qty\", \"red_qty\", \"n_tx\"]:\n",
" df_rel_m[c] = df_rel_m[c].fillna(0)\n",
"\n",
"df_rel_m[\"region\"] = df_rel_m[\"region\"].fillna(df_rel_m.get(\"region_flow\"))\n",
"df_rel_m[\"country\"] = df_rel_m[\"country\"].fillna(df_rel_m.get(\"country_flow\"))\n",
"\n",
"df_rel_m[\"active_rel_month\"] = (df_rel_m[\"gross_flow_qty\"] > 0).astype(int)\n",
"df_rel_m[\"holding_rel_month\"] = (df_rel_m[\"aum_qty\"] > 0).astype(int)\n",
"df_rel_m[\"flow_to_aum_rel\"] = df_rel_m[\"net_flow_qty\"] / (df_rel_m[\"aum_qty\"].abs() + EPS)\n",
"df_rel_m[\"turnover_rel\"] = df_rel_m[\"gross_flow_qty\"] / (df_rel_m[\"aum_qty\"].abs() + EPS)\n",
"\n",
"print(df_rel_m.shape)\n",
"df_rel_m.head()\n",
"\n",
"\n",
"'''\n",
"On agrège séparément :\n",
"les flows au niveau client-produit-mois\n",
"les AUM au niveau client-produit-mois\n",
"\n",
"Puis on les fusionne.\n",
"\n",
"Datasets créés : df_flows_rel_m\n",
"Table des transactions mensuelles au niveau : Registrar Account - ID × Product - Isin × month'''"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "2597a326-88f7-493f-830b-31826112eefa",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Product - Isin</th>\n",
" <th>month</th>\n",
" <th>ret_fund_m</th>\n",
" <th>ret_bench_m</th>\n",
" <th>active_return_m</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>617756</th>\n",
" <td>FR0007486709</td>\n",
" <td>2012-11-01</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>613737</th>\n",
" <td>FR0010135103</td>\n",
" <td>2010-03-01</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>613810</th>\n",
" <td>FR0010135103</td>\n",
" <td>2010-06-01</td>\n",
" <td>0.070565</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>614192</th>\n",
" <td>FR0010135103</td>\n",
" <td>2011-12-01</td>\n",
" <td>-0.024482</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>614258</th>\n",
" <td>FR0010135103</td>\n",
" <td>2012-03-01</td>\n",
" <td>0.028958</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Product - Isin month ret_fund_m ret_bench_m active_return_m\n",
"617756 FR0007486709 2012-11-01 NaN NaN NaN\n",
"613737 FR0010135103 2010-03-01 NaN NaN NaN\n",
"613810 FR0010135103 2010-06-01 0.070565 NaN NaN\n",
"614192 FR0010135103 2011-12-01 -0.024482 NaN NaN\n",
"614258 FR0010135103 2012-03-01 0.028958 NaN NaN"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_nav_m = (\n",
" df_nav\n",
" .dropna(subset=[NAV_ISIN_COL, \"month\", NAV_PRICE_COL])\n",
" .sort_values([NAV_ISIN_COL, \"month\"])\n",
" .groupby([NAV_ISIN_COL, \"month\"], as_index=False)\n",
" .tail(1)\n",
" .copy()\n",
")\n",
"\n",
"df_nav_m[\"ret_fund_m\"] = df_nav_m.groupby(NAV_ISIN_COL)[NAV_PRICE_COL].pct_change()\n",
"df_nav_m[\"ret_bench_m\"] = df_nav_m.groupby(NAV_ISIN_COL)[NAV_BENCH_COL].pct_change()\n",
"df_nav_m[\"active_return_m\"] = df_nav_m[\"ret_fund_m\"] - df_nav_m[\"ret_bench_m\"]\n",
"\n",
"df_nav_m = df_nav_m.rename(columns={NAV_ISIN_COL: ISIN_COL})\n",
"df_nav_m = df_nav_m[[ISIN_COL, \"month\", \"ret_fund_m\", \"ret_bench_m\", \"active_return_m\"]]\n",
"\n",
"df_nav_m.head()\n",
"#on agrège au niveau mensuel en prenant la dernière observation disponible du mois. pour nav et rates "
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "73cb228a-4c21-4407-a3f5-526c4b88639f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>month</th>\n",
" <th>Yld to Maturity</th>\n",
" <th>delta_rate_m</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2014-12-01</td>\n",
" <td>0.144</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>2015-01-01</td>\n",
" <td>0.086</td>\n",
" <td>-0.058</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>2015-02-01</td>\n",
" <td>0.064</td>\n",
" <td>-0.022</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65</th>\n",
" <td>2015-03-01</td>\n",
" <td>0.050</td>\n",
" <td>-0.014</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86</th>\n",
" <td>2015-04-01</td>\n",
" <td>-0.027</td>\n",
" <td>-0.077</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" month Yld to Maturity delta_rate_m\n",
"0 2014-12-01 0.144 NaN\n",
"22 2015-01-01 0.086 -0.058\n",
"43 2015-02-01 0.064 -0.022\n",
"65 2015-03-01 0.050 -0.014\n",
"86 2015-04-01 -0.027 -0.077"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_rates_m = (\n",
" df_rates\n",
" .dropna(subset=[\"month\", RATE_VAL_COL])\n",
" .sort_values(RATE_DATE_COL)\n",
" .groupby(\"month\", as_index=False)\n",
" .tail(1)\n",
" .copy()\n",
")\n",
"\n",
"df_rates_m[\"delta_rate_m\"] = df_rates_m[RATE_VAL_COL].diff()\n",
"df_rates_m = df_rates_m[[\"month\", RATE_VAL_COL, \"delta_rate_m\"]]\n",
"\n",
"df_rates_m.head()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "1e1f0fa9-9c62-4a5e-8ee1-2605c76fb80a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'\\nCréer un dataset complet contenant : fsionner ala left uaane hasab rel m shufyo bkhdu inter\\nclient + produit + performance + environnement macro.'"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_rel_m = df_rel_m.merge(\n",
" df_nav_m,\n",
" on=[ISIN_COL, \"month\"],\n",
" how=\"left\"\n",
")\n",
"\n",
"df_rel_m = df_rel_m.merge(\n",
" df_rates_m[[\"month\", \"delta_rate_m\"]],\n",
" on=\"month\",\n",
" how=\"left\"\n",
")\n",
"\n",
"for c in [\"ret_fund_m\", \"ret_bench_m\", \"active_return_m\", \"delta_rate_m\"]:\n",
" df_rel_m[c] = df_rel_m[c].fillna(0)\n",
"\n",
"df_rel_m.head()\n",
"\n",
"'''\n",
"Créer un dataset complet contenant : fsionner ala left uaane hasab rel m shufyo bkhdu inter\n",
"client + produit + performance + environnement macro.'''"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "41a4ac43-9035-46f3-8cba-5ba0d88ec98a",
"metadata": {},
"outputs": [],
"source": [
"def weighted_mean(x, w):\n",
" x = np.asarray(x, dtype=float)\n",
" w = np.asarray(w, dtype=float)\n",
" mask = np.isfinite(x) & np.isfinite(w) & (w >= 0)\n",
" if mask.sum() == 0 or w[mask].sum() == 0:\n",
" return np.nan\n",
" return np.average(x[mask], weights=w[mask])\n",
"\n",
"def hhi_from_weights(w):\n",
" w = np.asarray(w, dtype=float)\n",
" w = np.clip(w, 0, None)\n",
" s = w.sum()\n",
" if s <= 0:\n",
" return np.nan\n",
" p = w / s\n",
" return np.sum(p**2)\n",
"\n",
"def compute_trend(y):\n",
" y = np.asarray(y, dtype=float)\n",
" if len(y) < 4:\n",
" return np.nan\n",
" x = np.arange(len(y)).reshape(-1, 1)\n",
" mask = np.isfinite(y)\n",
" if mask.sum() < 4:\n",
" return np.nan\n",
" reg = LinearRegression().fit(x[mask], y[mask])\n",
" return reg.coef_[0]\n",
"\n",
"def compute_beta(y, x):\n",
" y = np.asarray(y, dtype=float)\n",
" x = np.asarray(x, dtype=float)\n",
" mask = np.isfinite(y) & np.isfinite(x)\n",
" if mask.sum() < 6:\n",
" return np.nan\n",
" reg = LinearRegression().fit(x[mask].reshape(-1, 1), y[mask])\n",
" return reg.coef_[0]"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "79facbea-b77c-4cf9-95f7-526e1dedf9b0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(1002117, 21)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>month</th>\n",
" <th>aum_qty</th>\n",
" <th>aum_val</th>\n",
" <th>net_flow_qty</th>\n",
" <th>gross_flow_qty</th>\n",
" <th>sub_qty</th>\n",
" <th>red_qty</th>\n",
" <th>n_tx</th>\n",
" <th>n_isin_held</th>\n",
" <th>n_isin_active</th>\n",
" <th>delta_rate_m</th>\n",
" <th>region</th>\n",
" <th>country</th>\n",
" <th>active_month</th>\n",
" <th>flow_to_aum_m</th>\n",
" <th>turnover_m</th>\n",
" <th>sub_share_m</th>\n",
" <th>red_share_m</th>\n",
" <th>aum_peak_to_date</th>\n",
" <th>aum_drawdown</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7905</td>\n",
" <td>2015-01-01</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>-0.058</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7905</td>\n",
" <td>2015-02-01</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>-0.022</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7905</td>\n",
" <td>2015-03-01</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>-0.014</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>7905</td>\n",
" <td>2015-04-01</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>-0.077</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7905</td>\n",
" <td>2015-05-01</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>-0.053</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID month aum_qty aum_val net_flow_qty \\\n",
"0 7905 2015-01-01 0.0 0.0 0.0 \n",
"1 7905 2015-02-01 0.0 0.0 0.0 \n",
"2 7905 2015-03-01 0.0 0.0 0.0 \n",
"3 7905 2015-04-01 0.0 0.0 0.0 \n",
"4 7905 2015-05-01 0.0 0.0 0.0 \n",
"\n",
" gross_flow_qty sub_qty red_qty n_tx n_isin_held n_isin_active \\\n",
"0 0.0 0.0 0.0 0.0 0 0 \n",
"1 0.0 0.0 0.0 0.0 0 0 \n",
"2 0.0 0.0 0.0 0.0 0 0 \n",
"3 0.0 0.0 0.0 0.0 0 0 \n",
"4 0.0 0.0 0.0 0.0 0 0 \n",
"\n",
" delta_rate_m region country active_month flow_to_aum_m \\\n",
"0 -0.058 LUXEMBOURG LUXEMBOURG 0 0.0 \n",
"1 -0.022 LUXEMBOURG LUXEMBOURG 0 0.0 \n",
"2 -0.014 LUXEMBOURG LUXEMBOURG 0 0.0 \n",
"3 -0.077 LUXEMBOURG LUXEMBOURG 0 0.0 \n",
"4 -0.053 LUXEMBOURG LUXEMBOURG 0 0.0 \n",
"\n",
" turnover_m sub_share_m red_share_m aum_peak_to_date aum_drawdown \n",
"0 0.0 0.0 0.0 0.0 1.0 \n",
"1 0.0 0.0 0.0 0.0 1.0 \n",
"2 0.0 0.0 0.0 0.0 1.0 \n",
"3 0.0 0.0 0.0 0.0 1.0 \n",
"4 0.0 0.0 0.0 0.0 1.0 "
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# =========================\n",
"# ULTRA LIGHT VERSION\n",
"# =========================\n",
"\n",
"tmp = df_rel_m.copy()\n",
"tmp[\"isin_held_flag\"] = (tmp[\"aum_qty\"] > 0).astype(int)\n",
"tmp[\"isin_active_flag\"] = (tmp[\"gross_flow_qty\"] > 0).astype(int)\n",
"\n",
"df_month = (\n",
" tmp.groupby([ID_COL, \"month\"], as_index=False)\n",
" .agg(\n",
" aum_qty=(\"aum_qty\", \"sum\"),\n",
" aum_val=(\"aum_val\", \"sum\"),\n",
" net_flow_qty=(\"net_flow_qty\", \"sum\"),\n",
" gross_flow_qty=(\"gross_flow_qty\", \"sum\"),\n",
" sub_qty=(\"sub_qty\", \"sum\"),\n",
" red_qty=(\"red_qty\", \"sum\"),\n",
" n_tx=(\"n_tx\", \"sum\"),\n",
" n_isin_held=(\"isin_held_flag\", \"sum\"),\n",
" n_isin_active=(\"isin_active_flag\", \"sum\"),\n",
" delta_rate_m=(\"delta_rate_m\", \"first\"),\n",
" region=(\"region\", \"first\"),\n",
" country=(\"country\", \"first\"),\n",
" )\n",
" .sort_values([ID_COL, \"month\"])\n",
" .reset_index(drop=True)\n",
")\n",
"\n",
"df_month[\"active_month\"] = (df_month[\"gross_flow_qty\"] > 0).astype(int)\n",
"df_month[\"flow_to_aum_m\"] = df_month[\"net_flow_qty\"] / (df_month[\"aum_qty\"].abs() + EPS)\n",
"df_month[\"turnover_m\"] = df_month[\"gross_flow_qty\"] / (df_month[\"aum_qty\"].abs() + EPS)\n",
"df_month[\"sub_share_m\"] = df_month[\"sub_qty\"] / (df_month[\"gross_flow_qty\"] + EPS)\n",
"df_month[\"red_share_m\"] = df_month[\"red_qty\"] / (df_month[\"gross_flow_qty\"] + EPS)\n",
"\n",
"df_month[\"aum_peak_to_date\"] = df_month.groupby(ID_COL)[\"aum_qty\"].cummax()\n",
"df_month[\"aum_drawdown\"] = 1 - (df_month[\"aum_qty\"] / (df_month[\"aum_peak_to_date\"] + EPS))\n",
"\n",
"print(df_month.shape)\n",
"df_month.head()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "1a2d234f-42f8-4cd1-b50e-7fc3f8c829d2",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>Product - Isin</th>\n",
" <th>rel_n_months</th>\n",
" <th>rel_active_months</th>\n",
" <th>rel_holding_months</th>\n",
" <th>rel_aum_mean</th>\n",
" <th>rel_turnover_mean</th>\n",
" <th>rel_turnover_vol</th>\n",
" <th>rel_flow_to_aum_vol</th>\n",
" <th>rel_n_tx</th>\n",
" <th>rel_full_exit_count</th>\n",
" <th>rel_entry_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7905</td>\n",
" <td>FR0010135103</td>\n",
" <td>80</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7905</td>\n",
" <td>FR0010147603</td>\n",
" <td>80</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7905</td>\n",
" <td>FR0010148981</td>\n",
" <td>80</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>7905</td>\n",
" <td>FR0010148999</td>\n",
" <td>80</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7905</td>\n",
" <td>FR0010149096</td>\n",
" <td>80</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID Product - Isin rel_n_months rel_active_months \\\n",
"0 7905 FR0010135103 80 0 \n",
"1 7905 FR0010147603 80 0 \n",
"2 7905 FR0010148981 80 0 \n",
"3 7905 FR0010148999 80 0 \n",
"4 7905 FR0010149096 80 0 \n",
"\n",
" rel_holding_months rel_aum_mean rel_turnover_mean rel_turnover_vol \\\n",
"0 0 0.0 0.0 0.0 \n",
"1 0 0.0 0.0 0.0 \n",
"2 0 0.0 0.0 0.0 \n",
"3 0 0.0 0.0 0.0 \n",
"4 0 0.0 0.0 0.0 \n",
"\n",
" rel_flow_to_aum_vol rel_n_tx rel_full_exit_count rel_entry_count \n",
"0 0.0 0.0 0 0 \n",
"1 0.0 0.0 0 0 \n",
"2 0.0 0.0 0 0 \n",
"3 0.0 0.0 0 0 \n",
"4 0.0 0.0 0 0 "
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tmp = df_rel_m.sort_values([ID_COL, ISIN_COL, \"month\"]).copy()\n",
"tmp[\"prev_aum\"] = tmp.groupby([ID_COL, ISIN_COL])[\"aum_qty\"].shift(1)\n",
"tmp[\"full_exit_event\"] = ((tmp[\"prev_aum\"] > 0) & (tmp[\"aum_qty\"] <= 0)).astype(int)\n",
"tmp[\"entry_event\"] = ((tmp[\"prev_aum\"].fillna(0) <= 0) & (tmp[\"aum_qty\"] > 0)).astype(int)\n",
"\n",
"df_rel_feat = (\n",
" tmp.groupby([ID_COL, ISIN_COL], as_index=False)\n",
" .agg(\n",
" rel_n_months=(\"month\", \"nunique\"),\n",
" rel_active_months=(\"active_rel_month\", \"sum\"),\n",
" rel_holding_months=(\"holding_rel_month\", \"sum\"),\n",
" rel_aum_mean=(\"aum_qty\", \"mean\"),\n",
" rel_turnover_mean=(\"turnover_rel\", \"mean\"),\n",
" rel_turnover_vol=(\"turnover_rel\", \"std\"),\n",
" rel_flow_to_aum_vol=(\"flow_to_aum_rel\", \"std\"),\n",
" rel_n_tx=(\"n_tx\", \"sum\"),\n",
" rel_full_exit_count=(\"full_exit_event\", \"sum\"),\n",
" rel_entry_count=(\"entry_event\", \"sum\")\n",
" )\n",
")\n",
"\n",
"df_rel_feat.head()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "9b61ce6a-1f54-423a-8a54-9897fe1bce20",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(17236, 34)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>n_months</th>\n",
" <th>n_active_months</th>\n",
" <th>flow_freq</th>\n",
" <th>aum_qty_mean</th>\n",
" <th>aum_qty_median</th>\n",
" <th>aum_qty_max</th>\n",
" <th>aum_qty_last</th>\n",
" <th>net_flow_qty_sum</th>\n",
" <th>gross_flow_qty_sum</th>\n",
" <th>gross_flow_qty_mean</th>\n",
" <th>n_tx_total</th>\n",
" <th>net_flow_vol</th>\n",
" <th>turnover_mean</th>\n",
" <th>turnover_vol</th>\n",
" <th>flow_to_aum_mean</th>\n",
" <th>flow_to_aum_vol</th>\n",
" <th>avg_n_isin_held</th>\n",
" <th>max_n_isin_held</th>\n",
" <th>sub_share_mean</th>\n",
" <th>red_share_mean</th>\n",
" <th>delta_rate_mean</th>\n",
" <th>aum_drawdown_last</th>\n",
" <th>aum_drawdown_max</th>\n",
" <th>region</th>\n",
" <th>country</th>\n",
" <th>n_isin_total</th>\n",
" <th>rel_turnover_mean_avg</th>\n",
" <th>rel_turnover_vol_avg</th>\n",
" <th>rel_flow_to_aum_vol_avg</th>\n",
" <th>full_exit_count</th>\n",
" <th>entry_count</th>\n",
" <th>avg_holding_months_per_isin</th>\n",
" <th>max_holding_months_per_isin</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7905</td>\n",
" <td>80</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.000</td>\n",
" <td>0.00</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>-0.008925</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>12</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7912</td>\n",
" <td>80</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.000</td>\n",
" <td>0.00</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>-0.008925</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>5</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7962</td>\n",
" <td>80</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.000</td>\n",
" <td>0.00</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>-0.008925</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>4</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>8307</td>\n",
" <td>130</td>\n",
" <td>77</td>\n",
" <td>0.592308</td>\n",
" <td>22613.710831</td>\n",
" <td>22712.98</td>\n",
" <td>59145.06</td>\n",
" <td>487.0</td>\n",
" <td>27252.124</td>\n",
" <td>177077.31</td>\n",
" <td>1362.133154</td>\n",
" <td>161.0</td>\n",
" <td>3508.455222</td>\n",
" <td>0.04922</td>\n",
" <td>0.108358</td>\n",
" <td>0.00648</td>\n",
" <td>0.103427</td>\n",
" <td>11.784615</td>\n",
" <td>16</td>\n",
" <td>0.260828</td>\n",
" <td>-0.36224</td>\n",
" <td>0.013723</td>\n",
" <td>0.991766</td>\n",
" <td>0.991766</td>\n",
" <td>SWITZERLAND</td>\n",
" <td>SWITZERLAND</td>\n",
" <td>29</td>\n",
" <td>4.749062e+09</td>\n",
" <td>3.095092e+10</td>\n",
" <td>3.095092e+10</td>\n",
" <td>27</td>\n",
" <td>31</td>\n",
" <td>52.827586</td>\n",
" <td>130</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>8354</td>\n",
" <td>64</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.000</td>\n",
" <td>0.00</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>-0.010063</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>SWITZERLAND</td>\n",
" <td>SWITZERLAND</td>\n",
" <td>1</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID n_months n_active_months flow_freq aum_qty_mean \\\n",
"0 7905 80 0 0.000000 0.000000 \n",
"1 7912 80 0 0.000000 0.000000 \n",
"2 7962 80 0 0.000000 0.000000 \n",
"3 8307 130 77 0.592308 22613.710831 \n",
"4 8354 64 0 0.000000 0.000000 \n",
"\n",
" aum_qty_median aum_qty_max aum_qty_last net_flow_qty_sum \\\n",
"0 0.00 0.00 0.0 0.000 \n",
"1 0.00 0.00 0.0 0.000 \n",
"2 0.00 0.00 0.0 0.000 \n",
"3 22712.98 59145.06 487.0 27252.124 \n",
"4 0.00 0.00 0.0 0.000 \n",
"\n",
" gross_flow_qty_sum gross_flow_qty_mean n_tx_total net_flow_vol \\\n",
"0 0.00 0.000000 0.0 0.000000 \n",
"1 0.00 0.000000 0.0 0.000000 \n",
"2 0.00 0.000000 0.0 0.000000 \n",
"3 177077.31 1362.133154 161.0 3508.455222 \n",
"4 0.00 0.000000 0.0 0.000000 \n",
"\n",
" turnover_mean turnover_vol flow_to_aum_mean flow_to_aum_vol \\\n",
"0 0.00000 0.000000 0.00000 0.000000 \n",
"1 0.00000 0.000000 0.00000 0.000000 \n",
"2 0.00000 0.000000 0.00000 0.000000 \n",
"3 0.04922 0.108358 0.00648 0.103427 \n",
"4 0.00000 0.000000 0.00000 0.000000 \n",
"\n",
" avg_n_isin_held max_n_isin_held sub_share_mean red_share_mean \\\n",
"0 0.000000 0 0.000000 0.00000 \n",
"1 0.000000 0 0.000000 0.00000 \n",
"2 0.000000 0 0.000000 0.00000 \n",
"3 11.784615 16 0.260828 -0.36224 \n",
"4 0.000000 0 0.000000 0.00000 \n",
"\n",
" delta_rate_mean aum_drawdown_last aum_drawdown_max region \\\n",
"0 -0.008925 1.000000 1.000000 LUXEMBOURG \n",
"1 -0.008925 1.000000 1.000000 LUXEMBOURG \n",
"2 -0.008925 1.000000 1.000000 LUXEMBOURG \n",
"3 0.013723 0.991766 0.991766 SWITZERLAND \n",
"4 -0.010063 1.000000 1.000000 SWITZERLAND \n",
"\n",
" country n_isin_total rel_turnover_mean_avg rel_turnover_vol_avg \\\n",
"0 LUXEMBOURG 12 0.000000e+00 0.000000e+00 \n",
"1 LUXEMBOURG 5 0.000000e+00 0.000000e+00 \n",
"2 LUXEMBOURG 4 0.000000e+00 0.000000e+00 \n",
"3 SWITZERLAND 29 4.749062e+09 3.095092e+10 \n",
"4 SWITZERLAND 1 0.000000e+00 0.000000e+00 \n",
"\n",
" rel_flow_to_aum_vol_avg full_exit_count entry_count \\\n",
"0 0.000000e+00 0 0 \n",
"1 0.000000e+00 0 0 \n",
"2 0.000000e+00 0 0 \n",
"3 3.095092e+10 27 31 \n",
"4 0.000000e+00 0 0 \n",
"\n",
" avg_holding_months_per_isin max_holding_months_per_isin \n",
"0 0.000000 0 \n",
"1 0.000000 0 \n",
"2 0.000000 0 \n",
"3 52.827586 130 \n",
"4 0.000000 0 "
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_rel_client = (\n",
" df_rel_feat\n",
" .groupby(ID_COL, as_index=False)\n",
" .agg(\n",
" n_isin_total=(ISIN_COL, \"nunique\"),\n",
" rel_turnover_mean_avg=(\"rel_turnover_mean\", \"mean\"),\n",
" rel_turnover_vol_avg=(\"rel_turnover_vol\", \"mean\"),\n",
" rel_flow_to_aum_vol_avg=(\"rel_flow_to_aum_vol\", \"mean\"),\n",
" full_exit_count=(\"rel_full_exit_count\", \"sum\"),\n",
" entry_count=(\"rel_entry_count\", \"sum\"),\n",
" avg_holding_months_per_isin=(\"rel_holding_months\", \"mean\"),\n",
" max_holding_months_per_isin=(\"rel_holding_months\", \"max\")\n",
" )\n",
")\n",
"\n",
"df_client = (\n",
" df_month\n",
" .groupby(ID_COL, as_index=False)\n",
" .agg(\n",
" n_months=(\"month\", \"nunique\"),\n",
" n_active_months=(\"active_month\", \"sum\"),\n",
" flow_freq=(\"active_month\", \"mean\"),\n",
"\n",
" aum_qty_mean=(\"aum_qty\", \"mean\"),\n",
" aum_qty_median=(\"aum_qty\", \"median\"),\n",
" aum_qty_max=(\"aum_qty\", \"max\"),\n",
" aum_qty_last=(\"aum_qty\", \"last\"),\n",
"\n",
" net_flow_qty_sum=(\"net_flow_qty\", \"sum\"),\n",
" gross_flow_qty_sum=(\"gross_flow_qty\", \"sum\"),\n",
" gross_flow_qty_mean=(\"gross_flow_qty\", \"mean\"),\n",
" n_tx_total=(\"n_tx\", \"sum\"),\n",
"\n",
" net_flow_vol=(\"net_flow_qty\", \"std\"),\n",
" turnover_mean=(\"turnover_m\", \"mean\"),\n",
" turnover_vol=(\"turnover_m\", \"std\"),\n",
" flow_to_aum_mean=(\"flow_to_aum_m\", \"mean\"),\n",
" flow_to_aum_vol=(\"flow_to_aum_m\", \"std\"),\n",
"\n",
" avg_n_isin_held=(\"n_isin_held\", \"mean\"),\n",
" max_n_isin_held=(\"n_isin_held\", \"max\"),\n",
"\n",
" sub_share_mean=(\"sub_share_m\", \"mean\"),\n",
" red_share_mean=(\"red_share_m\", \"mean\"),\n",
"\n",
" delta_rate_mean=(\"delta_rate_m\", \"mean\"),\n",
" aum_drawdown_last=(\"aum_drawdown\", \"last\"),\n",
" aum_drawdown_max=(\"aum_drawdown\", \"max\"),\n",
"\n",
" region=(\"region\", \"last\"),\n",
" country=(\"country\", \"last\")\n",
" )\n",
")\n",
"\n",
"df_client = df_client.merge(df_rel_client, on=ID_COL, how=\"left\")\n",
"\n",
"print(df_client.shape)\n",
"df_client.head()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "48d1b22b-f0ca-448e-a330-6b7d64fe48b3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Registrar Account - ID', 'n_months', 'n_active_months', 'flow_freq',\n",
" 'aum_qty_mean', 'aum_qty_median', 'aum_qty_max', 'aum_qty_last',\n",
" 'net_flow_qty_sum', 'gross_flow_qty_sum', 'gross_flow_qty_mean',\n",
" 'n_tx_total', 'net_flow_vol', 'turnover_mean', 'turnover_vol',\n",
" 'flow_to_aum_mean', 'flow_to_aum_vol', 'avg_n_isin_held',\n",
" 'max_n_isin_held', 'sub_share_mean', 'red_share_mean',\n",
" 'delta_rate_mean', 'aum_drawdown_last', 'aum_drawdown_max', 'region',\n",
" 'country', 'n_isin_total', 'rel_turnover_mean_avg',\n",
" 'rel_turnover_vol_avg', 'rel_flow_to_aum_vol_avg', 'full_exit_count',\n",
" 'entry_count', 'avg_holding_months_per_isin',\n",
" 'max_holding_months_per_isin'],\n",
" dtype='object')"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_client.columns"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "a9b34e2b-503f-41a1-9629-670ceb7615ba",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(17236, 34)"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_client.shape"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "934507d6-8aaf-43e2-8a2d-d6cfea0b6af1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(17236, 38)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Registrar Account - ID</th>\n",
" <th>n_months</th>\n",
" <th>n_active_months</th>\n",
" <th>flow_freq</th>\n",
" <th>aum_qty_mean</th>\n",
" <th>aum_qty_median</th>\n",
" <th>aum_qty_max</th>\n",
" <th>aum_qty_last</th>\n",
" <th>net_flow_qty_sum</th>\n",
" <th>gross_flow_qty_sum</th>\n",
" <th>gross_flow_qty_mean</th>\n",
" <th>n_tx_total</th>\n",
" <th>net_flow_vol</th>\n",
" <th>turnover_mean</th>\n",
" <th>turnover_vol</th>\n",
" <th>flow_to_aum_mean</th>\n",
" <th>flow_to_aum_vol</th>\n",
" <th>avg_n_isin_held</th>\n",
" <th>max_n_isin_held</th>\n",
" <th>sub_share_mean</th>\n",
" <th>red_share_mean</th>\n",
" <th>delta_rate_mean</th>\n",
" <th>aum_drawdown_last</th>\n",
" <th>aum_drawdown_max</th>\n",
" <th>region</th>\n",
" <th>country</th>\n",
" <th>n_isin_total</th>\n",
" <th>rel_turnover_mean_avg</th>\n",
" <th>rel_turnover_vol_avg</th>\n",
" <th>rel_flow_to_aum_vol_avg</th>\n",
" <th>full_exit_count</th>\n",
" <th>entry_count</th>\n",
" <th>avg_holding_months_per_isin</th>\n",
" <th>max_holding_months_per_isin</th>\n",
" <th>flow_trend_12m</th>\n",
" <th>aum_trend_12m</th>\n",
" <th>drawdown_trend_12m</th>\n",
" <th>beta_rate</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7905</td>\n",
" <td>80</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.000</td>\n",
" <td>0.00</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>-0.008925</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>12</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7912</td>\n",
" <td>80</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.000</td>\n",
" <td>0.00</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>-0.008925</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>5</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7962</td>\n",
" <td>80</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.000</td>\n",
" <td>0.00</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>-0.008925</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>LUXEMBOURG</td>\n",
" <td>4</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>8307</td>\n",
" <td>130</td>\n",
" <td>77</td>\n",
" <td>0.592308</td>\n",
" <td>22613.710831</td>\n",
" <td>22712.98</td>\n",
" <td>59145.06</td>\n",
" <td>487.0</td>\n",
" <td>27252.124</td>\n",
" <td>177077.31</td>\n",
" <td>1362.133154</td>\n",
" <td>161.0</td>\n",
" <td>3508.455222</td>\n",
" <td>0.04922</td>\n",
" <td>0.108358</td>\n",
" <td>0.00648</td>\n",
" <td>0.103427</td>\n",
" <td>11.784615</td>\n",
" <td>16</td>\n",
" <td>0.260828</td>\n",
" <td>-0.36224</td>\n",
" <td>0.013723</td>\n",
" <td>0.991766</td>\n",
" <td>0.991766</td>\n",
" <td>SWITZERLAND</td>\n",
" <td>SWITZERLAND</td>\n",
" <td>29</td>\n",
" <td>4.749062e+09</td>\n",
" <td>3.095092e+10</td>\n",
" <td>3.095092e+10</td>\n",
" <td>27</td>\n",
" <td>31</td>\n",
" <td>52.827586</td>\n",
" <td>130</td>\n",
" <td>-0.003463</td>\n",
" <td>-1142.587413</td>\n",
" <td>0.019318</td>\n",
" <td>-0.069433</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>8354</td>\n",
" <td>64</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>0.000</td>\n",
" <td>0.00</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.00000</td>\n",
" <td>-0.010063</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>SWITZERLAND</td>\n",
" <td>SWITZERLAND</td>\n",
" <td>1</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Registrar Account - ID n_months n_active_months flow_freq aum_qty_mean \\\n",
"0 7905 80 0 0.000000 0.000000 \n",
"1 7912 80 0 0.000000 0.000000 \n",
"2 7962 80 0 0.000000 0.000000 \n",
"3 8307 130 77 0.592308 22613.710831 \n",
"4 8354 64 0 0.000000 0.000000 \n",
"\n",
" aum_qty_median aum_qty_max aum_qty_last net_flow_qty_sum \\\n",
"0 0.00 0.00 0.0 0.000 \n",
"1 0.00 0.00 0.0 0.000 \n",
"2 0.00 0.00 0.0 0.000 \n",
"3 22712.98 59145.06 487.0 27252.124 \n",
"4 0.00 0.00 0.0 0.000 \n",
"\n",
" gross_flow_qty_sum gross_flow_qty_mean n_tx_total net_flow_vol \\\n",
"0 0.00 0.000000 0.0 0.000000 \n",
"1 0.00 0.000000 0.0 0.000000 \n",
"2 0.00 0.000000 0.0 0.000000 \n",
"3 177077.31 1362.133154 161.0 3508.455222 \n",
"4 0.00 0.000000 0.0 0.000000 \n",
"\n",
" turnover_mean turnover_vol flow_to_aum_mean flow_to_aum_vol \\\n",
"0 0.00000 0.000000 0.00000 0.000000 \n",
"1 0.00000 0.000000 0.00000 0.000000 \n",
"2 0.00000 0.000000 0.00000 0.000000 \n",
"3 0.04922 0.108358 0.00648 0.103427 \n",
"4 0.00000 0.000000 0.00000 0.000000 \n",
"\n",
" avg_n_isin_held max_n_isin_held sub_share_mean red_share_mean \\\n",
"0 0.000000 0 0.000000 0.00000 \n",
"1 0.000000 0 0.000000 0.00000 \n",
"2 0.000000 0 0.000000 0.00000 \n",
"3 11.784615 16 0.260828 -0.36224 \n",
"4 0.000000 0 0.000000 0.00000 \n",
"\n",
" delta_rate_mean aum_drawdown_last aum_drawdown_max region \\\n",
"0 -0.008925 1.000000 1.000000 LUXEMBOURG \n",
"1 -0.008925 1.000000 1.000000 LUXEMBOURG \n",
"2 -0.008925 1.000000 1.000000 LUXEMBOURG \n",
"3 0.013723 0.991766 0.991766 SWITZERLAND \n",
"4 -0.010063 1.000000 1.000000 SWITZERLAND \n",
"\n",
" country n_isin_total rel_turnover_mean_avg rel_turnover_vol_avg \\\n",
"0 LUXEMBOURG 12 0.000000e+00 0.000000e+00 \n",
"1 LUXEMBOURG 5 0.000000e+00 0.000000e+00 \n",
"2 LUXEMBOURG 4 0.000000e+00 0.000000e+00 \n",
"3 SWITZERLAND 29 4.749062e+09 3.095092e+10 \n",
"4 SWITZERLAND 1 0.000000e+00 0.000000e+00 \n",
"\n",
" rel_flow_to_aum_vol_avg full_exit_count entry_count \\\n",
"0 0.000000e+00 0 0 \n",
"1 0.000000e+00 0 0 \n",
"2 0.000000e+00 0 0 \n",
"3 3.095092e+10 27 31 \n",
"4 0.000000e+00 0 0 \n",
"\n",
" avg_holding_months_per_isin max_holding_months_per_isin flow_trend_12m \\\n",
"0 0.000000 0 0.000000 \n",
"1 0.000000 0 0.000000 \n",
"2 0.000000 0 0.000000 \n",
"3 52.827586 130 -0.003463 \n",
"4 0.000000 0 0.000000 \n",
"\n",
" aum_trend_12m drawdown_trend_12m beta_rate \n",
"0 0.000000 0.000000 0.000000 \n",
"1 0.000000 0.000000 0.000000 \n",
"2 0.000000 0.000000 0.000000 \n",
"3 -1142.587413 0.019318 -0.069433 \n",
"4 0.000000 0.000000 0.000000 "
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def compute_trend(y):\n",
" y = np.asarray(y, dtype=float)\n",
" if len(y) < 4:\n",
" return np.nan\n",
" x = np.arange(len(y)).reshape(-1, 1)\n",
" mask = np.isfinite(y)\n",
" if mask.sum() < 4:\n",
" return np.nan\n",
" reg = LinearRegression().fit(x[mask], y[mask])\n",
" return reg.coef_[0]\n",
"\n",
"def compute_beta(y, x):\n",
" y = np.asarray(y, dtype=float)\n",
" x = np.asarray(x, dtype=float)\n",
" mask = np.isfinite(y) & np.isfinite(x)\n",
" if mask.sum() < 6:\n",
" return np.nan\n",
" reg = LinearRegression().fit(x[mask].reshape(-1, 1), y[mask])\n",
" return reg.coef_[0]\n",
"\n",
"rows = []\n",
"\n",
"for acc, g in df_month.groupby(ID_COL):\n",
" g = g.sort_values(\"month\")\n",
"\n",
" flow = g[\"flow_to_aum_m\"].values\n",
" aum = g[\"aum_qty\"].values\n",
" delta_rate = g[\"delta_rate_m\"].values\n",
" drawdown = g[\"aum_drawdown\"].values\n",
"\n",
" rows.append({\n",
" ID_COL: acc,\n",
" \"flow_trend_12m\": compute_trend(flow[-12:]),\n",
" \"aum_trend_12m\": compute_trend(aum[-12:]),\n",
" \"drawdown_trend_12m\": compute_trend(drawdown[-12:]),\n",
" \"beta_rate\": compute_beta(flow, delta_rate),\n",
" })\n",
"\n",
"df_beta = pd.DataFrame(rows)\n",
"\n",
"df_client = df_client.merge(df_beta, on=ID_COL, how=\"left\")\n",
"\n",
"print(df_client.shape)\n",
"df_client.head()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "3f78e685-b3e7-4c02-81d2-fc480d524814",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Nb clients = 9656\n",
"Nb features = 34\n",
"['log_aum_qty_mean', 'flow_freq', 'gross_flow_to_aum', 'turnover_vol', 'flow_to_aum_vol', 'activity_intensity', 'log_n_tx_total', 'avg_n_isin_held', 'n_isin_total', 'avg_holding_months_per_isin', 'exit_rate_per_isin', 'flow_direction_balance', 'redemption_bias', 'aum_drawdown_last', 'country_grp_FRANCE', 'country_grp_GERMANY', 'country_grp_ITALY', 'country_grp_LATAM', 'country_grp_LUXEMBOURG', 'country_grp_Other', 'country_grp_SPAIN', 'country_grp_SWITZERLAND', 'country_grp_UNITED KINGDOM', 'country_grp_UNITED STATES', 'region_grp_FRANCE', 'region_grp_GERMANY', 'region_grp_ITALY', 'region_grp_LATAM', 'region_grp_LUXEMBOURG', 'region_grp_NORDICS', 'region_grp_Other', 'region_grp_SPAIN', 'region_grp_SWITZERLAND', 'region_grp_UNITED KINGDOM']\n"
]
}
],
"source": [
"dfc = df_client.copy()\n",
"\n",
"dfc[\"gross_flow_to_aum\"] = dfc[\"gross_flow_qty_sum\"] / (dfc[\"aum_qty_mean\"].abs() + EPS)\n",
"dfc[\"avg_ticket\"] = dfc[\"gross_flow_qty_sum\"] / (dfc[\"n_tx_total\"] + EPS)\n",
"dfc[\"flow_direction_balance\"] = dfc[\"net_flow_qty_sum\"] / (dfc[\"gross_flow_qty_sum\"] + EPS)\n",
"dfc[\"redemption_bias\"] = dfc[\"red_share_mean\"] - dfc[\"sub_share_mean\"]\n",
"dfc[\"activity_intensity\"] = dfc[\"n_tx_total\"] / (dfc[\"n_months\"] + EPS)\n",
"dfc[\"exit_rate_per_isin\"] = dfc[\"full_exit_count\"] / (dfc[\"n_isin_total\"] + EPS)\n",
"dfc[\"entry_rate_per_isin\"] = dfc[\"entry_count\"] / (dfc[\"n_isin_total\"] + EPS)\n",
"dfc[\"aum_final_to_peak\"] = dfc[\"aum_qty_last\"] / (dfc[\"aum_qty_max\"] + EPS)\n",
"\n",
"for col in [\"aum_qty_mean\", \"gross_flow_qty_sum\", \"n_tx_total\", \"avg_ticket\"]:\n",
" dfc[f\"log_{col}\"] = np.log1p(dfc[col].clip(lower=0))\n",
"\n",
"dfc = dfc[(dfc[\"n_months\"] >= 6) & (dfc[\"aum_qty_mean\"] > 0)].copy()\n",
"\n",
"top_countries = dfc[\"country\"].fillna(\"Unknown\").value_counts().head(10).index\n",
"top_regions = dfc[\"region\"].fillna(\"Unknown\").value_counts().head(10).index\n",
"\n",
"dfc[\"country_grp\"] = np.where(dfc[\"country\"].isin(top_countries), dfc[\"country\"], \"Other\")\n",
"dfc[\"region_grp\"] = np.where(dfc[\"region\"].isin(top_regions), dfc[\"region\"], \"Other\")\n",
"\n",
"base_features = [\n",
" \"log_aum_qty_mean\",\n",
" \"flow_freq\",\n",
" \"gross_flow_to_aum\",\n",
" \"turnover_vol\",\n",
" \"flow_to_aum_vol\",\n",
" \"activity_intensity\",\n",
" \"log_n_tx_total\",\n",
" \"avg_n_isin_held\",\n",
" \"n_isin_total\",\n",
" \"avg_holding_months_per_isin\",\n",
" \"exit_rate_per_isin\",\n",
" \"flow_direction_balance\",\n",
" \"redemption_bias\",\n",
" \"aum_drawdown_last\",\n",
"]\n",
"\n",
"base_features = [c for c in base_features if c in dfc.columns]\n",
"\n",
"X_num = dfc[base_features].replace([np.inf, -np.inf], np.nan).fillna(dfc[base_features].median())\n",
"X_cat = pd.get_dummies(dfc[[\"country_grp\", \"region_grp\"]].fillna(\"Unknown\"), drop_first=True)\n",
"\n",
"X = pd.concat([X_num.reset_index(drop=True), X_cat.reset_index(drop=True)], axis=1)\n",
"\n",
"scaler = RobustScaler()\n",
"X_scaled = scaler.fit_transform(X)\n",
"\n",
"print(\"Nb clients =\", X.shape[0])\n",
"print(\"Nb features =\", X.shape[1])\n",
"print(X.columns.tolist())"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "7e865952-a1d2-4f39-bbbc-306c8ed7857c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>k</th>\n",
" <th>inertia</th>\n",
" <th>silhouette</th>\n",
" <th>davies_bouldin</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>4.524683e+27</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>5.112656e+26</td>\n",
" <td>0.999550</td>\n",
" <td>0.261256</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>2.261592e+26</td>\n",
" <td>0.999409</td>\n",
" <td>0.421666</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>1.116792e+26</td>\n",
" <td>0.999430</td>\n",
" <td>0.246382</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>4.183978e+25</td>\n",
" <td>0.999075</td>\n",
" <td>0.293760</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>6</td>\n",
" <td>2.520739e+25</td>\n",
" <td>0.999065</td>\n",
" <td>0.338481</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>7</td>\n",
" <td>1.141968e+25</td>\n",
" <td>0.998960</td>\n",
" <td>0.257299</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>8</td>\n",
" <td>5.503246e+24</td>\n",
" <td>0.998934</td>\n",
" <td>0.138057</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>9</td>\n",
" <td>1.305120e+24</td>\n",
" <td>0.998801</td>\n",
" <td>0.064579</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>10</td>\n",
" <td>5.648568e+23</td>\n",
" <td>0.998694</td>\n",
" <td>0.126169</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" k inertia silhouette davies_bouldin\n",
"0 1 4.524683e+27 NaN NaN\n",
"1 2 5.112656e+26 0.999550 0.261256\n",
"2 3 2.261592e+26 0.999409 0.421666\n",
"3 4 1.116792e+26 0.999430 0.246382\n",
"4 5 4.183978e+25 0.999075 0.293760\n",
"5 6 2.520739e+25 0.999065 0.338481\n",
"6 7 1.141968e+25 0.998960 0.257299\n",
"7 8 5.503246e+24 0.998934 0.138057\n",
"8 9 1.305120e+24 0.998801 0.064579\n",
"9 10 5.648568e+23 0.998694 0.126169"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rows = []\n",
"\n",
"for k in range(1, 11):\n",
" km = KMeans(n_clusters=k, n_init=50, random_state=42)\n",
" labels = km.fit_predict(X_scaled)\n",
"\n",
" row = {\n",
" \"k\": k,\n",
" \"inertia\": km.inertia_\n",
" }\n",
"\n",
" if k >= 2:\n",
" row[\"silhouette\"] = silhouette_score(X_scaled, labels)\n",
" row[\"davies_bouldin\"] = davies_bouldin_score(X_scaled, labels)\n",
" else:\n",
" row[\"silhouette\"] = np.nan\n",
" row[\"davies_bouldin\"] = np.nan\n",
"\n",
" rows.append(row)\n",
"\n",
"df_kdiag = pd.DataFrame(rows)\n",
"df_kdiag"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "04c350be-d871-4de8-93d6-3775d78e4725",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjUAAAGGCAYAAAAzegNcAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAA2JlJREFUeJzs3Xd4VNXWx/HvzKSTAgkJNQECkgAJvYamiIhgRSkqooCo2LmvCnaw4fWq1wqiIAiigFJEQaxXmgFCD0F6C6GFBEhvM/P+ETIaCJBAkpNMfp/n4ZGc2bPP2meQOZy1914mu91uR0REREREREREREREpIIzGx2AiIiIiIiIiIiIiIhIcSipISIiIiIiIiIiIiIilYKSGiIiIiIiIiIiIiIiUikoqSEiIiIiIiIiIiIiIpWCkhoiIiIiIiIiIiIiIlIpKKkhIiIiIiIiIiIiIiKVgpIaIiIiIiIiIiIiIiJSKSipISIiIiIiIiIiIiIilYKSGiIiIiIiIiIiIiIiUikoqSEiIiIiUsX16tWLcePGOX5eu3YtYWFhrF271nHsnnvu4cYbbzQiPBEREXFSH374IWFhYUaHUa7GjRtHr169Ltnu8OHDhIWFsWDBAsexqni9RIqipIZIGQkLC+PDDz90/FzwxZOcnGxgVM7v3OsuIiJS1e3cuZPHH3+ca665hsjISLp3787w4cOZNWuW0aGVuj179vDhhx9y+PDh816bPXt2oYcCIiIiAgsWLCAsLMzxKzIykm7dujFy5EhmzpxJWlqa0SGWunHjxhUac/PmzenZsydjxoxhz549RocnIsXgYnQAIpXJggULePbZZy/4+ty5c2ndunX5BVQGZs2axXvvvceaNWtwdXUtsk1YWBh33303L730UjlHl2/58uVs3bqVxx57zJDzi4iIVBYbN25k2LBh1K1bl4EDBxIYGMjRo0fZsmULM2fO5J577gFg2bJlmEwmg6O9cnv27OGjjz6iY8eO1K9fv9BrX3/9NTVq1GDAgAEGRSciIlJxPf7449SvX5+8vDxOnjzJunXreOONN5gxYwaTJk0iPDy8TM47evRoHnjggTLp+2Lc3Nx47bXXALBarRw6dIg5c+awcuVKlixZQq1atco9puIw6nqJVDRKaohchoIv+3OFhIQYEE3p+uOPP+jatesFExoVwfLly5k9e3aRSY2tW7disVgMiEpERKTi+eSTT/Dx8eHbb7/F19e30GtJSUmO37u5uZV3aCIiIlKB9OjRg8jISMfPDz74INHR0Tz00EM8/PDDLF26FA8Pj1I/r4uLCy4u5f940sXFhVtuuaXQsdatW/Pggw+yfPlyBg0aVO4xFYdR10ukotH2UyKXoUePHtxyyy3n/fL39zc6tCuSmZlJTEwMV199tdGhFCkjI+OSbdzd3fUFLyIictahQ4do0qTJeQkNgICAAMfvz62pcTF79uzhnnvuoVWrVnTv3p3PPvvsvDZJSUk899xzREVFERkZyc0338zChQsLtSmqbgcUvX80wN69e3n88cfp2LEjkZGRDBgwgN9++83x+oIFC3jiiScAGDZsmGNLibVr19KrVy92797NunXrHMcLVqkApKSk8Prrr9OzZ08iIiK47rrr+PTTT7HZbMW6JiIiIs6oS5cuPPzwwyQkJLB48WIAduzYwbhx47j22muJjIyka9euPPvss5w6dcrxvmXLlhEWFsa6devO63POnDmEhYWxa9cu4MI1Ir777jsGDBhAy5Yt6dixI2PGjOHo0aOF2hw4cIDHHnuMrl27EhkZSY8ePRgzZgypqamXNd6aNWsCnDdRMj4+3nEP0qpVKwYNGsQff/xRqE3BNl7nboF5ofudc6WkpDBu3DjatWtH+/btGTt2bJHjKOp6hYWF8corr/Drr79y4403EhERQf/+/VmxYkVxhy5S6SipIVLOTp06xRNPPEHbtm3p1KkTr732GtnZ2YXa5OXl8fHHH9O7d28iIiLo1asX7777Ljk5OY42EydOpFOnTtjtdsexV199lbCwMGbOnOk4dvLkScLCwvjqq68uGVt0dDQ5OTn06NGjRGMq+JJeunQpkydPdszwuPfeezl48OB57bds2cLIkSNp164drVq1YujQoWzYsKFQm4Iv6j179vB///d/dOjQgbvuuotx48Yxe/ZsgEJ7YBY4t6ZGQkIC48eP5/rrr6dly5Z06tSJxx9/vMi9tkVERJxNvXr1iIuLczw4uFJnzpzh/vvvJzw8nLFjxxIaGsrbb7/N8uXLHW2ysrK45557WLx4MTfddBPPPPMMPj4+jBs3ji+++OKyzrt7924GDx7M3r17GTVqFOPGjcPLy4tHHnmEX375BYAOHTo4EhUPPfQQb731Fm+99RaNGzfmueeeo3bt2oSGhjqOP/TQQ0D+pI6hQ4eyePFibr31Vl544QXatm3Lu+++y8SJE6/wiomIiFRuBasZVq1aBcCff/5JfHw8AwYM4MUXX6Rfv34sXbqUBx54wPF84uqrr8bLy4sff/zxvP6WLl3KVVddRdOmTS94zsmTJzN27FgaNGjAuHHjGDZsGNHR0dx9992kpKQAkJOTw8iRI9m8eTNDhw7lpZdeYtCgQcTHxzvaXEpycjLJycmcPHmSTZs2MXHiRKpXr84111zjaHPy5EmGDBnCqlWruPPOOxkzZgzZ2dmMHj3acQ9ypex2Ow8//DDfffcdN998M08++STHjh1j7Nixxe5jw4YNjB8/nn79+vH000+TnZ3N448/XijZJOJMNJ1Z5DKkpaWdV/DbZDJRo0aNS773ySefpF69evzf//0fmzdvZtasWaSkpPDWW2852rzwwgssXLiQ66+/nuHDh7N161amTJnC3r17+fjjjwFo3749M2bMYPfu3Y6bgfXr12M2m1m/fj3Dhg1zHIP8f+hfyvLly2nRooVjdkJJffbZZ5hMJkaMGEFaWhpTp07lqaee4ptvvnG0iY6OZtSoUURERPDoo49iMplYsGAB9957L1999RUtW7Ys1OcTTzxBgwYNGDNmDHa7nebNm3PixAlWr15d6JpdSGxsLJs2baJ///7Url2bhIQEvv76a4YNG8aSJUvw9PS8rLGKiIhUBiNGjGDUqFHceuuttGzZknbt2tGlSxc6dep0WVtNnjhxgn//+9/ceuutANxxxx306tWL+fPn07NnTyC/xtjevXv5z3/+w8033wzAkCFDuOeee3jvvfe4/fbb8fb2LtF5X3/9derUqcP8+fMdW2Xddddd3Hnnnbz99ttcd911BAcH0759e2bNmkVUVBSdOnVyvL93796899571KhR47ytJqZPn058fDwLFy6kYcOGjniDgoKYNm0aI0aMoE6dOiW+ViIiIs6gdu3a+Pj4EB8fD+R//44YMaJQm9atW/Ovf/2LDRs20L59ezw8POjVqxc//fQTL7zwgmPlQ2JiIjExMTz66KMXPF9CQgIffvghTz75pGMCAkCfPn247bbb+Oqrr3jooYfYu3cvhw8f5v3336dv376Odhfr+58yMjLo0qVLoWO1atXi888/L7QLx6effsrJkyeZPXs27du3B2DgwIHcfPPNTJw4kWuvvRaz+crmjP/222/ExMTw9NNPc//99wNw5513Op7rFMfevXtZunSpY1v0Tp06ccstt7BkyRKGDh16RfGJVERKaohchvvuu++8Y25ubsTGxl7yvfXr12fy5MkA3H333Xh7e/PVV18xYsQIwsPD2bFjBwsXLmTgwIGOolV33303/v7+fP7556xZs4bOnTvTrl07ID9p0bRpU1JTU9m1axd9+vRxJDIKXq9evTpNmjS5ZGwrVqy4ouKZ2dnZLFq0yPGwwdfXl9dff51du3bRtGlT7HY748ePp1OnTkydOtVRkHTIkCH079+f9957j88//7xQn+Hh4bzzzjuFjjVs2JDVq1ef91CiKFdffXWhGxyAa665hsGDB/PTTz85HsqIiIg4o65duzJnzhw+/fRTVq1axaZNm5g6dSr+/v689tprXHvttSXqz8vLq9D3r5ubG5GRkY4HHZB/PxEYGMiNN97oOObq6so999z
"text/plain": [
"<Figure size 1600x400 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig, axes = plt.subplots(1, 3, figsize=(16, 4))\n",
"\n",
"axes[0].plot(df_kdiag[\"k\"], df_kdiag[\"inertia\"], marker=\"o\")\n",
"axes[0].set_title(\"Elbow / Inertia\")\n",
"axes[0].set_xlabel(\"K\")\n",
"\n",
"axes[1].plot(df_kdiag[\"k\"], df_kdiag[\"silhouette\"], marker=\"o\")\n",
"axes[1].set_title(\"Silhouette\")\n",
"axes[1].set_xlabel(\"K\")\n",
"\n",
"axes[2].plot(df_kdiag[\"k\"], df_kdiag[\"davies_bouldin\"], marker=\"o\")\n",
"axes[2].set_title(\"Davies-Bouldin\")\n",
"axes[2].set_xlabel(\"K\")\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "a7249fea-2ad6-4d32-af6f-c9a234cbfe1c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"K=2 | silhouette=0.9996 | davies_bouldin=0.2613\n",
"K=5 | silhouette=0.9991 | davies_bouldin=0.2938\n",
"K=10 | silhouette=0.9987 | davies_bouldin=0.1262\n"
]
}
],
"source": [
"RESULTS = {}\n",
"\n",
"for k in [2, 5, 10]:\n",
" km = KMeans(n_clusters=k, n_init=50, random_state=42)\n",
" labels = km.fit_predict(X_scaled)\n",
" dfc[f\"cluster_k{k}\"] = labels\n",
"\n",
" RESULTS[k] = {\n",
" \"model\": km,\n",
" \"labels\": labels,\n",
" \"silhouette\": silhouette_score(X_scaled, labels),\n",
" \"davies_bouldin\": davies_bouldin_score(X_scaled, labels)\n",
" }\n",
"\n",
"for k in [2, 5, 10]:\n",
" print(f\"K={k} | silhouette={RESULTS[k]['silhouette']:.4f} | davies_bouldin={RESULTS[k]['davies_bouldin']:.4f}\")"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "0dd70e26-f831-4140-8815-eebb40c9aba7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== K=2 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>aum_qty_mean_med</th>\n",
" <th>gross_flow_to_aum_med</th>\n",
" <th>flow_freq_med</th>\n",
" <th>n_tx_total_med</th>\n",
" <th>avg_n_isin_held_med</th>\n",
" <th>n_isin_total_med</th>\n",
" <th>avg_holding_months_per_isin_med</th>\n",
" <th>exit_rate_per_isin_med</th>\n",
" <th>flow_direction_balance_med</th>\n",
" <th>redemption_bias_med</th>\n",
" <th>aum_drawdown_last_med</th>\n",
" <th>aum_final_to_peak_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k2</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>9651</td>\n",
" <td>200.835000</td>\n",
" <td>2.524501</td>\n",
" <td>0.058824</td>\n",
" <td>2.0</td>\n",
" <td>0.683333</td>\n",
" <td>2.0</td>\n",
" <td>14.0</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-5.714286e-02</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5</td>\n",
" <td>75884.615385</td>\n",
" <td>4.651799</td>\n",
" <td>0.094595</td>\n",
" <td>11.0</td>\n",
" <td>1.000000</td>\n",
" <td>1.0</td>\n",
" <td>36.0</td>\n",
" <td>0.666667</td>\n",
" <td>0.208726</td>\n",
" <td>-4.423077e+12</td>\n",
" <td>0.964708</td>\n",
" <td>0.035292</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients aum_qty_mean_med gross_flow_to_aum_med flow_freq_med \\\n",
"cluster_k2 \n",
"0 9651 200.835000 2.524501 0.058824 \n",
"1 5 75884.615385 4.651799 0.094595 \n",
"\n",
" n_tx_total_med avg_n_isin_held_med n_isin_total_med \\\n",
"cluster_k2 \n",
"0 2.0 0.683333 2.0 \n",
"1 11.0 1.000000 1.0 \n",
"\n",
" avg_holding_months_per_isin_med exit_rate_per_isin_med \\\n",
"cluster_k2 \n",
"0 14.0 1.000000 \n",
"1 36.0 0.666667 \n",
"\n",
" flow_direction_balance_med redemption_bias_med \\\n",
"cluster_k2 \n",
"0 0.000000 -5.714286e-02 \n",
"1 0.208726 -4.423077e+12 \n",
"\n",
" aum_drawdown_last_med aum_final_to_peak_med \n",
"cluster_k2 \n",
"0 1.000000 0.000000 \n",
"1 0.964708 0.035292 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== K=5 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>aum_qty_mean_med</th>\n",
" <th>gross_flow_to_aum_med</th>\n",
" <th>flow_freq_med</th>\n",
" <th>n_tx_total_med</th>\n",
" <th>avg_n_isin_held_med</th>\n",
" <th>n_isin_total_med</th>\n",
" <th>avg_holding_months_per_isin_med</th>\n",
" <th>exit_rate_per_isin_med</th>\n",
" <th>flow_direction_balance_med</th>\n",
" <th>redemption_bias_med</th>\n",
" <th>aum_drawdown_last_med</th>\n",
" <th>aum_final_to_peak_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k5</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>9649</td>\n",
" <td>200.729972</td>\n",
" <td>2.524501</td>\n",
" <td>0.058824</td>\n",
" <td>2.0</td>\n",
" <td>0.683333</td>\n",
" <td>2.0</td>\n",
" <td>14.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-5.714286e-02</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>94722.757683</td>\n",
" <td>5.784578</td>\n",
" <td>0.070374</td>\n",
" <td>10.0</td>\n",
" <td>0.586798</td>\n",
" <td>2.0</td>\n",
" <td>35.166667</td>\n",
" <td>1.500000</td>\n",
" <td>0.001110</td>\n",
" <td>-4.646436e+12</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>116019.862250</td>\n",
" <td>4.326850</td>\n",
" <td>0.082639</td>\n",
" <td>7.5</td>\n",
" <td>1.168750</td>\n",
" <td>2.0</td>\n",
" <td>35.833333</td>\n",
" <td>0.333333</td>\n",
" <td>0.682622</td>\n",
" <td>-2.943535e+12</td>\n",
" <td>0.482354</td>\n",
" <td>0.517646</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>152151.798643</td>\n",
" <td>3.417115</td>\n",
" <td>0.052149</td>\n",
" <td>7.5</td>\n",
" <td>1.381561</td>\n",
" <td>3.0</td>\n",
" <td>45.750000</td>\n",
" <td>0.875000</td>\n",
" <td>0.199417</td>\n",
" <td>-9.113122e+11</td>\n",
" <td>0.940068</td>\n",
" <td>0.059932</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>75884.615385</td>\n",
" <td>4.651799</td>\n",
" <td>0.282051</td>\n",
" <td>15.0</td>\n",
" <td>1.000000</td>\n",
" <td>1.0</td>\n",
" <td>39.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.201133</td>\n",
" <td>-6.666667e+12</td>\n",
" <td>0.685315</td>\n",
" <td>0.314685</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients aum_qty_mean_med gross_flow_to_aum_med flow_freq_med \\\n",
"cluster_k5 \n",
"0 9649 200.729972 2.524501 0.058824 \n",
"1 2 94722.757683 5.784578 0.070374 \n",
"2 2 116019.862250 4.326850 0.082639 \n",
"4 2 152151.798643 3.417115 0.052149 \n",
"3 1 75884.615385 4.651799 0.282051 \n",
"\n",
" n_tx_total_med avg_n_isin_held_med n_isin_total_med \\\n",
"cluster_k5 \n",
"0 2.0 0.683333 2.0 \n",
"1 10.0 0.586798 2.0 \n",
"2 7.5 1.168750 2.0 \n",
"4 7.5 1.381561 3.0 \n",
"3 15.0 1.000000 1.0 \n",
"\n",
" avg_holding_months_per_isin_med exit_rate_per_isin_med \\\n",
"cluster_k5 \n",
"0 14.000000 1.000000 \n",
"1 35.166667 1.500000 \n",
"2 35.833333 0.333333 \n",
"4 45.750000 0.875000 \n",
"3 39.000000 0.000000 \n",
"\n",
" flow_direction_balance_med redemption_bias_med \\\n",
"cluster_k5 \n",
"0 0.000000 -5.714286e-02 \n",
"1 0.001110 -4.646436e+12 \n",
"2 0.682622 -2.943535e+12 \n",
"4 0.199417 -9.113122e+11 \n",
"3 0.201133 -6.666667e+12 \n",
"\n",
" aum_drawdown_last_med aum_final_to_peak_med \n",
"cluster_k5 \n",
"0 1.000000 0.000000 \n",
"1 1.000000 0.000000 \n",
"2 0.482354 0.517646 \n",
"4 0.940068 0.059932 \n",
"3 0.685315 0.314685 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== K=10 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>aum_qty_mean_med</th>\n",
" <th>gross_flow_to_aum_med</th>\n",
" <th>flow_freq_med</th>\n",
" <th>n_tx_total_med</th>\n",
" <th>avg_n_isin_held_med</th>\n",
" <th>n_isin_total_med</th>\n",
" <th>avg_holding_months_per_isin_med</th>\n",
" <th>exit_rate_per_isin_med</th>\n",
" <th>flow_direction_balance_med</th>\n",
" <th>redemption_bias_med</th>\n",
" <th>aum_drawdown_last_med</th>\n",
" <th>aum_final_to_peak_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k10</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>9639</td>\n",
" <td>200.644385</td>\n",
" <td>2.528102</td>\n",
" <td>0.058824</td>\n",
" <td>2.0</td>\n",
" <td>0.683333</td>\n",
" <td>2.0</td>\n",
" <td>14.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-5.667766e-02</td>\n",
" <td>1.000000e+00</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>7</td>\n",
" <td>11281.487500</td>\n",
" <td>1.646680</td>\n",
" <td>0.056000</td>\n",
" <td>7.0</td>\n",
" <td>0.637500</td>\n",
" <td>3.0</td>\n",
" <td>43.222222</td>\n",
" <td>1.000000</td>\n",
" <td>-0.260413</td>\n",
" <td>-2.225352e+11</td>\n",
" <td>1.000000e+00</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>3</td>\n",
" <td>16597.286822</td>\n",
" <td>2.348949</td>\n",
" <td>0.015504</td>\n",
" <td>3.0</td>\n",
" <td>0.751938</td>\n",
" <td>1.0</td>\n",
" <td>46.000000</td>\n",
" <td>0.333333</td>\n",
" <td>-0.547161</td>\n",
" <td>-6.525000e+10</td>\n",
" <td>1.000000e+00</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>40190.138462</td>\n",
" <td>7.329808</td>\n",
" <td>0.046154</td>\n",
" <td>11.0</td>\n",
" <td>0.538462</td>\n",
" <td>3.0</td>\n",
" <td>23.333333</td>\n",
" <td>1.000000</td>\n",
" <td>-0.206507</td>\n",
" <td>-4.423077e+12</td>\n",
" <td>1.000000e+00</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>75884.615385</td>\n",
" <td>4.651799</td>\n",
" <td>0.282051</td>\n",
" <td>15.0</td>\n",
" <td>1.000000</td>\n",
" <td>1.0</td>\n",
" <td>39.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.201133</td>\n",
" <td>-6.666667e+12</td>\n",
" <td>6.853147e-01</td>\n",
" <td>0.314685</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>20646.937000</td>\n",
" <td>1.000000</td>\n",
" <td>0.027778</td>\n",
" <td>2.0</td>\n",
" <td>1.000000</td>\n",
" <td>1.0</td>\n",
" <td>36.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>-3.347944e+12</td>\n",
" <td>4.840572e-14</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>1</td>\n",
" <td>211392.787500</td>\n",
" <td>7.653700</td>\n",
" <td>0.137500</td>\n",
" <td>13.0</td>\n",
" <td>1.337500</td>\n",
" <td>3.0</td>\n",
" <td>35.666667</td>\n",
" <td>0.666667</td>\n",
" <td>0.365245</td>\n",
" <td>-2.539125e+12</td>\n",
" <td>9.647075e-01</td>\n",
" <td>0.035292</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>152961.538462</td>\n",
" <td>4.491325</td>\n",
" <td>0.030769</td>\n",
" <td>6.0</td>\n",
" <td>1.630769</td>\n",
" <td>4.0</td>\n",
" <td>53.000000</td>\n",
" <td>0.750000</td>\n",
" <td>-0.149927</td>\n",
" <td>-6.461538e+11</td>\n",
" <td>8.801370e-01</td>\n",
" <td>0.119863</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>1</td>\n",
" <td>151342.058824</td>\n",
" <td>2.342905</td>\n",
" <td>0.073529</td>\n",
" <td>9.0</td>\n",
" <td>1.132353</td>\n",
" <td>2.0</td>\n",
" <td>38.500000</td>\n",
" <td>1.000000</td>\n",
" <td>0.548762</td>\n",
" <td>-1.176471e+12</td>\n",
" <td>1.000000e+00</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>1</td>\n",
" <td>149255.376905</td>\n",
" <td>4.239348</td>\n",
" <td>0.094595</td>\n",
" <td>9.0</td>\n",
" <td>0.635135</td>\n",
" <td>1.0</td>\n",
" <td>47.000000</td>\n",
" <td>2.000000</td>\n",
" <td>0.208726</td>\n",
" <td>-4.869795e+12</td>\n",
" <td>1.000000e+00</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients aum_qty_mean_med gross_flow_to_aum_med \\\n",
"cluster_k10 \n",
"0 9639 200.644385 2.528102 \n",
"6 7 11281.487500 1.646680 \n",
"9 3 16597.286822 2.348949 \n",
"1 1 40190.138462 7.329808 \n",
"2 1 75884.615385 4.651799 \n",
"3 1 20646.937000 1.000000 \n",
"5 1 211392.787500 7.653700 \n",
"4 1 152961.538462 4.491325 \n",
"7 1 151342.058824 2.342905 \n",
"8 1 149255.376905 4.239348 \n",
"\n",
" flow_freq_med n_tx_total_med avg_n_isin_held_med \\\n",
"cluster_k10 \n",
"0 0.058824 2.0 0.683333 \n",
"6 0.056000 7.0 0.637500 \n",
"9 0.015504 3.0 0.751938 \n",
"1 0.046154 11.0 0.538462 \n",
"2 0.282051 15.0 1.000000 \n",
"3 0.027778 2.0 1.000000 \n",
"5 0.137500 13.0 1.337500 \n",
"4 0.030769 6.0 1.630769 \n",
"7 0.073529 9.0 1.132353 \n",
"8 0.094595 9.0 0.635135 \n",
"\n",
" n_isin_total_med avg_holding_months_per_isin_med \\\n",
"cluster_k10 \n",
"0 2.0 14.000000 \n",
"6 3.0 43.222222 \n",
"9 1.0 46.000000 \n",
"1 3.0 23.333333 \n",
"2 1.0 39.000000 \n",
"3 1.0 36.000000 \n",
"5 3.0 35.666667 \n",
"4 4.0 53.000000 \n",
"7 2.0 38.500000 \n",
"8 1.0 47.000000 \n",
"\n",
" exit_rate_per_isin_med flow_direction_balance_med \\\n",
"cluster_k10 \n",
"0 1.000000 0.000000 \n",
"6 1.000000 -0.260413 \n",
"9 0.333333 -0.547161 \n",
"1 1.000000 -0.206507 \n",
"2 0.000000 0.201133 \n",
"3 0.000000 1.000000 \n",
"5 0.666667 0.365245 \n",
"4 0.750000 -0.149927 \n",
"7 1.000000 0.548762 \n",
"8 2.000000 0.208726 \n",
"\n",
" redemption_bias_med aum_drawdown_last_med aum_final_to_peak_med \n",
"cluster_k10 \n",
"0 -5.667766e-02 1.000000e+00 0.000000 \n",
"6 -2.225352e+11 1.000000e+00 0.000000 \n",
"9 -6.525000e+10 1.000000e+00 0.000000 \n",
"1 -4.423077e+12 1.000000e+00 0.000000 \n",
"2 -6.666667e+12 6.853147e-01 0.314685 \n",
"3 -3.347944e+12 4.840572e-14 1.000000 \n",
"5 -2.539125e+12 9.647075e-01 0.035292 \n",
"4 -6.461538e+11 8.801370e-01 0.119863 \n",
"7 -1.176471e+12 1.000000e+00 0.000000 \n",
"8 -4.869795e+12 1.000000e+00 0.000000 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"profile_vars = [\n",
" \"aum_qty_mean\",\n",
" \"gross_flow_to_aum\",\n",
" \"flow_freq\",\n",
" \"n_tx_total\",\n",
" \"avg_n_isin_held\",\n",
" \"n_isin_total\",\n",
" \"avg_holding_months_per_isin\",\n",
" \"exit_rate_per_isin\",\n",
" \"flow_direction_balance\",\n",
" \"redemption_bias\",\n",
" \"aum_drawdown_last\",\n",
" \"aum_final_to_peak\",\n",
"]\n",
"\n",
"profile_vars = [c for c in profile_vars if c in dfc.columns]\n",
"\n",
"for k in [2, 5, 10]:\n",
" print(f\"\\n===== K={k} =====\")\n",
" prof = (\n",
" dfc.groupby(f\"cluster_k{k}\")\n",
" .agg(\n",
" n_clients=(ID_COL, \"count\"),\n",
" **{f\"{c}_med\": (c, \"median\") for c in profile_vars}\n",
" )\n",
" .sort_values(\"n_clients\", ascending=False)\n",
" )\n",
" display(prof)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "9368ab73-fd28-4b0e-9a8a-17b860cbf6d4",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABNgAAAGGCAYAAACpCjxcAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAkp1JREFUeJzs3Xlcjen/P/DXaRNSVCKVLRQKpSLLMGUb+zIYgyxhbBnZUtZGyb5mkKXF2JmyjGVmmsUyqRCyhLFliU8SoahO5/eHX+frzClaTt2du9fz8TiPT1333enVmfM5znnf1/W+JDKZTAYiIiIiIiIiIiIqEg2hAxAREREREREREakzFtiIiIiIiIiIiIiKgQU2IiIiIiIiIiKiYmCBjYiIiIiIiIiIqBhYYCMiIiIiIiIiIioGFtiIiIiIiIiIiIiKgQU2IiIiIiIiIiKiYmCBjYiIiIiIiIiIqBhYYCMiIiIiIiIiIioGFtiIiIjKMBcXF8yePVvoGKXq559/hpWVFR49eiR0FCIiIiKiAmGBjYiISACJiYmYP38+XF1dYWtrC3t7e3zzzTcIDQ3Fu3fvSiVDRkYG1q9fj+jo6FL5ferqyJEjCAkJETpGmWNlZYUffvhBaXzTpk2wsrKCt7c3cnJyinz/d+7cwbJly9CnTx/Y2dmhXbt2GDduHOLj44sTm4iIiKhEsMBGRERUyv766y/06tULx48fx5dffol58+Zh+vTpqFWrFpYvXw5/f/9SyZGRkYHAwEDExMSUyu8rqD59+uDKlSswMzMTOgoA4OjRowgLCxM6hloICgrC6tWr0a9fP/j7+0NDo+hvNQ8cOID9+/fDxsYGs2fPxsiRI3Hv3j0MHjwY//zzjwpTExERERWfltABiIiIypOHDx/C09MTtWrVQmhoKExMTOTHhg4digcPHuCvv/4SLqAKpKeno1KlSkX+eU1NTWhqaqowUdmUkZGBihUrCh1DZbZu3YqVK1eib9++WLx4cbGKawDQo0cPTJ48GZUrV5aPDRgwAN27d8f69evRpk2b4kYmIiIiUhnOYCMiIipFW7duRXp6Ovz9/RWKa7nq1KmDESNG5Pvz69evh5WVldJ4Xn3L4uPj4e7ujlatWqFZs2ZwcXGBt7c3AODRo0dwdnYGAAQGBsLKygpWVlZYv369/Ofv3LmDKVOmwMnJCba2tujfvz8iIyPz/L0xMTFYuHAhnJ2d0aFDh08+Bjt27ECPHj3QvHlzODo6on///jhy5Mgn/5acnBysX78e7dq1Q/PmzTF8+HD8+++/Sj3qcn/2woULCAgIQOvWrdGiRQtMmjQJL168UMjx+++/Y9y4cWjXrh1sbGzQqVMnbNiwAVKpVH7O8OHD8ddff+Hx48fyx8jFxSXfnAAQHR0NKysrhaW3w4cPR8+ePXH16lUMHToUzZs3x6pVqwAAmZmZWLduHTp37gwbGxt06NABy5YtQ2ZmpsL9nj17FkOGDIGDgwPs7OzQtWtX+X0ILTg4GMuXL0fv3r0REBBQ7OIaANjY2CgU1wCgWrVqcHBwwN27d4t9/0RERESqxBlsREREpejPP/+EhYUF7O3tS/T3pKSkwN3dHdWqVcO4ceOgr6+PR48e4bfffgMAGBoaYuHChVi4cCE6d+6Mzp07A4C8eHf79m0MGTIENWrUwNixY1GpUiUcP34ckyZNwvr16+Xn5/L19YWhoSEmTZqE9PT0fHPt27cPfn5+6Nq1K9zc3PD+/XvcvHkTly9fRq9evfL9uZUrV2Lr1q348ssv0b59eyQkJMDd3R3v37/P83w/Pz/o6+tj8uTJePz4MUJDQ/HDDz9gzZo18nPCw8NRqVIljBo1CpUqVcK5c+ewbt06vHnzBl5eXgCA8ePH4/Xr13j69Km8OPnfok9BvXz5EmPHjkWPHj3Qu3dvGBkZIScnBxMmTMCFCxcwaNAgWFpa4tatWwgNDcX9+/fx448/Avjw3+O7776DlZUVpkyZAh0dHTx48AAXL14sUhZVCg0NxZIlS9CzZ08sWbIkz+Laf4ub+dHT04OOjs4nz0lOTkbVqlWLEpWIiIioxLDARkREVErevHmDZ8+ewdXVtcR/V1xcHF69eoVt27bB1tZWPu7p6QkAqFSpErp27YqFCxfCysoKffr0Ufh5f39/mJqa4uDBg/KCx7fffoshQ4ZgxYoVSgU2AwMDhISEfHZp519//YWGDRti3bp1Bf5bnj9/jpCQEPkMs1yBgYEKM+4+VrVqVWzfvh0SiQTAhxlwO3bswOvXr1GlShUAH4p2urq68p8ZMmQI5s+fj927d8PT0xM6Ojpo27YtwsLCkJaWpvQYFVZycjJ8fX3xzTffyMcOHTqEf/75Bzt27ICDg4N8vGHDhliwYAEuXrwIe3t7nD17FllZWdiyZQsMDQ2LlUOVcmf39ezZE8uWLcv3v3/ubMnPCQgIQP/+/fM9fv78eVy6dAkTJkwoUl4iIiKiksICGxERUSl58+YNgKLPgCqM3CLSX3/9BWtra2hraxf4Z1++fIlz585hypQp8sy52rVrh/Xr1+PZs2eoUaOGfHzQoEEF6pumr6+Pp0+f4sqVK2jWrFmB8kRFRSE7OxvffvutwviwYcPyLbANGjRIXlwDAAcHB4SEhODx48ewtrYGAIXi2ps3b5CZmQkHBwfs3bsXd+/elZ+nKjo6OkrFoxMnTsDS0hL169dXmOXVunVrAB+Wm9rb20NfXx8AEBkZiQEDBqhkCaYqPH/+HABgbm7+yf/+wcHBBbq/Bg0a5HssJSUF06dPh7m5OcaMGVO4oEREREQljAU2IiKiUqKnpwcAePv2bYn/LicnJ3Tt2hWBgYEICQmBk5MTOnXqhF69en12CV5iYiJkMhnWrl2LtWvX5nlOSkqKQoHN3Ny8QLnGjh2Lf/75BwMHDkSdOnXQtm1b9OzZEy1btsz3Z548eQIAqF27tsJ41apVYWBgkOfP1KpVS+H73AJVWlqafOz27dtYs2YNzp07p1RIfP36dYH+nsKoUaOG0mP/4MED3LlzJ98ZXikpKQCA7t27Y//+/Zg7dy5WrlwJZ2dndO7cGd26dftkse3ly5fIysoqUl4DA4PPPlf69u2L//3vf9i0aROqVauGkSNH5nlecTckSE9Px3fffYe3b99i165dpVKkJiIiIioMFtiIiIhKiZ6eHkxMTHD79u0i38fHs7I+9nFj/tzz1q1bh0uXLuHPP//E6dOn4ePjg+DgYOzdu/eTBYqcnBwAwOjRo9G+ffs8z/lvsatChQoFym9paYkTJ07gr7/+wunTp/Hrr79i165dmDRpEqZMmVKg+yiI/IpOMpkMwIdC27Bhw6Cnp4cpU6agdu3aqFChAq5du4YVK1bIH4NPye+/RX4/+/GMuY/PbdSokby/23/VrFlT/rM7d+5EdHS0/LE7duwY9u7di+3bt+c7e8zDwwMxMTGf/VvyEhYWhlatWn3yHC0tLaxduxZjxozBkiVLUKVKFQwYMEDpvOTk5AL9zipVqig9TpmZmfDw8MDNmzexbds2NGrUqOB/BBEREVEpYYGNiIioFH355ZfYu3cv4uLiYGdnV+if/3gmVu7XwP/N8vqvFi1aoEWLFvD09MSRI0cwY8YMHDt2DAMHDsy3QGRhYQEA0NbWLvbMo7xUqlQJ3bt3R/fu3eXFk02bNuG7777Ls1CXOxstMTFRng0AUlNT8erVqyJliImJwcuXLxEYGAhHR0f5+H93BAXyL6TlPv7/ne32+PHjAueoXbs2EhIS4OzsnO/vyaWhoQFnZ2c4OzvD29sbmzZtwurVqxEdHZ3vfycvLy+FWXuFUdAlshUqVMDGjRvh5uaGefPmQV9fX6lHX7t27Qp0X//twZaTkwMvLy9ERUVhzZo1cHJyKvgfQERERFSKWGAjIiIqRWPGjMGRI0cwd+5chIaGwtjYWOF4YmIi/vzzT4wYMSLPn8+dORYbGyvfLCE9PR0REREK57169Qr6+voKRZvGjRsD+DAjCAAqVqwIAEoFGCMjIzg5OWHv3r0YNmwYTExMFI6/ePGiyI32U1NTUa1aNfn3Ojo6sLS
"text/plain": [
"<Figure size 1400x400 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABMsAAAGGCAYAAABlkwa3AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAlAxJREFUeJzs3Xlczdn/B/DXbbO1KaLFGsquVIQZphjGvgwzJrKEsWVkSzHUkOxrBmEKY2eyjWVG8zWWoULIEsaWfZIsKdLt/v4w3Z87FS33drqfXs/H4z6mzufT7eW6c937/pzzPjKFQqEAERERERERERERQUd0ACIiIiIiIiIiouKCxTIiIiIiIiIiIqJ/sVhGRERERERERET0LxbLiIiIiIiIiIiI/sViGRERERERERER0b9YLCMiIiIiIiIiIvoXi2VERERERERERET/YrGMiIiIiIiIiIjoXyyWERERERERERER/YvFMiIiomLMzc0NkydPFh2jSP3yyy+ws7PDvXv3REchIiIiohKIxTIiIiIBEhISMG3aNLi7u6Nhw4ZwdHTE119/jXXr1uH169dFkiEtLQ3Lli1DVFRUkfw+bbV3716Eh4eLjlHs2NnZ4Ycffsg2vnLlStjZ2cHPzw+ZmZkFvv979+7Bzs4ux9uvv/5amOhEREREH6QnOgAREVFJc+TIEXz33XcwMDBAt27dUKdOHbx9+xZnzpzBvHnz8Pfff2PGjBkaz5GWloaQkBCMHj0azZo10/jvy6tu3bqhU6dOMDAwEB0FALBv3z5cv34dAwcOFB2l2AsNDcWiRYvQo0cPBAUFQUen8NdlO3fujE8//VRlrEmTJoW+XyIiIqLcsFhGRERUhO7evQsfHx9YWVlh3bp1sLCwUB7z8PDAnTt3cOTIEXEB1SA1NRVly5Yt8M/r6upCV1dXjYmKp7S0NJQpU0Z0DLVZs2YNFixYgO7du2PWrFlqKZQBQL169dCtWze13BcRERFRXnAZJhERURFas2YNUlNTERQUpFIoy1KtWjUMGDAg159ftmwZ7Ozsso3n1OcrLi4OXl5eaNasGRo1agQ3Nzf4+fkBeLfEzdXVFQAQEhKiXN62bNky5c/fuHEDY8aMgYuLCxo2bIiePXsiMjIyx98bHR2NgIAAuLq6onXr1h98DDZs2IBOnTqhcePGcHZ2Rs+ePbF3794P/lkyMzOxbNkytGrVCo0bN0b//v3x999/Z+vplvWzZ86cQXBwMJo3b44mTZpg1KhRePr0qUqOw4cPY9iwYWjVqhUaNGiAtm3bYvny5ZDL5cpz+vfvjyNHjuD+/fvKx8jNzS3XnAAQFRUFOzs7leWt/fv3R+fOnXHx4kV4eHigcePGWLhwIQAgPT0dS5cuRbt27dCgQQO0bt0ac+fORXp6usr9njhxAn379oWTkxMcHBzQvn175X2IFhYWhnnz5qFr164IDg5WW6EsS2pqarbHg4iIiEhTOLOMiIioCP3vf/9DlSpV4OjoqNHfk5SUBC8vL5QvXx7Dhg2DsbEx7t27h99//x0AYGZmhoCAAAQEBKBdu3Zo164dACgLcdevX0ffvn1RqVIlDB06FGXLlsWBAwcwatQoLFu2THl+lsDAQJiZmWHUqFFITU3NNde2bdswc+ZMtG/fHp6ennjz5g2uXr2K8+fPo0uXLrn+3IIFC7BmzRp89tln+OSTTxAfHw8vLy+8efMmx/NnzpwJY2NjjB49Gvfv38e6devwww8/YPHixcpzIiIiULZsWQwaNAhly5bFqVOnsHTpUqSkpMDX1xcAMHz4cLx8+RKPHj1SFhrLlSv3kUc/Z8+ePcPQoUPRqVMndO3aFebm5sjMzMSIESNw5swZ9OnTB7a2trh27RrWrVuH27dv48cffwTw7u/j22+/hZ2dHcaMGQMDAwPcuXMHZ8+eLVAWdVq3bh1mz56Nzp07Y/bs2TkWyv5bqMyNoaFhtuW3ISEhmDt3LmQyGerXrw8fHx+0atVKLdmJiIiIcsJiGRERURFJSUnB48eP4e7urvHfFRsbi+fPn2Pt2rVo2LChctzHxwcAULZsWbRv3x4BAQGws7PLtswtKCgIlpaW2Llzp7J48c0336Bv376YP39+tmKZiYkJwsPDP7p88siRI6hduzaWLl2a5z/LkydPEB4erpz5lSUkJERlJtz7TE1N8dNPP0EmkwF4NzNtw4YNePnyJYyMjAC8K8CVLl1a+TN9+/bFtGnTsHnzZvj4+MDAwAAtW7bE+vXr8eLFi0IvBUxMTERgYCC+/vpr5dju3bvx119/YcOGDXByclKO165dG9OnT8fZs2fh6OiIEydO4O3bt1i9ejXMzMwKlUOdsmbdde7cGXPnzs317z9rFuPHBAcHo2fPngAAHR0dtGrVCm3btkWlSpVw9+5dhIeHY+jQoVixYgXatGmjrj8GERERkQoWy4iIiIpISkoKgILPTMqPrILQkSNHYG9vD319/Tz/7LNnz3Dq1CmMGTNGmTlLq1atsGzZMjx+/BiVKlVSjvfp0ydPfcaMjY3x6NEjXLhwAY0aNcpTnpMnTyIjIwPffPONyni/fv1yLZb16dNHWSgDACcnJ4SHh+P+/fuwt7cHAJVCWUpKCtLT0+Hk5IStW7fi5s2byvPUxcDAQFkIynLw4EHY2tqiZs2aKrOvmjdvDuDdkk5HR0cYGxsDACIjI9GrVy+1L3MsqCdPngAAbGxsPvj3HxYWlqf7q1WrlvJrKysrrF27VuV41uYPs2fPZrGMiIiINIbFMiIioiJiaGgIAHj16pXGf5eLiwvat2+PkJAQhIeHw8XFBW3btkWXLl0+ustkQkICFAoFlixZgiVLluR4TlJSkkqxzMbGJk+5hg4dir/++gu9e/dGtWrV0LJlS3Tu3BlNmzbN9WcePHgAAKhatarKuKmpKUxMTHL8GSsrK5Xvs4pNL168UI5dv34dixcvxqlTp7IVBV++fJmnP09+VKpUKdtjf+fOHdy4cSPXmVdJSUkAgI4dO2L79u2YOnUqFixYAFdXV7Rr1w4dOnT4YOHs2bNnePv2bYHympiYfPS50r17d/zzzz9YuXIlypcvn+uOoS1atChQhv8yNTVFz549ERoaikePHqFy5cpquV8iIiKi97FYRkREVEQMDQ1hYWGB69evF/g+3p8t9b73m9Jnnbd06VKcO3cO//vf/3Ds2DH4+/sjLCwMW7du/eDstszMTADA4MGD8cknn+R4zn8LV6VKlcpTfltbWxw8eBBHjhzBsWPH8Ntvv2HTpk0YNWoUxowZk6f7yIvcCkgKhQLAu6JZv379YGhoiDFjxqBq1aooVaoULl26hPnz5ysfgw/J7e8it599fybb++fWqVNH2Q/tv7KKQaVLl8bGjRsRFRWlfOz279+PrVu34qeffsp1Vpe3tzeio6M/+mfJyfr169GsWbMPnqOnp4clS5ZgyJAhmD17NoyMjNCrV69s5yUmJubpdxoZGeX4OL0v6zF59uwZi2VERESkESyWERERFaHPPvsMW7duRWxsLBwcHPL98+/PkMr6Gvj/2Vf/1aRJEzRp0gQ+Pj7Yu3cvJkyYgP3796N37965FnuqVKkCANDX11fbjKD3lS1bFh07dkTHjh2Rnp4Ob29vrFy5Et9++22ORbesWWIJCQnKbACQnJyM58+fFyhDdHQ0nj17hpCQEDg7OyvH/7uzJZB7USzr8f/vLLT79+/nOUfVqlURHx8PV1fXXH9PFh0dHbi6usLV1RV+fn5YuXIlFi1ahKioqFz/nnx9fVVm0+VHXpehlipVCitWrICnpye+//57GBsbZ+tpl9eG/O/3LMtN1t9RcerdRkRERNLCYhkREVERGjJkCPbu3YupU6di3bp1qFChgsrxhIQE/O9//8OAAQNy/PmsGV0xMTHKjQJSU1Oxa9culfOeP38OY2NjlQJM3bp1AQDp6ekAgDJlygBAtmKKubk5XFxcsHXrVvTr1w8WFhYqx58+fVrgQkVycjLKly+v/N7AwAC2trY4evQo3r59m2OxzNXVFXp6eti8eTNatmypHN+4cWOBMgD/P/Msa6YZ8O5
"text/plain": [
"<Figure size 1400x400 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABNgAAAGGCAYAAACpCjxcAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAoFtJREFUeJzs3XdYVNf6NuBnaIIiKGCh2VDAggoCijWCRqNijRqjYjexRrCiRkFF7BUbFhBjN2JJLDmSYgkCduwGCxY0CFhBkWG+P/ycnxNAYRhYsHnu65rrwNqbmWcIZ5x591rvkikUCgWIiIiIiIiIiIhILVqiAxARERERERERERVnLLARERERERERERHlAwtsRERERERERERE+cACGxERERERERERUT6wwEZERERERERERJQPLLARERERERERERHlAwtsRERERERERERE+cACGxERERERERERUT6wwEZERERERERERJQPLLAREREVEe7u7pgyZYroGIVq7969sLOzw4MHD0RHISIiIiJSGwtsREREBSw+Ph4zZsyAh4cHHBwc4OTkhG+++QabN2/GmzdvCiVDWloaVq5ciaioqEJ5vOLq4MGDCA0NFR2jyLGzs8OsWbOyjK9duxZ2dnbw9fVFZmZmvh5jzZo1+P7779G0aVPY2dlh5cqVOZ775MkT/PDDD3B2doaTkxNGjBiB+/fv5+vxiYiIiPKDBTYiIqIC9Oeff8LT0xOHDx9G69at8eOPP2L8+PGwsLDAwoULERAQUCg50tLSEBQUhOjo6EJ5vNzq0qULLl26BEtLS9FRAAC//PILwsLCRMcoFoKDg7F06VJ069YNAQEB0NLK39vKZcuW4fLly6hdu/Ynz3v9+jW8vLwQExOD7777DmPHjsW1a9fQr18/pKSk5CsDERERkbp0RAcgIiKSqvv378Pb2xsWFhbYvHkzKlasqDzWt29f3Lt3D3/++ae4gBqQmpqK0qVLq/3z2tra0NbW1mCioiktLQ0GBgaiY2jMhg0bsHjxYnTt2hVz587Nd3ENACIiImBlZYXk5GS4ubnleN62bdtw9+5d7N69G/Xr1wcAtGjRAp6enggJCYGPj0++sxARERHlFWewERERFZANGzYgNTUVAQEBKsW1D6pWrYoBAwbk+PMrV66EnZ1dlvHs+pbFxsZiyJAhaNy4MerXrw93d3f4+voCAB48eKAsWAQFBcHOzi7LEry4uDiMHTsWrq6ucHBwQPfu3REREZHt40ZHR8PPzw9ubm5o1arVJ38HW7ZsQceOHdGgQQO4uLige/fuOHjw4CefS2ZmJlauXInmzZujQYMG6N+/P/75558sPeo+/OzZs2cRGBiIJk2aoGHDhhg1ahSSk5NVchw7dgzDhw9H8+bNUa9ePbRp0warVq2CXC5XntO/f3/8+eefePjwofJ35O7unmNOAIiKioKdnZ3K0tv+/fujU6dOuHz5Mvr27YsGDRpgyZIlAID09HSsWLECbdu2Rb169dCqVSssWLAA6enpKvd76tQp9OnTB87OznB0dES7du2U9yFaSEgIFi5ciM6dOyMwMFAjxTUAsLKyytV5R48ehYODg7K4BgA2NjZwc3PD4cOHNZKFiIiIKK84g42IiKiA/PHHH7C2toaTk1OBPk5SUhKGDBmC8uXLY/jw4TAyMsKDBw/wv//9DwBgYmICPz8/+Pn5oW3btmjbti0AKIt3t27dQp8+fVCpUiUMGzYMpUuXxuHDhzFq1CisXLlSef4H/v7+MDExwahRo5Camppjrl27dmHOnDlo164dvLy88PbtW9y4cQMXL16Ep6dnjj+3ePFibNiwAa1bt0aLFi1w/fp1DBkyBG/fvs32/Dlz5sDIyAijR4/Gw4cPsXnzZsyaNQvLli1TnhMeHo7SpUtj0KBBKF26NE6fPo0VK1bg1atXmDx5MgDg+++/x8uXL/H48WNlcbJMmTKf+e1n79mzZxg2bBg6duyIzp07w9TUFJmZmRgxYgTOnj2LXr16wcbGBjdv3sTmzZtx9+5drF69GsD7/x7fffcd7OzsMHbsWOjp6eHevXs4d+6cWlk0afPmzZg3bx46deqEefPmZVtc+29xMyeGhobQ09PL0+NnZmbixo0b6NGjR5ZjDg4OOHnyJF69egVDQ8M83S8RERFRfrHARkREVABevXqFJ0+ewMPDo8Af6/z583j+/Dk2btwIBwcH5bi3tzcAoHTp0mjXrh38/PxgZ2eHLl26qPx8QEAAzM3N8fPPPysLHt9++y369OmDRYsWZSmwGRsbIzQ09LNLO//880/UqlULK1asyPVzefr0KUJDQ5UzzD4ICgrKsel9uXLlsGnTJshkMgDvizBbtmzBy5cvUbZsWQDvi3b6+vrKn+nTpw9mzJiB7du3w9vbG3p6emjWrBnCwsLw4sWLLL+jvEpMTIS/vz+++eYb5dj+/fvx999/Y8uWLXB2dlaO16pVCzNnzsS5c+fg5OSEU6dO4d27d1i/fj1MTEzylUOTPszu69SpExYsWJDjf/9PLe/8WGBgILp3756nDM+ePUN6ejoqVKiQ5diHsX///ZcFNiIiIip0LLAREREVgFevXgFQfwZUXnwoIv3555+wt7eHrq5urn/22bNnOH36NMaOHavM/EHz5s2xcuVKPHnyBJUqVVKO9+rVK1d904yMjPD48WNcunRJZTnfp0RGRiIjIwPffvutyni/fv1yLLD16tVLWVwDAGdnZ4SGhuLhw4ewt7cHAJXi2qtXr5Ceng5nZ2fs3LkTt2/fVp6nKXp6elmKR0eOHIGNjQ1q1KihMsurSZMmAN4vN3VycoKRkRGA9z3JevToobElmPn19OlTAO+Xcn7qv39ISEiu7q9mzZp5zvBhFmN2M99KlSqlcg4RERFRYWKBjYiIqAB8mEHz+vXrAn8sV1dXtGvXDkFBQQgNDYWrqyvatGkDT0/Pzy7Bi4+Ph0KhwPLly7F8+fJsz0lKSlIpsOW2V9awYcPw999/o2fPnqhatSqaNWuGTp06oVGjRjn+zKNHjwAAVapUURkvV64cjI2Ns/0ZCwsLle8/FKhevHihHLt16xaWLVuG06dPZykkvnz5MlfPJy8qVaqU5Xd/7949xMXF5TjDKykpCQDQoUMH7N69G9OnT8fixYvh5uaGtm3bon379p8stj179gzv3r1TK6+xsfFn/1a6du2Kf//9F2vXrkX58uUxcODAbM9r2rSpWhly40MR7b8964D/K6x9OIeIiIioMLHARkREVAAMDQ1RsWJF3Lp1S+37+HhW1sc+bsz/4bwVK1bgwoUL+OOPP3DixAlMnToVISEh2Llz5ydn0WVmZgIABg8ejBYtWmR7zn+LXbktYNjY2ODIkSP4888/ceLECfz222/Ytm0bRo0ahbFjx+bqPnIjp6KTQqEA8L7Q1q9fPxgaGmLs2LGoUqUKSpUqhStXrmDRokXK38Gn5PTfIqef/XjG3Mfn2traKvu7/VflypWVP7t161ZERUUpf3eHDh3Czp07sWnTphxnj40ZMwbR0dGffS7ZCQsLQ+PGjT95jo6ODpYvX46hQ4di3rx5KFu2bLa90BITE3P1mGXLls329/Qp5cqVg56eXraP8WEsuw1FiIiIiAoaC2xEREQFpHXr1ti5cyfOnz8PR0fHPP/8xzOxPnwN/N8sr/9q2LAhGjZsCG9vbxw8eBATJkzAoUOH0LNnzxwLRNbW1gAAXV3dApl5VLp0aXTo0AEdOnRAeno6xowZg7Vr1+K7777LtlD3YTZafHy8MhsApKSk4Pnz52pliI6OxrNnzxAUFAQXFxfl+H93BAVyLqR9+P3/d7bbw4cPc52jSpUquH79Otzc3HJ8nA+0tLTg5uYGNzc3+Pr6Yu3atVi6dCmioqJy/O80efJklVl7eZHbJbKlSpXCmjVr4OXlhR9//BFGRkZZevQ1b948V/elTg82LS0t2Nra4vLly1mOXbp0CdbW1uy/RkREREKwwEZERFRAhg4dioMHD2L69OnYvHkzzMzMVI7Hx8fjjz/+wIABA7L9+Q8zx2JiYpSbJaSmpmLfvn0q5z1//hx
"text/plain": [
"<Figure size 1400x400 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"def robust_zscore_col(s):\n",
" med = np.nanmedian(s)\n",
" mad = np.nanmedian(np.abs(s - med))\n",
" if mad == 0 or np.isnan(mad):\n",
" return np.zeros(len(s))\n",
" return (s - med) / (1.4826 * mad)\n",
"\n",
"for k in [2, 5, 10]:\n",
" prof = dfc.groupby(f\"cluster_k{k}\")[profile_vars].median()\n",
" prof_z = prof.copy()\n",
"\n",
" for c in prof.columns:\n",
" prof_z[c] = robust_zscore_col(prof[c].values)\n",
"\n",
" plt.figure(figsize=(14, 4))\n",
" sns.heatmap(prof_z, cmap=\"RdBu_r\", center=0)\n",
" plt.title(f\"Cluster signatures — K={k}\")\n",
" plt.tight_layout()\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "6e7fa1e2-cec7-41be-943f-d06b25e1e175",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAuYAAAKyCAYAAACZhhuyAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAsEJJREFUeJzs3XlcVdX+//E3qEgooigGOIt6tECFIkVR0rSy8jqQaZmmUWk45Ix200S9Ys45JXbRHDFz6qo5l90sTS2xNELLAWcRUxFUFM7vj36d7z2hyIED5xx9PR+P83i491p7r88+2r0fPqy1tpPRaDQKAAAAgE052zoAAAAAACTmAAAAgF0gMQcAAADsAIk5AAAAYAdIzAEAAAA7QGIOAAAA2AEScwAAAMAOkJgDAAAAdoDEHAAeML/++qtmzpyps2fP2joUAMD/IDEHHMTMmTNlMBhsHQZyMXz4cLVs2dKmMdzr30laWpr69u2rK1euyMfHpwgjAwDcC4k5YAOrV6+WwWAwfQICAhQaGqqIiAgtWrRI165ds8o458+f18yZM5WYmGiV+z0orl+/rpkzZ+r777+3aRxz587Vtm3brHrPESNG6JFHHtG7775r1fsWlu+//14Gg0GbNm0yO5+ZmalevXqpbt26WrlyZYHG2LVrl0aMGKFnnnlGDRo00FNPPaV//vOfunDhQoHuCwCWIjEHbKh///6aOHGiRo8erW7dukmSxo8fr3/84x/69ddfzfq+/fbb+umnnyy6/4ULFzRr1iwScwtdv35ds2bN0p49eyy6buzYsTkSyIKIjY21ODHP7d/JqVOn5O/vr0mTJsnZ2XH/5//WrVvq37+/vv76a40dO1Yvvvhige43adIk7dmzR61atdJ7772n559/Xhs3blSHDh2UkpJipagB4N6K2zoA4EHWvHlzBQQEmI579eqlXbt2qXfv3oqMjNQXX3whV1dXSVLx4sVVvDj/ydqjjIwMubm5qUSJEjaPIbd/J5UrV1bv3r2LODLrunXrlgYMGKAdO3ZozJgx6tSpU4HvOWLECD322GNmP6w0a9ZMr776qpYsWaKBAwcWeAwAyAvHLZkA96mQkBBFRkbq9OnT+s9//mM6f6e5w99++61efvllPf744woMDNQzzzyjqVOnSvpzCsBflcQRI0aYps2sXr1akrRv3z71799fTz75pPz9/RUWFqbx48frxo0bZmMMHz5cgYGBOn/+vCIjIxUYGKjGjRvrgw8+UFZWllnf7OxsLVy4UG3btlVAQIAaN26siIgI/fzzz2b9Pv/8c3Xs2FH169fXE088oYEDB+ZpIeJf38GxY8c0ZMgQPfbYY2rcuLGmT58uo9Gos2fP6u2331ZQUJCaNm2q+fPnm12fmZmpDz/8UB07dtRjjz2mhg0b6pVXXtHu3btNfU6dOqWQkBBJ0qxZs0zf28yZM82+j+TkZL355psKDAzUkCFDTG3/O8d8xowZqlu3rnbt2mUWx8iRI+Xv75/jtyL/y2AwKCMjQ2vWrDHFMHz4cLPv4bffftPgwYMVHBysV155xazt7/L6nR84cEARERF67LHH1KBBA7366qv64Ycf7v6XUoRu376tQYMGafv27Ro9erReeuklq9w3ODg4x28QgoODVbZsWR09etQqYwBAXlB+A+xQu3btNHXqVO3cufOuyceRI0fUq1cvGQwG9e/fXy4uLjpx4oR+/PFHSZKfn5/69++vGTNmqHPnznrsscckSUFBQZKkTZs26caNG3r55ZdVtmxZ/fTTT1qyZInOnTunGTNmmI2VlZWliIgI1a9fX8OGDdOuXbs0f/58ValSxZQQStI///lPrV69Ws2bN9eLL76orKws7du3TwcOHDD9ZuCjjz7Shx9+qDZt2ujFF1/UpUuXtGTJEnXt2lVr165VmTJl7vn9DBw4UH5+fho8eLC+/vprffTRRypbtqyWL1+uxo0ba8iQIVq3bp0++OADBQQEKDg4WJJ07do1ffbZZ3rhhRfUqVMnpaena+XKlXrjjTf02WefqV69evL09NTo0aM1evRotW7dWq1bt5Yks2T39u3bpuQ1KirK9FuNv3v77bf11Vdf6Z///Kf+85//qHTp0vrmm2+0YsUKvfPOO6pbt+5dn3HixIl67733VL9+fdO/gapVq5r1eeedd1StWjUNHDhQRqPxrvfK63e+a9cuvfnmm/L391ffvn3l5OSk1atX67XXXtOyZctUv379e/7dFJasrCwNGjRIW7du1ahRo9SlS5ccfW7duqW0tLQ83a9s2bK5TudJT09Xenq6ypUrl++YAcBiRgBFbtWqVcY6deoYf/rpp7v2eeyxx4zt27c3Hc+YMcNYp04d0/GCBQuMderUMaampt71Hj/99JOxTp06xlWrVuVou379eo5zsbGxRoPBYDx9+rTpXFRUlLFOnTrGWbNmmfVt3769sUOHDqbjXbt2GevUqWMcO3ZsjvtmZ2cbjUaj8dSpU8Z69eoZP/roI7P2pKQk4yOPPJLj/N/99R2MHDnSdO727dvG5s2bGw0GgzE2NtZ0/sqVK8b69esbo6KizPrevHnT7J5XrlwxNmnSxDhixAjTudTUVGOdOnWMM2bMyBHDX9/H5MmT79jWokWLHM/26KOPGv/5z38ar1y5YmzWrJmxY8eOxlu3buX6rEaj0diwYUOz+P/+PQwaNOiubX/J63eenZ1tfPrpp42vv/666e/LaPzz30nLli2NPXv2vGe8hWH37t3GOnXqGFu0aGGsU6eOccmSJffsm5fPyZMncx139uzZxjp16hi/++47az8SANwVFXPATrm5uSk9Pf2u7X9VObdv367w8HCLF/P9b5U3IyNDN27cUGBgoIxGo3755Rf5+vqa9X/55ZfNjh977DGzqTZbtmyRk5OT+vbtm2MsJycnSdLWrVuVnZ2tNm3a6NKlS6b2ChUqqFq1avr+++/zNAf6fxf7FStWTP7+/jp37pzZ+TJlyqhGjRo6efKkWd9ixYpJ+nPazdWrV5WdnS1/f3/98ssv9xz3f/39+7ibOnXqqH///poyZYqSkpL0xx9/aP78+VZZL3CnqvHf5fU7T0xM1PHjx/X222/rjz/+MLtHSEiIPv/8c2VnZ9ts0ejFixdVvHhxVa5c+a596tatqwULFuTpfl5eXndt27t3r2bPnq02bdqYpjUBQFEgMQfsVEZGhsqXL3/X9ueee06fffaZ3nvvPU2ZMkUhISFq3bq1nn322TwlT2fOnNGMGTP05Zdf6sqVK2Ztf9+usWTJkvL09DQ75+HhYXZdcnKyKlasqLJly951zOPHj8toNOrpp5++Y3tek9W//9Dg7u5+xxjd3d11+fJls3Nr1qzR/PnzdezYMd26dct0PreE705xent757l/RESENmzYoJ9++kmDBg1SrVq18nxtbvISc16/8+PHj0uSoqKi7nqvtLQ0eXh43LGtILuX5JYk/2Xo0KFauHCh3nnnHcXFxZmmZv0vDw8PNWnSJN9xSNLvv/+uvn37qnbt2ho3blyB7gXgT3v37lVcXJwOHjyolJQUzZ49W61atcrz9Tdv3tT777+vQ4cO6ffff9eTTz6pOXPmmPXZt2+fJk+erGPHjun69evy9fVVly5d1KNHDys/TeEiMQfs0Llz55SWlpZjTvH/cnV11dKlS/X9999rx44d+uabb/TFF1/o008/1fz5802V4TvJyspSz549deXKFb3xxhuqWbOm3NzcdP78eQ0fPlzZ2dlm/XO7lyWys7Pl5OSkjz/++I73dHNzy9N97vSDx91iNP7P3OvPP/9cw4cPV6tWrRQREaHy5curWLFiio2NNaus34uLi4tFleOTJ0/qxIkTkqTDhw/n+bp7KVmy5D375PU7/+t7GjZsmOrVq3fHe+X29xMaGpqXkO8oKSnpnn28vLy0YMECvfzyy+rVq5eWLFmSY45+ZmZmjh8y78bT0zPH93H27FlFRESodOnSmjdvnkqXLp33hwBwVxkZGTIYDAoPD7/jb1XvJSsrSyVLllS3bt20efPmO/Zxc3PTq6++KoPBoIceekg//PCD3n/
"text/plain": [
"<Figure size 800x700 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAuYAAAKyCAYAAACZhhuyAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAsBlJREFUeJzs3XlcVdX+//E3qEgooigGKE6oRwtUKEWUJE0ru3mdMi3TNCoNh5yxbppoV8w5p8RCc8TMqavmXHazNLXE0gjtOuAsYiqCiML5/dGv8+2EIgcOnHP09Xw8zuPh3mvtvT77pPd++LDW2k5Go9EoAAAAADblbOsAAAAAAJCYAwAAAHaBxBwAAACwAyTmAAAAgB0gMQcAAADsAIk5AAAAYAdIzAEAAAA7QGIOAAAA2AEScwC4z/z666+aOXOmzp49a+tQAAB/QWIOOIiZM2fKYDDYOgzkYeTIkWrVqpVNY7jb35O0tDT1799fV65ckY+PTzFGBgC4GxJzwAZWr14tg8Fg+gQGBiosLEwRERFatGiRrl27ZpVxzp8/r5kzZyoxMdEq97tfXL9+XTNnztT3339v0zjmzp2rbdu2WfWeb731lh566CG9/fbbVr1vUfn+++9lMBi0adMms/NZWVnq06eP6tWrp5UrVxZqjL//e/zrJyUlpVD3BgBLlLR1AMD9bODAgapatapu3bqlixcvas+ePRo/frw++eQTzZkzR/Xq1TP1feONN/T6669bdP8LFy5o1qxZqlKliurXr2/t8O9Z169f16xZs9S/f3+FhITk+7px48bJaDRaLY7Y2Fg99dRTat26db6vyevvyalTpxQQEKDevXvL2dlx6zI3b97UwIED9fXXX2vcuHF67rnnrHLfP/89/lW5cuWscm8AyA8Sc8CGWrRoocDAQNNxnz59tGvXLvXt21eRkZH64osv5OrqKkkqWbKkSpbkn6w9ysjIkJubm0qVKmXzGPL6e1K1alX17du3mCOzrps3b2rQoEHasWOHxo4dqy5duljt3n//9wgAxc1xSybAPSo0NFSRkZE6ffq0/vOf/5jO327u8LfffqsXXnhBjz76qIKCgvTUU09p6tSpkv6YAvBnJfGtt94y/Wp+9erVkqR9+/Zp4MCBevzxxxUQEKDw8HCNHz9emZmZZmOMHDlSQUFBOn/+vCIjIxUUFKSmTZvq/fffV3Z2tlnfnJwcLVy4UO3atVNgYKCaNm2qiIgI/fzzz2b9Pv/8c3Xq1EkNGjRQkyZNNHjw4HwtRPzzOzh27JiGDRumRx55RE2bNtX06dNlNBp19uxZvfHGGwoODlbz5s01f/58s+uzsrL0wQcfqFOnTnrkkUfUqFEjvfjii9q9e7epz6lTpxQaGipJmjVrlul7mzlzptn3kZycrNdee01BQUEaNmyYqe2vc8xnzJihevXqadeuXWZxjBo1SgEBAfr111/v+KwGg0EZGRlas2aNKYaRI0eafQ+//fabhg4dqsaNG+vFF180a/u7/H7nBw4cUEREhB555BE1bNhQL730kn744Yc7/0cpRrdu3dKQIUO0fft2jRkzRs8//7zVx7h27Vquv9cAUFwovwF2qH379po6dap27tx5x+TjyJEj6tOnjwwGgwYOHCgXFxedOHFCP/74oyTJ399fAwcO1IwZM9S1a1c98sgjkqTg4GBJ0qZNm5SZmakXXnhB5cuX108//aQlS5bo3LlzmjFjhtlY2dnZioiIUIMGDTRixAjt2rVL8+fPl5+fnykhlKR//etfWr16tVq0aKHnnntO2dnZ2rdvnw4cOGCqRH744Yf64IMP1LZtWz333HO6dOmSlixZou7du2vt2rX5mjowePBg+fv7a+jQofr666/14Ycfqnz58lq+fLmaNm2qYcOGad26dXr//fcVGBioxo0bS/oj6frss8/07LPPqkuXLkpPT9fKlSv16quv6rPPPlP9+vXl6empMWPGaMyYMWrTpo3atGkjSWbJ7q1bt0zJa1RUlOm3Gn/3xhtv6KuvvtK//vUv/ec//1HZsmX1zTffaMWKFXrzzTfNpir93cSJE/XOO++oQYMGpr8D1apVM+vz5ptvqnr16ho8eHCeU2jy+53v2rVLr732mgICAtS/f385OTlp9erVevnll7Vs2TI1aNDgrv9tikp2draGDBmirVu3avTo0erWrVuuPjdv3lRaWlq+7le+fPlc03l69uypjIwMlSpVSmFhYRo5cqRq1KhhjfABIH+MAIrdqlWrjHXr1jX+9NNPd+zzyCOPGDt06GA6njFjhrFu3bqm4wULFhjr1q1rTE1NveM9fvrpJ2PdunWNq1atytV2/fr1XOdiY2ONBoPBePr0adO5qKgoY926dY2zZs0y69uhQwdjx44dTce7du0y1q1b1zhu3Lhc983JyTEajUbjqVOnjPXr1zd++OGHZu1JSUnGhx56KNf5v/vzOxg1apTp3K1bt4wtWrQwGgwGY2xsrOn8lStXjA0aNDBGRUWZ9b1x44bZPa9cuWJs1qyZ8a233jKdS01NNdatW9c4Y8aMXDH8+X1Mnjz5tm0tW7bM9WwPP/yw8V//+pfxypUrxscee8zYqVMn482bN/N8VqPRaGzUqJFZ/H//HoYMGXLHtj/l9zvPyckxPvnkk8ZXXnnF9N/LaPzj70mrVq2MvXv3vmu8RWH37t3GunXrGlu2bGmsW7euccmSJXftm5/PyZMnTddt2LDBOHLkSOOaNWuMW7duNU6bNs3YsGFDY0hIiPHMmTPF8ZgAYDQajUYq5oCdcnNzU3p6+h3b/6xybt++XZ07d7Z4Md9fq7wZGRnKzMxUUFCQjEajfvnlF/n6+pr1f+GFF8yOH3nkEbOpNlu2bJGTk5P69++faywnJydJ0tatW5WTk6O2bdvq0qVLpvZKlSqpevXq+v777/M1B/qvi/1KlCihgIAAnTt3zux8uXLlVLNmTZ08edKsb4kSJST9Me3m6tWrysnJUUBAgH755Ze7jvtXf/8+7qRu3boaOHCgpkyZoqSkJP3++++aP3++VdYL3K5q/Hf5/c4TExN1/PhxvfHGG/r999/N7hEaGqrPP/9cOTk5Nls0evHiRZUsWTLX4sy/qlevnhYsWJCv+3l5eZn+/Mwzz+iZZ54xHbdu3VphYWF66aWX9OGHH2rs2LEFDxwALEBiDtipjIwMVaxY8Y7tzzzzjD777DO98847mjJlikJDQ9WmTRs9/fTT+Uqezpw5oxkzZujLL7/UlStXzNr+vl1j6dKl5enpaXbOw8PD7Lrk5GRVrlxZ5cuXv+OYx48fl9Fo1JNPPnnb9vwmq3//ocHd3f22Mbq7u+vy5ctm59asWaP58+fr2LFjunnzpul8Xgnf7eL09vbOd/+IiAht2LBBP/30k4YMGaLatWvn+9q85Cfm/H7nx48flyRFRUXd8V5paWny8PC4bVththX8a5J8J8OHD9fChQv15ptvKi4uzjQ16688PDzUrFmzAsfxV48++qgaNmyYa30AAMvt3btXcXFxOnjwoFJSUjR79myLdpu6ceOG3n33XR06dEj/+9//9Pjjj2vOnDlmffbt26fJkyfr2LFjun79unx9fdWtWzf16tXLyk9TtEjMATt07tw5paWl5ZpT/Feurq5aunSpvv/+e+3YsUPffPONvvjiC3366aeaP3++qTJ8O9nZ2erdu7euXLmiV199VbVq1ZKbm5vOnz+vkSNHKicnx6x/XveyRE5OjpycnPTRRx/d9p5ubm75us/tfvC4U4zGv8y9/vzzzzVy5Ei1bt1aERERqlixokqUKKHY2FizyvrduLi4WFQ5PnnypE6cOCFJOnz4cL6vu5vSpUvftU9+v/M/v6cRI0bccWvNvP77hIWF5Sfk20pKSrprHy8vLy1YsEAvvPCC+vTpoyVLluSao5+VlZXrh8w78fT0vOvfa29vbx07dixf9wNwZxkZGTIYDOrcufNtf6t6N9nZ2SpdurR69OihzZs337aPm5ubXnrpJRkMBj3wwAP64Ycf9O677+qBBx5Q165dC/sIxYbEHLBDn3/+uaS7JzvOzs4KDQ1
"text/plain": [
"<Figure size 800x700 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAuYAAAKyCAYAAACZhhuyAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAr4xJREFUeJzs3Xl8Tdf+//F3gkhDhBCNEEFwqAQJGiGV0mqrrSIoqpSmLY2hpooOVHAlNddU0RtqDK6pRc2t3qs1tqKlaXANMbREFJGIkJzfH34933sawklO5Bx9PR+P83h0r7X23p992nsfn/PJWms7GI1GowAAAAAUKceiDgAAAAAAiTkAAABgE0jMAQAAABtAYg4AAADYABJzAAAAwAaQmAMAAAA2gMQcAAAAsAEk5gAAAIANIDEHgIfUr7/+qhkzZui3334r6lAAAPeBxBywMTNmzJDBYCjqMJCHESNGqFWrVkUaw73+O0lLS1P//v115coVVapU6QFGBgDILxJzoBCtXr1aBoPB9PH391dISIjCw8O1cOFCXbt2zSr3OX/+vGbMmKHExESrXO/v4vr165oxY4b27NlTpHHMmTNH27Zts+o133vvPT322GN6//33rXrdwrJnzx4ZDAZt2rTJrD0rK0t9+vRRnTp1tHLlygLd48KFC5o0aZJ69OihgIAAGQyGPP/d//jjj+rWrZsaNGig5s2ba9y4cUpPTy9QDACQFxJz4AEYOHCgJkyYoNGjR6tHjx6SpPHjx+ull17Sr7/+ajb27bff1k8//WTR9S9cuKCZM2eSmFvo+vXrmjlzpvbu3WvReWPHjs2VQBZEbGysxYl5Xv+dnDlzRn5+fpo4caIcHe33/+Zv3rypgQMH6ttvv9XYsWPVqVOnAl3vxIkT+uyzz3ThwoV7/lUqMTFRvXr1UmZmpkaMGKFOnTpp+fLleueddwoUAwDkpXhRBwD8HbRo0UL+/v6m4z59+mjXrl3q27evIiIi9NVXX8nZ2VmSVLx4cRUvzv80bVFGRoZcXFxUokSJIo8hr/9OqlSpor59+z7gyKzr5s2bGjRokHbs2KExY8aoc+fOBb5mvXr1tGfPHpUtW1abNm3SgQMH7jp2ypQpKlOmjBYtWqTSpUtLuv29fvjhh9q5c6dCQkIKHA8A/JX9llIAOxccHKyIiAidPXtWX375pan9TnOHv/vuO3Xr1k2NGzdWQECAnn32WU2ZMkXS7SkAf1YS33vvPdO0mdWrV0uS9u/fr4EDB+rJJ5+Un5+fQkNDNX78eGVmZprdY8SIEQoICND58+cVERGhgIAANW3aVB9//LGys7PNxubk5GjBggVq27at/P391bRpU4WHh+vnn382G/fFF18oLCxM9evX1+OPP67Bgwff10LEP7+DEydOaNiwYWrUqJGaNm2qadOmyWg06rffftPbb7+twMBANW/eXPPmzTM7PysrS5988onCwsLUqFEjNWzYUK+88op2795tGnPmzBkFBwdLkmbOnGn63mbMmGH2fSQnJ+vNN99UQECAhg0bZur73znm06dPV506dbRr1y6zOEaOHCk/P79cfxX5XwaDQRkZGVqzZo0phhEjRph9D8eOHdPQoUPVpEkTvfLKK2Z9f3W/3/nBgwcVHh6uRo0aqUGDBnr11Vf1ww8/3P1fygN069YtDRkyRNu3b9fo0aP18ssvW+W6pUuXVtmyZe857tq1a/r+++/10ksvmZJySWrXrp1cXFy0ceNGq8QDAH9FWQ4oQu3atdOUKVO0c+fOuyYfR48eVZ8+fWQwGDRw4EA5OTnp1KlT+vHHHyVJvr6+GjhwoKZPn64uXbqoUaNGkqTAwEBJ0qZNm5SZmalu3bqpbNmy+umnn7R48WL9/vvvmj59utm9srOzFR4ervr162v48OHatWuX5s2bJ29vb1NCKEkffPCBVq9erRYtWqhTp07Kzs7W/v37dfDgQdNfBj799FN98sknatOmjTp16qRLly5p8eLF6t69u9auXasyZcrc8/sZPHiwfH19NXToUH377bf69NNPVbZsWS1btkxNmzbVsGHDtG7dOn388cfy9/dXkyZNJN1OrP71r3/pxRdfVOfOnZWenq6VK1fqjTfe0L/+9S/VrVtX7u7uGj16tEaPHq3WrVurdevWkmSW7N66dcuUvEZGRpr+qvFXb7/9tr755ht98MEH+vLLL1W6dGn95z//0YoVK/TOO++oTp06d33GCRMm6MMPP1T9+vVN/w1UrVrVbMw777wjHx8fDR48WEaj8a7Xut/vfNeuXXrzzTfl5+en/v37y8HBQatXr9Zrr72mpUuXqn79+vf8d1NYsrOzNWTIEG3dulWjRo1S165dc425efOm0tLS7ut6ZcuWtXg6T1JSkm7duiU/Pz+zdicnJ9WtW5cpYwAKjxFAoVm1apWxdu3axp9++umuYxo1amRs37696Xj69OnG2rVrm47nz59vrF27tjE1NfWu1/jpp5+MtWvXNq5atSpX3/Xr13O1xcbGGg0Gg/Hs2bOmtsjISGPt2rWNM2fONBvbvn17Y4cOHUzHu3btMtauXds4duzYXNfNyckxGo1G45kzZ4x169Y1fvrpp2b9SUlJxsceeyxX+1/9+R2MHDnS1Hbr1i1jixYtjAaDwRgbG2tqv3LlirF+/frGyMhIs7E3btwwu+aVK1eMzZo1M7733numttTUVGPt2rWN06dPzxXDn9/HpEmT7tjXsmXLXM9Wr1494wcffGC8cuWK8YknnjCGhYUZb968meezGo1GY8OGDc3i/+v3MGTIkLv2/el+v/OcnBzjM888Y3z99ddN/76Mxtv/nbRq1crYu3fve8ZbGHbv3m2sXbu2sWXLlsbatWsbFy9efM+x9/M5ffr0Ha+xceNGY+3atY27d+++a9++ffty9Q0cONDYvHnz/D8oAOSBijlQxFxcXPLc6eHPKuf27dvVsWNHi6t//1vlzcjIUGZmpgICAmQ0GvXLL7/Iy8vLbHy3bt3Mjhs1amQ21WbLli1ycHBQ//79c93LwcFBkrR161bl5OSoTZs2unTpkqm/QoUK8vHx0Z49e+5rDvT/LvYrVqyY/Pz89Pvvv5u1lylTRtWrV9fp06fNxhYrVkzS7Wk3V69eVU5Ojvz8/PTLL7/c877/66/fx93Url1bAwcO1OTJk5WUlKQ//vhD8+bNs8p6gTtVjf/qfr/zxMREnTx5Um+//bb++OMPs2sEBwfriy++UE5OTpEtGr148aKKFy+uKlWq3HVMnTp1NH/+/Pu6noeHh8Ux/DnNy8nJKVdfyZIlc00DAwBrITEHilhGRobKly9/1/7nn39e//rXv/Thhx9q8uTJCg4OVuvWrfXcc8/dV/J07tw5TZ8+XV9//bWuXLli1vfX7RpLliwpd3d3szY3Nzez85KTk1WxYsU85+qePHlSRqNRzzzzzB377zdZ/euPBldX1zvG6OrqqsuXL5u1rVmzRvPmzdOJEyd08+ZNU3teCd+d4vT09Lzv8eHh4dqwYYN++uknDRkyRDVr1rzvc/NyPzHf73d+8uRJSVJkZORdr5WWliY3N7c79qWkpNwzlru5nyT53Xff1YIFC/TOO+8oLi7ONDXrf7m5ualZs2b5juNe/vwxm5WVlavvxo0bd53SBODO9u3bp7i4OB06dEgpKSmaNWuWnn766fs+/8aNG/roo490+PBh/fe//9WTTz6p2bNnm43Zv3+/Jk2apBMnTuj69evy8vJS165d1atXLys/TeEiMQeK0O+//660tLRcc4r/l7Ozs5YsWaI9e/Zox44d+s9//qOvvvpKy5cv17x580yV4TvJzs5W7969deXKFb3xxhuqUaOGXFxcdP78eY0YMUI5OTlm4/O6liVycnLk4OCgzz777I7XdHFxua/r3OmHx91iNP7P3OsvvvhCI0aM0NNPP63w8HCVL19exYoVU2xsrFll/V6cnJwsqhyfPn1ap06dkiQdOXLkvs+7l5IlS95zzP1+539+T8OHD1fdunXveK28/v0UZDeSpKSke47x8PDQ/Pnz1a1bN/Xp00eLFy/ONUc/Kysr14/
"text/plain": [
"<Figure size 800x700 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"def plot_distance_matrix_sorted(X_scaled, labels, max_points=400, title=\"Distance matrix\"):\n",
" n = X_scaled.shape[0]\n",
" idx = np.arange(n)\n",
"\n",
" if n > max_points:\n",
" rng = np.random.default_rng(42)\n",
" idx = rng.choice(idx, size=max_points, replace=False)\n",
"\n",
" X_sub = X_scaled[idx]\n",
" labels_sub = np.asarray(labels)[idx]\n",
"\n",
" order = np.lexsort((np.arange(len(labels_sub)), labels_sub))\n",
" X_sub = X_sub[order]\n",
"\n",
" D = pairwise_distances(X_sub)\n",
"\n",
" plt.figure(figsize=(8, 7))\n",
" sns.heatmap(D, cmap=\"viridis\")\n",
" plt.title(title)\n",
" plt.tight_layout()\n",
" plt.show()\n",
"\n",
"for k in [2, 5, 10]:\n",
" plot_distance_matrix_sorted(\n",
" X_scaled,\n",
" dfc[f\"cluster_k{k}\"].values,\n",
" title=f\"Distance matrix triée — K={k}\"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "3c052322-6566-4567-b084-81148fa65538",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAGGCAYAAADmRxfNAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAZI1JREFUeJzt3XlcVGXDxvHfDIuIu7jvmoGoGC654JZkppblkmamlZm52/q4PJWlZWhpqbnnmtnukpVpmY+piZalaeaS+64ILikgMHPeP07MK4HKAHJguL6fDyVnzpxzz3AznOvcm80wDAMREREREZFMsFtdABERERERyf0ULEREREREJNMULEREREREJNMULEREREREJNMULEREREREJNMULEREREREJNMULEREREREJNMULEREREREJNMULEREREREJNMULETEbe+99x5BQUFWFyPX6tWrF7169bK6GGnSzzb7BAUF8d5772XoueHh4YwYMSKLS5TSiBEjCA8Pv6XnEBHPomAhksctXbqUoKAg11dISAjNmjWjT58+fPDBB1y+fDlLznPmzBnee+89du/enSXHk+wTHh6e4QtgkYwKDw+nX79+qbYvX76c4OBg+vTpw9WrVzN8/FOnTjF16lQeeugh7rzzTho1akSvXr3YtGlTZootkqd5W10AEckZhg4dSoUKFUhKSuLcuXP8/PPPvPnmmyxYsIDp06dTo0YN174DBgzg6aefduv4Z8+eZerUqZQvX57g4OCsLr5IrrNjxw68vLwy9NxVq1Zhs9myuEQ534oVKxg5ciRhYWFMnz6dfPnyZfhYP/zwA++//z6tW7emU6dOJCUl8eWXX9K7d2/efPNNunTpkoUlF8kbFCxEBIAWLVoQEhLi+r5fv35ERkbSv39/Bg4cyMqVK/Hz8wPA29sbb299fIi4y+l0kpiYSL58+TJ1Uezr65uFpcodvvnmG0aMGEHjxo0zHSoAGjVqxP/+9z+KFy/u2vbII4/w4IMPMmXKFAULkQxQVygRua4mTZowcOBATpw4wYoVK1zb0+qH/9NPP/HII4/QoEED6taty7333ss777wDwJYtW3jooYcAGDlypKvb1dKlSwHYunUrQ4cO5a677qJ27dq0bNmSN998k/j4+BTnGDFiBHXr1uXMmTMMHDiQunXr0rhxY8aPH4/D4Uixr9PpZOHChXTo0IGQkBAaN25Mnz592LlzZ4r9vvzySzp37kydOnVo2LAhzz33HKdOnUqxz+HDhxkyZAhNmzYlJCSEFi1a8Nxzz/H333/f9D389NNPad26NXXq1OGhhx5i69atqfZJSEhg8uTJdO7cmfr16xMaGkqPHj3YvHlziv2OHz9OUFAQc+fOdR23du3adOnShR07dqTYNyoqipEjR9KiRQtq165Ns2bNGDBgAMePH79pmdPj0qVLjB07lpYtW1K7dm3uueceZs+ejdPpTLHfN998Q+fOnalbty716tWjQ4cOLFy40PV4cle8X375hVGjRtGoUSPq1avHsGHDuHjxYopjrVmzhqeffppmzZpRu3ZtWrduzbRp01L97AF+//13+vbty5133kloaGiq8wIcOHCAoUOH0rBhQ0JCQujcuTM//PBDul5/bGws48aNc73+e++9l7lz52IYRor9goKCGDNmDCtWrOC+++4jJCSEDRs2uB77dxezLVu20LlzZ0JCQmjdujWffPJJmr9v/x5jkfw+/vrrr0RERNC4cWNCQ0MZNGgQMTExGX4fc4qVK1fyn//8h4YNGzJjxoxMhwqA22+/PUWoADOwtWzZktOnT2dZN1CRvES3HEXkhh588EHeeecdNm7cSLdu3dLc56+//qJfv34EBQUxdOhQfH19OXLkCL/99hsAt912G0OHDmXKlCk8/PDD1K9fH4B69eoBZreO+Ph4HnnkEYoWLcqOHTv48MMPOX36NFOmTElxLofDQZ8+fahTpw7Dhg0jMjKSefPmUbFiRXr06OHa76WXXmLp0qW0aNGChx56CIfDwdatW/n9999dLTMzZsxg8uTJtGvXjoceeoiYmBg+/PBDHn30UZYvX07hwoVJSEigT58+JCQk0LNnT0qUKMGZM2dYt24dly5dolChQtd97z7//HNGjRpF3bp1efzxxzl27BgDBgygSJEilC1b1rXf5cuX+fzzz7n//vvp2rUrV65c4YsvvuCpp57i888/T9V17Ouvv+bKlSs8/PDD2Gw25syZw5AhQ1izZg0+Pj4ADBkyhP3799OzZ0/Kly9PTEwMP/30E6dOnaJChQrp+tlfT1xcHD179uTMmTN0796dsmXLsm3bNt555x2ioqJ46aWXADNsPv/88zRp0oQXX3wRgIMHD/Lbb7/x+OOPpzjmmDFjKFy4MIMHD+bQoUN8/PHHnDx5kkWLFrm6/Cxbtgx/f3969+6Nv78/mzdvZsqUKVy+fJnhw4e7jvXTTz/Rr18/SpUqxWOPPUaJEiU4cOAA69atc533r7/+4pFHHqF06dL07dsXf39/vv32WwYNGsR7773HPffcc93XbxgGAwYMcAXm4OBgNmzYwFtvvcWZM2f473//m2L/zZs38+233/Loo49SrFgxypcvn+Zx//zzT5566ilKlizJkCFDcDqdTJs2LdXF74288cYbrvfxxIkTLFy4kDFjxjBp0iTXPul9H3OK1atX85///IcGDRowc+ZMV8vptS5evJiuYJQ/f37y589/w32ioqLStZ+IpMEQkTxtyZIlRmBgoLFjx47r7lO/fn2jY8eOru+nTJliBAYGur6fP3++ERgYaERHR1/3GDt27DACAwONJUuWpHosLi4u1bZZs2YZQUFBxokTJ1zbhg8fbgQGBhpTp05NsW/Hjh2NTp06ub6PjIw0AgMDjddffz3VcZ1Op2EYhnH8+HEjODjYmDFjRorH9+7da9SsWdO1/c8//zQCAwONb7/99rqvLS0JCQlGkyZNjAcffNC4evWqa/unn35qBAYGGj179nRtS0pKSrGPYRjGxYsXjbCwMGPkyJGubceOHTMCAwONhg0bGhcuXHBtX7NmjREYGGisXbvW9dzAwEBjzpw5bpXZMFL/bNMybdo0IzQ01Dh06FCK7RMmTDCCg4ONkydPGoZhGG+88YZRr149Iykp6brHSq5/nTp1MhISElzb33//fSMwMNBYs2aNa1ta9eSVV14x7rjjDtf7l5SUZISHhxutWrUyLl68mGLf5J+9YRjG448/btx///0p3nen02k8/PDDRps2bW74+r///nsjMDDQmD59eortQ4YMMYKCgowjR464tgUGBho1atQw/vrrr1THCQwMNKZMmeL6vl+/fsYdd9xhnD592rXt8OHDRs2aNVP9TFq1amUMHz7c9X3y+/jEE0+keJ1vvvmmERwcbFy6dMm1LT3vo2GYv2+tWrW64XtxK7Vq1cpo1qyZUbNmTaNnz55GbGzsDfcNDAy86de173daDh8+bISEhBj/+c9/svrliOQJ6golIjfl7+/PlStXrvt44cKFAXMw5L+7wqTHtXcgY2NjiYmJoW7duhiGwZ9//plq/0ceeSTF9/Xr10/Rxee7777DZrMxePDgVM9Nvvv9/fff43Q6adeuHTExMa6vEiVKULlyZbZs2QJAwYIFAdi4cSNxcXHpfk1//PEH0dHRdO/ePUV/+E6dOqVq5fDy8nLt43Q6uXDhAklJSdSuXTvN19++fXuKFCni+r5BgwYAHDt2DDDfTx8fH37++edU3YmywqpVq6hfvz6FCxdO8d6FhYXhcDj45ZdfALNexMXF8dNPP930mA8//LCrtQXMn7G3tzc//vija9u19eTy5cvExMTQoEED4uLiOHjwIGDe9T9+/DiPPfaYq14mS/7ZX7hwgc2bN9OuXTvXcWJiYjh//jzNmjXj8OHDnDlz5rplXb9+PV5eXqmmDH7yyScxDIP169en2H7nnXdSvXr1G75+h8NBZGQkd999N6VLl3Ztr1y5Ms2bN7/hc6/VrVu3FIO6GzRogMPh4MSJE65t6Xkfc4qLFy+SlJREmTJl0mypSPb2228zf/78m3517Njxuse
"text/plain": [
"<Figure size 800x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAGGCAYAAADmRxfNAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAW3xJREFUeJzt3XlcFPXjx/E3p4AIeOB9Z6yoGB6Vt0lmavkttcNK7VDTvCrrl3SnlWCllalp5ZVHlop2WFrkt69aWNqpeeYtmnJ4cgjszu+PhZUVUGCRxeX1fDxW2dmZ2c/MfnZ23jOf+YybYRiGAAAAAMAB7s4uAAAAAICrH8ECAAAAgMMIFgAAAAAcRrAAAAAA4DCCBQAAAACHESwAAAAAOIxgAQAAAMBhBAsAAAAADiNYAAAAAHAYwQJAkb333nsymUzOLsZVa9CgQRo0aJCzi5EvPtvSYzKZ9N577xVr2oiICEVGRpZwiexFRkYqIiLiir4HANdCsADKuZiYGJlMJtsjLCxMnTp10pAhQ/Txxx/r3LlzJfI+x48f13vvvacdO3aUyPxQeiIiIoq9AwwUV0REhIYPH55n+KpVqxQaGqohQ4bo/PnzDr1H7m1f7scHH3zg0HyB8srT2QUAUDaMHTtWdevWVVZWlhITE/XLL79o0qRJmj9/vmbOnKmmTZvaxn3sscf06KOPFmn+J06c0PTp01WnTh2FhoaWdPGBq85ff/0lDw+PYk27Zs0aubm5lXCJyr4vvvhCzz77rDp06KCZM2eqQoUKDs+zY8eOuuOOO+yGNWvWzOH5AuURwQKAJKlLly4KCwuzPR8+fLji4uI0YsQIjRw5Ul9//bV8fHwkSZ6envL0ZPMBFJXFYlFmZqYqVKjg0E6xt7d3CZbq6rB69WpFRkaqXbt2JRYqJKlhw4Z5ggWA4qEpFIACtW/fXiNHjlR8fLy++OIL2/D82uH/+OOPuu+++9S2bVu1atVKt956q6ZOnSpJ+vnnn3XXXXdJkp599llbc4OYmBhJ0pYtWzR27FjddNNNatGihbp27apJkyYpPT3d7j0iIyPVqlUrHT9+XCNHjlSrVq3Url07TZ48WWaz2W5ci8WiBQsWqE+fPgoLC1O7du00ZMgQbd261W68zz//XP369VPLli11ww036Mknn9SxY8fsxjlw4IDGjBmjjh07KiwsTF26dNGTTz6ps2fPXnYdfvrpp+revbtatmypu+66S1u2bMkzTkZGht59913169dPbdq0UXh4uO6//35t2rTJbrwjR47IZDJpzpw5tvm2aNFC/fv3119//WU3bkJCgp599ll16dJFLVq0UKdOnfTYY4/pyJEjly1zYZw5c0avv/66unbtqhYtWuiWW27RBx98IIvFYjfe6tWr1a9fP7Vq1UqtW7dWnz59tGDBAtvrOU3xNm/erJdeekk33nijWrdurWeeeUanT5+2m1dsbKweffRRderUSS1atFD37t01Y8aMPJ+9JP35558aNmyYrr/+eoWHh+d5X0nau3evxo4dqxtuuEFhYWHq16+fvv/++0Itf2pqqqKjo23Lf+utt2rOnDkyDMNuPJPJpIkTJ+qLL77QbbfdprCwMG3YsMH22sVNzH7++Wf169dPYWFh6t69u5YuXZrv9+3iayxy1uOvv/6qqKgotWvXTuHh4Ro1apSSk5OLvR7Liq+//lr/93//pxtuuEHvv/9+iYWKHOnp6Q43qwLAGQsAl3HHHXdo6tSp2rhxo+655558x9mzZ4+GDx8uk8mksWPHytvbWwcPHtRvv/0mSbrmmms0duxYTZs2Tffee6/atGkjSWrdurUka7OO9PR03XfffQoKCtJff/2lRYsW6d9//9W0adPs3stsNmvIkCFq2bKlnnnmGcXFxWnu3LmqV6+e7r//ftt4zz//vGJiYtSlSxfdddddMpvN2rJli/7880/bmZn3339f7777rnr16qW77rpLycnJWrRokR544AGtWrVKAQEBysjI0JAhQ5SRkaGBAweqWrVqOn78uH744QedOXNGlSpVKnDdLVu2TC+99JJatWqlBx98UIcPH9Zjjz2mwMBA1apVyzbeuXPntGzZMt1+++26++67lZKSouXLl2vo0KFatmxZnqZjX331lVJSUnTvvffKzc1NH330kcaMGaPY2Fh5eXlJksaMGaN//vlHAwcOVJ06dZScnKwff/xRx44dU926dQv12RckLS1NAwcO1PHjxzVgwADVqlVLv//+u6ZOnaqEhAQ9//zzkqxhc9y4cWrfvr2efvppSdK+ffv022+/6cEHH7Sb58SJExUQEKDRo0dr//79+uSTT3T06FEtXLjQ1uRn5cqV8vPz08MPPyw/Pz9t2rRJ06ZN07lz5zR+/HjbvH788UcNHz5c1atX1+DBg1WtWjXt3btXP/zwg+199+zZo/vuu081atTQsGHD5Ofnp2+++UajRo3Se++9p1tuuaXA5TcMQ4899pgtMIeGhmrDhg164403dPz4cT333HN242/atEnffPONHnjgAVWuXFl16tTJd77bt2/X0KFDFRwcrDFjxshisWjGjBmqUqVKoT+b1157zbYe4+PjtWDBAk2cOFHvvPOObZzCrseyYu3atfq///s/tW3bVrNmzbKdOc3t9OnThQpGvr6+8vX1tRu2cuVKLVmyRIZh6JprrtFjjz2mPn36lFj5gXLFAFCurVixwggJCTH++uuvAsdp06aNceedd9qeT5s2zQgJCbE9nzdvnhESEmIkJSUVOI+//vrLCAkJMVasWJHntbS0tDzDZs+ebZhMJiM+Pt42bPz48UZISIgxffp0u3HvvPNOo2/fvrbncXFxRkhIiPHqq6/mma/FYjEMwzCOHDlihIaGGu+//77d67t27TKaNWtmG759+3YjJCTE+OabbwpctvxkZGQY7du3N+644w7j/PnztuGffvqpERISYgwcONA2LCsry24cwzCM06dPGx06dDCeffZZ27DDhw8bISEhxg033GCcOnXKNjw2NtYICQkx1q1bZ5s2JCTE+Oijj4pUZsPI+9nmZ8aMGUZ4eLixf/9+u+FvvfWWERoaahw9etQwDMN47bXXjNatWxtZWVkFziun/vXt29fIyMiwDf/www+NkJAQIzY21jYsv3ry4osvGtddd51t/WVlZRkRERFGt27djNOnT9uNm/PZG4ZhPPjgg8btt99ut94tFotx7733Gj169Ljk8n/33XdGSEiIMXPmTLvhY8aMMUwmk3Hw4EHbsJCQEKNp06bGnj178swnJCTEmDZtmu358OHDjeuuu874999/bcMOHDhgNGvWLM9n0q1bN2P8+PG25znr8aGHHrJbzkmTJhmhoaHGmTNnbMMKsx4Nw/p969at2yXXxZXUrVs3o1OnTkazZs2MgQMHGqmpqZccNyQk5LKP3OvbMAzj3nvvNebPn2/ExsYaS5YsMW6//XYjJCTEWLx48ZVePMAl0RQKwGX5+fkpJSWlwNcDAgIkSd9//32epjCFkfsIZGpqqpKTk9WqVSsZhqHt27fnGf++++6ze96mTRu7Jj7ffvut3NzcNHr06DzT5hz9/u6772SxWNSrVy8lJyfbHtWqVVODBg30888/S5L8/f0lSRs3blRaWlqhl2nbtm1KSkrSgAED7NrD9+3bN89ZDg8PD9s4FotFp06dUlZWllq0aJHv8vfu3VuBgYG2523btpUkHT58WJJ1fXp5eemXX37J05yoJKxZs0Zt2rRRQECA3brr0KGDzGazNm/eLMlaL9LS0vTjjz9edp733nuv7WyLZP2MPT099b///c82LHc9OXfunJKTk9W2bVulpaVp3759kqxH/Y8cOaLBgwfb6mWOnM/+1KlT2rRpk3r16mWbT3Jysk6ePKlOnTrpwIEDOn78eIFlXb9+vTw8PPJ0GfzII4/IMAytX7/ebvj111+vJk2aXHL5zWaz4uLidPPNN6tGjRq24Q0aNFDnzp0vOW1u99xzj91F3W3btpXZbFZ8fLxtWGHWY1lx+vRpZWVlqWbNmvmeqcjx5ptvat68eZd93HnnnXbTLV26VA8++KB
"text/plain": [
"<Figure size 800x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"def intra_inter_distances(X_scaled, labels):\n",
" D = pairwise_distances(X_scaled)\n",
" labels = np.asarray(labels)\n",
"\n",
" intra, inter = [], []\n",
" n = len(labels)\n",
"\n",
" for i in range(n):\n",
" for j in range(i+1, n):\n",
" if labels[i] == labels[j]:\n",
" intra.append(D[i, j])\n",
" else:\n",
" inter.append(D[i, j])\n",
"\n",
" return np.array(intra), np.array(inter)\n",
"\n",
"for k in [2, 5]:\n",
" intra, inter = intra_inter_distances(X_scaled, dfc[f\"cluster_k{k}\"].values)\n",
"\n",
" plt.figure(figsize=(8, 4))\n",
" sns.kdeplot(intra, label=\"Intra-cluster\", fill=True)\n",
" sns.kdeplot(inter, label=\"Inter-cluster\", fill=True)\n",
" plt.title(f\"Distances dans l'espace original — K={k}\")\n",
" plt.xlabel(\"Distance\")\n",
" plt.legend()\n",
" plt.tight_layout()\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "b4a85ccf-34e1-4788-adfc-35da95ba774f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"churn_hard 0.801471\n",
"churn_soft 0.840928\n",
"churn_warning 0.403065\n",
"dtype: float64"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfc[\"churn_hard\"] = (dfc[\"aum_final_to_peak\"] < 0.10).astype(int)\n",
"\n",
"dfc[\"churn_soft\"] = (\n",
" (dfc[\"aum_final_to_peak\"] < 0.40) &\n",
" (dfc[\"aum_drawdown_last\"] > 0.40)\n",
").astype(int)\n",
"\n",
"dfc[\"churn_warning\"] = (\n",
" (dfc[\"flow_direction_balance\"] < 0) &\n",
" (dfc[\"aum_drawdown_last\"] > 0.20)\n",
").astype(int)\n",
"\n",
"dfc[[\"churn_hard\", \"churn_soft\", \"churn_warning\"]].mean()"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "e990eea6-b569-4fe9-9196-42ec4ccc0e17",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== CHURN PAR CLUSTER K=2 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>churn_hard_rate</th>\n",
" <th>churn_soft_rate</th>\n",
" <th>churn_warning_rate</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k2</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>9651</td>\n",
" <td>0.801575</td>\n",
" <td>0.840949</td>\n",
" <td>0.403171</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5</td>\n",
" <td>0.600000</td>\n",
" <td>0.800000</td>\n",
" <td>0.200000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients churn_hard_rate churn_soft_rate churn_warning_rate\n",
"cluster_k2 \n",
"0 9651 0.801575 0.840949 0.403171\n",
"1 5 0.600000 0.800000 0.200000"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== CHURN PAR CLUSTER K=5 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>churn_hard_rate</th>\n",
" <th>churn_soft_rate</th>\n",
" <th>churn_warning_rate</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k5</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>9649</td>\n",
" <td>0.801637</td>\n",
" <td>0.840916</td>\n",
" <td>0.403151</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>0.500000</td>\n",
" <td>0.500000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>0.500000</td>\n",
" <td>1.000000</td>\n",
" <td>0.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients churn_hard_rate churn_soft_rate churn_warning_rate\n",
"cluster_k5 \n",
"0 9649 0.801637 0.840916 0.403151\n",
"1 2 1.000000 1.000000 0.500000\n",
"2 2 0.500000 0.500000 0.000000\n",
"4 2 0.500000 1.000000 0.500000\n",
"3 1 0.000000 1.000000 0.000000"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"for k in [2, 5]:\n",
" out = (\n",
" dfc.groupby(f\"cluster_k{k}\")\n",
" .agg(\n",
" n_clients=(ID_COL, \"count\"),\n",
" churn_hard_rate=(\"churn_hard\", \"mean\"),\n",
" churn_soft_rate=(\"churn_soft\", \"mean\"),\n",
" churn_warning_rate=(\"churn_warning\", \"mean\")\n",
" )\n",
" .sort_values(\"n_clients\", ascending=False)\n",
" )\n",
" print(f\"\\n===== CHURN PAR CLUSTER K={k} =====\")\n",
" display(out)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "4ad61286-89b0-473f-811d-f3affee994f0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== NIVEAU 2 / EXPLICATION — K=2 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>beta_rate_med</th>\n",
" <th>delta_rate_mean_med</th>\n",
" <th>aum_trend_12m_med</th>\n",
" <th>flow_trend_12m_med</th>\n",
" <th>drawdown_trend_12m_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k2</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>9651</td>\n",
" <td>0.000000</td>\n",
" <td>-0.004333</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5</td>\n",
" <td>0.355197</td>\n",
" <td>0.022432</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients beta_rate_med delta_rate_mean_med aum_trend_12m_med \\\n",
"cluster_k2 \n",
"0 9651 0.000000 -0.004333 0.0 \n",
"1 5 0.355197 0.022432 0.0 \n",
"\n",
" flow_trend_12m_med drawdown_trend_12m_med \n",
"cluster_k2 \n",
"0 0.0 0.0 \n",
"1 0.0 0.0 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===== NIVEAU 2 / EXPLICATION — K=5 =====\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_clients</th>\n",
" <th>beta_rate_med</th>\n",
" <th>delta_rate_mean_med</th>\n",
" <th>aum_trend_12m_med</th>\n",
" <th>flow_trend_12m_med</th>\n",
" <th>drawdown_trend_12m_med</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster_k5</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>9649</td>\n",
" <td>0.000000e+00</td>\n",
" <td>-0.004333</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>-5.372138e+11</td>\n",
" <td>0.018078</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>4.439995e-01</td>\n",
" <td>0.013204</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-1.546122e-33</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>1.972259e-02</td>\n",
" <td>0.024369</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.546122e-33</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>-7.467253e-02</td>\n",
" <td>0.051667</td>\n",
" <td>-769.230769</td>\n",
" <td>-0.017094</td>\n",
" <td>5.379236e-03</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_clients beta_rate_med delta_rate_mean_med aum_trend_12m_med \\\n",
"cluster_k5 \n",
"0 9649 0.000000e+00 -0.004333 0.000000 \n",
"1 2 -5.372138e+11 0.018078 0.000000 \n",
"2 2 4.439995e-01 0.013204 0.000000 \n",
"4 2 1.972259e-02 0.024369 0.000000 \n",
"3 1 -7.467253e-02 0.051667 -769.230769 \n",
"\n",
" flow_trend_12m_med drawdown_trend_12m_med \n",
"cluster_k5 \n",
"0 0.000000 0.000000e+00 \n",
"1 0.000000 0.000000e+00 \n",
"2 0.000000 -1.546122e-33 \n",
"4 0.000000 1.546122e-33 \n",
"3 -0.017094 5.379236e-03 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#niv 2 explciation eocno \n",
"explain_vars = [\n",
" \"beta_rate\",\n",
" \"delta_rate_mean\",\n",
" \"aum_trend_12m\",\n",
" \"flow_trend_12m\",\n",
" \"drawdown_trend_12m\",\n",
"]\n",
"\n",
"explain_vars = [c for c in explain_vars if c in dfc.columns]\n",
"\n",
"for k in [2, 5]:\n",
" print(f\"\\n===== NIVEAU 2 / EXPLICATION — K={k} =====\")\n",
" out = (\n",
" dfc.groupby(f\"cluster_k{k}\")\n",
" .agg(\n",
" n_clients=(ID_COL, \"count\"),\n",
" **{f\"{c}_med\": (c, \"median\") for c in explain_vars}\n",
" )\n",
" .sort_values(\"n_clients\", ascending=False)\n",
" )\n",
" display(out)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "0ae8a553-1ec2-4789-8b6c-c70909495b18",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAGGCAYAAADmRxfNAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAASQxJREFUeJzt3X1cFWX+//E35wiC9whqlveaRxRQ1LxhNc3MDDUT0yypvEvzrjLLm9QUy7DMTGzzDkUkt7JQy9W10rLcDa11cSVTM3G9LxEsU1DwwO8Pf55vR0A5DDAgr+fjwSPPzDVzfYZgOO8z1zXjlp2dnS0AAAAAMMBidgEAAAAASj+CBQAAAADDCBYAAAAADCNYAAAAADCMYAEAAADAMIIFAAAAAMMIFgAAAAAMI1gAAAAAMIxgAQAAAMAwggUA3ITNZtPs2bPNLuOWMmXKFHXr1s3sMgAAhYhgAaDMOnbsmF5++WXde++9CggIUOvWrTVo0CDFxMTo0qVLZpcHA9asWaN169aZXUahyCvYLlmyRDabTVOnTlVWVlaB93/48GG98cYb6tu3r4KCgtSpUyeNHDlSiYmJRsoGUAaVM7sAADDD9u3b9eyzz8rDw0N9+/ZV06ZNlZmZqd27d2vevHn6+eef9corr5hdJgro/fffl7e3t0JDQ80upUgsW7ZMCxYsUL9+/TRnzhxZLAX/nPDjjz/Wxx9/rB49euixxx7TH3/8oQ8//FCPPPKIoqKiFBwcXIiVA7iVESwAlDnHjx/XhAkTdPvttysmJkY1a9Z0rBs8eLCOHj2q7du3F2tNWVlZyszMVPny5Yu134IoTbUWpitXrigrK0seHh6m1hEVFaX58+froYce0muvvWYoVEhSr169NG7cOFWsWNGxrH///goJCdGiRYsIFgDyjaFQAMqcqKgopaWlac6cOU6h4pr69evrySefzLF869at6t27t/z9/dWrVy998803TuvzmjewaNEi2Ww2p2XXhrd8+umn6tWrlwICArRjxw6tW7dONptNu3fvVkREhDp06KBWrVpp7NixSk1NvemxTZkyRUFBQTp+/LiGDx+uVq1aqVOnTnrnnXeUnZ3t1HbFihUaNGiQ2rdvr8DAQIWGhmrLli059plXrTfy9ddfKywsTEFBQWrdurX69++vjRs35tl+165dstls2rVrl9PyEydOyGazOQ1rSk5O1tSpU3X33XfL399fnTp10ujRo3XixAlJUrdu3XTo0CF99913stlsstlsevzxxx3bnz9/XnPmzFGXLl3k7++v++67T8uWLXMaTnSt3xUrVmjVqlXq3r27AgICdPjw4Rsed1GLjo7WvHnz9OCDDyoiIsJwqJAkf39/p1AhSd7e3mrbtq2SkpIM7x9A2cEVCwBlzldffaW6deuqdevW+d5m9+7d+vzzz/XYY4+pYsWKio2N1TPPPKOvvvpK3t7eBapj586d+sc//qHBgwfL29tbd9xxh86fPy9JevXVV1WlShWNGzdOJ0+eVExMjGbPnq233377pvu12+0aMWKEWrZsqRdffFE7duzQokWLZLfb9eyzzzrarV69Wt26dVOfPn2UmZmpTZs26dlnn9XSpUvVtWvXm9aal3Xr1umll17SnXfeqVGjRqly5crav3+/duzYoT59+hToe/Vn48eP188//6ywsDDdcccdSk1N1b/+9S+dPn1aderU0UsvvaRXXnlFFSpU0NNPPy1J8vX1lSSlp6crLCxMv/76qwYNGqTatWsrISFBb731lpKTkzVt2rQcx3L58mUNHDhQHh4eqlq1quH6CyomJkZz585V7969NXfu3FxDRX7CpyRVqlTppldekpOTVa1atYKUCqCMIlgAKFMuXLigX3/9Vffee69L2x0+fFibN29WvXr1JEnt27dX3759tWnTJoWFhRWoliNHjmjjxo1q0qSJY9n+/fslSdWqVdPKlSvl5uYm6erwo9jYWP3xxx+qXLnyDfd7+fJlde7cWdOnT5ckPfbYY3r66ae1fPlyPf7446pevbok6bPPPpOnp6dju8GDBys0NFTR0dE5gkVutebmjz/+0KuvvqrAwEDFxsY6DZe6/opJQZw/f14JCQmaNGmShg8f7lg+atQox7+7d++ut99+W97e3urbt6/T9tHR0Tp+/LjWr1+vBg0aSJIGDRqkmjVrasWKFRo2bJhq167taP/LL7/oiy++cHzPzLJ9+3adPHlSvXv31htvvCGr1Zpru44dO+ZrfxERETecf/Lvf/9be/bs0ejRowtUL4CyiWABoEy5cOGCJOUY+nEzwcHBjlAhSc2aNVOlSpV0/PjxAtdy11135flGfeDAgY5QIUlt27bVqlWrdPLkSTVr1uym+x48eLDj325ubho8eLC2b9+u+Ph49erVS5KcQsXvv/8uu92uNm3aaNOmTS7V+mf/+te/dPHiRY0cOTLHHIw/H09BeXp6yt3dXd99950efvhhl68gbNmyRW3atFGVKlWcPt0PDg7WsmXL9P333+vBBx90LO/Ro4fpoUKSzp49K0mqU6dOnqFCuhqc8uNG/y9TUlI0ceJE1alTRyNGjHCtUABlGsECQJlSqVIlSdLFixdd2u7Pn2JfU7VqVcfQpYKoU6dOnutuv/12p9dVqlSRpHz1Z7FYVLduXadlDRs2lCSdPHnSseyrr77S4sWLtX//fmVkZDiW5xYAblTrnx07dkySdOedd+arvas8PDz0wgsv6PXXX9df/vIXtWzZUl27dtVDDz2kGjVq3HT7o0eP6uDBg3l+sn/9UKL8Hvdvv/2mzMzMfLW9XtWqVW86LOmhhx7SmTNntGTJEnl7e2vIkCG5tjM60TotLU2jRo3SxYsX9be//c3lAA6gbCNYAChTKlWqpJo1a+rQoUMubZfXp8R/Ht6T1yfydrs91+V/vmJwvbwm5RbGcCLp6lCX0aNH66677tLMmTNVo0YNubu7Ky4uTn//+99dqrUw5PW9y+35DEOGDFG3bt20detW/fOf/9TChQu1bNkyxcTEqHnz5jfsJysrS3/5y1/y/CT+2vCoa/J73OPHj9d3332Xr7bXW716tdq3b3/DNuXKldPChQs1YsQIzZ07V5UrV1b//v1ztEtOTs5Xn5UrV85xbBkZGRo/frwOHjyoFStWqGnTpvk/CAAQwQJAGXTPPffoww8/VEJCgoKCggptv1WqVMn1isKpU6cKrY/8yMrK0vHjxx1XKaSrcyQkOSZdf/bZZypfvrxWrFjh9Gl5XFycob6vDRc7dOiQ6tevn+/trl2R+eOPP5yW//kKy/X9DBs2TMOGDdP//vc/PfTQQ1q5cqXefPNNSXkHlXr16iktLa3Qb6E6efLkAl+9ys/QNkkqX768Fi9erCeeeEIzZsxQlSpVdN999zm16dSpU772df0ci6ysLE2ePFnx8fF6++231a5du/wfAAD8fwQLAGXOiBEjtHHjRk2fPl0xMTGOOwZdc+zYMX311Ve53nL2RurVq6c//vhDBw4ccLxZPHPmjL744otCqz2/1qxZ45i8nZ2drTVr1sjd3d0xBMhqtcrNzc3pasqJEye0bds2Q/126tRJFStW1NKlS9W5c+cck7fzesN/xx13yGq16vvvv1f37t0dy99//32ndunp6bJYLE77rVevnipWrOg0nMvLyyvXN/oPPPCAFi1apB07dqhz585O686fP68KFSqoXDnX/zT6+/u7vE1BVKpUSVFRUXrsscf0/PPPa9myZU7Dugo6x+KVV17R5s2bNXv2bPXo0aNQawZQdhAsAJQ59erV05tvvqkJEyYoJCTE8eTtjIwMJSQkaMuWLQV6YnNISIjefPNNjRs3To8//rguXbqk999/Xw0bNtS+ffuK4EhyV758ee3YsUOTJ09WYGCgduzYoe3bt+vpp592TETu0qWLoqOjNWLECPXu3VspKSn629/+pnr16ungwYMF7rtSpUqaOnWqpk+frocffli9e/dWlSpVdODAAV26dEmvv/56rttVrlxZPXv21HvvvSc3NzfVrVtX27dvV0pKilO7//3vfxoyZIh69uypJk2ayGq1auv
"text/plain": [
"<Figure size 800x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAGGCAYAAADmRxfNAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAARmJJREFUeJzt3XlclOX+//H3MLK5I6CZ+5KggoqaC0fTTMvcckmPppVbmVtlmmKaWxquqWi5h2h8NT2Y1dGsrExOoXkMj+RxS01xSUUsU0BgmN8f/pgjAgrcwLC8no9Hj+Pcy3V/ZuY6w7znvq77NlmtVqsAAAAAwAAHexcAAAAAoPAjWAAAAAAwjGABAAAAwDCCBQAAAADDCBYAAAAADCNYAAAAADCMYAEAAADAMIIFAAAAAMMIFgAAAAAMI1gAwAN4eXlp1qxZ9i6jSAkICFCHDh3sXQYAIBcRLAAUW+fOndO0adP0xBNPyNfXV02bNlX//v0VEhKihIQEe5cHA0JDQ7Vt2zZ7l5ErMgu2K1eulJeXlyZPnqyUlJQct3/+/Hl5eXll+N+OHTuMlA6gmClh7wIAwB727Nmj1157TU5OTnrmmWdUr149JSUl6eDBg1qwYIF+/fVXvfPOO/YuEzm0adMmubm5qXfv3vYuJU+sXr1aixcvVq9evTRnzhw5OBj/nbBbt2567LHH0ixr0qSJ4XYBFB8ECwDFTnR0tMaNG6eHH35YISEhqlixom3dwIEDdfbsWe3Zsydfa0pJSVFSUpKcnZ3z9bg5UZhqzU3JyclKSUmRk5OTXetYu3atFi1apJ49e+rdd9/NlVAhSQ0aNNAzzzyTK20BKJ4YCgWg2Fm7dq3i4uI0Z86cNKEiVY0aNfTiiy+mW757925169ZNPj4+6tq1q/bu3ZtmfWbzBpYtWyYvL680y1KHt3z22Wfq2rWrfH19FR4erm3btsnLy0sHDx5UYGCgWrVqpSZNmmj06NGKjY194HMLCAiQn5+foqOjNWzYMDVp0kRt2rTR8uXLZbVa02y7bt069e/fXy1btlSjRo3Uu3dv7dq1K12bmdV6P99//70GDRokPz8/NW3aVH369NHnn3+e6fb79++Xl5eX9u/fn2Z56jCdu4c1Xb16VZMnT9Zjjz0mHx8ftWnTRiNHjtT58+clSR06dNDJkyf1008/2Yb0PP/887b9b9y4oTlz5qhdu3by8fFRp06dtHr16jTDiVKPu27dOq1fv14dO3aUr6+vTp06dd/nndeCg4O1YMEC9ejRQ4GBgbkWKlLFxcUpMTExV9sEUHxwxgJAsfPdd9+pWrVqatq0aZb3OXjwoL766is999xzKlWqlDZu3KhXX31V3333ndzc3HJUx759+/TFF19o4MCBcnNzU5UqVXTjxg1J0uzZs1W2bFmNGTNGFy5cUEhIiGbNmqUlS5Y8sF2LxaLhw4ercePGevPNNxUeHq5ly5bJYrHotddes223YcMGdejQQd27d1dSUpJ27Nih1157TatWrVL79u0fWGtmtm3bprfeekuPPPKIRowYoTJlyujo0aMKDw9X9+7dc/Ra3W3s2LH69ddfNWjQIFWpUkWxsbH64YcfdOnSJVWtWlVvvfWW3nnnHZUsWVKvvPKKJMnDw0OSFB8fr0GDBuny5cvq37+/KleurMjISL333nu6evWqpkyZku653L59W/369ZOTk5PKlStnuP6cCgkJ0dy5c9WtWzfNnTs3w1CRlfApSaVLl0535mX58uWaP3++TCaTGjZsqHHjxqlNmza5UjuA4oFgAaBYuXnzpi5fvqwnnngiW/udOnVKO3fuVPXq1SVJLVu21DPPPKMdO3Zo0KBBOarlzJkz+vzzz1W3bl3bsqNHj0qSypcvrw8//FAmk0nSneFHGzdu1F9//aUyZcrct93bt2+rbdu2mjp1qiTpueee0yuvvKI1a9bo+eefV4UKFSRJX375pVxcXGz7DRw4UL1791ZwcHC6YJFRrRn566+/NHv2bDVq1EgbN25MM1zq3jMmOXHjxg1FRkZq4sSJGjZsmG35iBEjbP/u2LGjlixZIjc3t3RDe4KDgxUdHa1PPvlENWvWlCT1799fFStW1Lp16zR06FBVrlzZtv3vv/+ur7/+2vaa2cuePXt04cIFdevWTfPnz5fZbM5wu9atW2epvcDAQNv8EwcHB7Vp00YdO3ZUpUqVFB0drfXr1+ull17SihUr0vUFAMgMwQJAsXLz5k1JUqlSpbK1n7+/vy1USJK3t7dKly6t6OjoHNfy6KOPZvpFvV+/frZQIUnNmzfX+vXrdeHCBXl7ez+w7YEDB9r+bTKZNHDgQO3Zs0cRERHq2rWrJKUJFX/++acsFouaNWuW4ZWA7lfr3X744QfdunVLL7/8cro5GHc/n5xycXGRo6OjfvrpJz377LPZPoOwa9cuNWvWTGXLlk3z676/v79Wr16tAwcOqEePHrblTz75pN1DhSTFxMRIkqpWrZppqJDuBKesuPu9fPjhh7Vu3bo065955hl17dpVc+fOJVgAyDKCBYBipXTp0pKkW7duZWu/u3/FTlWuXDnb0KWcqFq1aqbrHn744TSPy5YtK0lZOp6Dg4OqVauWZlmtWrUkSRcuXLAt++6777RixQodPXo0zbj6jALA/Wq927lz5yRJjzzySJa2zy4nJydNmDBB8+bN09/+9jc1btxY7du3V8+ePeXp6fnA/c+ePavjx49n+sv+vUOJsvq8//jjDyUlJWVp23uVK1fugRPCe/bsqStXrmjlypVyc3PT4MGDM9zO398/RzXcq3z58urdu7dWr16t33//XQ899FCutAugaCNYAChWSpcurYoVK+rkyZPZ2i+zX4nvHt6T2S/yFoslw+V3nzG4V2aTcnNjOJEk/fvf/9bIkSP16KOPavr06fL09JSjo6PCwsL0z3/+M1u15obMXruM7s8wePBgdejQQbt379a//vUvLV26VKtXr1ZISIgaNGhw3+OkpKTob3/7m4YPH57h+tThUamy+rzHjh2rn376KUvb3mvDhg1q2bLlfbcpUaKEli5dquHDh2vu3LkqU6aM+vTpk267q1evZumYZcqUeeBzSw0Tf/zxB8ECQJYQLAAUO48//rg+/vhjRUZGys/PL9faLVu2bIZnFC5evJhrx8iKlJQURUdH285SSHfmSEiyTbr+8ssv5ezsrHXr1qX5tTwsLMzQsVOHi508eVI1atTI8n6pZ2T++uuvNMvvPsNy73GGDh2qoUOH6rffflPPnj314YcfauHChZIyDyrVq1dXXFxcrv2yn2rSpEk5PnuVlaFtkuTs7KwVK1bohRde0Ntvv62yZcuqU6dOabbJ6mTru+dYZCb1KlsFYSgYgMKBYAGg2Bk+fLg+//xzTZ06VSEhIbYrBqU6d+6cvvvuuwwvOXs/1atX119//aVjx47ZvixeuXJFX3/9da7VnlWhoaG2ydtWq1WhoaFydHS0DQEym80ymUxpzqacP39e33zzjaHjtmnTRqVKldKqVavUtm3bdJO3M/vCX6VKFZnNZh04cEAdO3a0Ld+0aVOa7eLj4+Xg4JCm3erVq6tUqVJphnO5urpm+EX/6aef1rJlyxQeHq62bdumWXfjxg2VLFlSJUpk/0+jj49PtvfJidKlS2vt2rV67rnn9MYbb2j16tVphnXlZI5FbGxsuvBw+fJlhYWFycvLK8NLMgNARggWAIqd6tWra+HChRo3bpy6dOliu/N2YmKiIiMjtWvXrhzdsblLly5auHChxowZo+eff14JCQnatGmTatWqpSNHjuTBM8mYs7OzwsPDNWnSJDVq1Ejh4eHas2ePXnnlFdsXyHbt2ik4OFjDhw9Xt27ddO3aNf3f//2fqlevruPHj+f42KVLl9bkyZM1depUPfvss+rWrZvKli2rY8eOKSEhQfPmzctwvzJlyqhz58766KOPZDKZVK1aNe3Zs0fXrl1Ls91vv/2mwYMHq3Pnzqpbt67MZrN2796tmJgY26R0SWrYsKE2bdqkDz74QDVq1FCFChXUunVrDRs2TN9++61eeeUV9erVSw0bNlR
"text/plain": [
"<Figure size 800x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"for k in [2, 5]:\n",
" tmp = (\n",
" dfc.groupby(f\"cluster_k{k}\")\n",
" .agg(\n",
" churn_hard=(\"churn_hard\", \"mean\"),\n",
" churn_soft=(\"churn_soft\", \"mean\"),\n",
" churn_warning=(\"churn_warning\", \"mean\")\n",
" )\n",
" )\n",
"\n",
" tmp.plot(kind=\"bar\", figsize=(8, 4))\n",
" plt.title(f\"Churn par cluster — K={k}\")\n",
" plt.ylabel(\"Rate\")\n",
" plt.tight_layout()\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5fc62ca1-a145-4669-8798-33d72352d4d5",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}