diff --git a/ClusteringV4_400plus_Ancien_finMars.ipynb b/ClusteringV4_400plus_Ancien_finMars.ipynb
new file mode 100644
index 0000000..22a824e
--- /dev/null
+++ b/ClusteringV4_400plus_Ancien_finMars.ipynb
@@ -0,0 +1,6631 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "d9272d36",
+ "metadata": {},
+ "source": [
+ "# Clustering des 400+ gros comptes Carmignac — version enrichie\n",
+ "\n",
+ "Ce notebook repart du notebook initial et ajoute :\n",
+ "\n",
+ "- une base plus propre et plus lisible ;\n",
+ "- des **features comportementales** supplémentaires ;\n",
+ "- un **clustering global** sur les 400+ gros comptes ;\n",
+ "- un **clustering par fonds** (`Product - Isin`) sur les plus gros fonds ;\n",
+ "- un cadre prêt à intégrer des **données de performance Carmignac** si elles sont disponibles.\n",
+ "\n",
+ "## Principes retenus\n",
+ "\n",
+ "- univers = comptes avec **AUM total ≥ 5 M€** au **31/10/2025**\n",
+ "- exclusion de **Off Distribution** et **Private Clients**\n",
+ "- **pas de clipping d'outliers** pour l'instant\n",
+ "- un fonds est identifié par **`Product - Isin`**\n",
+ "- les résultats doivent être **interprétables métier**, pas seulement \"bons\" mathématiquement"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c6b14319",
+ "metadata": {},
+ "source": [
+ "## 1. Imports"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "1aa70c18",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import warnings\n",
+ "warnings.filterwarnings(\"ignore\")\n",
+ "\n",
+ "import re\n",
+ "import math\n",
+ "import json\n",
+ "from pathlib import Path\n",
+ "\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns\n",
+ "\n",
+ "from sklearn.preprocessing import RobustScaler\n",
+ "from sklearn.impute import SimpleImputer\n",
+ "from sklearn.pipeline import Pipeline\n",
+ "from sklearn.cluster import KMeans, AgglomerativeClustering\n",
+ "from sklearn.mixture import GaussianMixture\n",
+ "from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score\n",
+ "from sklearn.decomposition import PCA\n",
+ "from sklearn.linear_model import LinearRegression\n",
+ "\n",
+ "pd.set_option(\"display.max_columns\", 200)\n",
+ "pd.set_option(\"display.max_rows\", 200)\n",
+ "\n",
+ "RANDOM_STATE = 42\n",
+ "EPS = 1e-9"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "98deb42c",
+ "metadata": {},
+ "source": [
+ "## 2. Paths"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "a462ff7a-1e4d-44f7-af6c-031b47779ce1",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " reg_orig | \n",
+ " reg_used | \n",
+ " Agreement - Code | \n",
+ " Company - Id | \n",
+ " Company - Ultimate Parent Id | \n",
+ " Registrar Account - Region | \n",
+ " RegistrarAccount - Country | \n",
+ " Product - Asset Type | \n",
+ " Product - Strategy | \n",
+ " Product - Legal Status | \n",
+ " Product - Is Dedie ? | \n",
+ " Product - Fund | \n",
+ " Product - Shareclass Type | \n",
+ " Product - Shareclass Currency | \n",
+ " Product - Isin | \n",
+ " Centralisation Date | \n",
+ " Quantity - AUM | \n",
+ " Value - AUM CCY | \n",
+ " Value - AUM € | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 18872 | \n",
+ " 18872 | \n",
+ " L104 | \n",
+ " 2257.0 | \n",
+ " 33675.0 | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " FR0010135103 | \n",
+ " 2015-01-31 | \n",
+ " 49094.915 | \n",
+ " 3.242523e+07 | \n",
+ " 3.242523e+07 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 18872 | \n",
+ " 18872 | \n",
+ " L104 | \n",
+ " 2257.0 | \n",
+ " 33675.0 | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ " Equity | \n",
+ " Investissement Latitude | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Investissement Latitude | \n",
+ " A | \n",
+ " EUR | \n",
+ " FR0010147603 | \n",
+ " 2015-01-31 | \n",
+ " 1717.000 | \n",
+ " 4.767422e+05 | \n",
+ " 4.767422e+05 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 18872 | \n",
+ " 18872 | \n",
+ " L104 | \n",
+ " 2257.0 | \n",
+ " 33675.0 | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ " Equity | \n",
+ " Investissement | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Investissement | \n",
+ " A | \n",
+ " EUR | \n",
+ " FR0010148981 | \n",
+ " 2015-01-31 | \n",
+ " 8254.870 | \n",
+ " 9.862671e+06 | \n",
+ " 9.862671e+06 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 18872 | \n",
+ " 18872 | \n",
+ " L104 | \n",
+ " 2257.0 | \n",
+ " 33675.0 | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ " Equity | \n",
+ " Euro-Entrepreneurs | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Euro-Entrepreneurs | \n",
+ " A | \n",
+ " EUR | \n",
+ " FR0010149112 | \n",
+ " 2015-01-31 | \n",
+ " 278.923 | \n",
+ " 7.664525e+04 | \n",
+ " 7.664525e+04 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 18872 | \n",
+ " 18872 | \n",
+ " L104 | \n",
+ " 2257.0 | \n",
+ " 33675.0 | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ " Fixed Income | \n",
+ " Sécurité | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Sécurité | \n",
+ " AW & AW-R | \n",
+ " EUR | \n",
+ " FR0010149120 | \n",
+ " 2015-01-31 | \n",
+ " 1807.267 | \n",
+ " 3.078318e+06 | \n",
+ " 3.078318e+06 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 1088393 | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " France | \n",
+ " France | \n",
+ " Diversified | \n",
+ " Inflation Solution | \n",
+ " SICAV | \n",
+ " NO | \n",
+ " Carmignac Portfolio Inflation Solution | \n",
+ " F | \n",
+ " EUR | \n",
+ " LU2715954330 | \n",
+ " 2025-10-31 | \n",
+ " 81065.419 | \n",
+ " 9.533293e+06 | \n",
+ " 9.533293e+06 | \n",
+ "
\n",
+ " \n",
+ " | 1088394 | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " France | \n",
+ " France | \n",
+ " Diversified | \n",
+ " Inflation Solution | \n",
+ " SICAV | \n",
+ " NO | \n",
+ " Carmignac Portfolio Inflation Solution | \n",
+ " A | \n",
+ " EUR | \n",
+ " LU2715954504 | \n",
+ " 2025-10-31 | \n",
+ " 6853.363 | \n",
+ " 7.978685e+05 | \n",
+ " 7.978685e+05 | \n",
+ "
\n",
+ " \n",
+ " | 1088395 | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " France | \n",
+ " France | \n",
+ " Private Assets | \n",
+ " Evergreen | \n",
+ " SICAV | \n",
+ " NO | \n",
+ " Carmignac S.A. SICAV - PART II UCI Private Eve... | \n",
+ " A | \n",
+ " EUR | \n",
+ " LU2799473124 | \n",
+ " 2025-10-31 | \n",
+ " 4212.234 | \n",
+ " 5.263608e+05 | \n",
+ " 5.263608e+05 | \n",
+ "
\n",
+ " \n",
+ " | 1088396 | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " France | \n",
+ " France | \n",
+ " Equity | \n",
+ " Tech Solutions | \n",
+ " SICAV | \n",
+ " NO | \n",
+ " Carmignac Portfolio Tech Solutions | \n",
+ " A | \n",
+ " EUR | \n",
+ " LU2809794220 | \n",
+ " 2025-10-31 | \n",
+ " 31469.523 | \n",
+ " 4.438147e+06 | \n",
+ " 4.438147e+06 | \n",
+ "
\n",
+ " \n",
+ " | 1088397 | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " France | \n",
+ " France | \n",
+ " Equity | \n",
+ " Tech Solutions | \n",
+ " SICAV | \n",
+ " NO | \n",
+ " Carmignac Portfolio Tech Solutions | \n",
+ " F | \n",
+ " EUR | \n",
+ " LU2809794576 | \n",
+ " 2025-10-31 | \n",
+ " 554.301 | \n",
+ " 7.871629e+04 | \n",
+ " 7.871629e+04 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1088398 rows × 19 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " reg_orig reg_used Agreement - Code Company - Id \\\n",
+ "0 18872 18872 L104 2257.0 \n",
+ "1 18872 18872 L104 2257.0 \n",
+ "2 18872 18872 L104 2257.0 \n",
+ "3 18872 18872 L104 2257.0 \n",
+ "4 18872 18872 L104 2257.0 \n",
+ "... ... ... ... ... \n",
+ "1088393 Private Client Private Client Private Client Private Client \n",
+ "1088394 Private Client Private Client Private Client Private Client \n",
+ "1088395 Private Client Private Client Private Client Private Client \n",
+ "1088396 Private Client Private Client Private Client Private Client \n",
+ "1088397 Private Client Private Client Private Client Private Client \n",
+ "\n",
+ " Company - Ultimate Parent Id Registrar Account - Region \\\n",
+ "0 33675.0 Switzerland \n",
+ "1 33675.0 Switzerland \n",
+ "2 33675.0 Switzerland \n",
+ "3 33675.0 Switzerland \n",
+ "4 33675.0 Switzerland \n",
+ "... ... ... \n",
+ "1088393 Private Client France \n",
+ "1088394 Private Client France \n",
+ "1088395 Private Client France \n",
+ "1088396 Private Client France \n",
+ "1088397 Private Client France \n",
+ "\n",
+ " RegistrarAccount - Country Product - Asset Type \\\n",
+ "0 Switzerland Diversified \n",
+ "1 Switzerland Equity \n",
+ "2 Switzerland Equity \n",
+ "3 Switzerland Equity \n",
+ "4 Switzerland Fixed Income \n",
+ "... ... ... \n",
+ "1088393 France Diversified \n",
+ "1088394 France Diversified \n",
+ "1088395 France Private Assets \n",
+ "1088396 France Equity \n",
+ "1088397 France Equity \n",
+ "\n",
+ " Product - Strategy Product - Legal Status Product - Is Dedie ? \\\n",
+ "0 Patrimoine FCP NO \n",
+ "1 Investissement Latitude FCP NO \n",
+ "2 Investissement FCP NO \n",
+ "3 Euro-Entrepreneurs FCP NO \n",
+ "4 Sécurité FCP NO \n",
+ "... ... ... ... \n",
+ "1088393 Inflation Solution SICAV NO \n",
+ "1088394 Inflation Solution SICAV NO \n",
+ "1088395 Evergreen SICAV NO \n",
+ "1088396 Tech Solutions SICAV NO \n",
+ "1088397 Tech Solutions SICAV NO \n",
+ "\n",
+ " Product - Fund \\\n",
+ "0 Carmignac Patrimoine \n",
+ "1 Carmignac Investissement Latitude \n",
+ "2 Carmignac Investissement \n",
+ "3 Carmignac Euro-Entrepreneurs \n",
+ "4 Carmignac Sécurité \n",
+ "... ... \n",
+ "1088393 Carmignac Portfolio Inflation Solution \n",
+ "1088394 Carmignac Portfolio Inflation Solution \n",
+ "1088395 Carmignac S.A. SICAV - PART II UCI Private Eve... \n",
+ "1088396 Carmignac Portfolio Tech Solutions \n",
+ "1088397 Carmignac Portfolio Tech Solutions \n",
+ "\n",
+ " Product - Shareclass Type Product - Shareclass Currency \\\n",
+ "0 A EUR \n",
+ "1 A EUR \n",
+ "2 A EUR \n",
+ "3 A EUR \n",
+ "4 AW & AW-R EUR \n",
+ "... ... ... \n",
+ "1088393 F EUR \n",
+ "1088394 A EUR \n",
+ "1088395 A EUR \n",
+ "1088396 A EUR \n",
+ "1088397 F EUR \n",
+ "\n",
+ " Product - Isin Centralisation Date Quantity - AUM Value - AUM CCY \\\n",
+ "0 FR0010135103 2015-01-31 49094.915 3.242523e+07 \n",
+ "1 FR0010147603 2015-01-31 1717.000 4.767422e+05 \n",
+ "2 FR0010148981 2015-01-31 8254.870 9.862671e+06 \n",
+ "3 FR0010149112 2015-01-31 278.923 7.664525e+04 \n",
+ "4 FR0010149120 2015-01-31 1807.267 3.078318e+06 \n",
+ "... ... ... ... ... \n",
+ "1088393 LU2715954330 2025-10-31 81065.419 9.533293e+06 \n",
+ "1088394 LU2715954504 2025-10-31 6853.363 7.978685e+05 \n",
+ "1088395 LU2799473124 2025-10-31 4212.234 5.263608e+05 \n",
+ "1088396 LU2809794220 2025-10-31 31469.523 4.438147e+06 \n",
+ "1088397 LU2809794576 2025-10-31 554.301 7.871629e+04 \n",
+ "\n",
+ " Value - AUM € \n",
+ "0 3.242523e+07 \n",
+ "1 4.767422e+05 \n",
+ "2 9.862671e+06 \n",
+ "3 7.664525e+04 \n",
+ "4 3.078318e+06 \n",
+ "... ... \n",
+ "1088393 9.533293e+06 \n",
+ "1088394 7.978685e+05 \n",
+ "1088395 5.263608e+05 \n",
+ "1088396 4.438147e+06 \n",
+ "1088397 7.871629e+04 \n",
+ "\n",
+ "[1088398 rows x 19 columns]"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "PATH_AUM = \"s3://projet-bdc-carmignac-g3/paco/AUM_paths.csv\"\n",
+ "df_aum = pd.read_csv(PATH_AUM, sep=\",\")\n",
+ "df_aum"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "8be2ec30",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "# =========================\n",
+ "# PATHS PRINCIPAUX\n",
+ "# =========================\n",
+ "PATH_FLOWS = \"s3://projet-bdc-data/carmignac/Flows ENSAE V2 -20251105.csv\"\n",
+ "PATH_NAV = \"s3://projet-bdc-data/carmignac/Data Modélisation/Nav/NAV_Bench_data.csv\"\n",
+ "PATH_RATES = \"s3://projet-bdc-data/carmignac/Data Modélisation/market data/esterRates.csv\"\n",
+ "\n",
+ "# Optionnels\n",
+ "PATH_COMP_FLOWS = \"s3://projet-bdc-data/carmignac/Data Modélisation/competitors/daily_estimated_flows.csv\"\n",
+ "PATH_COMP_PERF = \"s3://projet-bdc-data/carmignac/Data Modélisation/competitors/weekly_perf_full.csv\"\n",
+ "PATH_PEERS = \"s3://projet-bdc-carmignac-g3/peers/CAD_peers.csv\"\n",
+ "\n",
+ "\n",
+ "PATH_CARMIGNAC_PERF = None\n",
+ "ADDITIONAL_PERF_PATHS = []"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "85f76368-fd8f-42fb-bb76-76e957bc221b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Registrar Account - ID | \n",
+ " reg_used | \n",
+ " Agreement - Code | \n",
+ " Company - Id | \n",
+ " Company - Ultimate Parent Id | \n",
+ " Registrar Account - Region | \n",
+ " RegistrarAccount - Country | \n",
+ " Product - Asset Type | \n",
+ " Product - Strategy | \n",
+ " Product - Legal Status | \n",
+ " Product - Is Dedie ? | \n",
+ " Product - Fund | \n",
+ " Product - Shareclass Type | \n",
+ " Product - Shareclass Currency | \n",
+ " Product - Isin | \n",
+ " Centralisation Date | \n",
+ " Quantity - AUM | \n",
+ " Value - AUM CCY | \n",
+ " Value - AUM € | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1088396 | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " France | \n",
+ " France | \n",
+ " Equity | \n",
+ " Tech Solutions | \n",
+ " SICAV | \n",
+ " NO | \n",
+ " Carmignac Portfolio Tech Solutions | \n",
+ " A | \n",
+ " EUR | \n",
+ " LU2809794220 | \n",
+ " 2025-10-31 | \n",
+ " 31469.523 | \n",
+ " 4.438147e+06 | \n",
+ " 4.438147e+06 | \n",
+ "
\n",
+ " \n",
+ " | 1088397 | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " Private Client | \n",
+ " France | \n",
+ " France | \n",
+ " Equity | \n",
+ " Tech Solutions | \n",
+ " SICAV | \n",
+ " NO | \n",
+ " Carmignac Portfolio Tech Solutions | \n",
+ " F | \n",
+ " EUR | \n",
+ " LU2809794576 | \n",
+ " 2025-10-31 | \n",
+ " 554.301 | \n",
+ " 7.871629e+04 | \n",
+ " 7.871629e+04 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Registrar Account - ID reg_used Agreement - Code \\\n",
+ "1088396 Private Client Private Client Private Client \n",
+ "1088397 Private Client Private Client Private Client \n",
+ "\n",
+ " Company - Id Company - Ultimate Parent Id \\\n",
+ "1088396 Private Client Private Client \n",
+ "1088397 Private Client Private Client \n",
+ "\n",
+ " Registrar Account - Region RegistrarAccount - Country \\\n",
+ "1088396 France France \n",
+ "1088397 France France \n",
+ "\n",
+ " Product - Asset Type Product - Strategy Product - Legal Status \\\n",
+ "1088396 Equity Tech Solutions SICAV \n",
+ "1088397 Equity Tech Solutions SICAV \n",
+ "\n",
+ " Product - Is Dedie ? Product - Fund \\\n",
+ "1088396 NO Carmignac Portfolio Tech Solutions \n",
+ "1088397 NO Carmignac Portfolio Tech Solutions \n",
+ "\n",
+ " Product - Shareclass Type Product - Shareclass Currency \\\n",
+ "1088396 A EUR \n",
+ "1088397 F EUR \n",
+ "\n",
+ " Product - Isin Centralisation Date Quantity - AUM Value - AUM CCY \\\n",
+ "1088396 LU2809794220 2025-10-31 31469.523 4.438147e+06 \n",
+ "1088397 LU2809794576 2025-10-31 554.301 7.871629e+04 \n",
+ "\n",
+ " Value - AUM € \n",
+ "1088396 4.438147e+06 \n",
+ "1088397 7.871629e+04 "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_aum.columns #reg_orig comme cle de donnee\n",
+ "df_aum = df_aum.rename(columns={\"reg_orig\": \"Registrar Account - ID\"})\n",
+ "df_aum.tail(2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "da4bd8b3",
+ "metadata": {},
+ "source": [
+ "## 3. Fonctions utilitaires"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "929b77fb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def first_existing_col(df, candidates):\n",
+ " if df is None:\n",
+ " return None\n",
+ " for c in candidates:\n",
+ " if c in df.columns:\n",
+ " return c\n",
+ " return None\n",
+ "\n",
+ "def normalize_month(s):\n",
+ " return pd.to_datetime(s, errors=\"coerce\").dt.to_period(\"M\").dt.to_timestamp()\n",
+ "\n",
+ "def robust_entropy(weights):\n",
+ " arr = np.asarray(weights, dtype=float)\n",
+ " arr = arr[np.isfinite(arr)]\n",
+ " arr = arr[arr > 0]\n",
+ " if len(arr) == 0:\n",
+ " return np.nan\n",
+ " p = arr / arr.sum()\n",
+ " return -(p * np.log(p + EPS)).sum()\n",
+ "\n",
+ "def herfindahl(weights):\n",
+ " arr = np.asarray(weights, dtype=float)\n",
+ " arr = arr[np.isfinite(arr)]\n",
+ " arr = arr[arr > 0]\n",
+ " if len(arr) == 0:\n",
+ " return np.nan\n",
+ " p = arr / arr.sum()\n",
+ " return np.sum(p ** 2)\n",
+ "\n",
+ "def compute_trend(y):\n",
+ " y = np.asarray(y, dtype=float)\n",
+ " if len(y) < 4:\n",
+ " return np.nan\n",
+ " x = np.arange(len(y)).reshape(-1, 1)\n",
+ " mask = np.isfinite(y)\n",
+ " if mask.sum() < 4:\n",
+ " return np.nan\n",
+ " reg = LinearRegression().fit(x[mask], y[mask])\n",
+ " return float(reg.coef_[0])\n",
+ "\n",
+ "def compute_beta(y, x):\n",
+ " y = np.asarray(y, dtype=float)\n",
+ " x = np.asarray(x, dtype=float)\n",
+ " mask = np.isfinite(y) & np.isfinite(x)\n",
+ " if mask.sum() < 4:\n",
+ " return np.nan\n",
+ " xv = x[mask].reshape(-1, 1)\n",
+ " reg = LinearRegression().fit(xv, y[mask])\n",
+ " return float(reg.coef_[0])\n",
+ "\n",
+ "def compute_corr(x, y):\n",
+ " x = pd.Series(x, dtype=float)\n",
+ " y = pd.Series(y, dtype=float)\n",
+ " ok = x.notna() & y.notna()\n",
+ " if ok.sum() < 4:\n",
+ " return np.nan\n",
+ " return x[ok].corr(y[ok])\n",
+ "\n",
+ "def safe_div(a, b):\n",
+ " return a / (b + EPS)\n",
+ "\n",
+ "\n",
+ "def adaptive_floor(values, q=0.10, min_floor=1.0):\n",
+ " s = pd.to_numeric(pd.Series(values), errors=\"coerce\")\n",
+ " s = s[np.isfinite(s) & (s > 0)]\n",
+ " if len(s) == 0:\n",
+ " return float(min_floor)\n",
+ " return float(max(s.quantile(q), min_floor))\n",
+ "\n",
+ "def cluster_balance_summary(labels):\n",
+ " vc = pd.Series(labels).value_counts().sort_index()\n",
+ " n = int(vc.sum())\n",
+ " return {\n",
+ " \"n_clusters\": int(len(vc)),\n",
+ " \"min_cluster_size\": int(vc.min()),\n",
+ " \"max_cluster_size\": int(vc.max()),\n",
+ " \"dominant_cluster_share\": float(vc.max() / n) if n > 0 else np.nan,\n",
+ " \"singleton_clusters\": int((vc == 1).sum()),\n",
+ " }\n",
+ "\n",
+ "def diagnose_small_cluster_drivers(df, feature_cols, label_col=\"cluster_fund\", top_n=15):\n",
+ " if df is None or df.empty or label_col not in df.columns:\n",
+ " return pd.DataFrame()\n",
+ " vc = df[label_col].value_counts()\n",
+ " if vc.empty:\n",
+ " return pd.DataFrame()\n",
+ " small_cluster = vc.idxmin()\n",
+ " mask_small = df[label_col] == small_cluster\n",
+ " rows = []\n",
+ " for col in feature_cols:\n",
+ " if col not in df.columns:\n",
+ " continue\n",
+ " s = pd.to_numeric(df[col], errors=\"coerce\")\n",
+ " if s.notna().sum() < 4:\n",
+ " continue\n",
+ " small_med = s[mask_small].median()\n",
+ " rest_med = s[~mask_small].median()\n",
+ " q25 = s.quantile(0.25)\n",
+ " q75 = s.quantile(0.75)\n",
+ " iqr = q75 - q25\n",
+ " scale = iqr if pd.notna(iqr) and iqr > 0 else s.std()\n",
+ " if pd.isna(scale) or scale <= 0:\n",
+ " scale = 1.0\n",
+ " robust_gap = abs(small_med - rest_med) / (scale + EPS)\n",
+ " rows.append({\n",
+ " \"feature\": col,\n",
+ " \"small_cluster_label\": small_cluster,\n",
+ " \"small_cluster_size\": int(mask_small.sum()),\n",
+ " \"small_cluster_median\": small_med,\n",
+ " \"rest_median\": rest_med,\n",
+ " \"abs_gap\": abs(small_med - rest_med),\n",
+ " \"robust_gap_iqr\": robust_gap,\n",
+ " \"q99\": s.quantile(0.99),\n",
+ " \"max\": s.max(),\n",
+ " })\n",
+ " out = pd.DataFrame(rows)\n",
+ " if out.empty:\n",
+ " return out\n",
+ " return out.sort_values([\"robust_gap_iqr\", \"abs_gap\"], ascending=False).head(top_n).reset_index(drop=True)\n",
+ "\n",
+ "def add_reading_comment(diag_df):\n",
+ " if diag_df is None or diag_df.empty:\n",
+ " print(\"Aucun diagnostic disponible.\")\n",
+ " return\n",
+ " msg = []\n",
+ " if \"min_cluster_size\" in diag_df.columns:\n",
+ " min_min = pd.to_numeric(diag_df[\"min_cluster_size\"], errors=\"coerce\").min()\n",
+ " if pd.notna(min_min) and min_min <= 1:\n",
+ " msg.append(\"- Des singletons apparaissent : le clustering isole au moins un client extrême.\")\n",
+ " if {\"max_cluster_size\", \"n_clusters\"}.issubset(diag_df.columns):\n",
+ " pass\n",
+ " if \"dominant_cluster_share\" in diag_df.columns:\n",
+ " dom = pd.to_numeric(diag_df[\"dominant_cluster_share\"], errors=\"coerce\").max()\n",
+ " if pd.notna(dom) and dom > 0.8:\n",
+ " msg.append(\"- Un cluster domine très largement la population : la séparation est surtout masse principale vs outliers.\")\n",
+ " if \"silhouette\" in diag_df.columns:\n",
+ " sil = pd.to_numeric(diag_df[\"silhouette\"], errors=\"coerce\").max()\n",
+ " if pd.notna(sil) and sil > 0.9:\n",
+ " msg.append(\"- Une silhouette très élevée avec des singletons est souvent artificielle : elle récompense l'isolement d'outliers plus qu'une segmentation métier riche.\")\n",
+ " if not msg:\n",
+ " msg.append(\"- Les métriques ne signalent pas de déséquilibre extrême évident.\")\n",
+ " print(\"\\n\".join(msg))\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5479d218",
+ "metadata": {},
+ "source": [
+ "## 4. Chargement des données"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "26ba7a06",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "flows: (2574461, 24)\n",
+ "aum: (1088398, 19)\n",
+ "nav: (623914, 5)\n",
+ "rates: (2826, 2)\n",
+ "comp_perf: (2370192, 5)\n",
+ "peers: (31, 12)\n",
+ "perf_carm: None\n"
+ ]
+ }
+ ],
+ "source": [
+ "df_flows = pd.read_csv(PATH_FLOWS, sep=\";\")\n",
+ "df_nav = pd.read_csv(PATH_NAV, sep=\";\")\n",
+ "df_rates = pd.read_csv(PATH_RATES, sep=\";\")\n",
+ "\n",
+ "df_comp_flows = pd.read_csv(PATH_COMP_FLOWS, sep=\";\")\n",
+ "df_comp_perf = pd.read_csv(PATH_COMP_PERF, sep=\";\")\n",
+ "\n",
+ "# parfois le fichier peers est séparé par |\n",
+ "df_peers = pd.read_csv(PATH_PEERS, sep=\"|\")\n",
+ "if df_peers is None:\n",
+ " df_peers = pd.read_csv(PATH_PEERS, sep=\";\")\n",
+ "\n",
+ "df_perf_carm = pd.read_csv(PATH_CARMIGNAC_PERF, sep=\";\") if PATH_CARMIGNAC_PERF else None\n",
+ "\n",
+ "extra_perf = []\n",
+ "for p in ADDITIONAL_PERF_PATHS:\n",
+ " df_tmp = pd.read_csv(p, sep=\";\")\n",
+ " if df_tmp is not None:\n",
+ " extra_perf.append(df_tmp)\n",
+ "\n",
+ "print(\"flows:\", None if df_flows is None else df_flows.shape)\n",
+ "print(\"aum:\", None if df_aum is None else df_aum.shape)\n",
+ "print(\"nav:\", None if df_nav is None else df_nav.shape)\n",
+ "print(\"rates:\", None if df_rates is None else df_rates.shape)\n",
+ "print(\"comp_perf:\", None if df_comp_perf is None else df_comp_perf.shape)\n",
+ "print(\"peers:\", None if df_peers is None else df_peers.shape)\n",
+ "print(\"perf_carm:\", None if df_perf_carm is None else df_perf_carm.shape)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c6890a35",
+ "metadata": {},
+ "source": [
+ "## 5. Définition des colonnes-clés"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "b9ad4a32",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'ID_COL': 'Registrar Account - ID',\n",
+ " 'ISIN_COL': 'Product - Isin',\n",
+ " 'FLOW_DATE_COL': 'Centralisation Date',\n",
+ " 'AUM_DATE_COL': 'Centralisation Date',\n",
+ " 'NAV_DATE_COL': 'Dat',\n",
+ " 'NAV_ISIN_COL': 'Isin',\n",
+ " 'NAV_PRICE_COL': 'Price (TF PartPrice)',\n",
+ " 'RATE_DATE_COL': 'Date',\n",
+ " 'RATE_VAL_COL': 'Yld to Maturity',\n",
+ " 'REGION_COL': 'Registrar Account - Region',\n",
+ " 'COUNTRY_COL': 'RegistrarAccount - Country'}"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "ID_COL = \"Registrar Account - ID\"\n",
+ "ISIN_COL = \"Product - Isin\"\n",
+ "\n",
+ "FLOW_DATE_COL = \"Centralisation Date\"\n",
+ "AUM_DATE_COL = \"Centralisation Date\"\n",
+ "\n",
+ "FLOW_QTY_COL = \"Quantity - NetFlows\"\n",
+ "FLOW_SUB_COL = \"Quantity - Subscription\"\n",
+ "FLOW_RED_COL = \"Quantity - Redemption\"\n",
+ "\n",
+ "AUM_QTY_COL = \"Quantity - AUM\"\n",
+ "AUM_VAL_COL = \"Value - AUM €\"\n",
+ "\n",
+ "REGION_COL = \"Registrar Account - Region\"\n",
+ "COUNTRY_COL = \"RegistrarAccount - Country\"\n",
+ "\n",
+ "NAV_DATE_COL = \"Dat\"\n",
+ "NAV_ISIN_COL = \"Isin\"\n",
+ "NAV_PRICE_COL = \"Price (TF PartPrice)\"\n",
+ "NAV_BENCH_COL = \"PriceBench\"\n",
+ "\n",
+ "RATE_DATE_COL = \"Date\"\n",
+ "RATE_VAL_COL = \"Yld to Maturity\"\n",
+ "\n",
+ "display({\n",
+ " \"ID_COL\": ID_COL,\n",
+ " \"ISIN_COL\": ISIN_COL,\n",
+ " \"FLOW_DATE_COL\": FLOW_DATE_COL,\n",
+ " \"AUM_DATE_COL\": AUM_DATE_COL,\n",
+ " \"NAV_DATE_COL\": NAV_DATE_COL,\n",
+ " \"NAV_ISIN_COL\": NAV_ISIN_COL,\n",
+ " \"NAV_PRICE_COL\": NAV_PRICE_COL,\n",
+ " \"RATE_DATE_COL\": RATE_DATE_COL,\n",
+ " \"RATE_VAL_COL\": RATE_VAL_COL,\n",
+ " \"REGION_COL\": REGION_COL,\n",
+ " \"COUNTRY_COL\": COUNTRY_COL,\n",
+ "})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "901bed00",
+ "metadata": {},
+ "source": [
+ "## 6. Préparation des dates et types"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "68a2e8e2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Agreement - Code | \n",
+ " Company - Id | \n",
+ " Company - Ultimate Parent Id | \n",
+ " Registrar Account - ID | \n",
+ " Registrar Account - Region | \n",
+ " RegistrarAccount - Country | \n",
+ " Product - Asset Type | \n",
+ " Product - Strategy | \n",
+ " Product - Legal Status | \n",
+ " Product - Is Dedie ? | \n",
+ " Product - Fund | \n",
+ " Product - Shareclass Type | \n",
+ " Product - Shareclass Currency | \n",
+ " Product - Isin | \n",
+ " Centralisation Date | \n",
+ " Quantity - Subscription | \n",
+ " Quantity - Redemption | \n",
+ " Quantity - NetFlows | \n",
+ " Value Ccy - Subscription | \n",
+ " Value Ccy - Redemption | \n",
+ " Value Ccy - NetFlows | \n",
+ " Value € - Subscription | \n",
+ " Value € - Redemption | \n",
+ " Value € - NetFlows | \n",
+ " month | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 003 | \n",
+ " 166 | \n",
+ " 166 | \n",
+ " 200127202 | \n",
+ " France | \n",
+ " France | \n",
+ " Equity | \n",
+ " Investissement | \n",
+ " SICAV | \n",
+ " NO | \n",
+ " Carmignac Portfolio Investissement | \n",
+ " F | \n",
+ " EUR | \n",
+ " LU0992625839 | \n",
+ " 2020-11-05 | \n",
+ " 1636.00 | \n",
+ " 0.0 | \n",
+ " 1636.00 | \n",
+ " 280983.00 | \n",
+ " 0.0 | \n",
+ " 280983.00 | \n",
+ " 280983.00 | \n",
+ " 0.0 | \n",
+ " 280983.00 | \n",
+ " 2020-11-01 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 003 | \n",
+ " 166 | \n",
+ " 166 | \n",
+ " 406533 | \n",
+ " France | \n",
+ " France | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " FR0010135103 | \n",
+ " 2015-03-09 | \n",
+ " 144.69 | \n",
+ " 0.0 | \n",
+ " 144.69 | \n",
+ " 99985.13 | \n",
+ " 0.0 | \n",
+ " 99985.13 | \n",
+ " 99985.13 | \n",
+ " 0.0 | \n",
+ " 99985.13 | \n",
+ " 2015-03-01 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Agreement - Code Company - Id Company - Ultimate Parent Id \\\n",
+ "0 003 166 166 \n",
+ "1 003 166 166 \n",
+ "\n",
+ " Registrar Account - ID Registrar Account - Region \\\n",
+ "0 200127202 France \n",
+ "1 406533 France \n",
+ "\n",
+ " RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
+ "0 France Equity Investissement \n",
+ "1 France Diversified Patrimoine \n",
+ "\n",
+ " Product - Legal Status Product - Is Dedie ? \\\n",
+ "0 SICAV NO \n",
+ "1 FCP NO \n",
+ "\n",
+ " Product - Fund Product - Shareclass Type \\\n",
+ "0 Carmignac Portfolio Investissement F \n",
+ "1 Carmignac Patrimoine A \n",
+ "\n",
+ " Product - Shareclass Currency Product - Isin Centralisation Date \\\n",
+ "0 EUR LU0992625839 2020-11-05 \n",
+ "1 EUR FR0010135103 2015-03-09 \n",
+ "\n",
+ " Quantity - Subscription Quantity - Redemption Quantity - NetFlows \\\n",
+ "0 1636.00 0.0 1636.00 \n",
+ "1 144.69 0.0 144.69 \n",
+ "\n",
+ " Value Ccy - Subscription Value Ccy - Redemption Value Ccy - NetFlows \\\n",
+ "0 280983.00 0.0 280983.00 \n",
+ "1 99985.13 0.0 99985.13 \n",
+ "\n",
+ " Value € - Subscription Value € - Redemption Value € - NetFlows month \n",
+ "0 280983.00 0.0 280983.00 2020-11-01 \n",
+ "1 99985.13 0.0 99985.13 2015-03-01 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Registrar Account - ID | \n",
+ " reg_used | \n",
+ " Agreement - Code | \n",
+ " Company - Id | \n",
+ " Company - Ultimate Parent Id | \n",
+ " Registrar Account - Region | \n",
+ " RegistrarAccount - Country | \n",
+ " Product - Asset Type | \n",
+ " Product - Strategy | \n",
+ " Product - Legal Status | \n",
+ " Product - Is Dedie ? | \n",
+ " Product - Fund | \n",
+ " Product - Shareclass Type | \n",
+ " Product - Shareclass Currency | \n",
+ " Product - Isin | \n",
+ " Centralisation Date | \n",
+ " Quantity - AUM | \n",
+ " Value - AUM CCY | \n",
+ " Value - AUM € | \n",
+ " month | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 18872 | \n",
+ " 18872 | \n",
+ " L104 | \n",
+ " 2257.0 | \n",
+ " 33675.0 | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " FR0010135103 | \n",
+ " 2015-01-31 | \n",
+ " 49094.915 | \n",
+ " 3.242523e+07 | \n",
+ " 3.242523e+07 | \n",
+ " 2015-01-01 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 18872 | \n",
+ " 18872 | \n",
+ " L104 | \n",
+ " 2257.0 | \n",
+ " 33675.0 | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ " Equity | \n",
+ " Investissement Latitude | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Investissement Latitude | \n",
+ " A | \n",
+ " EUR | \n",
+ " FR0010147603 | \n",
+ " 2015-01-31 | \n",
+ " 1717.000 | \n",
+ " 4.767422e+05 | \n",
+ " 4.767422e+05 | \n",
+ " 2015-01-01 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Registrar Account - ID reg_used Agreement - Code Company - Id \\\n",
+ "0 18872 18872 L104 2257.0 \n",
+ "1 18872 18872 L104 2257.0 \n",
+ "\n",
+ " Company - Ultimate Parent Id Registrar Account - Region \\\n",
+ "0 33675.0 Switzerland \n",
+ "1 33675.0 Switzerland \n",
+ "\n",
+ " RegistrarAccount - Country Product - Asset Type Product - Strategy \\\n",
+ "0 Switzerland Diversified Patrimoine \n",
+ "1 Switzerland Equity Investissement Latitude \n",
+ "\n",
+ " Product - Legal Status Product - Is Dedie ? \\\n",
+ "0 FCP NO \n",
+ "1 FCP NO \n",
+ "\n",
+ " Product - Fund Product - Shareclass Type \\\n",
+ "0 Carmignac Patrimoine A \n",
+ "1 Carmignac Investissement Latitude A \n",
+ "\n",
+ " Product - Shareclass Currency Product - Isin Centralisation Date \\\n",
+ "0 EUR FR0010135103 2015-01-31 \n",
+ "1 EUR FR0010147603 2015-01-31 \n",
+ "\n",
+ " Quantity - AUM Value - AUM CCY Value - AUM € month \n",
+ "0 49094.915 3.242523e+07 3.242523e+07 2015-01-01 \n",
+ "1 1717.000 4.767422e+05 4.767422e+05 2015-01-01 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "\n",
+ "for df, date_col in [\n",
+ " (df_flows, FLOW_DATE_COL),\n",
+ " (df_aum, AUM_DATE_COL),\n",
+ " (df_nav, NAV_DATE_COL),\n",
+ " (df_rates, RATE_DATE_COL),\n",
+ "]:\n",
+ " if df is not None and date_col is not None and date_col in df.columns:\n",
+ " df[date_col] = pd.to_datetime(df[date_col], errors=\"coerce\")\n",
+ " df[\"month\"] = df[date_col].dt.to_period(\"M\").dt.to_timestamp()\n",
+ "\n",
+ "for df in [df_flows, df_aum]:\n",
+ " if df is not None:\n",
+ " df[ID_COL] = df[ID_COL].astype(str).str.strip()\n",
+ " df[ISIN_COL] = df[ISIN_COL].astype(str).str.strip()\n",
+ "\n",
+ "if df_nav is not None and NAV_ISIN_COL is not None:\n",
+ " df_nav[NAV_ISIN_COL] = df_nav[NAV_ISIN_COL].astype(str).str.strip()\n",
+ "\n",
+ "display(df_flows.head(2))\n",
+ "display(df_aum.head(2))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8b073eb2",
+ "metadata": {},
+ "source": [
+ "## 7. Sélection des 400+ gros comptes au 31/10/2025\n",
+ "\n",
+ "Conforme aux messages Carmignac :\n",
+ "- on travaille à la date de référence **31/10/2025**\n",
+ "- on exclut **Off Distribution** et **Private Clients**\n",
+ "- on garde les comptes avec **AUM total ≥ 5 M€**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "049f4a35-5769-48db-8148-f057dddfbc44",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['Registrar Account - ID', 'reg_used', 'Agreement - Code',\n",
+ " 'Company - Id', 'Company - Ultimate Parent Id',\n",
+ " 'Registrar Account - Region', 'RegistrarAccount - Country',\n",
+ " 'Product - Asset Type', 'Product - Strategy', 'Product - Legal Status',\n",
+ " 'Product - Is Dedie ?', 'Product - Fund', 'Product - Shareclass Type',\n",
+ " 'Product - Shareclass Currency', 'Product - Isin',\n",
+ " 'Centralisation Date', 'Quantity - AUM', 'Value - AUM CCY',\n",
+ " 'Value - AUM €', 'month'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_aum.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "bb71018b",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Nombre de comptes >= 5M€ : 432\n",
+ "Couverture encours : 100.0 %\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Registrar Account - ID | \n",
+ " aum_qty_total | \n",
+ " aum_val_total | \n",
+ " weight_31102025 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 419 | \n",
+ " 420350 | \n",
+ " 2626183.491 | \n",
+ " 1.623308e+09 | \n",
+ " 0.047517 | \n",
+ "
\n",
+ " \n",
+ " | 355 | \n",
+ " 364765 | \n",
+ " 6715556.496 | \n",
+ " 1.383209e+09 | \n",
+ " 0.040489 | \n",
+ "
\n",
+ " \n",
+ " | 160 | \n",
+ " 200127454 | \n",
+ " 4513772.201 | \n",
+ " 8.784361e+08 | \n",
+ " 0.025713 | \n",
+ "
\n",
+ " \n",
+ " | 352 | \n",
+ " 312933 | \n",
+ " 2728411.596 | \n",
+ " 8.379604e+08 | \n",
+ " 0.024529 | \n",
+ "
\n",
+ " \n",
+ " | 200 | \n",
+ " 200127809 | \n",
+ " 3473162.647 | \n",
+ " 8.342839e+08 | \n",
+ " 0.024421 | \n",
+ "
\n",
+ " \n",
+ " | 418 | \n",
+ " 420259 | \n",
+ " 3600860.888 | \n",
+ " 8.296663e+08 | \n",
+ " 0.024286 | \n",
+ "
\n",
+ " \n",
+ " | 357 | \n",
+ " 364907 | \n",
+ " 2794302.254 | \n",
+ " 8.151083e+08 | \n",
+ " 0.023860 | \n",
+ "
\n",
+ " \n",
+ " | 400 | \n",
+ " 366441 | \n",
+ " 1944206.261 | \n",
+ " 7.707213e+08 | \n",
+ " 0.022560 | \n",
+ "
\n",
+ " \n",
+ " | 358 | \n",
+ " 364929 | \n",
+ " 2135207.537 | \n",
+ " 7.479766e+08 | \n",
+ " 0.021895 | \n",
+ "
\n",
+ " \n",
+ " | 381 | \n",
+ " 365538 | \n",
+ " 2695348.456 | \n",
+ " 7.200408e+08 | \n",
+ " 0.021077 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Registrar Account - ID aum_qty_total aum_val_total weight_31102025\n",
+ "419 420350 2626183.491 1.623308e+09 0.047517\n",
+ "355 364765 6715556.496 1.383209e+09 0.040489\n",
+ "160 200127454 4513772.201 8.784361e+08 0.025713\n",
+ "352 312933 2728411.596 8.379604e+08 0.024529\n",
+ "200 200127809 3473162.647 8.342839e+08 0.024421\n",
+ "418 420259 3600860.888 8.296663e+08 0.024286\n",
+ "357 364907 2794302.254 8.151083e+08 0.023860\n",
+ "400 366441 1944206.261 7.707213e+08 0.022560\n",
+ "358 364929 2135207.537 7.479766e+08 0.021895\n",
+ "381 365538 2695348.456 7.200408e+08 0.021077"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "\n",
+ "TARGET_DATE = pd.Timestamp(\"2025-10-01\")\n",
+ "EXCLUDED_IDS = [\"Off Distribution\", \"Private Clients\"]\n",
+ "AUM_THRESHOLD_EUR = 5_000_000\n",
+ "\n",
+ "df_top_accounts = (\n",
+ " df_aum[\n",
+ " (df_aum[\"month\"] == TARGET_DATE) &\n",
+ " (~df_aum[ID_COL].isin(EXCLUDED_IDS))\n",
+ " ]\n",
+ " .groupby(ID_COL, as_index=False)\n",
+ " .agg(\n",
+ " aum_qty_total=(AUM_QTY_COL, \"sum\"),\n",
+ " aum_val_total=(AUM_VAL_COL, \"sum\"),\n",
+ " )\n",
+ " .sort_values(\"aum_val_total\", ascending=False)\n",
+ ")\n",
+ "\n",
+ "df_top_accounts[\"weight_31102025\"] = df_top_accounts[\"aum_val_total\"] / df_top_accounts[\"aum_val_total\"].sum()\n",
+ "\n",
+ "top_accounts = df_top_accounts[df_top_accounts[\"aum_val_total\"] >= AUM_THRESHOLD_EUR].copy()\n",
+ "TOP_IDS = set(top_accounts[ID_COL].tolist())\n",
+ "\n",
+ "print(\"Nombre de comptes >= 5M€ :\", len(TOP_IDS))\n",
+ "print(\"Couverture encours :\", round(top_accounts[\"aum_val_total\"].sum() / df_top_accounts[\"aum_val_total\"].sum() * 100, 2), \"%\")\n",
+ "display(top_accounts.head(10))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "06fcabb4",
+ "metadata": {},
+ "source": [
+ "## 8. Filtrage des données sur les 400+ gros comptes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "98e51fed",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "df_flows: (1501724, 25)\n",
+ "df_aum: (1088398, 20)\n",
+ "Nb comptes flows: 427\n",
+ "Nb comptes aum: 432\n"
+ ]
+ }
+ ],
+ "source": [
+ "\n",
+ "df_flows = df_flows[df_flows[ID_COL].isin(TOP_IDS)].copy()\n",
+ "df_aum = df_aum[df_aum[ID_COL].isin(TOP_IDS)].copy()\n",
+ "\n",
+ "print(\"df_flows:\", df_flows.shape)\n",
+ "print(\"df_aum:\", df_aum.shape)\n",
+ "print(\"Nb comptes flows:\", df_flows[ID_COL].nunique())\n",
+ "print(\"Nb comptes aum:\", df_aum[ID_COL].nunique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "cdfa3b92-286c-40c3-aac8-f7972be9d5ca",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Nb comptes AUM : 432\n",
+ "Nb comptes flows : 427\n",
+ "Intersection : 427\n",
+ "AUM seulement : 5\n",
+ "Flows seulement : 0\n",
+ "Exemples AUM seulement : ['200142554', '200139346', '200131477', '200127798', '200130743']\n",
+ "Exemples Flows seulement : []\n"
+ ]
+ }
+ ],
+ "source": [
+ "aum_ids = set(df_aum[\"Registrar Account - ID\"].dropna().unique())\n",
+ "flow_ids = set(df_flows[\"Registrar Account - ID\"].dropna().unique())\n",
+ "\n",
+ "print(\"Nb comptes AUM :\", len(aum_ids))\n",
+ "print(\"Nb comptes flows :\", len(flow_ids))\n",
+ "print(\"Intersection :\", len(aum_ids & flow_ids))\n",
+ "print(\"AUM seulement :\", len(aum_ids - flow_ids))\n",
+ "print(\"Flows seulement :\", len(flow_ids - aum_ids))\n",
+ "\n",
+ "print(\"Exemples AUM seulement :\", list(aum_ids - flow_ids)[:10])\n",
+ "print(\"Exemples Flows seulement :\", list(flow_ids - aum_ids)[:10])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "fefe8a12-a98f-4220-bfbe-c0f2cd6d4ff2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "df_flows columns: Index(['Agreement - Code', 'Company - Id', 'Company - Ultimate Parent Id',\n",
+ " 'Registrar Account - ID', 'Registrar Account - Region',\n",
+ " 'RegistrarAccount - Country', 'Product - Asset Type',\n",
+ " 'Product - Strategy', 'Product - Legal Status', 'Product - Is Dedie ?',\n",
+ " 'Product - Fund', 'Product - Shareclass Type',\n",
+ " 'Product - Shareclass Currency', 'Product - Isin',\n",
+ " 'Centralisation Date', 'Quantity - Subscription',\n",
+ " 'Quantity - Redemption', 'Quantity - NetFlows',\n",
+ " 'Value Ccy - Subscription', 'Value Ccy - Redemption',\n",
+ " 'Value Ccy - NetFlows', 'Value € - Subscription',\n",
+ " 'Value € - Redemption', 'Value € - NetFlows', 'month'],\n",
+ " dtype='object')\n",
+ "df_aum columns: Index(['Registrar Account - ID', 'reg_used', 'Agreement - Code',\n",
+ " 'Company - Id', 'Company - Ultimate Parent Id',\n",
+ " 'Registrar Account - Region', 'RegistrarAccount - Country',\n",
+ " 'Product - Asset Type', 'Product - Strategy', 'Product - Legal Status',\n",
+ " 'Product - Is Dedie ?', 'Product - Fund', 'Product - Shareclass Type',\n",
+ " 'Product - Shareclass Currency', 'Product - Isin',\n",
+ " 'Centralisation Date', 'Quantity - AUM', 'Value - AUM CCY',\n",
+ " 'Value - AUM €', 'month'],\n",
+ " dtype='object')\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"df_flows columns:\", df_flows.columns)\n",
+ "print(\"df_aum columns:\", df_aum.columns)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "94d7c460",
+ "metadata": {},
+ "source": [
+ "## 9. Base fine mensuelle flows : `client × ISIN × month`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "4ac20a75-bf00-4f61-b0ec-d1c18d4fa665",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " column | \n",
+ " n_groups_observed | \n",
+ " pct_groups_1_value | \n",
+ " pct_groups_gt1_value | \n",
+ " max_n_unique | \n",
+ " mean_n_unique | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Centralisation Date | \n",
+ " 275762 | \n",
+ " 0.323591 | \n",
+ " 0.676409 | \n",
+ " 23 | \n",
+ " 5.443382 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Value € - NetFlows | \n",
+ " 275762 | \n",
+ " 0.323888 | \n",
+ " 0.676112 | \n",
+ " 37 | \n",
+ " 5.435386 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Value Ccy - NetFlows | \n",
+ " 275762 | \n",
+ " 0.323902 | \n",
+ " 0.676098 | \n",
+ " 37 | \n",
+ " 5.435190 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Quantity - NetFlows | \n",
+ " 275762 | \n",
+ " 0.325879 | \n",
+ " 0.674121 | \n",
+ " 37 | \n",
+ " 5.361130 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Value € - Redemption | \n",
+ " 275762 | \n",
+ " 0.409785 | \n",
+ " 0.590215 | \n",
+ " 34 | \n",
+ " 4.152294 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " Value Ccy - Redemption | \n",
+ " 275762 | \n",
+ " 0.409788 | \n",
+ " 0.590212 | \n",
+ " 34 | \n",
+ " 4.152120 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " Quantity - Redemption | \n",
+ " 275762 | \n",
+ " 0.410916 | \n",
+ " 0.589084 | \n",
+ " 34 | \n",
+ " 4.090861 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " Value € - Subscription | \n",
+ " 275762 | \n",
+ " 0.443324 | \n",
+ " 0.556676 | \n",
+ " 31 | \n",
+ " 3.716030 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " Value Ccy - Subscription | \n",
+ " 275762 | \n",
+ " 0.443335 | \n",
+ " 0.556665 | \n",
+ " 31 | \n",
+ " 3.715904 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " Quantity - Subscription | \n",
+ " 275762 | \n",
+ " 0.444307 | \n",
+ " 0.555693 | \n",
+ " 30 | \n",
+ " 3.667652 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " Company - Id | \n",
+ " 275440 | \n",
+ " 0.615590 | \n",
+ " 0.384410 | \n",
+ " 3 | \n",
+ " 1.384432 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " Company - Ultimate Parent Id | \n",
+ " 275440 | \n",
+ " 0.615600 | \n",
+ " 0.384400 | \n",
+ " 2 | \n",
+ " 1.384400 | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " Registrar Account - Region | \n",
+ " 275762 | \n",
+ " 0.999307 | \n",
+ " 0.000693 | \n",
+ " 2 | \n",
+ " 1.000693 | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " RegistrarAccount - Country | \n",
+ " 275762 | \n",
+ " 0.999307 | \n",
+ " 0.000693 | \n",
+ " 2 | \n",
+ " 1.000693 | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " Agreement - Code | \n",
+ " 275762 | \n",
+ " 0.999888 | \n",
+ " 0.000112 | \n",
+ " 2 | \n",
+ " 1.000112 | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " Product - Asset Type | \n",
+ " 275529 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 1 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " Product - Strategy | \n",
+ " 275761 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 1 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " Product - Legal Status | \n",
+ " 275762 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 1 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " Product - Is Dedie ? | \n",
+ " 275762 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 1 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " Product - Fund | \n",
+ " 275762 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 1 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " Product - Shareclass Type | \n",
+ " 275762 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 1 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " Product - Shareclass Currency | \n",
+ " 275762 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 1 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " column n_groups_observed pct_groups_1_value \\\n",
+ "0 Centralisation Date 275762 0.323591 \n",
+ "1 Value € - NetFlows 275762 0.323888 \n",
+ "2 Value Ccy - NetFlows 275762 0.323902 \n",
+ "3 Quantity - NetFlows 275762 0.325879 \n",
+ "4 Value € - Redemption 275762 0.409785 \n",
+ "5 Value Ccy - Redemption 275762 0.409788 \n",
+ "6 Quantity - Redemption 275762 0.410916 \n",
+ "7 Value € - Subscription 275762 0.443324 \n",
+ "8 Value Ccy - Subscription 275762 0.443335 \n",
+ "9 Quantity - Subscription 275762 0.444307 \n",
+ "10 Company - Id 275440 0.615590 \n",
+ "11 Company - Ultimate Parent Id 275440 0.615600 \n",
+ "12 Registrar Account - Region 275762 0.999307 \n",
+ "13 RegistrarAccount - Country 275762 0.999307 \n",
+ "14 Agreement - Code 275762 0.999888 \n",
+ "15 Product - Asset Type 275529 1.000000 \n",
+ "16 Product - Strategy 275761 1.000000 \n",
+ "17 Product - Legal Status 275762 1.000000 \n",
+ "18 Product - Is Dedie ? 275762 1.000000 \n",
+ "19 Product - Fund 275762 1.000000 \n",
+ "20 Product - Shareclass Type 275762 1.000000 \n",
+ "21 Product - Shareclass Currency 275762 1.000000 \n",
+ "\n",
+ " pct_groups_gt1_value max_n_unique mean_n_unique \n",
+ "0 0.676409 23 5.443382 \n",
+ "1 0.676112 37 5.435386 \n",
+ "2 0.676098 37 5.435190 \n",
+ "3 0.674121 37 5.361130 \n",
+ "4 0.590215 34 4.152294 \n",
+ "5 0.590212 34 4.152120 \n",
+ "6 0.589084 34 4.090861 \n",
+ "7 0.556676 31 3.716030 \n",
+ "8 0.556665 31 3.715904 \n",
+ "9 0.555693 30 3.667652 \n",
+ "10 0.384410 3 1.384432 \n",
+ "11 0.384400 2 1.384400 \n",
+ "12 0.000693 2 1.000693 \n",
+ "13 0.000693 2 1.000693 \n",
+ "14 0.000112 2 1.000112 \n",
+ "15 0.000000 1 1.000000 \n",
+ "16 0.000000 1 1.000000 \n",
+ "17 0.000000 1 1.000000 \n",
+ "18 0.000000 1 1.000000 \n",
+ "19 0.000000 1 1.000000 \n",
+ "20 0.000000 1 1.000000 \n",
+ "21 0.000000 1 1.000000 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# ============================================================\n",
+ "# DIAGNOSTIC RAPIDE D'AGREGATION - DF_FLOWS\n",
+ "# Pour chaque colonne, on regarde combien de valeurs distinctes\n",
+ "#prend dans chaque groupe client x ISIN x mois\n",
+ "# ============================================================\n",
+ "\n",
+ "KEYS = [ID_COL, ISIN_COL, \"month\"]\n",
+ "df_flows_diag_base = df_flows.dropna(subset=KEYS).copy()\n",
+ "flow_cols_to_check = [c for c in df_flows_diag_base.columns if c not in KEYS]\n",
+ "flow_summary_rows = []\n",
+ "\n",
+ "for col in flow_cols_to_check:\n",
+ " s = df_flows_diag_base[col]\n",
+ " # on enlève juste les NA pour cette colonne\n",
+ " tmp = df_flows_diag_base.loc[s.notna(), KEYS + [col]].copy()\n",
+ " # optionnel : nettoyer les strings seulement si la colonne est texte\n",
+ " if tmp[col].dtype == \"object\":\n",
+ " tmp[col] = tmp[col].astype(str).str.strip()\n",
+ " tmp = tmp[tmp[col] != \"\"]\n",
+ "\n",
+ " # nb de valeurs distinctes dans chaque groupe\n",
+ " nun = tmp.groupby(KEYS)[col].nunique()\n",
+ " flow_summary_rows.append({\n",
+ " \"column\": col,\n",
+ " \"n_groups_observed\": int(nun.shape[0]),\n",
+ " \"pct_groups_1_value\": float((nun == 1).mean()) if len(nun) else np.nan,\n",
+ " \"pct_groups_gt1_value\": float((nun > 1).mean()) if len(nun) else np.nan,\n",
+ " \"max_n_unique\": int(nun.max()) if len(nun) else np.nan,\n",
+ " \"mean_n_unique\": float(nun.mean()) if len(nun) else np.nan,\n",
+ " })\n",
+ "\n",
+ "flow_agg_summary = (\n",
+ " pd.DataFrame(flow_summary_rows)\n",
+ " .sort_values([\"pct_groups_gt1_value\", \"max_n_unique\"], ascending=[False, False])\n",
+ " .reset_index(drop=True)\n",
+ ")\n",
+ "\n",
+ "display(flow_agg_summary)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "8cd2d9d3-5ced-44f5-af20-7f501435a43f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " column | \n",
+ " n_groups_observed | \n",
+ " pct_groups_1_value | \n",
+ " pct_groups_gt1_value | \n",
+ " max_n_unique | \n",
+ " mean_n_unique | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " RegistrarAccount - Country | \n",
+ " 955241 | \n",
+ " 0.936354 | \n",
+ " 0.063646 | \n",
+ " 10 | \n",
+ " 1.125696 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Registrar Account - Region | \n",
+ " 955241 | \n",
+ " 0.936976 | \n",
+ " 0.063024 | \n",
+ " 9 | \n",
+ " 1.115268 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Quantity - AUM | \n",
+ " 955241 | \n",
+ " 0.940144 | \n",
+ " 0.059856 | \n",
+ " 10 | \n",
+ " 1.090657 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Value - AUM CCY | \n",
+ " 955241 | \n",
+ " 0.940171 | \n",
+ " 0.059829 | \n",
+ " 10 | \n",
+ " 1.090631 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Value - AUM € | \n",
+ " 955241 | \n",
+ " 0.940171 | \n",
+ " 0.059829 | \n",
+ " 10 | \n",
+ " 1.090631 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " Agreement - Code | \n",
+ " 955241 | \n",
+ " 0.983530 | \n",
+ " 0.016470 | \n",
+ " 2 | \n",
+ " 1.016470 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " Company - Id | \n",
+ " 954372 | \n",
+ " 0.983682 | \n",
+ " 0.016318 | \n",
+ " 3 | \n",
+ " 1.016322 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " Company - Ultimate Parent Id | \n",
+ " 954372 | \n",
+ " 0.984164 | \n",
+ " 0.015836 | \n",
+ " 3 | \n",
+ " 1.015840 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " reg_used | \n",
+ " 955241 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 1 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " Product - Asset Type | \n",
+ " 934524 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 1 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " Product - Strategy | \n",
+ " 955210 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 1 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " Product - Legal Status | \n",
+ " 955241 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 1 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " Product - Is Dedie ? | \n",
+ " 955241 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 1 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " Product - Fund | \n",
+ " 955241 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 1 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " Product - Shareclass Type | \n",
+ " 955241 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 1 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " Product - Shareclass Currency | \n",
+ " 955241 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 1 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " Centralisation Date | \n",
+ " 955241 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 1 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " column n_groups_observed pct_groups_1_value \\\n",
+ "0 RegistrarAccount - Country 955241 0.936354 \n",
+ "1 Registrar Account - Region 955241 0.936976 \n",
+ "2 Quantity - AUM 955241 0.940144 \n",
+ "3 Value - AUM CCY 955241 0.940171 \n",
+ "4 Value - AUM € 955241 0.940171 \n",
+ "5 Agreement - Code 955241 0.983530 \n",
+ "6 Company - Id 954372 0.983682 \n",
+ "7 Company - Ultimate Parent Id 954372 0.984164 \n",
+ "8 reg_used 955241 1.000000 \n",
+ "9 Product - Asset Type 934524 1.000000 \n",
+ "10 Product - Strategy 955210 1.000000 \n",
+ "11 Product - Legal Status 955241 1.000000 \n",
+ "12 Product - Is Dedie ? 955241 1.000000 \n",
+ "13 Product - Fund 955241 1.000000 \n",
+ "14 Product - Shareclass Type 955241 1.000000 \n",
+ "15 Product - Shareclass Currency 955241 1.000000 \n",
+ "16 Centralisation Date 955241 1.000000 \n",
+ "\n",
+ " pct_groups_gt1_value max_n_unique mean_n_unique \n",
+ "0 0.063646 10 1.125696 \n",
+ "1 0.063024 9 1.115268 \n",
+ "2 0.059856 10 1.090657 \n",
+ "3 0.059829 10 1.090631 \n",
+ "4 0.059829 10 1.090631 \n",
+ "5 0.016470 2 1.016470 \n",
+ "6 0.016318 3 1.016322 \n",
+ "7 0.015836 3 1.015840 \n",
+ "8 0.000000 1 1.000000 \n",
+ "9 0.000000 1 1.000000 \n",
+ "10 0.000000 1 1.000000 \n",
+ "11 0.000000 1 1.000000 \n",
+ "12 0.000000 1 1.000000 \n",
+ "13 0.000000 1 1.000000 \n",
+ "14 0.000000 1 1.000000 \n",
+ "15 0.000000 1 1.000000 \n",
+ "16 0.000000 1 1.000000 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# ============================================================\n",
+ "# DIAGNOSTIC RAPIDE D'AGREGATION - DF_AUM\n",
+ "# Pour chaque colonne, on regarde combien de valeurs distinctes\n",
+ "# prend dans chaque groupe client x ISIN x mois\n",
+ "# ============================================================\n",
+ "\n",
+ "KEYS = [ID_COL, ISIN_COL, \"month\"]\n",
+ "\n",
+ "df_aum_diag_base = df_aum.dropna(subset=KEYS).copy()\n",
+ "\n",
+ "aum_cols_to_check = [c for c in df_aum_diag_base.columns if c not in KEYS]\n",
+ "\n",
+ "aum_summary_rows = []\n",
+ "\n",
+ "for col in aum_cols_to_check:\n",
+ " s = df_aum_diag_base[col]\n",
+ "\n",
+ " # on enlève les NA pour cette colonne\n",
+ " tmp = df_aum_diag_base.loc[s.notna(), KEYS + [col]].copy()\n",
+ "\n",
+ " # nettoyage léger si texte\n",
+ " if tmp[col].dtype == \"object\":\n",
+ " tmp[col] = tmp[col].astype(str).str.strip()\n",
+ " tmp = tmp[tmp[col] != \"\"]\n",
+ "\n",
+ " # nb de valeurs distinctes dans chaque groupe\n",
+ " nun = tmp.groupby(KEYS)[col].nunique()\n",
+ "\n",
+ " aum_summary_rows.append({\n",
+ " \"column\": col,\n",
+ " \"n_groups_observed\": int(nun.shape[0]),\n",
+ " \"pct_groups_1_value\": float((nun == 1).mean()) if len(nun) else np.nan,\n",
+ " \"pct_groups_gt1_value\": float((nun > 1).mean()) if len(nun) else np.nan,\n",
+ " \"max_n_unique\": int(nun.max()) if len(nun) else np.nan,\n",
+ " \"mean_n_unique\": float(nun.mean()) if len(nun) else np.nan,\n",
+ " })\n",
+ "\n",
+ "aum_agg_summary = (\n",
+ " pd.DataFrame(aum_summary_rows)\n",
+ " .sort_values([\"pct_groups_gt1_value\", \"max_n_unique\"], ascending=[False, False])\n",
+ " .reset_index(drop=True)\n",
+ ")\n",
+ "\n",
+ "display(aum_agg_summary)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bc0e24e1-162a-48e8-9486-6494e762ae26",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "cba5b9bf",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Registrar Account - ID | \n",
+ " Product - Isin | \n",
+ " month | \n",
+ " net_flow_qty | \n",
+ " gross_flow_qty | \n",
+ " sub_qty | \n",
+ " red_qty | \n",
+ " n_tx | \n",
+ " Product - Asset Type | \n",
+ " Product - Strategy | \n",
+ " Product - Legal Status | \n",
+ " Product - Is Dedie ? | \n",
+ " Product - Fund | \n",
+ " Product - Shareclass Type | \n",
+ " Product - Shareclass Currency | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-01-01 | \n",
+ " 673.990 | \n",
+ " 1045.99 | \n",
+ " 859.990 | \n",
+ " -186.000 | \n",
+ " 9 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-02-01 | \n",
+ " 988.000 | \n",
+ " 1712.00 | \n",
+ " 1350.000 | \n",
+ " -362.000 | \n",
+ " 12 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-03-01 | \n",
+ " 9.710 | \n",
+ " 1561.71 | \n",
+ " 785.710 | \n",
+ " -776.000 | \n",
+ " 12 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-04-01 | \n",
+ " -123.234 | \n",
+ " 1830.19 | \n",
+ " 853.478 | \n",
+ " -976.712 | \n",
+ " 11 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-05-01 | \n",
+ " 121.000 | \n",
+ " 529.00 | \n",
+ " 325.000 | \n",
+ " -204.000 | \n",
+ " 6 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Registrar Account - ID Product - Isin month net_flow_qty \\\n",
+ "0 18872 FR0010135103 2015-01-01 673.990 \n",
+ "1 18872 FR0010135103 2015-02-01 988.000 \n",
+ "2 18872 FR0010135103 2015-03-01 9.710 \n",
+ "3 18872 FR0010135103 2015-04-01 -123.234 \n",
+ "4 18872 FR0010135103 2015-05-01 121.000 \n",
+ "\n",
+ " gross_flow_qty sub_qty red_qty n_tx Product - Asset Type \\\n",
+ "0 1045.99 859.990 -186.000 9 Diversified \n",
+ "1 1712.00 1350.000 -362.000 12 Diversified \n",
+ "2 1561.71 785.710 -776.000 12 Diversified \n",
+ "3 1830.19 853.478 -976.712 11 Diversified \n",
+ "4 529.00 325.000 -204.000 6 Diversified \n",
+ "\n",
+ " Product - Strategy Product - Legal Status Product - Is Dedie ? \\\n",
+ "0 Patrimoine FCP NO \n",
+ "1 Patrimoine FCP NO \n",
+ "2 Patrimoine FCP NO \n",
+ "3 Patrimoine FCP NO \n",
+ "4 Patrimoine FCP NO \n",
+ "\n",
+ " Product - Fund Product - Shareclass Type \\\n",
+ "0 Carmignac Patrimoine A \n",
+ "1 Carmignac Patrimoine A \n",
+ "2 Carmignac Patrimoine A \n",
+ "3 Carmignac Patrimoine A \n",
+ "4 Carmignac Patrimoine A \n",
+ "\n",
+ " Product - Shareclass Currency \n",
+ "0 EUR \n",
+ "1 EUR \n",
+ "2 EUR \n",
+ "3 EUR \n",
+ "4 EUR "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(275762, 15)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# ============================================================\n",
+ "# DF_FLOWS_MENSUEL : 1 client x 1 ISIN x 1 mois\n",
+ "# ============================================================\n",
+ "\n",
+ "KEYS = [ID_COL, ISIN_COL, \"month\"]\n",
+ "\n",
+ "df_flows_clean = df_flows.dropna(subset=KEYS).copy()\n",
+ "\n",
+ "# -------------------------\n",
+ "# NUMERIC FLOWS\n",
+ "# -------------------------\n",
+ "df_flows_clean[\"net_flow_qty\"] = pd.to_numeric(df_flows_clean[FLOW_QTY_COL], errors=\"coerce\").fillna(0.0)\n",
+ "df_flows_clean[\"sub_qty\"] = pd.to_numeric(df_flows_clean[FLOW_SUB_COL], errors=\"coerce\").fillna(0.0)\n",
+ "df_flows_clean[\"red_qty\"] = pd.to_numeric(df_flows_clean[FLOW_RED_COL], errors=\"coerce\").fillna(0.0)\n",
+ "\n",
+ "df_flows_clean[\"gross_flow_qty\"] = df_flows_clean[\"sub_qty\"].abs() + df_flows_clean[\"red_qty\"].abs()\n",
+ "\n",
+ "# -------------------------\n",
+ "# COLONNES PRODUIT STABLES\n",
+ "# -------------------------\n",
+ "product_cols = [\n",
+ " \"Product - Asset Type\",\n",
+ " \"Product - Strategy\",\n",
+ " \"Product - Legal Status\",\n",
+ " \"Product - Is Dedie ?\",\n",
+ " \"Product - Fund\",\n",
+ " \"Product - Shareclass Type\",\n",
+ " \"Product - Shareclass Currency\",\n",
+ "]\n",
+ "\n",
+ "# -------------------------\n",
+ "# AGGREGATION\n",
+ "# -------------------------\n",
+ "agg_dict = {\n",
+ " \"net_flow_qty\": (\"net_flow_qty\", \"sum\"),\n",
+ " \"gross_flow_qty\": (\"gross_flow_qty\", \"sum\"),\n",
+ " \"sub_qty\": (\"sub_qty\", \"sum\"),\n",
+ " \"red_qty\": (\"red_qty\", \"sum\"),\n",
+ " \"n_tx\": (FLOW_QTY_COL, \"size\"),\n",
+ "}\n",
+ "\n",
+ "# ajouter produit en \"first\"\n",
+ "for col in product_cols:\n",
+ " if col in df_flows_clean.columns:\n",
+ " agg_dict[col] = (col, \"first\")\n",
+ "\n",
+ "# -------------------------\n",
+ "# GROUPBY FINAL\n",
+ "# -------------------------\n",
+ "df_flows_m = (\n",
+ " df_flows_clean\n",
+ " .groupby(KEYS, as_index=False)\n",
+ " .agg(**agg_dict)\n",
+ ")\n",
+ "\n",
+ "display(df_flows_m.head())\n",
+ "print(df_flows_m.shape)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "94621000",
+ "metadata": {},
+ "source": [
+ "## 10. Base fine mensuelle AUM : `client × ISIN × month`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "b46d886b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Registrar Account - ID | \n",
+ " Product - Isin | \n",
+ " month | \n",
+ " aum_qty | \n",
+ " aum_val | \n",
+ " Product - Asset Type | \n",
+ " Product - Strategy | \n",
+ " Product - Legal Status | \n",
+ " Product - Is Dedie ? | \n",
+ " Product - Fund | \n",
+ " Product - Shareclass Type | \n",
+ " Product - Shareclass Currency | \n",
+ " Registrar Account - Region | \n",
+ " RegistrarAccount - Country | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-01-01 | \n",
+ " 49094.915 | \n",
+ " 3.242523e+07 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-02-01 | \n",
+ " 49797.915 | \n",
+ " 3.368032e+07 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-03-01 | \n",
+ " 50302.627 | \n",
+ " 3.505691e+07 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-04-01 | \n",
+ " 50219.393 | \n",
+ " 3.452433e+07 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-05-01 | \n",
+ " 53685.393 | \n",
+ " 3.699729e+07 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Registrar Account - ID Product - Isin month aum_qty aum_val \\\n",
+ "0 18872 FR0010135103 2015-01-01 49094.915 3.242523e+07 \n",
+ "1 18872 FR0010135103 2015-02-01 49797.915 3.368032e+07 \n",
+ "2 18872 FR0010135103 2015-03-01 50302.627 3.505691e+07 \n",
+ "3 18872 FR0010135103 2015-04-01 50219.393 3.452433e+07 \n",
+ "4 18872 FR0010135103 2015-05-01 53685.393 3.699729e+07 \n",
+ "\n",
+ " Product - Asset Type Product - Strategy Product - Legal Status \\\n",
+ "0 Diversified Patrimoine FCP \n",
+ "1 Diversified Patrimoine FCP \n",
+ "2 Diversified Patrimoine FCP \n",
+ "3 Diversified Patrimoine FCP \n",
+ "4 Diversified Patrimoine FCP \n",
+ "\n",
+ " Product - Is Dedie ? Product - Fund Product - Shareclass Type \\\n",
+ "0 NO Carmignac Patrimoine A \n",
+ "1 NO Carmignac Patrimoine A \n",
+ "2 NO Carmignac Patrimoine A \n",
+ "3 NO Carmignac Patrimoine A \n",
+ "4 NO Carmignac Patrimoine A \n",
+ "\n",
+ " Product - Shareclass Currency Registrar Account - Region \\\n",
+ "0 EUR Switzerland \n",
+ "1 EUR Switzerland \n",
+ "2 EUR Switzerland \n",
+ "3 EUR Switzerland \n",
+ "4 EUR Switzerland \n",
+ "\n",
+ " RegistrarAccount - Country \n",
+ "0 Switzerland \n",
+ "1 Switzerland \n",
+ "2 Switzerland \n",
+ "3 Switzerland \n",
+ "4 Switzerland "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(955241, 14)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# ============================================================\n",
+ "# DF_AUM_MENSUEL : 1 client x 1 ISIN x 1 mois\n",
+ "# ============================================================\n",
+ "\n",
+ "df_aum_clean = df_aum.dropna(subset=KEYS).copy()\n",
+ "\n",
+ "# conversion\n",
+ "df_aum_clean[\"aum_qty\"] = pd.to_numeric(df_aum_clean[AUM_QTY_COL], errors=\"coerce\")\n",
+ "df_aum_clean[\"aum_val\"] = pd.to_numeric(df_aum_clean[AUM_VAL_COL], errors=\"coerce\")\n",
+ "\n",
+ "# -------------------------\n",
+ "# AGGREGATION\n",
+ "# -------------------------\n",
+ "agg_dict_aum = {\n",
+ " \"aum_qty\": (\"aum_qty\", \"last\"), # très important\n",
+ " \"aum_val\": (\"aum_val\", \"last\"),\n",
+ "}\n",
+ "\n",
+ "# colonnes produit (stables)\n",
+ "for col in product_cols:\n",
+ " if col in df_aum_clean.columns:\n",
+ " agg_dict_aum[col] = (col, \"first\")\n",
+ "\n",
+ "# region / country (quasi stable)\n",
+ "for col in [REGION_COL, COUNTRY_COL]:\n",
+ " if col in df_aum_clean.columns:\n",
+ " agg_dict_aum[col] = (col, \"first\")\n",
+ "\n",
+ "# -------------------------\n",
+ "# GROUPBY\n",
+ "# -------------------------\n",
+ "df_aum_m = (\n",
+ " df_aum_clean\n",
+ " .sort_values(AUM_DATE_COL)\n",
+ " .groupby(KEYS, as_index=False)\n",
+ " .agg(**agg_dict_aum)\n",
+ ")\n",
+ "\n",
+ "display(df_aum_m.head())\n",
+ "print(df_aum_m.shape)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8576701e",
+ "metadata": {},
+ "source": [
+ "## 11. Fusion flows + AUM : table centrale `df_rel_m`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "6120b573",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Registrar Account - ID | \n",
+ " Product - Isin | \n",
+ " month | \n",
+ " aum_qty | \n",
+ " aum_val | \n",
+ " Product - Asset Type_x | \n",
+ " Product - Strategy_x | \n",
+ " Product - Legal Status_x | \n",
+ " Product - Is Dedie ?_x | \n",
+ " Product - Fund_x | \n",
+ " Product - Shareclass Type_x | \n",
+ " Product - Shareclass Currency_x | \n",
+ " Registrar Account - Region | \n",
+ " RegistrarAccount - Country | \n",
+ " net_flow_qty | \n",
+ " gross_flow_qty | \n",
+ " sub_qty | \n",
+ " red_qty | \n",
+ " n_tx | \n",
+ " Product - Asset Type_y | \n",
+ " Product - Strategy_y | \n",
+ " Product - Legal Status_y | \n",
+ " Product - Is Dedie ?_y | \n",
+ " Product - Fund_y | \n",
+ " Product - Shareclass Type_y | \n",
+ " Product - Shareclass Currency_y | \n",
+ " isin_held_flag | \n",
+ " active_rel_month | \n",
+ " flow_to_aum | \n",
+ " turnover_rel | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-01-01 | \n",
+ " 49094.915 | \n",
+ " 3.242523e+07 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ " 673.990 | \n",
+ " 1045.99 | \n",
+ " 859.990 | \n",
+ " -186.000 | \n",
+ " 9.0 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.013728 | \n",
+ " 0.021305 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-02-01 | \n",
+ " 49797.915 | \n",
+ " 3.368032e+07 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ " 988.000 | \n",
+ " 1712.00 | \n",
+ " 1350.000 | \n",
+ " -362.000 | \n",
+ " 12.0 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.019840 | \n",
+ " 0.034378 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-03-01 | \n",
+ " 50302.627 | \n",
+ " 3.505691e+07 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ " 9.710 | \n",
+ " 1561.71 | \n",
+ " 785.710 | \n",
+ " -776.000 | \n",
+ " 12.0 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.000193 | \n",
+ " 0.031046 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-04-01 | \n",
+ " 50219.393 | \n",
+ " 3.452433e+07 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ " -123.234 | \n",
+ " 1830.19 | \n",
+ " 853.478 | \n",
+ " -976.712 | \n",
+ " 11.0 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " -0.002454 | \n",
+ " 0.036443 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-05-01 | \n",
+ " 53685.393 | \n",
+ " 3.699729e+07 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ " 121.000 | \n",
+ " 529.00 | \n",
+ " 325.000 | \n",
+ " -204.000 | \n",
+ " 6.0 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.002254 | \n",
+ " 0.009854 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Registrar Account - ID Product - Isin month aum_qty aum_val \\\n",
+ "0 18872 FR0010135103 2015-01-01 49094.915 3.242523e+07 \n",
+ "1 18872 FR0010135103 2015-02-01 49797.915 3.368032e+07 \n",
+ "2 18872 FR0010135103 2015-03-01 50302.627 3.505691e+07 \n",
+ "3 18872 FR0010135103 2015-04-01 50219.393 3.452433e+07 \n",
+ "4 18872 FR0010135103 2015-05-01 53685.393 3.699729e+07 \n",
+ "\n",
+ " Product - Asset Type_x Product - Strategy_x Product - Legal Status_x \\\n",
+ "0 Diversified Patrimoine FCP \n",
+ "1 Diversified Patrimoine FCP \n",
+ "2 Diversified Patrimoine FCP \n",
+ "3 Diversified Patrimoine FCP \n",
+ "4 Diversified Patrimoine FCP \n",
+ "\n",
+ " Product - Is Dedie ?_x Product - Fund_x Product - Shareclass Type_x \\\n",
+ "0 NO Carmignac Patrimoine A \n",
+ "1 NO Carmignac Patrimoine A \n",
+ "2 NO Carmignac Patrimoine A \n",
+ "3 NO Carmignac Patrimoine A \n",
+ "4 NO Carmignac Patrimoine A \n",
+ "\n",
+ " Product - Shareclass Currency_x Registrar Account - Region \\\n",
+ "0 EUR Switzerland \n",
+ "1 EUR Switzerland \n",
+ "2 EUR Switzerland \n",
+ "3 EUR Switzerland \n",
+ "4 EUR Switzerland \n",
+ "\n",
+ " RegistrarAccount - Country net_flow_qty gross_flow_qty sub_qty red_qty \\\n",
+ "0 Switzerland 673.990 1045.99 859.990 -186.000 \n",
+ "1 Switzerland 988.000 1712.00 1350.000 -362.000 \n",
+ "2 Switzerland 9.710 1561.71 785.710 -776.000 \n",
+ "3 Switzerland -123.234 1830.19 853.478 -976.712 \n",
+ "4 Switzerland 121.000 529.00 325.000 -204.000 \n",
+ "\n",
+ " n_tx Product - Asset Type_y Product - Strategy_y Product - Legal Status_y \\\n",
+ "0 9.0 Diversified Patrimoine FCP \n",
+ "1 12.0 Diversified Patrimoine FCP \n",
+ "2 12.0 Diversified Patrimoine FCP \n",
+ "3 11.0 Diversified Patrimoine FCP \n",
+ "4 6.0 Diversified Patrimoine FCP \n",
+ "\n",
+ " Product - Is Dedie ?_y Product - Fund_y Product - Shareclass Type_y \\\n",
+ "0 NO Carmignac Patrimoine A \n",
+ "1 NO Carmignac Patrimoine A \n",
+ "2 NO Carmignac Patrimoine A \n",
+ "3 NO Carmignac Patrimoine A \n",
+ "4 NO Carmignac Patrimoine A \n",
+ "\n",
+ " Product - Shareclass Currency_y isin_held_flag active_rel_month \\\n",
+ "0 EUR 1 1 \n",
+ "1 EUR 1 1 \n",
+ "2 EUR 1 1 \n",
+ "3 EUR 1 1 \n",
+ "4 EUR 1 1 \n",
+ "\n",
+ " flow_to_aum turnover_rel \n",
+ "0 0.013728 0.021305 \n",
+ "1 0.019840 0.034378 \n",
+ "2 0.000193 0.031046 \n",
+ "3 -0.002454 0.036443 \n",
+ "4 0.002254 0.009854 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(962611, 30)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# ============================================================\n",
+ "# DF_REL_M : fusion flows + aum\n",
+ "# ============================================================\n",
+ "\n",
+ "keys = pd.concat([\n",
+ " df_flows_m[KEYS],\n",
+ " df_aum_m[KEYS]\n",
+ "]).drop_duplicates()\n",
+ "\n",
+ "df_rel_m = (\n",
+ " keys\n",
+ " .merge(df_aum_m, on=KEYS, how=\"left\")\n",
+ " .merge(df_flows_m, on=KEYS, how=\"left\")\n",
+ ")\n",
+ "\n",
+ "# -------------------------\n",
+ "# CLEAN NUMERIC\n",
+ "# -------------------------\n",
+ "for c in [\n",
+ " \"aum_qty\", \"aum_val\",\n",
+ " \"net_flow_qty\", \"gross_flow_qty\",\n",
+ " \"sub_qty\", \"red_qty\", \"n_tx\"\n",
+ "]:\n",
+ " if c in df_rel_m.columns:\n",
+ " df_rel_m[c] = pd.to_numeric(df_rel_m[c], errors=\"coerce\").fillna(0.0)\n",
+ "\n",
+ "# -------------------------\n",
+ "# FEATURES BASIQUES\n",
+ "# -------------------------\n",
+ "df_rel_m[\"isin_held_flag\"] = (df_rel_m[\"aum_qty\"] > 0).astype(int)\n",
+ "df_rel_m[\"active_rel_month\"] = (df_rel_m[\"gross_flow_qty\"] > 0).astype(int)\n",
+ "\n",
+ "df_rel_m[\"flow_to_aum\"] = df_rel_m[\"net_flow_qty\"] / (df_rel_m[\"aum_qty\"].abs() + 1.0)\n",
+ "df_rel_m[\"turnover_rel\"] = df_rel_m[\"gross_flow_qty\"] / (df_rel_m[\"aum_qty\"].abs() + 1.0)\n",
+ "\n",
+ "display(df_rel_m.head())\n",
+ "print(df_rel_m.shape)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2c2ca596",
+ "metadata": {},
+ "source": [
+ "## 12. NAV mensuel et taux mensuels"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "9fe5c3c0-80f6-4fe4-bd67-08323f781c7a",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "df_nav_m: (30336, 17)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Isin | \n",
+ " month | \n",
+ " Price (TF PartPrice) | \n",
+ " ret_fund_m | \n",
+ " ret_fund_3m_mean | \n",
+ " ret_fund_6m_mean | \n",
+ " ret_fund_12m_mean | \n",
+ " drawdown_proxy | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 617663 | \n",
+ " FR0007486709 | \n",
+ " 2011-01-01 | \n",
+ " 1534.80 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | 617668 | \n",
+ " FR0007486709 | \n",
+ " 2011-02-01 | \n",
+ " 1500.73 | \n",
+ " -0.022198 | \n",
+ " -0.022198 | \n",
+ " -0.022198 | \n",
+ " -0.022198 | \n",
+ " -0.022198 | \n",
+ "
\n",
+ " \n",
+ " | 617672 | \n",
+ " FR0007486709 | \n",
+ " 2011-03-01 | \n",
+ " 1481.29 | \n",
+ " -0.012954 | \n",
+ " -0.017576 | \n",
+ " -0.017576 | \n",
+ " -0.017576 | \n",
+ " -0.034864 | \n",
+ "
\n",
+ " \n",
+ " | 617677 | \n",
+ " FR0007486709 | \n",
+ " 2011-04-01 | \n",
+ " 1517.60 | \n",
+ " 0.024512 | \n",
+ " -0.003547 | \n",
+ " -0.003547 | \n",
+ " -0.003547 | \n",
+ " -0.011207 | \n",
+ "
\n",
+ " \n",
+ " | 617681 | \n",
+ " FR0007486709 | \n",
+ " 2011-05-01 | \n",
+ " 1504.83 | \n",
+ " -0.008415 | \n",
+ " 0.001048 | \n",
+ " -0.004764 | \n",
+ " -0.004764 | \n",
+ " -0.019527 | \n",
+ "
\n",
+ " \n",
+ " | 617686 | \n",
+ " FR0007486709 | \n",
+ " 2011-06-01 | \n",
+ " 1492.92 | \n",
+ " -0.007915 | \n",
+ " 0.002728 | \n",
+ " -0.005394 | \n",
+ " -0.005394 | \n",
+ " -0.027287 | \n",
+ "
\n",
+ " \n",
+ " | 617690 | \n",
+ " FR0007486709 | \n",
+ " 2011-07-01 | \n",
+ " 1473.52 | \n",
+ " -0.012995 | \n",
+ " -0.009775 | \n",
+ " -0.006661 | \n",
+ " -0.006661 | \n",
+ " -0.039927 | \n",
+ "
\n",
+ " \n",
+ " | 617694 | \n",
+ " FR0007486709 | \n",
+ " 2011-08-01 | \n",
+ " 1346.78 | \n",
+ " -0.086012 | \n",
+ " -0.035640 | \n",
+ " -0.017296 | \n",
+ " -0.017996 | \n",
+ " -0.122505 | \n",
+ "
\n",
+ " \n",
+ " | 617699 | \n",
+ " FR0007486709 | \n",
+ " 2011-09-01 | \n",
+ " 1330.69 | \n",
+ " -0.011947 | \n",
+ " -0.036984 | \n",
+ " -0.017128 | \n",
+ " -0.017240 | \n",
+ " -0.132988 | \n",
+ "
\n",
+ " \n",
+ " | 617703 | \n",
+ " FR0007486709 | \n",
+ " 2011-10-01 | \n",
+ " 1372.91 | \n",
+ " 0.031728 | \n",
+ " -0.022077 | \n",
+ " -0.015926 | \n",
+ " -0.011799 | \n",
+ " -0.105480 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Isin month Price (TF PartPrice) ret_fund_m \\\n",
+ "617663 FR0007486709 2011-01-01 1534.80 NaN \n",
+ "617668 FR0007486709 2011-02-01 1500.73 -0.022198 \n",
+ "617672 FR0007486709 2011-03-01 1481.29 -0.012954 \n",
+ "617677 FR0007486709 2011-04-01 1517.60 0.024512 \n",
+ "617681 FR0007486709 2011-05-01 1504.83 -0.008415 \n",
+ "617686 FR0007486709 2011-06-01 1492.92 -0.007915 \n",
+ "617690 FR0007486709 2011-07-01 1473.52 -0.012995 \n",
+ "617694 FR0007486709 2011-08-01 1346.78 -0.086012 \n",
+ "617699 FR0007486709 2011-09-01 1330.69 -0.011947 \n",
+ "617703 FR0007486709 2011-10-01 1372.91 0.031728 \n",
+ "\n",
+ " ret_fund_3m_mean ret_fund_6m_mean ret_fund_12m_mean drawdown_proxy \n",
+ "617663 NaN NaN NaN 0.000000 \n",
+ "617668 -0.022198 -0.022198 -0.022198 -0.022198 \n",
+ "617672 -0.017576 -0.017576 -0.017576 -0.034864 \n",
+ "617677 -0.003547 -0.003547 -0.003547 -0.011207 \n",
+ "617681 0.001048 -0.004764 -0.004764 -0.019527 \n",
+ "617686 0.002728 -0.005394 -0.005394 -0.027287 \n",
+ "617690 -0.009775 -0.006661 -0.006661 -0.039927 \n",
+ "617694 -0.035640 -0.017296 -0.017996 -0.122505 \n",
+ "617699 -0.036984 -0.017128 -0.017240 -0.132988 \n",
+ "617703 -0.022077 -0.015926 -0.011799 -0.105480 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# ============================================================\n",
+ "# NAV mensuel + perfs mensuelles corrigées\n",
+ "# ============================================================\n",
+ "\n",
+ "df_nav_m = None\n",
+ "\n",
+ "if df_nav is not None and NAV_ISIN_COL is not None and NAV_PRICE_COL is not None:\n",
+ " df_nav = df_nav.copy()\n",
+ "\n",
+ " # nettoyage prix fonds\n",
+ " df_nav[NAV_PRICE_COL] = (\n",
+ " df_nav[NAV_PRICE_COL]\n",
+ " .astype(str)\n",
+ " .str.replace(\"\\u202f\", \"\", regex=False)\n",
+ " .str.replace(\" \", \"\", regex=False)\n",
+ " .str.replace(\",\", \".\", regex=False)\n",
+ " )\n",
+ " df_nav[NAV_PRICE_COL] = pd.to_numeric(df_nav[NAV_PRICE_COL], errors=\"coerce\")\n",
+ "\n",
+ " # nettoyage benchmark si dispo\n",
+ " if NAV_BENCH_COL is not None and NAV_BENCH_COL in df_nav.columns:\n",
+ " df_nav[NAV_BENCH_COL] = (\n",
+ " df_nav[NAV_BENCH_COL]\n",
+ " .astype(str)\n",
+ " .str.replace(\"\\u202f\", \"\", regex=False)\n",
+ " .str.replace(\" \", \"\", regex=False)\n",
+ " .str.replace(\",\", \".\", regex=False)\n",
+ " )\n",
+ " df_nav[NAV_BENCH_COL] = pd.to_numeric(df_nav[NAV_BENCH_COL], errors=\"coerce\")\n",
+ "\n",
+ " # dernière NAV du mois par ISIN\n",
+ " df_nav_m = (\n",
+ " df_nav\n",
+ " .dropna(subset=[NAV_ISIN_COL, \"month\", NAV_PRICE_COL])\n",
+ " .sort_values([NAV_ISIN_COL, \"month\", NAV_DATE_COL])\n",
+ " .groupby([NAV_ISIN_COL, \"month\"], as_index=False)\n",
+ " .tail(1)\n",
+ " .copy()\n",
+ " )\n",
+ "\n",
+ " # rendement mensuel\n",
+ " df_nav_m[\"ret_fund_m\"] = df_nav_m.groupby(NAV_ISIN_COL)[NAV_PRICE_COL].pct_change()\n",
+ "\n",
+ " # moyenne glissante des rendements mensuels\n",
+ " df_nav_m[\"ret_fund_3m_mean\"] = (\n",
+ " df_nav_m.groupby(NAV_ISIN_COL)[\"ret_fund_m\"]\n",
+ " .transform(lambda s: s.rolling(3, min_periods=1).mean())\n",
+ " )\n",
+ "\n",
+ " df_nav_m[\"ret_fund_6m_mean\"] = (\n",
+ " df_nav_m.groupby(NAV_ISIN_COL)[\"ret_fund_m\"]\n",
+ " .transform(lambda s: s.rolling(6, min_periods=1).mean())\n",
+ " )\n",
+ "\n",
+ " df_nav_m[\"ret_fund_12m_mean\"] = (\n",
+ " df_nav_m.groupby(NAV_ISIN_COL)[\"ret_fund_m\"]\n",
+ " .transform(lambda s: s.rolling(12, min_periods=1).mean())\n",
+ " )\n",
+ "\n",
+ " # volatilité glissante des rendements\n",
+ " df_nav_m[\"ret_vol_3m\"] = (\n",
+ " df_nav_m.groupby(NAV_ISIN_COL)[\"ret_fund_m\"]\n",
+ " .transform(lambda s: s.rolling(3, min_periods=2).std())\n",
+ " )\n",
+ "\n",
+ " df_nav_m[\"ret_vol_6m\"] = (\n",
+ " df_nav_m.groupby(NAV_ISIN_COL)[\"ret_fund_m\"]\n",
+ " .transform(lambda s: s.rolling(6, min_periods=2).std())\n",
+ " )\n",
+ "\n",
+ " # drawdown\n",
+ " df_nav_m[\"drawdown_proxy\"] = (\n",
+ " df_nav_m[NAV_PRICE_COL] /\n",
+ " df_nav_m.groupby(NAV_ISIN_COL)[NAV_PRICE_COL].cummax()\n",
+ " ) - 1\n",
+ "\n",
+ " # benchmark si dispo\n",
+ " if NAV_BENCH_COL is not None and NAV_BENCH_COL in df_nav_m.columns:\n",
+ " df_nav_m[\"ret_bench_m\"] = df_nav_m.groupby(NAV_ISIN_COL)[NAV_BENCH_COL].pct_change()\n",
+ "\n",
+ " df_nav_m[\"active_return_m\"] = df_nav_m[\"ret_fund_m\"] - df_nav_m[\"ret_bench_m\"]\n",
+ "\n",
+ " df_nav_m[\"active_return_3m_mean\"] = (\n",
+ " df_nav_m.groupby(NAV_ISIN_COL)[\"active_return_m\"]\n",
+ " .transform(lambda s: s.rolling(3, min_periods=1).mean())\n",
+ " )\n",
+ "\n",
+ " df_nav_m[\"active_return_6m_mean\"] = (\n",
+ " df_nav_m.groupby(NAV_ISIN_COL)[\"active_return_m\"]\n",
+ " .transform(lambda s: s.rolling(6, min_periods=1).mean())\n",
+ " )\n",
+ "\n",
+ "print(\"df_nav_m:\", None if df_nav_m is None else df_nav_m.shape)\n",
+ "\n",
+ "if df_nav_m is not None:\n",
+ " display(\n",
+ " df_nav_m[\n",
+ " [\n",
+ " NAV_ISIN_COL, \"month\", NAV_PRICE_COL,\n",
+ " \"ret_fund_m\", \"ret_fund_3m_mean\", \"ret_fund_6m_mean\",\n",
+ " \"ret_fund_12m_mean\", \"drawdown_proxy\"\n",
+ " ]\n",
+ " ].head(10)\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "d2f87bef",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "df_nav_m: (30336, 17)\n",
+ "df_rates_m: (131, 4)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Isin | \n",
+ " month | \n",
+ " Price (TF PartPrice) | \n",
+ " ret_fund_m | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 617663 | \n",
+ " FR0007486709 | \n",
+ " 2011-01-01 | \n",
+ " 1534.80 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 617668 | \n",
+ " FR0007486709 | \n",
+ " 2011-02-01 | \n",
+ " 1500.73 | \n",
+ " -0.022198 | \n",
+ "
\n",
+ " \n",
+ " | 617672 | \n",
+ " FR0007486709 | \n",
+ " 2011-03-01 | \n",
+ " 1481.29 | \n",
+ " -0.012954 | \n",
+ "
\n",
+ " \n",
+ " | 617677 | \n",
+ " FR0007486709 | \n",
+ " 2011-04-01 | \n",
+ " 1517.60 | \n",
+ " 0.024512 | \n",
+ "
\n",
+ " \n",
+ " | 617681 | \n",
+ " FR0007486709 | \n",
+ " 2011-05-01 | \n",
+ " 1504.83 | \n",
+ " -0.008415 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Isin month Price (TF PartPrice) ret_fund_m\n",
+ "617663 FR0007486709 2011-01-01 1534.80 NaN\n",
+ "617668 FR0007486709 2011-02-01 1500.73 -0.022198\n",
+ "617672 FR0007486709 2011-03-01 1481.29 -0.012954\n",
+ "617677 FR0007486709 2011-04-01 1517.60 0.024512\n",
+ "617681 FR0007486709 2011-05-01 1504.83 -0.008415"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " month | \n",
+ " Yld to Maturity | \n",
+ " delta_rate_m | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2014-12-01 | \n",
+ " 0.144 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " 2015-01-01 | \n",
+ " 0.086 | \n",
+ " -0.058 | \n",
+ "
\n",
+ " \n",
+ " | 43 | \n",
+ " 2015-02-01 | \n",
+ " 0.064 | \n",
+ " -0.022 | \n",
+ "
\n",
+ " \n",
+ " | 65 | \n",
+ " 2015-03-01 | \n",
+ " 0.050 | \n",
+ " -0.014 | \n",
+ "
\n",
+ " \n",
+ " | 86 | \n",
+ " 2015-04-01 | \n",
+ " -0.027 | \n",
+ " -0.077 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " month Yld to Maturity delta_rate_m\n",
+ "0 2014-12-01 0.144 NaN\n",
+ "22 2015-01-01 0.086 -0.058\n",
+ "43 2015-02-01 0.064 -0.022\n",
+ "65 2015-03-01 0.050 -0.014\n",
+ "86 2015-04-01 -0.027 -0.077"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "df_rates_m = None\n",
+ "\n",
+ "if df_rates is not None and RATE_DATE_COL is not None and RATE_VAL_COL is not None:\n",
+ " df_rates = df_rates.copy()\n",
+ "\n",
+ " df_rates[RATE_VAL_COL] = (\n",
+ " df_rates[RATE_VAL_COL]\n",
+ " .astype(str)\n",
+ " .str.replace(\"\\u202f\", \"\", regex=False)\n",
+ " .str.replace(\" \", \"\", regex=False)\n",
+ " .str.replace(\",\", \".\", regex=False)\n",
+ " )\n",
+ " df_rates[RATE_VAL_COL] = pd.to_numeric(df_rates[RATE_VAL_COL], errors=\"coerce\")\n",
+ "\n",
+ " df_rates_m = (\n",
+ " df_rates\n",
+ " .dropna(subset=[\"month\", RATE_VAL_COL])\n",
+ " .sort_values(RATE_DATE_COL)\n",
+ " .groupby(\"month\", as_index=False)\n",
+ " .tail(1)\n",
+ " .copy()\n",
+ " )\n",
+ "\n",
+ " df_rates_m[\"delta_rate_m\"] = df_rates_m[RATE_VAL_COL].diff()\n",
+ "\n",
+ "print(\"df_nav_m:\", None if df_nav_m is None else df_nav_m.shape)\n",
+ "print(\"df_rates_m:\", None if df_rates_m is None else df_rates_m.shape)\n",
+ "\n",
+ "if df_nav_m is not None:\n",
+ " display(df_nav_m[[NAV_ISIN_COL, \"month\", NAV_PRICE_COL, \"ret_fund_m\"]].head())\n",
+ "\n",
+ "if df_rates_m is not None:\n",
+ " display(df_rates_m[[\"month\", RATE_VAL_COL, \"delta_rate_m\"]].head())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6c3a4e1f",
+ "metadata": {},
+ "source": [
+ "## 13. Intégration optionnelle des performances Carmignac"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "d6402369-171e-4589-a311-aa440ebf10f2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "df_perf_monthly: (30336, 12)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Product - Isin | \n",
+ " month | \n",
+ " ret_fund_m | \n",
+ " ret_fund_3m_mean | \n",
+ " ret_fund_6m_mean | \n",
+ " ret_fund_12m_mean | \n",
+ " ret_vol_3m | \n",
+ " ret_vol_6m | \n",
+ " drawdown_proxy | \n",
+ " active_return_m | \n",
+ " active_return_3m_mean | \n",
+ " active_return_6m_mean | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 617663 | \n",
+ " FR0007486709 | \n",
+ " 2011-01-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 617668 | \n",
+ " FR0007486709 | \n",
+ " 2011-02-01 | \n",
+ " -0.022198 | \n",
+ " -0.022198 | \n",
+ " -0.022198 | \n",
+ " -0.022198 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " -0.022198 | \n",
+ " -0.028319 | \n",
+ " -0.028319 | \n",
+ " -0.028319 | \n",
+ "
\n",
+ " \n",
+ " | 617672 | \n",
+ " FR0007486709 | \n",
+ " 2011-03-01 | \n",
+ " -0.012954 | \n",
+ " -0.017576 | \n",
+ " -0.017576 | \n",
+ " -0.017576 | \n",
+ " 0.006537 | \n",
+ " 0.006537 | \n",
+ " -0.034864 | \n",
+ " 0.014369 | \n",
+ " -0.006975 | \n",
+ " -0.006975 | \n",
+ "
\n",
+ " \n",
+ " | 617677 | \n",
+ " FR0007486709 | \n",
+ " 2011-04-01 | \n",
+ " 0.024512 | \n",
+ " -0.003547 | \n",
+ " -0.003547 | \n",
+ " -0.003547 | \n",
+ " 0.024735 | \n",
+ " 0.024735 | \n",
+ " -0.011207 | \n",
+ " -0.006997 | \n",
+ " -0.006982 | \n",
+ " -0.006982 | \n",
+ "
\n",
+ " \n",
+ " | 617681 | \n",
+ " FR0007486709 | \n",
+ " 2011-05-01 | \n",
+ " -0.008415 | \n",
+ " 0.001048 | \n",
+ " -0.004764 | \n",
+ " -0.004764 | \n",
+ " 0.020447 | \n",
+ " 0.020343 | \n",
+ " -0.019527 | \n",
+ " 0.003691 | \n",
+ " 0.003688 | \n",
+ " -0.004314 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Product - Isin month ret_fund_m ret_fund_3m_mean \\\n",
+ "617663 FR0007486709 2011-01-01 NaN NaN \n",
+ "617668 FR0007486709 2011-02-01 -0.022198 -0.022198 \n",
+ "617672 FR0007486709 2011-03-01 -0.012954 -0.017576 \n",
+ "617677 FR0007486709 2011-04-01 0.024512 -0.003547 \n",
+ "617681 FR0007486709 2011-05-01 -0.008415 0.001048 \n",
+ "\n",
+ " ret_fund_6m_mean ret_fund_12m_mean ret_vol_3m ret_vol_6m \\\n",
+ "617663 NaN NaN NaN NaN \n",
+ "617668 -0.022198 -0.022198 NaN NaN \n",
+ "617672 -0.017576 -0.017576 0.006537 0.006537 \n",
+ "617677 -0.003547 -0.003547 0.024735 0.024735 \n",
+ "617681 -0.004764 -0.004764 0.020447 0.020343 \n",
+ "\n",
+ " drawdown_proxy active_return_m active_return_3m_mean \\\n",
+ "617663 0.000000 NaN NaN \n",
+ "617668 -0.022198 -0.028319 -0.028319 \n",
+ "617672 -0.034864 0.014369 -0.006975 \n",
+ "617677 -0.011207 -0.006997 -0.006982 \n",
+ "617681 -0.019527 0.003691 0.003688 \n",
+ "\n",
+ " active_return_6m_mean \n",
+ "617663 NaN \n",
+ "617668 -0.028319 \n",
+ "617672 -0.006975 \n",
+ "617677 -0.006982 \n",
+ "617681 -0.004314 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "df_perf_monthly = None\n",
+ "\n",
+ "if df_perf_monthly is None:\n",
+ " df_perf_monthly = (\n",
+ " df_nav_m[\n",
+ " [\n",
+ " NAV_ISIN_COL, \"month\",\n",
+ " \"ret_fund_m\",\n",
+ " \"ret_fund_3m_mean\",\n",
+ " \"ret_fund_6m_mean\",\n",
+ " \"ret_fund_12m_mean\",\n",
+ " \"ret_vol_3m\",\n",
+ " \"ret_vol_6m\",\n",
+ " \"drawdown_proxy\"\n",
+ " ]\n",
+ " + ([ \"active_return_m\", \"active_return_3m_mean\", \"active_return_6m_mean\" ] if \"active_return_m\" in df_nav_m.columns else [])\n",
+ " ]\n",
+ " .rename(columns={NAV_ISIN_COL: ISIN_COL})\n",
+ " .copy()\n",
+ " ) if df_nav_m is not None else None\n",
+ "\n",
+ "print(\"df_perf_monthly:\", None if df_perf_monthly is None else df_perf_monthly.shape)\n",
+ "if df_perf_monthly is not None:\n",
+ " display(df_perf_monthly.head())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "70a5eae1-8d98-427a-8aab-f10a4c8ad782",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Product - Isin | \n",
+ " month | \n",
+ " ret_fund_m | \n",
+ " ret_fund_3m_mean | \n",
+ " ret_fund_6m_mean | \n",
+ " ret_fund_12m_mean | \n",
+ " ret_vol_3m | \n",
+ " ret_vol_6m | \n",
+ " drawdown_proxy | \n",
+ " active_return_m | \n",
+ " active_return_3m_mean | \n",
+ " active_return_6m_mean | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 617663 | \n",
+ " FR0007486709 | \n",
+ " 2011-01-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 617668 | \n",
+ " FR0007486709 | \n",
+ " 2011-02-01 | \n",
+ " -0.022198 | \n",
+ " -0.022198 | \n",
+ " -0.022198 | \n",
+ " -0.022198 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " -0.022198 | \n",
+ " -0.028319 | \n",
+ " -0.028319 | \n",
+ " -0.028319 | \n",
+ "
\n",
+ " \n",
+ " | 617672 | \n",
+ " FR0007486709 | \n",
+ " 2011-03-01 | \n",
+ " -0.012954 | \n",
+ " -0.017576 | \n",
+ " -0.017576 | \n",
+ " -0.017576 | \n",
+ " 0.006537 | \n",
+ " 0.006537 | \n",
+ " -0.034864 | \n",
+ " 0.014369 | \n",
+ " -0.006975 | \n",
+ " -0.006975 | \n",
+ "
\n",
+ " \n",
+ " | 617677 | \n",
+ " FR0007486709 | \n",
+ " 2011-04-01 | \n",
+ " 0.024512 | \n",
+ " -0.003547 | \n",
+ " -0.003547 | \n",
+ " -0.003547 | \n",
+ " 0.024735 | \n",
+ " 0.024735 | \n",
+ " -0.011207 | \n",
+ " -0.006997 | \n",
+ " -0.006982 | \n",
+ " -0.006982 | \n",
+ "
\n",
+ " \n",
+ " | 617681 | \n",
+ " FR0007486709 | \n",
+ " 2011-05-01 | \n",
+ " -0.008415 | \n",
+ " 0.001048 | \n",
+ " -0.004764 | \n",
+ " -0.004764 | \n",
+ " 0.020447 | \n",
+ " 0.020343 | \n",
+ " -0.019527 | \n",
+ " 0.003691 | \n",
+ " 0.003688 | \n",
+ " -0.004314 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 89 | \n",
+ " LU3149200233 | \n",
+ " 2025-10-01 | \n",
+ " 0.035073 | \n",
+ " 0.035073 | \n",
+ " 0.035073 | \n",
+ " 0.035073 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.000000 | \n",
+ " 0.016577 | \n",
+ " 0.016577 | \n",
+ " 0.016577 | \n",
+ "
\n",
+ " \n",
+ " | 35 | \n",
+ " LU3149200746 | \n",
+ " 2025-09-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 51 | \n",
+ " LU3149200746 | \n",
+ " 2025-10-01 | \n",
+ " 0.035261 | \n",
+ " 0.035261 | \n",
+ " 0.035261 | \n",
+ " 0.035261 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.000000 | \n",
+ " 0.016766 | \n",
+ " 0.016766 | \n",
+ " 0.016766 | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " LU3186888858 | \n",
+ " 2025-10-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " LU3198990908 | \n",
+ " 2025-10-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
30336 rows × 12 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Product - Isin month ret_fund_m ret_fund_3m_mean \\\n",
+ "617663 FR0007486709 2011-01-01 NaN NaN \n",
+ "617668 FR0007486709 2011-02-01 -0.022198 -0.022198 \n",
+ "617672 FR0007486709 2011-03-01 -0.012954 -0.017576 \n",
+ "617677 FR0007486709 2011-04-01 0.024512 -0.003547 \n",
+ "617681 FR0007486709 2011-05-01 -0.008415 0.001048 \n",
+ "... ... ... ... ... \n",
+ "89 LU3149200233 2025-10-01 0.035073 0.035073 \n",
+ "35 LU3149200746 2025-09-01 NaN NaN \n",
+ "51 LU3149200746 2025-10-01 0.035261 0.035261 \n",
+ "16 LU3186888858 2025-10-01 NaN NaN \n",
+ "4 LU3198990908 2025-10-01 NaN NaN \n",
+ "\n",
+ " ret_fund_6m_mean ret_fund_12m_mean ret_vol_3m ret_vol_6m \\\n",
+ "617663 NaN NaN NaN NaN \n",
+ "617668 -0.022198 -0.022198 NaN NaN \n",
+ "617672 -0.017576 -0.017576 0.006537 0.006537 \n",
+ "617677 -0.003547 -0.003547 0.024735 0.024735 \n",
+ "617681 -0.004764 -0.004764 0.020447 0.020343 \n",
+ "... ... ... ... ... \n",
+ "89 0.035073 0.035073 NaN NaN \n",
+ "35 NaN NaN NaN NaN \n",
+ "51 0.035261 0.035261 NaN NaN \n",
+ "16 NaN NaN NaN NaN \n",
+ "4 NaN NaN NaN NaN \n",
+ "\n",
+ " drawdown_proxy active_return_m active_return_3m_mean \\\n",
+ "617663 0.000000 NaN NaN \n",
+ "617668 -0.022198 -0.028319 -0.028319 \n",
+ "617672 -0.034864 0.014369 -0.006975 \n",
+ "617677 -0.011207 -0.006997 -0.006982 \n",
+ "617681 -0.019527 0.003691 0.003688 \n",
+ "... ... ... ... \n",
+ "89 0.000000 0.016577 0.016577 \n",
+ "35 0.000000 NaN NaN \n",
+ "51 0.000000 0.016766 0.016766 \n",
+ "16 0.000000 NaN NaN \n",
+ "4 0.000000 NaN NaN \n",
+ "\n",
+ " active_return_6m_mean \n",
+ "617663 NaN \n",
+ "617668 -0.028319 \n",
+ "617672 -0.006975 \n",
+ "617677 -0.006982 \n",
+ "617681 -0.004314 \n",
+ "... ... \n",
+ "89 0.016577 \n",
+ "35 NaN \n",
+ "51 0.016766 \n",
+ "16 NaN \n",
+ "4 NaN \n",
+ "\n",
+ "[30336 rows x 12 columns]"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_perf_monthly"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "091fc8ff",
+ "metadata": {},
+ "source": [
+ "## 14. Enrichissement de `df_rel_m` avec performances et taux"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "82872d77",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Registrar Account - ID | \n",
+ " Product - Isin | \n",
+ " month | \n",
+ " aum_qty | \n",
+ " aum_val | \n",
+ " Product - Asset Type_x | \n",
+ " Product - Strategy_x | \n",
+ " Product - Legal Status_x | \n",
+ " Product - Is Dedie ?_x | \n",
+ " Product - Fund_x | \n",
+ " Product - Shareclass Type_x | \n",
+ " Product - Shareclass Currency_x | \n",
+ " Registrar Account - Region | \n",
+ " RegistrarAccount - Country | \n",
+ " net_flow_qty | \n",
+ " gross_flow_qty | \n",
+ " sub_qty | \n",
+ " red_qty | \n",
+ " n_tx | \n",
+ " Product - Asset Type_y | \n",
+ " Product - Strategy_y | \n",
+ " Product - Legal Status_y | \n",
+ " Product - Is Dedie ?_y | \n",
+ " Product - Fund_y | \n",
+ " Product - Shareclass Type_y | \n",
+ " Product - Shareclass Currency_y | \n",
+ " isin_held_flag | \n",
+ " active_rel_month | \n",
+ " flow_to_aum | \n",
+ " turnover_rel | \n",
+ " ret_fund_m | \n",
+ " ret_fund_3m_mean | \n",
+ " ret_fund_6m_mean | \n",
+ " ret_fund_12m_mean | \n",
+ " ret_vol_3m | \n",
+ " ret_vol_6m | \n",
+ " drawdown_proxy | \n",
+ " active_return_m | \n",
+ " active_return_3m_mean | \n",
+ " active_return_6m_mean | \n",
+ " delta_rate_m | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-01-01 | \n",
+ " 49094.915 | \n",
+ " 3.242523e+07 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ " 673.990 | \n",
+ " 1045.99 | \n",
+ " 859.990 | \n",
+ " -186.000 | \n",
+ " 9.0 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.013728 | \n",
+ " 0.021305 | \n",
+ " 0.065542 | \n",
+ " 0.029403 | \n",
+ " 0.020252 | \n",
+ " 0.013566 | \n",
+ " 0.031299 | \n",
+ " 0.023619 | \n",
+ " 0.000000 | \n",
+ " 0.003148 | \n",
+ " -0.000504 | \n",
+ " -0.002776 | \n",
+ " -0.058 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-02-01 | \n",
+ " 49797.915 | \n",
+ " 3.368032e+07 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ " 988.000 | \n",
+ " 1712.00 | \n",
+ " 1350.000 | \n",
+ " -362.000 | \n",
+ " 12.0 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.019840 | \n",
+ " 0.034378 | \n",
+ " 0.025620 | \n",
+ " 0.034295 | \n",
+ " 0.020209 | \n",
+ " 0.015671 | \n",
+ " 0.027938 | \n",
+ " 0.023607 | \n",
+ " 0.000000 | \n",
+ " -0.002505 | \n",
+ " -0.001482 | \n",
+ " -0.002621 | \n",
+ " -0.022 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-03-01 | \n",
+ " 50302.627 | \n",
+ " 3.505691e+07 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ " 9.710 | \n",
+ " 1561.71 | \n",
+ " 785.710 | \n",
+ " -776.000 | \n",
+ " 12.0 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.000193 | \n",
+ " 0.031046 | \n",
+ " 0.030614 | \n",
+ " 0.040592 | \n",
+ " 0.024664 | \n",
+ " 0.017903 | \n",
+ " 0.021751 | \n",
+ " 0.022402 | \n",
+ " 0.000000 | \n",
+ " 0.000187 | \n",
+ " 0.000277 | \n",
+ " -0.001807 | \n",
+ " -0.014 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-04-01 | \n",
+ " 50219.393 | \n",
+ " 3.452433e+07 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ " -123.234 | \n",
+ " 1830.19 | \n",
+ " 853.478 | \n",
+ " -976.712 | \n",
+ " 11.0 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " -0.002454 | \n",
+ " 0.036443 | \n",
+ " -0.023909 | \n",
+ " 0.010775 | \n",
+ " 0.020089 | \n",
+ " 0.016308 | \n",
+ " 0.030141 | \n",
+ " 0.029315 | \n",
+ " -0.023909 | \n",
+ " -0.001525 | \n",
+ " -0.001281 | \n",
+ " -0.000893 | \n",
+ " -0.077 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-05-01 | \n",
+ " 53685.393 | \n",
+ " 3.699729e+07 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " Switzerland | \n",
+ " Switzerland | \n",
+ " 121.000 | \n",
+ " 529.00 | \n",
+ " 325.000 | \n",
+ " -204.000 | \n",
+ " 6.0 | \n",
+ " Diversified | \n",
+ " Patrimoine | \n",
+ " FCP | \n",
+ " NO | \n",
+ " Carmignac Patrimoine | \n",
+ " A | \n",
+ " EUR | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.002254 | \n",
+ " 0.009854 | \n",
+ " 0.003240 | \n",
+ " 0.003315 | \n",
+ " 0.018805 | \n",
+ " 0.014758 | \n",
+ " 0.027262 | \n",
+ " 0.029957 | \n",
+ " -0.020747 | \n",
+ " -0.006286 | \n",
+ " -0.002542 | \n",
+ " -0.002012 | \n",
+ " -0.053 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Registrar Account - ID Product - Isin month aum_qty aum_val \\\n",
+ "0 18872 FR0010135103 2015-01-01 49094.915 3.242523e+07 \n",
+ "1 18872 FR0010135103 2015-02-01 49797.915 3.368032e+07 \n",
+ "2 18872 FR0010135103 2015-03-01 50302.627 3.505691e+07 \n",
+ "3 18872 FR0010135103 2015-04-01 50219.393 3.452433e+07 \n",
+ "4 18872 FR0010135103 2015-05-01 53685.393 3.699729e+07 \n",
+ "\n",
+ " Product - Asset Type_x Product - Strategy_x Product - Legal Status_x \\\n",
+ "0 Diversified Patrimoine FCP \n",
+ "1 Diversified Patrimoine FCP \n",
+ "2 Diversified Patrimoine FCP \n",
+ "3 Diversified Patrimoine FCP \n",
+ "4 Diversified Patrimoine FCP \n",
+ "\n",
+ " Product - Is Dedie ?_x Product - Fund_x Product - Shareclass Type_x \\\n",
+ "0 NO Carmignac Patrimoine A \n",
+ "1 NO Carmignac Patrimoine A \n",
+ "2 NO Carmignac Patrimoine A \n",
+ "3 NO Carmignac Patrimoine A \n",
+ "4 NO Carmignac Patrimoine A \n",
+ "\n",
+ " Product - Shareclass Currency_x Registrar Account - Region \\\n",
+ "0 EUR Switzerland \n",
+ "1 EUR Switzerland \n",
+ "2 EUR Switzerland \n",
+ "3 EUR Switzerland \n",
+ "4 EUR Switzerland \n",
+ "\n",
+ " RegistrarAccount - Country net_flow_qty gross_flow_qty sub_qty red_qty \\\n",
+ "0 Switzerland 673.990 1045.99 859.990 -186.000 \n",
+ "1 Switzerland 988.000 1712.00 1350.000 -362.000 \n",
+ "2 Switzerland 9.710 1561.71 785.710 -776.000 \n",
+ "3 Switzerland -123.234 1830.19 853.478 -976.712 \n",
+ "4 Switzerland 121.000 529.00 325.000 -204.000 \n",
+ "\n",
+ " n_tx Product - Asset Type_y Product - Strategy_y Product - Legal Status_y \\\n",
+ "0 9.0 Diversified Patrimoine FCP \n",
+ "1 12.0 Diversified Patrimoine FCP \n",
+ "2 12.0 Diversified Patrimoine FCP \n",
+ "3 11.0 Diversified Patrimoine FCP \n",
+ "4 6.0 Diversified Patrimoine FCP \n",
+ "\n",
+ " Product - Is Dedie ?_y Product - Fund_y Product - Shareclass Type_y \\\n",
+ "0 NO Carmignac Patrimoine A \n",
+ "1 NO Carmignac Patrimoine A \n",
+ "2 NO Carmignac Patrimoine A \n",
+ "3 NO Carmignac Patrimoine A \n",
+ "4 NO Carmignac Patrimoine A \n",
+ "\n",
+ " Product - Shareclass Currency_y isin_held_flag active_rel_month \\\n",
+ "0 EUR 1 1 \n",
+ "1 EUR 1 1 \n",
+ "2 EUR 1 1 \n",
+ "3 EUR 1 1 \n",
+ "4 EUR 1 1 \n",
+ "\n",
+ " flow_to_aum turnover_rel ret_fund_m ret_fund_3m_mean ret_fund_6m_mean \\\n",
+ "0 0.013728 0.021305 0.065542 0.029403 0.020252 \n",
+ "1 0.019840 0.034378 0.025620 0.034295 0.020209 \n",
+ "2 0.000193 0.031046 0.030614 0.040592 0.024664 \n",
+ "3 -0.002454 0.036443 -0.023909 0.010775 0.020089 \n",
+ "4 0.002254 0.009854 0.003240 0.003315 0.018805 \n",
+ "\n",
+ " ret_fund_12m_mean ret_vol_3m ret_vol_6m drawdown_proxy active_return_m \\\n",
+ "0 0.013566 0.031299 0.023619 0.000000 0.003148 \n",
+ "1 0.015671 0.027938 0.023607 0.000000 -0.002505 \n",
+ "2 0.017903 0.021751 0.022402 0.000000 0.000187 \n",
+ "3 0.016308 0.030141 0.029315 -0.023909 -0.001525 \n",
+ "4 0.014758 0.027262 0.029957 -0.020747 -0.006286 \n",
+ "\n",
+ " active_return_3m_mean active_return_6m_mean delta_rate_m \n",
+ "0 -0.000504 -0.002776 -0.058 \n",
+ "1 -0.001482 -0.002621 -0.022 \n",
+ "2 0.000277 -0.001807 -0.014 \n",
+ "3 -0.001281 -0.000893 -0.077 \n",
+ "4 -0.002542 -0.002012 -0.053 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "if df_perf_monthly is not None:\n",
+ " df_rel_m = df_rel_m.merge(\n",
+ " df_perf_monthly,\n",
+ " on=[ISIN_COL, \"month\"],\n",
+ " how=\"left\"\n",
+ " )\n",
+ "\n",
+ "if df_rates_m is not None:\n",
+ " df_rel_m = df_rel_m.merge(\n",
+ " df_rates_m[[\"month\", \"delta_rate_m\"]],\n",
+ " on=\"month\",\n",
+ " how=\"left\"\n",
+ " )\n",
+ "\n",
+ "for c in [\n",
+ " \"ret_fund_m\",\n",
+ " \"ret_fund_3m_mean\",\n",
+ " \"ret_fund_6m_mean\",\n",
+ " \"ret_fund_12m_mean\",\n",
+ " \"ret_vol_3m\",\n",
+ " \"ret_vol_6m\",\n",
+ " \"drawdown_proxy\",\n",
+ " \"active_return_m\",\n",
+ " \"active_return_3m_mean\",\n",
+ " \"active_return_6m_mean\",\n",
+ " \"delta_rate_m\"\n",
+ "]:\n",
+ " if c in df_rel_m.columns:\n",
+ " df_rel_m[c] = pd.to_numeric(df_rel_m[c], errors=\"coerce\")\n",
+ "\n",
+ "display(df_rel_m.head())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "03d60451-3c2f-4d50-bb78-d00ba6a50a39",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "432"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_rel_m[\"Registrar Account - ID\"].nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "667c5372-0438-4c30-b4f7-9ffd9e09b149",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "349"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_rel_m[\"Product - Isin\"].nunique() #349"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6f3b149c-899b-4c14-834e-6ec5a66cceca",
+ "metadata": {},
+ "source": [
+ "- ret_fund_m: Rendement mensuel du fond.\n",
+ "- ret_fund_3m_mean : Moyenne des rendements mensuels sur les 3 derniers mois.\n",
+ "- ret_fund_6m_mean : Moyenne des rendements mensuels sur les 6 derniers mois.\n",
+ "- ret_fund_12m_mean : Moyenne des rendements mensuels sur les 12 derniers mois.\n",
+ "- ret_vol_3m, ret_vol_6m : Volatilité récente du fond, utile pour capter si le client agit dans des phases agitées.\n",
+ "- drawdown_proxy : Distance au plus haut historique.\n",
+ "- active_return_m :Surperformance mensuelle vs benchmark.\n",
+ "- active_return_3m_mean, active_return_6m_mean : Surperformance moyenne récente."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "90772aeb-9aeb-4cc1-bbe3-80f7ca794872",
+ "metadata": {},
+ "source": [
+ "## Clustering par fond"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "aa6b70d5-7d78-4b72-923f-fd82956f4284",
+ "metadata": {},
+ "source": [
+ "## choisir les fonds et normaliser"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "e860b58d-3f3b-457b-b574-5b94a4582a97",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['Registrar Account - ID', 'Product - Isin', 'month', 'aum_qty',\n",
+ " 'aum_val', 'Product - Asset Type_x', 'Product - Strategy_x',\n",
+ " 'Product - Legal Status_x', 'Product - Is Dedie ?_x',\n",
+ " 'Product - Fund_x', 'Product - Shareclass Type_x',\n",
+ " 'Product - Shareclass Currency_x', 'Registrar Account - Region',\n",
+ " 'RegistrarAccount - Country', 'net_flow_qty', 'gross_flow_qty',\n",
+ " 'sub_qty', 'red_qty', 'n_tx', 'Product - Asset Type_y',\n",
+ " 'Product - Strategy_y', 'Product - Legal Status_y',\n",
+ " 'Product - Is Dedie ?_y', 'Product - Fund_y',\n",
+ " 'Product - Shareclass Type_y', 'Product - Shareclass Currency_y',\n",
+ " 'isin_held_flag', 'active_rel_month', 'flow_to_aum', 'turnover_rel',\n",
+ " 'ret_fund_m', 'ret_fund_3m_mean', 'ret_fund_6m_mean',\n",
+ " 'ret_fund_12m_mean', 'ret_vol_3m', 'ret_vol_6m', 'drawdown_proxy',\n",
+ " 'active_return_m', 'active_return_3m_mean', 'active_return_6m_mean',\n",
+ " 'delta_rate_m'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_rel_m.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "2fcb1f21-df60-4549-8876-b93d4126f1c5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Product - Isin | \n",
+ " Product - Isin | \n",
+ " fund_family | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " FR0010135103 | \n",
+ " FR0010135103 | \n",
+ " FR0010135103 | \n",
+ "
\n",
+ " \n",
+ " | 91 | \n",
+ " FR0010147603 | \n",
+ " FR0010147603 | \n",
+ " FR0010147603 | \n",
+ "
\n",
+ " \n",
+ " | 99 | \n",
+ " FR0010148981 | \n",
+ " FR0010148981 | \n",
+ " FR0010148981 | \n",
+ "
\n",
+ " \n",
+ " | 165 | \n",
+ " FR0010149112 | \n",
+ " FR0010149112 | \n",
+ " FR0010149112 | \n",
+ "
\n",
+ " \n",
+ " | 177 | \n",
+ " FR0010149120 | \n",
+ " FR0010149120 | \n",
+ " FR0010149120 | \n",
+ "
\n",
+ " \n",
+ " | 256 | \n",
+ " FR0010149161 | \n",
+ " FR0010149161 | \n",
+ " FR0010149161 | \n",
+ "
\n",
+ " \n",
+ " | 257 | \n",
+ " FR0010149179 | \n",
+ " FR0010149179 | \n",
+ " FR0010149179 | \n",
+ "
\n",
+ " \n",
+ " | 268 | \n",
+ " FR0010149203 | \n",
+ " FR0010149203 | \n",
+ " FR0010149203 | \n",
+ "
\n",
+ " \n",
+ " | 269 | \n",
+ " FR0010149302 | \n",
+ " FR0010149302 | \n",
+ " FR0010149302 | \n",
+ "
\n",
+ " \n",
+ " | 317 | \n",
+ " FR0010306142 | \n",
+ " FR0010306142 | \n",
+ " FR0010306142 | \n",
+ "
\n",
+ " \n",
+ " | 331 | \n",
+ " FR0010312660 | \n",
+ " FR0010312660 | \n",
+ " FR0010312660 | \n",
+ "
\n",
+ " \n",
+ " | 338 | \n",
+ " FR0011269067 | \n",
+ " FR0011269067 | \n",
+ " FR0011269067 | \n",
+ "
\n",
+ " \n",
+ " | 364 | \n",
+ " FR0011269083 | \n",
+ " FR0011269083 | \n",
+ " FR0011269083 | \n",
+ "
\n",
+ " \n",
+ " | 366 | \n",
+ " FR0011269091 | \n",
+ " FR0011269091 | \n",
+ " FR0011269091 | \n",
+ "
\n",
+ " \n",
+ " | 367 | \n",
+ " FR0011269109 | \n",
+ " FR0011269109 | \n",
+ " FR0011269109 | \n",
+ "
\n",
+ " \n",
+ " | 370 | \n",
+ " FR0011269125 | \n",
+ " FR0011269125 | \n",
+ " FR0011269125 | \n",
+ "
\n",
+ " \n",
+ " | 371 | \n",
+ " FR0011269182 | \n",
+ " FR0011269182 | \n",
+ " FR0011269182 | \n",
+ "
\n",
+ " \n",
+ " | 373 | \n",
+ " FR0011269190 | \n",
+ " FR0011269190 | \n",
+ " FR0011269190 | \n",
+ "
\n",
+ " \n",
+ " | 388 | \n",
+ " FR0011269588 | \n",
+ " FR0011269588 | \n",
+ " FR0011269588 | \n",
+ "
\n",
+ " \n",
+ " | 392 | \n",
+ " FR0011269596 | \n",
+ " FR0011269596 | \n",
+ " FR0011269596 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Product - Isin Product - Isin fund_family\n",
+ "0 FR0010135103 FR0010135103 FR0010135103\n",
+ "91 FR0010147603 FR0010147603 FR0010147603\n",
+ "99 FR0010148981 FR0010148981 FR0010148981\n",
+ "165 FR0010149112 FR0010149112 FR0010149112\n",
+ "177 FR0010149120 FR0010149120 FR0010149120\n",
+ "256 FR0010149161 FR0010149161 FR0010149161\n",
+ "257 FR0010149179 FR0010149179 FR0010149179\n",
+ "268 FR0010149203 FR0010149203 FR0010149203\n",
+ "269 FR0010149302 FR0010149302 FR0010149302\n",
+ "317 FR0010306142 FR0010306142 FR0010306142\n",
+ "331 FR0010312660 FR0010312660 FR0010312660\n",
+ "338 FR0011269067 FR0011269067 FR0011269067\n",
+ "364 FR0011269083 FR0011269083 FR0011269083\n",
+ "366 FR0011269091 FR0011269091 FR0011269091\n",
+ "367 FR0011269109 FR0011269109 FR0011269109\n",
+ "370 FR0011269125 FR0011269125 FR0011269125\n",
+ "371 FR0011269182 FR0011269182 FR0011269182\n",
+ "373 FR0011269190 FR0011269190 FR0011269190\n",
+ "388 FR0011269588 FR0011269588 FR0011269588\n",
+ "392 FR0011269596 FR0011269596 FR0011269596"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# ============================================================\n",
+ "# 1) Définir une \"famille économique\" de fond à partir de Product - Fund\n",
+ "# ============================================================\n",
+ "\n",
+ "def normalize_fund_name(x):\n",
+ " if pd.isna(x):\n",
+ " return np.nan\n",
+ " s = str(x).strip().upper()\n",
+ " s = s.replace(\"_\", \" \").replace(\"-\", \" \")\n",
+ " s = re.sub(r\"\\s+\", \" \", s).strip()\n",
+ " # enlève une année finale type 2023 / 2024 / 2025\n",
+ " s = re.sub(r\"\\b20\\d{2}\\b$\", \"\", s).strip()\n",
+ " s = re.sub(r\"\\s+\", \" \", s).strip()\n",
+ " return s\n",
+ "\n",
+ "df_rel_m[\"fund_family\"] = df_rel_m[\"Product - Isin\"].apply(normalize_fund_name)\n",
+ "\n",
+ "display(\n",
+ " df_rel_m[[ISIN_COL, \"Product - Isin\", \"fund_family\"]]\n",
+ " .drop_duplicates()\n",
+ " .head(20)\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "95004754-7f24-4769-9f64-9cfbab43ddf8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# # fond : Intuition\n",
+ "\n",
+ "# On veut étudier les fonds :\n",
+ "\n",
+ "# les plus importants en encours\n",
+ "# avec assez de clients\n",
+ "# et donc assez de signal pour clusteriser"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "31a67ca0-bdcd-4461-af7d-1184b5368a06",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " fund_family | \n",
+ " aum_val_total | \n",
+ " n_clients | \n",
+ " n_isin | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " FR0010149120 | \n",
+ " 4.884612e+09 | \n",
+ " 282 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " FR0010135103 | \n",
+ " 4.502206e+09 | \n",
+ " 290 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " FR0010148981 | \n",
+ " 3.237251e+09 | \n",
+ " 260 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " FR00140081Y1 | \n",
+ " 1.421801e+09 | \n",
+ " 139 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " LU0992624949 | \n",
+ " 1.290916e+09 | \n",
+ " 151 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " LU1623762843 | \n",
+ " 1.206331e+09 | \n",
+ " 201 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " FR001400KAV4 | \n",
+ " 1.178233e+09 | \n",
+ " 129 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " LU0336084032 | \n",
+ " 1.051440e+09 | \n",
+ " 229 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " FR0010149302 | \n",
+ " 9.536118e+08 | \n",
+ " 255 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " FR0010149161 | \n",
+ " 9.442772e+08 | \n",
+ " 165 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " LU0992631217 | \n",
+ " 9.104539e+08 | \n",
+ " 139 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " LU1299306321 | \n",
+ " 8.665762e+08 | \n",
+ " 111 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " FR001400U4S3 | \n",
+ " 6.569309e+08 | \n",
+ " 120 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " FR0010306142 | \n",
+ " 6.219282e+08 | \n",
+ " 204 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " FR0010312660 | \n",
+ " 3.721387e+08 | \n",
+ " 168 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " fund_family aum_val_total n_clients n_isin\n",
+ "0 FR0010149120 4.884612e+09 282 1\n",
+ "1 FR0010135103 4.502206e+09 290 1\n",
+ "2 FR0010148981 3.237251e+09 260 1\n",
+ "3 FR00140081Y1 1.421801e+09 139 1\n",
+ "4 LU0992624949 1.290916e+09 151 1\n",
+ "5 LU1623762843 1.206331e+09 201 1\n",
+ "6 FR001400KAV4 1.178233e+09 129 1\n",
+ "7 LU0336084032 1.051440e+09 229 1\n",
+ "8 FR0010149302 9.536118e+08 255 1\n",
+ "9 FR0010149161 9.442772e+08 165 1\n",
+ "10 LU0992631217 9.104539e+08 139 1\n",
+ "11 LU1299306321 8.665762e+08 111 1\n",
+ "12 FR001400U4S3 6.569309e+08 120 1\n",
+ "13 FR0010306142 6.219282e+08 204 1\n",
+ "14 FR0010312660 3.721387e+08 168 1"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# ============================================================\n",
+ "# 2) Choisir les top fonds (au niveau fund_family)\n",
+ "# ============================================================\n",
+ "\n",
+ "TARGET_DATE = pd.Timestamp(\"2025-10-01\")\n",
+ "\n",
+ "top_fund_families = (\n",
+ " df_rel_m[df_rel_m[\"month\"] == TARGET_DATE]\n",
+ " .groupby(\"fund_family\", as_index=False)\n",
+ " .agg(\n",
+ " aum_val_total=(\"aum_val\", \"sum\"),\n",
+ " n_clients=(ID_COL, \"nunique\"),\n",
+ " n_isin=(ISIN_COL, \"nunique\")\n",
+ " )\n",
+ " .sort_values([\"aum_val_total\", \"n_clients\"], ascending=[False, False])\n",
+ " .reset_index(drop=True)\n",
+ ")\n",
+ "\n",
+ "# garder des fonds assez gros et avec assez de clients\n",
+ "top_fund_families = top_fund_families[top_fund_families[\"n_clients\"] >= 20].head(15).copy()\n",
+ "\n",
+ "display(top_fund_families)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "d08cc033-0bf2-42a7-9f4d-ffee5afab88a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " fund_family | \n",
+ " aum_val_total | \n",
+ " n_clients | \n",
+ " n_isin | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " FR0010149120 | \n",
+ " 4.884612e+09 | \n",
+ " 282 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " FR0010135103 | \n",
+ " 4.502206e+09 | \n",
+ " 290 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " FR0010148981 | \n",
+ " 3.237251e+09 | \n",
+ " 260 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " FR00140081Y1 | \n",
+ " 1.421801e+09 | \n",
+ " 139 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " LU0992624949 | \n",
+ " 1.290916e+09 | \n",
+ " 151 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " LU1623762843 | \n",
+ " 1.206331e+09 | \n",
+ " 201 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " FR001400KAV4 | \n",
+ " 1.178233e+09 | \n",
+ " 129 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " LU0336084032 | \n",
+ " 1.051440e+09 | \n",
+ " 229 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " FR0010149302 | \n",
+ " 9.536118e+08 | \n",
+ " 255 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " FR0010149161 | \n",
+ " 9.442772e+08 | \n",
+ " 165 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " LU0992631217 | \n",
+ " 9.104539e+08 | \n",
+ " 139 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " LU1299306321 | \n",
+ " 8.665762e+08 | \n",
+ " 111 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " FR001400U4S3 | \n",
+ " 6.569309e+08 | \n",
+ " 120 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " FR0010306142 | \n",
+ " 6.219282e+08 | \n",
+ " 204 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " FR0010312660 | \n",
+ " 3.721387e+08 | \n",
+ " 168 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " fund_family aum_val_total n_clients n_isin\n",
+ "0 FR0010149120 4.884612e+09 282 1\n",
+ "1 FR0010135103 4.502206e+09 290 1\n",
+ "2 FR0010148981 3.237251e+09 260 1\n",
+ "3 FR00140081Y1 1.421801e+09 139 1\n",
+ "4 LU0992624949 1.290916e+09 151 1\n",
+ "5 LU1623762843 1.206331e+09 201 1\n",
+ "6 FR001400KAV4 1.178233e+09 129 1\n",
+ "7 LU0336084032 1.051440e+09 229 1\n",
+ "8 FR0010149302 9.536118e+08 255 1\n",
+ "9 FR0010149161 9.442772e+08 165 1\n",
+ "10 LU0992631217 9.104539e+08 139 1\n",
+ "11 LU1299306321 8.665762e+08 111 1\n",
+ "12 FR001400U4S3 6.569309e+08 120 1\n",
+ "13 FR0010306142 6.219282e+08 204 1\n",
+ "14 FR0010312660 3.721387e+08 168 1"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "top_fund_families = top_fund_families[\n",
+ " (top_fund_families[\"n_clients\"] >= 20)\n",
+ "].copy()\n",
+ "top_fund_families #15 plus gros fond en terme de aum val total et n clients"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b56b8070-3d60-4d18-9c2f-f800c88ec804",
+ "metadata": {},
+ "source": [
+ "## Clustering par fund family"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1c765db8-7120-4176-a90c-2fffdbc85b54",
+ "metadata": {},
+ "source": [
+ "### construire la base mensuelle au niveau client × fond"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "5014d562-073b-4ff8-b2c3-cf7343e1badf",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Registrar Account - ID | \n",
+ " fund_family | \n",
+ " month | \n",
+ " fund_aum_qty | \n",
+ " fund_aum_val | \n",
+ " net_flow_qty | \n",
+ " gross_flow_qty | \n",
+ " n_tx | \n",
+ " ret_fund_m | \n",
+ " ret_fund_6m_mean | \n",
+ " portfolio_aum_total | \n",
+ " fund_weight | \n",
+ " held_flag | \n",
+ " active_flag | \n",
+ " flow_to_aum | \n",
+ " turnover | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-01-01 | \n",
+ " 49094.915 | \n",
+ " 3.242523e+07 | \n",
+ " 673.990 | \n",
+ " 1045.99 | \n",
+ " 9.0 | \n",
+ " 0.065542 | \n",
+ " 0.020252 | \n",
+ " 179864.637 | \n",
+ " 0.272953 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.013728 | \n",
+ " 0.021305 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-02-01 | \n",
+ " 49797.915 | \n",
+ " 3.368032e+07 | \n",
+ " 988.000 | \n",
+ " 1712.00 | \n",
+ " 12.0 | \n",
+ " 0.025620 | \n",
+ " 0.020209 | \n",
+ " 186761.736 | \n",
+ " 0.266637 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.019840 | \n",
+ " 0.034378 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-03-01 | \n",
+ " 50302.627 | \n",
+ " 3.505691e+07 | \n",
+ " 9.710 | \n",
+ " 1561.71 | \n",
+ " 12.0 | \n",
+ " 0.030614 | \n",
+ " 0.024664 | \n",
+ " 190357.718 | \n",
+ " 0.264252 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.000193 | \n",
+ " 0.031046 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-04-01 | \n",
+ " 50219.393 | \n",
+ " 3.452433e+07 | \n",
+ " -123.234 | \n",
+ " 1830.19 | \n",
+ " 11.0 | \n",
+ " -0.023909 | \n",
+ " 0.020089 | \n",
+ " 191429.324 | \n",
+ " 0.262338 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " -0.002454 | \n",
+ " 0.036443 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 18872 | \n",
+ " FR0010135103 | \n",
+ " 2015-05-01 | \n",
+ " 53685.393 | \n",
+ " 3.699729e+07 | \n",
+ " 121.000 | \n",
+ " 529.00 | \n",
+ " 6.0 | \n",
+ " 0.003240 | \n",
+ " 0.018805 | \n",
+ " 189056.475 | \n",
+ " 0.283963 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.002254 | \n",
+ " 0.009854 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Registrar Account - ID fund_family month fund_aum_qty fund_aum_val \\\n",
+ "0 18872 FR0010135103 2015-01-01 49094.915 3.242523e+07 \n",
+ "1 18872 FR0010135103 2015-02-01 49797.915 3.368032e+07 \n",
+ "2 18872 FR0010135103 2015-03-01 50302.627 3.505691e+07 \n",
+ "3 18872 FR0010135103 2015-04-01 50219.393 3.452433e+07 \n",
+ "4 18872 FR0010135103 2015-05-01 53685.393 3.699729e+07 \n",
+ "\n",
+ " net_flow_qty gross_flow_qty n_tx ret_fund_m ret_fund_6m_mean \\\n",
+ "0 673.990 1045.99 9.0 0.065542 0.020252 \n",
+ "1 988.000 1712.00 12.0 0.025620 0.020209 \n",
+ "2 9.710 1561.71 12.0 0.030614 0.024664 \n",
+ "3 -123.234 1830.19 11.0 -0.023909 0.020089 \n",
+ "4 121.000 529.00 6.0 0.003240 0.018805 \n",
+ "\n",
+ " portfolio_aum_total fund_weight held_flag active_flag flow_to_aum \\\n",
+ "0 179864.637 0.272953 1 1 0.013728 \n",
+ "1 186761.736 0.266637 1 1 0.019840 \n",
+ "2 190357.718 0.264252 1 1 0.000193 \n",
+ "3 191429.324 0.262338 1 1 -0.002454 \n",
+ "4 189056.475 0.283963 1 1 0.002254 \n",
+ "\n",
+ " turnover \n",
+ "0 0.021305 \n",
+ "1 0.034378 \n",
+ "2 0.031046 \n",
+ "3 0.036443 \n",
+ "4 0.009854 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(962611, 16)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# ============================================================\n",
+ "# 3) Construire la base mensuelle client x fund_family x mois\n",
+ "# ============================================================\n",
+ "\n",
+ "portfolio_month = (\n",
+ " df_rel_m\n",
+ " .groupby([ID_COL, \"month\"], as_index=False)\n",
+ " .agg(portfolio_aum_total=(\"aum_qty\", \"sum\"))\n",
+ ")\n",
+ "\n",
+ "family_month = (\n",
+ " df_rel_m\n",
+ " .groupby([ID_COL, \"fund_family\", \"month\"], as_index=False)\n",
+ " .agg(\n",
+ " fund_aum_qty=(\"aum_qty\", \"sum\"),\n",
+ " fund_aum_val=(\"aum_val\", \"sum\"),\n",
+ " net_flow_qty=(\"net_flow_qty\", \"sum\"),\n",
+ " gross_flow_qty=(\"gross_flow_qty\", \"sum\"),\n",
+ " n_tx=(\"n_tx\", \"sum\"),\n",
+ " ret_fund_m=(\"ret_fund_m\", \"mean\"),\n",
+ " ret_fund_6m_mean=(\"ret_fund_6m_mean\", \"mean\"),\n",
+ " )\n",
+ " .merge(portfolio_month, on=[ID_COL, \"month\"], how=\"left\")\n",
+ ")\n",
+ "\n",
+ "family_month[\"fund_weight\"] = family_month[\"fund_aum_qty\"] / (family_month[\"portfolio_aum_total\"].abs() + 1.0)\n",
+ "family_month[\"held_flag\"] = (family_month[\"fund_aum_qty\"] > 0).astype(int)\n",
+ "family_month[\"active_flag\"] = (family_month[\"gross_flow_qty\"] > 0).astype(int)\n",
+ "family_month[\"flow_to_aum\"] = family_month[\"net_flow_qty\"] / (family_month[\"fund_aum_qty\"].abs() + 1.0)\n",
+ "family_month[\"turnover\"] = family_month[\"gross_flow_qty\"] / (family_month[\"fund_aum_qty\"].abs() + 1.0)\n",
+ "\n",
+ "display(family_month.head())\n",
+ "print(family_month.shape)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b1ce1fc3-9526-4d26-a791-9d46561972ef",
+ "metadata": {},
+ "source": [
+ "### Features :\n",
+ "L’idée est de capter :\n",
+ "\n",
+ " fréquence\n",
+ " intensité\n",
+ " taille\n",
+ " timing d’entrée/sortie\n",
+ " relation à la performance\n",
+ " rôle du fond dans le portefeuille\n",
+ " spécificités produit"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "b65c3b5b-d270-409d-9c4d-5218262a5d92",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ============================================================\n",
+ "# 4) Construire le dataset client x fond pour le clustering\n",
+ "# ============================================================\n",
+ "\n",
+ "def build_fund_dataset(fund_family, min_months=3):\n",
+ " g = family_month[family_month[\"fund_family\"] == fund_family].sort_values([ID_COL, \"month\"]).copy()\n",
+ "\n",
+ " if g.empty:\n",
+ " return None\n",
+ "\n",
+ " g[\"prev_held\"] = g.groupby(ID_COL)[\"held_flag\"].shift(1).fillna(0).astype(int)\n",
+ " g[\"entry_event\"] = ((g[\"prev_held\"] == 0) & (g[\"held_flag\"] == 1)).astype(int)\n",
+ " g[\"exit_event\"] = ((g[\"prev_held\"] == 1) & (g[\"held_flag\"] == 0)).astype(int)\n",
+ " g[\"buy_month\"] = (g[\"net_flow_qty\"] > 0).astype(int)\n",
+ "\n",
+ " # rolling simples\n",
+ " g[\"turnover_6m_mean\"] = (\n",
+ " g.groupby(ID_COL)[\"turnover\"]\n",
+ " .transform(lambda s: s.rolling(6, min_periods=1).mean())\n",
+ " )\n",
+ "\n",
+ " g[\"flow_to_aum_6m_mean\"] = (\n",
+ " g.groupby(ID_COL)[\"flow_to_aum\"]\n",
+ " .transform(lambda s: s.rolling(6, min_periods=1).mean())\n",
+ " )\n",
+ "\n",
+ " df_fund = (\n",
+ " g.groupby(ID_COL, as_index=False)\n",
+ " .agg(\n",
+ " n_months_obs=(\"month\", \"nunique\"),\n",
+ " fund_aum_mean=(\"fund_aum_qty\", \"mean\"),\n",
+ " fund_aum_last=(\"fund_aum_qty\", \"last\"),\n",
+ " fund_weight_mean=(\"fund_weight\", \"mean\"),\n",
+ " fund_weight_last=(\"fund_weight\", \"last\"),\n",
+ " held_month_share=(\"held_flag\", \"mean\"),\n",
+ " active_month_share=(\"active_flag\", \"mean\"),\n",
+ " entry_count=(\"entry_event\", \"sum\"),\n",
+ " exit_count=(\"exit_event\", \"sum\"),\n",
+ " turnover_mean=(\"turnover\", \"mean\"),\n",
+ " turnover_6m_mean=(\"turnover_6m_mean\", \"last\"),\n",
+ " flow_to_aum_mean=(\"flow_to_aum\", \"mean\"),\n",
+ " flow_to_aum_6m_mean=(\"flow_to_aum_6m_mean\", \"last\"),\n",
+ " ret_fund_m_mean=(\"ret_fund_m\", \"mean\"),\n",
+ " ret_fund_6m_mean=(\"ret_fund_6m_mean\", \"mean\"),\n",
+ " buy_after_good_perf_share=(\"buy_month\", \"mean\"),\n",
+ " )\n",
+ " )\n",
+ "\n",
+ " # corr flux / perf\n",
+ " corr_block = (\n",
+ " g.groupby(ID_COL)\n",
+ " .apply(lambda z: compute_corr(z[\"net_flow_qty\"], z[\"ret_fund_m\"]))\n",
+ " .rename(\"corr_flow_ret_1m\")\n",
+ " .reset_index()\n",
+ " )\n",
+ "\n",
+ " df_fund = df_fund.merge(corr_block, on=ID_COL, how=\"left\")\n",
+ " df_fund = df_fund[df_fund[\"n_months_obs\"] >= min_months].copy()\n",
+ "\n",
+ " return df_fund"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "f3f7adf4-6f4a-414d-89e5-b8f338f65c86",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ============================================================\n",
+ "# 5) Outils de clustering\n",
+ "# ============================================================\n",
+ "\n",
+ "def prep_matrix(df, feature_cols):\n",
+ " X = df[feature_cols].copy()\n",
+ " X = X.replace([np.inf, -np.inf], np.nan)\n",
+ "\n",
+ " pipe = Pipeline([\n",
+ " (\"imputer\", SimpleImputer(strategy=\"median\")),\n",
+ " (\"scaler\", RobustScaler()),\n",
+ " ])\n",
+ "\n",
+ " X_scaled = pipe.fit_transform(X)\n",
+ " return X_scaled\n",
+ "\n",
+ "def cluster_balance_summary(labels):\n",
+ " vc = pd.Series(labels).value_counts().sort_index()\n",
+ " n = int(vc.sum())\n",
+ " return {\n",
+ " \"n_clusters\": int(len(vc)),\n",
+ " \"min_cluster_size\": int(vc.min()),\n",
+ " \"max_cluster_size\": int(vc.max()),\n",
+ " \"dominant_cluster_share\": float(vc.max() / n) if n > 0 else np.nan,\n",
+ " \"singleton_clusters\": int((vc == 1).sum()),\n",
+ " }\n",
+ "\n",
+ "def evaluate_partition(X_scaled, labels):\n",
+ " out = cluster_balance_summary(labels)\n",
+ "\n",
+ " if len(np.unique(labels)) < 2:\n",
+ " out[\"silhouette\"] = np.nan\n",
+ " out[\"davies_bouldin\"] = np.nan\n",
+ " out[\"calinski_harabasz\"] = np.nan\n",
+ " return out\n",
+ "\n",
+ " out[\"silhouette\"] = silhouette_score(X_scaled, labels)\n",
+ " out[\"davies_bouldin\"] = davies_bouldin_score(X_scaled, labels)\n",
+ " out[\"calinski_harabasz\"] = calinski_harabasz_score(X_scaled, labels)\n",
+ " return out"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "668aed37-0f3c-4d2b-9b8f-828d4ce24889",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# k=2,.,10 pour chaque fond \n",
+ "\n",
+ "# ============================================================\n",
+ "# 6) Tester K = 2 à 10 pour un fond\n",
+ "# ============================================================\n",
+ "\n",
+ "FEATURE_COLS_FUND = [\n",
+ " \"fund_weight_mean\",\n",
+ " \"fund_weight_last\",\n",
+ " \"fund_aum_mean\",\n",
+ " \"fund_aum_last\",\n",
+ " \"held_month_share\",\n",
+ " \"active_month_share\",\n",
+ " \"entry_count\",\n",
+ " \"exit_count\",\n",
+ " \"turnover_mean\",\n",
+ " \"turnover_6m_mean\",\n",
+ " \"flow_to_aum_mean\",\n",
+ " \"flow_to_aum_6m_mean\",\n",
+ " \"corr_flow_ret_1m\",\n",
+ " \"buy_after_good_perf_share\",\n",
+ "]\n",
+ "\n",
+ "def run_kmeans_grid_for_fund(fund_family, k_min=4, k_max=10):\n",
+ " df_fund = build_fund_dataset(fund_family)\n",
+ "\n",
+ " if df_fund is None or df_fund.empty or len(df_fund) < 10:\n",
+ " return None\n",
+ "\n",
+ " feature_cols = [c for c in FEATURE_COLS_FUND if c in df_fund.columns and not df_fund[c].isna().all()]\n",
+ " X_scaled = prep_matrix(df_fund, feature_cols)\n",
+ "\n",
+ " rows = []\n",
+ " models = {}\n",
+ "\n",
+ " for k in range(k_min, k_max + 1):\n",
+ " if k >= len(df_fund):\n",
+ " continue\n",
+ "\n",
+ " km = KMeans(n_clusters=k, random_state=RANDOM_STATE, n_init=50)\n",
+ " labels = km.fit_predict(X_scaled)\n",
+ "\n",
+ " rows.append({\n",
+ " \"k\": k,\n",
+ " **evaluate_partition(X_scaled, labels)\n",
+ " })\n",
+ " models[k] = labels\n",
+ "\n",
+ " diag = pd.DataFrame(rows)\n",
+ " return {\n",
+ " \"data\": df_fund,\n",
+ " \"features\": feature_cols,\n",
+ " \"diag\": diag,\n",
+ " \"models\": models,\n",
+ " \"X_scaled\": X_scaled,\n",
+ " }"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "7ce93ee1-129d-41be-af41-958361f58bf3",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Nombre de fonds étudiés : 15\n",
+ "['FR0010149120', 'FR0010135103', 'FR0010148981', 'FR00140081Y1', 'LU0992624949', 'LU1623762843', 'FR001400KAV4', 'LU0336084032', 'FR0010149302', 'FR0010149161', 'LU0992631217', 'LU1299306321', 'FR001400U4S3', 'FR0010306142', 'FR0010312660']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# ============================================================\n",
+ "# 7) Lancer l'étude sur tous les top fonds\n",
+ "# ============================================================\n",
+ "\n",
+ "fund_results = {}\n",
+ "\n",
+ "for fund_family in top_fund_families[\"fund_family\"]:\n",
+ " res = run_kmeans_grid_for_fund(fund_family, k_min=4, k_max=10)\n",
+ " if res is not None:\n",
+ " fund_results[fund_family] = res\n",
+ "\n",
+ "print(\"Nombre de fonds étudiés :\", len(fund_results))\n",
+ "print(list(fund_results.keys()))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "bfb36134-0e65-42d5-a76c-a901a2a35816",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " k | \n",
+ " n_clusters | \n",
+ " min_cluster_size | \n",
+ " max_cluster_size | \n",
+ " dominant_cluster_share | \n",
+ " singleton_clusters | \n",
+ " silhouette | \n",
+ " davies_bouldin | \n",
+ " calinski_harabasz | \n",
+ " fund_family | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 317 | \n",
+ " 0.990625 | \n",
+ " 3 | \n",
+ " 0.952702 | \n",
+ " 0.014258 | \n",
+ " 6.236159e+04 | \n",
+ " FR0010149120 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 5 | \n",
+ " 5 | \n",
+ " 1 | \n",
+ " 314 | \n",
+ " 0.981250 | \n",
+ " 3 | \n",
+ " 0.888334 | \n",
+ " 0.260038 | \n",
+ " 6.892536e+04 | \n",
+ " FR0010149120 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 6 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 302 | \n",
+ " 0.943750 | \n",
+ " 3 | \n",
+ " 0.779785 | \n",
+ " 0.317455 | \n",
+ " 7.565806e+04 | \n",
+ " FR0010149120 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 7 | \n",
+ " 7 | \n",
+ " 1 | \n",
+ " 287 | \n",
+ " 0.896875 | \n",
+ " 3 | \n",
+ " 0.783468 | \n",
+ " 0.362942 | \n",
+ " 9.135321e+04 | \n",
+ " FR0010149120 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 8 | \n",
+ " 8 | \n",
+ " 1 | \n",
+ " 283 | \n",
+ " 0.884375 | \n",
+ " 3 | \n",
+ " 0.802890 | \n",
+ " 0.375458 | \n",
+ " 1.060793e+05 | \n",
+ " FR0010149120 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 9 | \n",
+ " 9 | \n",
+ " 1 | \n",
+ " 283 | \n",
+ " 0.884375 | \n",
+ " 4 | \n",
+ " 0.802810 | \n",
+ " 0.353071 | \n",
+ " 1.197763e+05 | \n",
+ " FR0010149120 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 10 | \n",
+ " 10 | \n",
+ " 1 | \n",
+ " 280 | \n",
+ " 0.875000 | \n",
+ " 4 | \n",
+ " 0.785437 | \n",
+ " 0.361689 | \n",
+ " 1.324484e+05 | \n",
+ " FR0010149120 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 323 | \n",
+ " 0.964179 | \n",
+ " 2 | \n",
+ " 0.862526 | \n",
+ " 0.396761 | \n",
+ " 1.072268e+07 | \n",
+ " FR0010135103 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 5 | \n",
+ " 5 | \n",
+ " 1 | \n",
+ " 319 | \n",
+ " 0.952239 | \n",
+ " 2 | \n",
+ " 0.878178 | \n",
+ " 0.408251 | \n",
+ " 1.251565e+07 | \n",
+ " FR0010135103 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 6 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 317 | \n",
+ " 0.946269 | \n",
+ " 2 | \n",
+ " 0.877437 | \n",
+ " 0.438045 | \n",
+ " 1.375313e+07 | \n",
+ " FR0010135103 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 7 | \n",
+ " 7 | \n",
+ " 1 | \n",
+ " 317 | \n",
+ " 0.946269 | \n",
+ " 2 | \n",
+ " 0.873553 | \n",
+ " 0.348404 | \n",
+ " 1.616668e+07 | \n",
+ " FR0010135103 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " 8 | \n",
+ " 8 | \n",
+ " 1 | \n",
+ " 305 | \n",
+ " 0.910448 | \n",
+ " 2 | \n",
+ " 0.814795 | \n",
+ " 0.380975 | \n",
+ " 1.996607e+07 | \n",
+ " FR0010135103 | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " 9 | \n",
+ " 9 | \n",
+ " 1 | \n",
+ " 289 | \n",
+ " 0.862687 | \n",
+ " 2 | \n",
+ " 0.771577 | \n",
+ " 0.441611 | \n",
+ " 2.164950e+07 | \n",
+ " FR0010135103 | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 10 | \n",
+ " 10 | \n",
+ " 1 | \n",
+ " 292 | \n",
+ " 0.871642 | \n",
+ " 4 | \n",
+ " 0.780776 | \n",
+ " 0.344281 | \n",
+ " 2.352434e+07 | \n",
+ " FR0010135103 | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 311 | \n",
+ " 0.984177 | \n",
+ " 2 | \n",
+ " 0.951234 | \n",
+ " 0.246002 | \n",
+ " 1.197836e+05 | \n",
+ " FR0010148981 | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 5 | \n",
+ " 5 | \n",
+ " 1 | \n",
+ " 308 | \n",
+ " 0.974684 | \n",
+ " 3 | \n",
+ " 0.923568 | \n",
+ " 0.291831 | \n",
+ " 1.194490e+05 | \n",
+ " FR0010148981 | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " 6 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 307 | \n",
+ " 0.971519 | \n",
+ " 4 | \n",
+ " 0.925231 | \n",
+ " 0.295944 | \n",
+ " 1.336923e+05 | \n",
+ " FR0010148981 | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " 7 | \n",
+ " 7 | \n",
+ " 1 | \n",
+ " 306 | \n",
+ " 0.968354 | \n",
+ " 4 | \n",
+ " 0.901949 | \n",
+ " 0.196162 | \n",
+ " 1.719620e+05 | \n",
+ " FR0010148981 | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " 8 | \n",
+ " 8 | \n",
+ " 1 | \n",
+ " 298 | \n",
+ " 0.943038 | \n",
+ " 4 | \n",
+ " 0.879011 | \n",
+ " 0.231291 | \n",
+ " 2.574472e+05 | \n",
+ " FR0010148981 | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " 9 | \n",
+ " 9 | \n",
+ " 1 | \n",
+ " 297 | \n",
+ " 0.939873 | \n",
+ " 5 | \n",
+ " 0.883001 | \n",
+ " 0.222999 | \n",
+ " 2.976192e+05 | \n",
+ " FR0010148981 | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " 10 | \n",
+ " 10 | \n",
+ " 1 | \n",
+ " 291 | \n",
+ " 0.920886 | \n",
+ " 5 | \n",
+ " 0.853795 | \n",
+ " 0.266340 | \n",
+ " 3.422146e+05 | \n",
+ " FR0010148981 | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 130 | \n",
+ " 0.928571 | \n",
+ " 2 | \n",
+ " 0.846660 | \n",
+ " 0.272805 | \n",
+ " 1.058871e+03 | \n",
+ " FR00140081Y1 | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " 5 | \n",
+ " 5 | \n",
+ " 1 | \n",
+ " 124 | \n",
+ " 0.885714 | \n",
+ " 2 | \n",
+ " 0.801199 | \n",
+ " 0.291206 | \n",
+ " 1.457334e+03 | \n",
+ " FR00140081Y1 | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " 6 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 116 | \n",
+ " 0.828571 | \n",
+ " 2 | \n",
+ " 0.798078 | \n",
+ " 0.354293 | \n",
+ " 2.012263e+03 | \n",
+ " FR00140081Y1 | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " 7 | \n",
+ " 7 | \n",
+ " 1 | \n",
+ " 116 | \n",
+ " 0.828571 | \n",
+ " 3 | \n",
+ " 0.797375 | \n",
+ " 0.302775 | \n",
+ " 2.397924e+03 | \n",
+ " FR00140081Y1 | \n",
+ "
\n",
+ " \n",
+ " | 25 | \n",
+ " 8 | \n",
+ " 8 | \n",
+ " 1 | \n",
+ " 115 | \n",
+ " 0.821429 | \n",
+ " 4 | \n",
+ " 0.784789 | \n",
+ " 0.243282 | \n",
+ " 3.247377e+03 | \n",
+ " FR00140081Y1 | \n",
+ "
\n",
+ " \n",
+ " | 26 | \n",
+ " 9 | \n",
+ " 9 | \n",
+ " 1 | \n",
+ " 115 | \n",
+ " 0.821429 | \n",
+ " 4 | \n",
+ " 0.776258 | \n",
+ " 0.354855 | \n",
+ " 3.919863e+03 | \n",
+ " FR00140081Y1 | \n",
+ "
\n",
+ " \n",
+ " | 27 | \n",
+ " 10 | \n",
+ " 10 | \n",
+ " 1 | \n",
+ " 106 | \n",
+ " 0.757143 | \n",
+ " 4 | \n",
+ " 0.678970 | \n",
+ " 0.369964 | \n",
+ " 4.284648e+03 | \n",
+ " FR00140081Y1 | \n",
+ "
\n",
+ " \n",
+ " | 28 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 185 | \n",
+ " 0.968586 | \n",
+ " 2 | \n",
+ " 0.937635 | \n",
+ " 0.092228 | \n",
+ " 7.051123e+03 | \n",
+ " LU0992624949 | \n",
+ "
\n",
+ " \n",
+ " | 29 | \n",
+ " 5 | \n",
+ " 5 | \n",
+ " 1 | \n",
+ " 183 | \n",
+ " 0.958115 | \n",
+ " 2 | \n",
+ " 0.939791 | \n",
+ " 0.174930 | \n",
+ " 1.430396e+04 | \n",
+ " LU0992624949 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " k n_clusters min_cluster_size max_cluster_size \\\n",
+ "0 4 4 1 317 \n",
+ "1 5 5 1 314 \n",
+ "2 6 6 1 302 \n",
+ "3 7 7 1 287 \n",
+ "4 8 8 1 283 \n",
+ "5 9 9 1 283 \n",
+ "6 10 10 1 280 \n",
+ "7 4 4 1 323 \n",
+ "8 5 5 1 319 \n",
+ "9 6 6 1 317 \n",
+ "10 7 7 1 317 \n",
+ "11 8 8 1 305 \n",
+ "12 9 9 1 289 \n",
+ "13 10 10 1 292 \n",
+ "14 4 4 1 311 \n",
+ "15 5 5 1 308 \n",
+ "16 6 6 1 307 \n",
+ "17 7 7 1 306 \n",
+ "18 8 8 1 298 \n",
+ "19 9 9 1 297 \n",
+ "20 10 10 1 291 \n",
+ "21 4 4 1 130 \n",
+ "22 5 5 1 124 \n",
+ "23 6 6 1 116 \n",
+ "24 7 7 1 116 \n",
+ "25 8 8 1 115 \n",
+ "26 9 9 1 115 \n",
+ "27 10 10 1 106 \n",
+ "28 4 4 1 185 \n",
+ "29 5 5 1 183 \n",
+ "\n",
+ " dominant_cluster_share singleton_clusters silhouette davies_bouldin \\\n",
+ "0 0.990625 3 0.952702 0.014258 \n",
+ "1 0.981250 3 0.888334 0.260038 \n",
+ "2 0.943750 3 0.779785 0.317455 \n",
+ "3 0.896875 3 0.783468 0.362942 \n",
+ "4 0.884375 3 0.802890 0.375458 \n",
+ "5 0.884375 4 0.802810 0.353071 \n",
+ "6 0.875000 4 0.785437 0.361689 \n",
+ "7 0.964179 2 0.862526 0.396761 \n",
+ "8 0.952239 2 0.878178 0.408251 \n",
+ "9 0.946269 2 0.877437 0.438045 \n",
+ "10 0.946269 2 0.873553 0.348404 \n",
+ "11 0.910448 2 0.814795 0.380975 \n",
+ "12 0.862687 2 0.771577 0.441611 \n",
+ "13 0.871642 4 0.780776 0.344281 \n",
+ "14 0.984177 2 0.951234 0.246002 \n",
+ "15 0.974684 3 0.923568 0.291831 \n",
+ "16 0.971519 4 0.925231 0.295944 \n",
+ "17 0.968354 4 0.901949 0.196162 \n",
+ "18 0.943038 4 0.879011 0.231291 \n",
+ "19 0.939873 5 0.883001 0.222999 \n",
+ "20 0.920886 5 0.853795 0.266340 \n",
+ "21 0.928571 2 0.846660 0.272805 \n",
+ "22 0.885714 2 0.801199 0.291206 \n",
+ "23 0.828571 2 0.798078 0.354293 \n",
+ "24 0.828571 3 0.797375 0.302775 \n",
+ "25 0.821429 4 0.784789 0.243282 \n",
+ "26 0.821429 4 0.776258 0.354855 \n",
+ "27 0.757143 4 0.678970 0.369964 \n",
+ "28 0.968586 2 0.937635 0.092228 \n",
+ "29 0.958115 2 0.939791 0.174930 \n",
+ "\n",
+ " calinski_harabasz fund_family \n",
+ "0 6.236159e+04 FR0010149120 \n",
+ "1 6.892536e+04 FR0010149120 \n",
+ "2 7.565806e+04 FR0010149120 \n",
+ "3 9.135321e+04 FR0010149120 \n",
+ "4 1.060793e+05 FR0010149120 \n",
+ "5 1.197763e+05 FR0010149120 \n",
+ "6 1.324484e+05 FR0010149120 \n",
+ "7 1.072268e+07 FR0010135103 \n",
+ "8 1.251565e+07 FR0010135103 \n",
+ "9 1.375313e+07 FR0010135103 \n",
+ "10 1.616668e+07 FR0010135103 \n",
+ "11 1.996607e+07 FR0010135103 \n",
+ "12 2.164950e+07 FR0010135103 \n",
+ "13 2.352434e+07 FR0010135103 \n",
+ "14 1.197836e+05 FR0010148981 \n",
+ "15 1.194490e+05 FR0010148981 \n",
+ "16 1.336923e+05 FR0010148981 \n",
+ "17 1.719620e+05 FR0010148981 \n",
+ "18 2.574472e+05 FR0010148981 \n",
+ "19 2.976192e+05 FR0010148981 \n",
+ "20 3.422146e+05 FR0010148981 \n",
+ "21 1.058871e+03 FR00140081Y1 \n",
+ "22 1.457334e+03 FR00140081Y1 \n",
+ "23 2.012263e+03 FR00140081Y1 \n",
+ "24 2.397924e+03 FR00140081Y1 \n",
+ "25 3.247377e+03 FR00140081Y1 \n",
+ "26 3.919863e+03 FR00140081Y1 \n",
+ "27 4.284648e+03 FR00140081Y1 \n",
+ "28 7.051123e+03 LU0992624949 \n",
+ "29 1.430396e+04 LU0992624949 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# ============================================================\n",
+ "# 8) Tableau de synthèse par fond et par K\n",
+ "# ============================================================\n",
+ "\n",
+ "rows = []\n",
+ "\n",
+ "for fund_family, res in fund_results.items():\n",
+ " diag = res[\"diag\"].copy()\n",
+ " diag[\"fund_family\"] = fund_family\n",
+ " rows.append(diag)\n",
+ "\n",
+ "df_fund_diag = pd.concat(rows, axis=0).reset_index(drop=True)\n",
+ "\n",
+ "display(df_fund_diag.head(30))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "66ea6e29-47dd-4fc0-900f-225d3c8a2b1d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "==============================\n",
+ "FOND : FR0010149120\n",
+ "K retenu : 4\n",
+ "cluster\n",
+ "0 317\n",
+ "1 1\n",
+ "2 1\n",
+ "3 1\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "==============================\n",
+ "FOND : FR0010135103\n",
+ "K retenu : 5\n",
+ "cluster\n",
+ "0 4\n",
+ "1 1\n",
+ "2 319\n",
+ "3 1\n",
+ "4 10\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "==============================\n",
+ "FOND : FR0010148981\n",
+ "K retenu : 4\n",
+ "cluster\n",
+ "0 311\n",
+ "1 1\n",
+ "2 1\n",
+ "3 3\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "==============================\n",
+ "FOND : FR00140081Y1\n",
+ "K retenu : 4\n",
+ "cluster\n",
+ "0 130\n",
+ "1 1\n",
+ "2 1\n",
+ "3 8\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "==============================\n",
+ "FOND : LU0992624949\n",
+ "K retenu : 5\n",
+ "cluster\n",
+ "0 183\n",
+ "1 1\n",
+ "2 1\n",
+ "3 4\n",
+ "4 2\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "==============================\n",
+ "FOND : LU1623762843\n",
+ "K retenu : 5\n",
+ "cluster\n",
+ "0 206\n",
+ "1 1\n",
+ "2 1\n",
+ "3 3\n",
+ "4 1\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "==============================\n",
+ "FOND : FR001400KAV4\n",
+ "K retenu : 4\n",
+ "cluster\n",
+ "0 125\n",
+ "1 1\n",
+ "2 1\n",
+ "3 2\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "==============================\n",
+ "FOND : LU0336084032\n",
+ "K retenu : 4\n",
+ "cluster\n",
+ "0 265\n",
+ "1 1\n",
+ "2 5\n",
+ "3 1\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "==============================\n",
+ "FOND : FR0010149302\n",
+ "K retenu : 4\n",
+ "cluster\n",
+ "0 309\n",
+ "1 1\n",
+ "2 1\n",
+ "3 2\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "==============================\n",
+ "FOND : FR0010149161\n",
+ "K retenu : 4\n",
+ "cluster\n",
+ "0 203\n",
+ "1 1\n",
+ "2 1\n",
+ "3 1\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "==============================\n",
+ "FOND : LU0992631217\n",
+ "K retenu : 4\n",
+ "cluster\n",
+ "0 156\n",
+ "1 1\n",
+ "2 1\n",
+ "3 3\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "==============================\n",
+ "FOND : LU1299306321\n",
+ "K retenu : 5\n",
+ "cluster\n",
+ "0 9\n",
+ "1 3\n",
+ "2 110\n",
+ "3 4\n",
+ "4 5\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "==============================\n",
+ "FOND : FR001400U4S3\n",
+ "K retenu : 4\n",
+ "cluster\n",
+ "0 111\n",
+ "1 1\n",
+ "2 3\n",
+ "3 1\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "==============================\n",
+ "FOND : FR0010306142\n",
+ "K retenu : 4\n",
+ "cluster\n",
+ "0 247\n",
+ "1 1\n",
+ "2 3\n",
+ "3 2\n",
+ "Name: count, dtype: int64\n",
+ "\n",
+ "==============================\n",
+ "FOND : FR0010312660\n",
+ "K retenu : 4\n",
+ "cluster\n",
+ "0 2\n",
+ "1 205\n",
+ "2 9\n",
+ "3 2\n",
+ "Name: count, dtype: int64\n"
+ ]
+ }
+ ],
+ "source": [
+ "# ============================================================\n",
+ "# 9) Choisir un K par fond et afficher les tailles des clusters\n",
+ "# Ici : on prend le meilleur silhouette sans singleton si possible\n",
+ "# ============================================================\n",
+ "\n",
+ "best_partitions = {}\n",
+ "\n",
+ "for fund_family, res in fund_results.items():\n",
+ " diag = res[\"diag\"].copy()\n",
+ "\n",
+ " diag2 = diag[diag[\"singleton_clusters\"] == 0].copy()\n",
+ " if diag2.empty:\n",
+ " diag2 = diag.copy()\n",
+ "\n",
+ " diag2 = diag2.sort_values(\n",
+ " [\"silhouette\", \"dominant_cluster_share\"],\n",
+ " ascending=[False, True]\n",
+ " )\n",
+ "\n",
+ " best_k = int(diag2.iloc[0][\"k\"])\n",
+ " labels = res[\"models\"][best_k]\n",
+ "\n",
+ " df_f = res[\"data\"].copy()\n",
+ " df_f[\"cluster\"] = labels\n",
+ "\n",
+ " best_partitions[fund_family] = {\n",
+ " \"k\": best_k,\n",
+ " \"data\": df_f\n",
+ " }\n",
+ "\n",
+ " print(\"\\n==============================\")\n",
+ " print(\"FOND :\", fund_family)\n",
+ " print(\"K retenu :\", best_k)\n",
+ " print(df_f[\"cluster\"].value_counts().sort_index())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "e5e397ed-3514-4377-8c9b-ed474e70f8b8",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAocAAAHWCAYAAAAFLiMtAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAXttJREFUeJzt3XlcVOX+B/DPMDAzDDADyK4gKu4bhkqoLRZJZqXWNbJyT9Owew1zy1JbTNMWzUzNCrvXJfP2S80FJdTKRE0FcwFXDBUBEZlBdpjn98dcTo4zICgwMH7er9e8bM7znXOecxj00znneY5MCCFARERERATAztodICIiIqKGg+GQiIiIiCQMh0REREQkYTgkIiIiIgnDIRERERFJGA6JiIiISMJwSEREREQShkMiIiIikjAcEhEREZGE4ZCIiIiIJAyHRFQnVq1aBZlMJr1UKhXatGmDiRMnIjMz06w+MzMTb7zxBtq1awe1Wg0nJyeEhITg/fffR25ursVt9OzZEzKZDMuWLauz/bh5H25++fj4SDVz5swxaXNwcEBgYCD++c9/Vtr3ffv2oU+fPlCr1fDx8cE///lP3Lhxw6yuuLgY06ZNg5+fHxwdHREaGoq4uDizup07d2LMmDHo1KkT5HI5AgMDK90ng8GABQsWoEWLFlCpVOjSpQvWrVtnVrdy5Uo89NBD8Pb2hlKpRIsWLTBq1ChcuHDBrHbZsmUYMmQIAgICIJPJMHLkSIvbjo+Px+jRo9GmTRuo1Wq0bNkSL7/8Mq5cuWKxvqSkBB988AHatWsHlUoFb29vDBgwAJcuXTKrPXLkCJ5++mm4u7tDrVajU6dO+Oyzz8zqqnvsie5V9tbuABHZtnfffRctWrRAUVER9u7di2XLlmHbtm04fvw41Go1AOCPP/7AE088gRs3buCll15CSEgIAODQoUOYP38+fv31V+zcudNkvWfOnMEff/yBwMBArFmzBhMmTKizfXjssccwfPhwk2WOjo5mdcuWLYOzszPy8/MRHx+PJUuW4MiRI9i7d69JXVJSEh599FG0b98en3zyCS5duoSPPvoIZ86cwfbt201qR44cif/+97+YNGkSWrdujVWrVuGJJ57A7t270adPH6lu7dq1WL9+Pe677z74+flVuT8zZ87E/PnzMXbsWPTo0QObNm3CCy+8AJlMhueff16qS0xMRIsWLfD000/Dzc0NqampWLlyJbZs2YKjR4+abOfDDz9EXl4eevbsWWnQA4Bp06YhJycHQ4YMQevWrXH+/Hl8/vnn2LJlC5KSkkxCd2lpKQYMGIB9+/Zh7Nix6NKlC65fv44DBw5Ap9OhWbNmUu3OnTvx1FNPoVu3bnj77bfh7OyMc+fOmYXImhx7onuWICKqAzExMQKA+OOPP0yWR0dHCwBi7dq1Qgghrl+/Lpo2bSq8vb1FcnKy2XoyMjLEe++9Z7Z81qxZwsvLS/zwww9CJpOJ1NTUOtkPACIqKqrKmtmzZwsA4urVqybLIyMjBQBx4MABk+X9+/cXvr6+QqfTSctWrlwpAIgdO3ZIyw4cOCAAiIULF0rLCgsLRatWrURYWJjJOi9fvixKSkqEEEIMGDBANG/e3GJfL126JBwcHEz2yWAwiAceeEA0a9ZMlJWVVbmvhw4dEgDEvHnzTJZfuHBBGAwGIYQQTk5OYsSIERY//8svv4jy8nKzZQDEzJkzTZZ/+OGHwsHBwez43Uqn0wlvb28xePBgs3XfqrrHnuhexsvKRFSvHnnkEQBAamoqAGDFihW4fPkyPvnkE7Rr186s3tvbG2+99ZbZ8rVr1+If//gHnnzySWi1Wqxdu7ZuO34HHnjgAQDAuXPnpGV6vR5xcXF46aWXoNFopOXDhw+Hs7Mzvv/+e2nZf//7X8jlcowbN05aplKpMGbMGCQkJODixYvScj8/Pzg4ONy2T5s2bUJpaSleffVVaZlMJsOECRNw6dIlJCQkVPn5isvVt14ub968OWQy2W23/+CDD8LOzs5smbu7O5KTk6VlBoMBixcvxuDBg9GzZ0+UlZWhoKDA4jrXrl2LzMxMzJ07F3Z2dsjPz4fBYDCrq8mxJ7qXMRwSUb2qCEpNmjQBAGzevBmOjo74xz/+Ue11HDhwAGfPnsXQoUOhUCjwzDPPYM2aNXXSXwAoKipCdna2yau4uPi2n6u4N8/NzU1aduzYMZSVlaF79+4mtQqFAsHBwUhMTJSWJSYmok2bNiZBBjDeawkYL5HWVGJiIpycnNC+fXuL67x5+xWuXbuGrKwsHDp0CKNGjQIAPProozXedmVu3LiBGzduwMPDQ1p28uRJpKeno0uXLhg3bhycnJzg5OSELl26YPfu3Saf//nnn6HRaHD58mW0bdsWzs7O0Gg0mDBhAoqKiqS6mhx7onsZwyER1SmdTofs7GxcunQJ69evx7vvvgtHR0c8+eSTAIDk5GS0adMGCoWi2utcvXo1/P390bt3bwDA888/j5MnT95RWKqOr7/+Gp6eniYvSwM4cnJykJ2djb/++gsxMTFYunQpPD098eCDD0o1Fffj+fr6mn3e19cX6enpJrWV1QEwqa2uK1euwNvb2+wsX1XrbNq0Kby9vdGjRw/s27cPn332GR577LEab7syixYtQklJCSIjI6VlZ86cAQB8+umn2LNnD1asWIGYmBgUFRXh8ccfx59//mlSW1ZWhoEDByIiIgI//PADRo8ejeXLl0thtmLfb97Xm9167InuZRyQQkR1Kjw83OR98+bNsWbNGjRt2hSA8VKfi4tLtddXVlaG9evXY8SIEVLAeeSRR+Dl5YU1a9YgODi41vpeYeDAgZg4caLJso4dO5rVtW3b1uR9586dERMTIw28AYDCwkIAgFKpNPu8SqWS2itqK6u7eV01cSfr3L59O4qKipCcnIzVq1cjPz+/xtutzK+//op33nkHzz33nHTLAQBp9HBeXh4SExPh7+8PwPizDgoKwoIFC7B69WqptqCgAOPHj5dGJz/zzDMoKSnBihUr8O6776J169Y1OvZE9zKGQyKqU0uXLkWbNm1gb28Pb29vtG3b1uSeM41Gg7y8vGqvb+fOnbh69Sp69uyJs2fPSsv79u2LdevW4cMPPzS7p+1mGRkZJu+1Wq3Fkcc3a9asmVnIteSHH36ARqPB1atX8dlnnyE1NdVs3RXvLV2WLioqMql3dHSstO7mddXEnayzb9++AID+/ftj4MCB6NSpE5ydnc0Cc02lpKRg8ODB6NSpE7766iuzfgJA7969pWAIAAEBAejTpw/27dtnVjt06FCTdbzwwgtYsWIFEhIS0Lp16xode6J7GcMhEdWpnj17mt3jdbN27dohKSkJJSUl1bq0XHFv4XPPPWex/ZdffpHCjCW3XlKMiYmpdE6+mnrwwQel++aeeuopdO7cGS+++CIOHz4sBdaK7Vua7uXKlSsm08P4+vri8uXLFusA3HbKGkt8fX2xe/duCCFMLi1Xd52tWrVCt27dsGbNmrsKhxcvXkS/fv2g1Wqxbds2s7PHFf3w9vY2+6yXl5fJ/YF+fn44ceKEWa2XlxcA4Pr16wBqduyJ7mW855CIrOqpp55CYWEhfvjhh9vW5ufnY9OmTYiMjMSGDRvMXr6+vrcdmBIXF2fyioiIqK1dMeHs7IzZs2cjKSnJZBRsp06dYG9vj0OHDpnUl5SUICkpyeSyeHBwME6fPg29Xm9Se+DAAam9poKDg1FQUGAyMrim6ywsLIROp6vxtitcu3YN/fr1Q3FxMXbs2GHxHsDOnTvDwcHBYjhOT0+Hp6en9L5iXsxbayvuIayorcmxJ7qnWXsuHSKyTZXNc3irnJwc4evrK3x9fcWpU6fM2jMzM6V5Dv/zn/8IAOLXX3+1uK6xY8cKV1dXUVRUdPc78D+4i3kOS0pKRLNmzURwcLDJ8scff1z4+voKvV4vLfvqq68EALF9+3Zp2f79+83mOSwqKhJBQUEiNDS00v5UNc/hxYsXK53nsGnTptI8h6WlpSInJ8fs8wcOHBByuVwMGzas0u1XNc/hjRs3RM+ePYWLi4s4dOhQpesQQoiBAwcKuVxuMv/lyZMnhVwuF6+++qq07MiRIwKAeOGFF0w+P3ToUGFvby8uX74sLavusSe6l/GyMhFZlZubG3788Uc88cQTCA4ONnlCypEjR7Bu3TqEhYUBMF5SbtKkCXr16mVxXU8//TRWrlyJrVu34plnnqm3faiMg4MD/vWvf2HKlCmIjY3F448/DgCYO3cuevXqhYceegjjxo3DpUuX8PHHH6Nfv35SDQCEhoZiyJAhmDFjBrKyshAUFIRvv/0WFy5cwNdff22yrT///BObN28GAJw9exY6nQ7vv/8+AKBr16546qmnABjvn5w0aRIWLlyI0tJS9OjRAxs3bsRvv/2GNWvWQC6XAzAO8vD390dkZCQ6duwIJycnHDt2DDExMdBqtXj77bdNtv/TTz/h6NGjAIxPNvnzzz+l7T/99NPo0qULAODFF1/EwYMHMXr0aCQnJ5ucwXR2dsagQYOk9x988AHi4+PxyCOP4J///CcA4LPPPoO7uzvefPNNqa5bt24YPXo0vvnmG5SVleGhhx7Cnj17sGHDBsyYMcPkcnF1jz3RPc3a6ZSIbFN1zxxWSE9PF6+//rpo06aNUKlUQq1Wi5CQEDF37lyh0+lEZmamsLe3r/KMVUFBgVCr1WLw4MG1tRt3deZQCOPTO7RarXjooYdMlv/222+iV69eQqVSCU9PTxEVFWVyNqtCYWGheOONN4SPj49QKpWiR48eIjY21qyu4nhbet16Fq+8vFx88MEHonnz5kKhUIiOHTuK1atXm9QUFxeLf/3rX6JLly5Co9EIBwcH0bx5czFmzBiLT6MZMWJEpduPiYmR6po3b15pnaWznYcPHxbh4eHCyclJuLi4iIEDB4rTp0+b1ZWUlIg5c+aI5s2bCwcHBxEUFCQ+/fRTs7qaHHuie5VMCCHqNY0SERERUYPFASlEREREJGE4JCIiIiIJwyERERERSRgOiYiIiEjCcEhEREREEoZDIiIiIpJwEmwrMRgMSE9Ph4uLi8nzTYmIiIhqmxACeXl58PPzk571XhmGQytJT0+Hv7+/tbtBRERE95CLFy+iWbNmVdYwHFqJi4sLAOMPSaPRWLk3REREZMv0ej38/f2l/FEVhkMrqbiUrNFoGA6JiIioXlTnVjYOSCEiIiIiCcMhEREREUkYDomIiIhIwnBIRERERBKGQyIiIiKSMBwSERERkYThkIiIiIgkDIdEREREJGE4JCIiIiIJwyERERERSfj4PBtTZihDZn4mDmYcxDndOXT26IwuHl3g6+xr7a4RERFRI8BwaEMMwoCT107i5Z0vo7CsUFrurnJHTEQMWrq2tGLviIiIqDHgZWUbklWQhdd2vWYSDAEgpygHU3+dipyiHCv1jIiIiBoLhkMbklWQVWkAPHX9FK4XXa/nHhEREVFjw3BoQwpKC6psLykvqaeeEBERUWPFcGhD/Jz9IIPMYpuTgxO0Sm0994iIiIgaG4ZDG+Kucsfz7Z632PZat9fg5ehVzz0iIiKixoajlW2Is8IZ47uMR6AmECuPrUR2YTaauTTDpG6TcL/v/bCX88d9r9EX61FcXgxHe0c4K5yt3R0iImoEZEIIYe1O3Iv0ej20Wi10Oh00Gk2trlsIgauFV1FmKIODnQM81Z61un5q+HTFOiRfS8byo8tx6cYltHVriwnBE9BS2xJqB7W1u0dERPWsJrmDp5JskEwmg5eal5DvVYVlhdh0dhMWHlooLcssyMRvl3/D4r6L8ZD/Q7CT8Y4SIiKyjP9CENmYa4XX8OmRT82WCwi8k/AOrhZctUKviIiosWA4JLIx6TfSUWYos9h2regacotz67dDRETUqDAcEtkYuZ28ynZeUiYioqrwnkMiG+Pr5Au1vRoFZeaTojdzaQZXpWv9d6qBKyotR5a+CAnnryE7rwShLd3RvIkTPF2U1u4aEVG9YzgksjGejp74oM8HiP4lGgZhkJYr5UrM7zOfo9dvUVRajl9OX0XUmiMoM/w9eUOwvyuWvXQffLWOVuwdEVH941Q2VlKXU9kQFZYV4nLeZXx/+nuczz2PLp5dMChoEPyc/WBvx/8nvNlf1/LxyMe/oNxg/lfh6N6BmN6/HRT2VV+qbwiEELh6oxgGA+CisoeTkj9nIvobp7Ihusc52jsiyC0IU3tMRUl5CZRy5W3vRbxX7T2TbTEYAsC6gxfx8gMt4efasM8eZumLsP14Br7emwpdYSn6BHlgUnhrBDZxgoM97zElopphOCSyYfZ29jxTeBsZ+qJK2wpLy00uNTdE2XnFmPz9Ufx2NltatvXYFfycnImNUb3R3rf6VyaybxTj8vVCHEi9Bje1Aj1buMNLo4KjQ/X+x6K0vBw5+aUQAnB3cmgUZ1yJyBz/1SCie1rvVh5Ysuusxba23i5QKxp2wEnLKTAJhhWKywz4YFsyPn/hPmgdHW67nkx9EaLXJ+H3c9ekZXI7GT57vhv6tvOEWlH1PxeXrxfg24S/8H9HLgEABgY3xajegWjmxifyEDU2vN5ARPe0lp5OaO/rYrFt9lMd4OHcsEcsx53MrLTttzPZuFFUett1lJUbsO5gmkkwBIByg8Br644gs4qzqwBwObcQz63Yjy9/PY/sGyXIvlGCr/em4h/LEnD5uvmoeSJq2BgOieie5qVR4esRPfBCzwAo/3d/XmsvZ6we0xNd/F2t27lqcFJWfmZTaW8HmUx223Vk3yhGzO8XLLYZBLDzROUB1GAQ2H7sCi7nFpq1ZeiLsPnoFRga8KV5IQSy8oqQoStEfrHlyeOJ7jW8rExE9zw/V0fMeqoDovq2QplBwElhD49GMsdhv44++GjnaYttg7s1hbuT4rbrKBeArrDyM4xV3ZepLyrF5qPplbb/dDQdkT2awd2p4R3PTH0Rth67gm/2pkJfWIreQR54/bE2CGyihsJeDl1hCTL1xdiVnIXScgMeaecFX1dHuDspkJ1XjFKDATIYz7Da2cng6ayEvZznXKjxYzgkIgKgcpCjaSO8P85Ho8Ib/dqYBcRmbo6I6hsEVTUGkzg6yNG1mRZHL+kstj/YuvK5MeV2siq3oXKQQ27X8AJTxUCevTfdr7n9eAZ2pWThx1d7w1erwle/pWLpnr/vR/047jQGB/thZO8WeH19EjL0RejUVIvRvVtgd0omvDQqDLu/Obw0qjvrVHEeUJIP2KsAR9e73EOiO9fwfmOJiKjaNI4OGBYWiC2v9cELPf3Rr4M3FkUG4/tXwuDvXr2w6+6kwNtPdoClK9AtPCq/JxMAXFQOGNUrsNL2Ub0DqzUgpr79lVNgEgwrFJcZsDj+NM5k5ZkEwwo/JqUj6WIuAKCgpBwHU3MwYc1h9GjhjvjkLEzZ8Ceu3SiuWWdK8oH0ROCHl4GVjwDrngfO7QIKcu5k14juGsMhEVEjp3V0QKemWswd3BlfvHgfBnVrWuO5GTv4arBmTChaezkDABzkMgzq5of/jOkJn9s8JaZ7oBseaedltvyh1p4IbeFeo37Ul5+rGMjjonJAzL4Llbb/cOQSBnTxld4LAXwadwYv3d8cv5y5igxd1QN4TAgBXNgLrOwLnI4F9JeBtATgP4OBI/82Bsc7ZTAAuReBsz8b13XpMHAjq2bryM8GdOlA/lWgrOTO+0KNCi8rExHZCJlMBnv57QegWKJW2qNXkAfWjb0fN4rLYG8ng7uTAupqPGnF00WFD5/tgjNZeVh3IA0CwNCeAWjt7Qwvlzu8xFrHnKsYyKNV2eNiTuWjrK8XlMDplql9LucWwlVtPEOaeDEXHZtqq9eRvCvAT/80hsRb7XoP6DgIUDhVb103MxiAjKPGkFl4/e/lft2AyNWAtlnlny3UGc9aluQBkAHCANg7ABcPAgpn42dVroBMDqhda943avAYDomISOLhoryjwTieLkp4uihxf4smAAA7uzsLqfXlsY4+WFjJQJ4mzgo82t4LB1ItX9btGeiOk1f0Zsvl/9vn6gwCkhTkAHkZltsMZcC184BbYPXXVyEv3TwYAsbL1ztnAQOXWA6dN64COeeM9z2e2QG0exL4IwY4/M3fAValBf4RA3h2AK7lABofwKHx3a9LleNlZSIiqjV2drIGHwwBwNNFgTf6tTFb7u/uiKeDm+KJzr5oYiHkOTrIMTC4KWKPmwa6+wLccDJdD6W9HTpX96whANzusZbyO7xfM/useTCskLzRGAJvVVYMZJ0Erp4C9OmAZ3vg/C/Aoa9Nz2wW6YB1kcYzi/rLQO6lO+sjNVgMh3dh6dKlCAwMhEqlQmhoKA4ePGjtLhERUTWczcyHrqAUXw4LwXPd/RHR0Qezn+qAL4d1h7dGhWZuavx3Qi/07+SDiqz7QGsPrBt3Pz7ZeQol5QZpXe5OCvzz0SB890calr8UAm9NDc68OroDTVpZbnNQA27N72wHb1ypvM1QDpSZz0uJG1nGexx9ugBH1wCu/sDBLy2vo7wUSNkKOPsaP2MpbFKjxcvKd2j9+vWIjo7G8uXLERoaikWLFiEiIgKnTp2Cl5f5jdlERNQwZOqLMGl9Ei7nFuI/B/7CA6094eggx+r9afh452nsmPQAmrqp0cLDCQuHdMHMAe0hhHHgj71cho+eC8aPiZdw/mo+7m/ZBF2aaXExpwDfvxIGH42qZs+UdvEGnlkJrBoAlN4U2GQyYNAywNnnznbSq0PlbY5ugNLCCPSyImNolAEQMIZIXVrl67maAtw3AijR/+/+xFumPCrUGQeyFOmM23PyANTVHKBUfAPIzwIuHwFEOdA0BHDyAlTVf1Y43TmGwzv0ySefYOzYsRg1ahQAYPny5di6dSu++eYbTJ8+3cq9IyKiyuTkl0hPdCkqNZg9gjBDXyTNeemsdICz0vTSbpCXM6ZEtIPhf5NfA0C3ALc775BPV2DCPiBpHXDxANCkNdDzZcA1ALCvwf2LN3PxA/zDgIsJ5m19Z/59xq+s2Bjc5A6AgyOg1BgDmVd7QGYHeHcCrhy1vI2A+/8XJu2Mg1Nupk8Htr4BnNr697IWDxoDb1WDYQBjqEz8DxD3tnEwDGAMyw9MAe6f8HfALM43BsiCHEChBtQexvkhS24Y75l0qNmIffobw+EdKCkpweHDhzFjxgxpmZ2dHcLDw5GQYOEXEUBxcTGKi/+e+0qvN7+ZmYiI6p7B0sjgm5RV83F/tXZvpdwecG8JPDzdePZOrjQuuxvOnsCQb4Bd7wPHvjdeBlY3MQbDNv2BSweA3xcbB8MEPgB0HwVo/QFxEsg8AYS+Ypy6Juw14P9eNl+/oxsQEGoMkgonoEnQ321FemD7NNNgCACpvwI/jDWOlnZqUnnfs08DO2eaLhMC+HUB0LwX0Kqv8RL4rx8Z74c0/O+xh14dgKcWA3s/NV6S7/Ua4N4KUFU+TydZxnsO70B2djbKy8vh7e1tstzb2xsZGZZHnc2bNw9arVZ6+fv710dXiYjoFm5qhcXBJoDxedR+t5nXsc7YyY1B626DYQWNHzDgI2DiYSDqIPDKb0Dn54Ck1UBMf+O8ileSgIQlwPI+QPYpoFl3oNOzQOZJ41lL745A/wXGYFnBpwsw7EdAyACvtsZ7JpXOf7fnZwEpP1nuU9o+46XmypQWAvuWVN6+91PjQJvDq4CDK/4OhoBxMM2Gkcbpf47/F/jyIeDkRtPL9VQtDIf1ZMaMGdDpdNLr4sWL1u4SEdE9yUejwrxnOlt8IszMAe3h2Uieq10tFYNaPNsC2qbAjUxgzwfmdaUFwJbXjf/drLvxkrG9I2CvBoL6GcPgyK3A2N3A058BdkrAXmkcUHProJkiveV5GysUVvHkl7JiIO9y5e156UBhbuUBUn/ZeK9kxaXnbZNrPvE3MRzeCQ8PD8jlcmRmmt6nkpmZCR8fyzcPK5VKaDQakxcREdU/OzsZ+gR5YHNUbzzWwQvN3BzRu1UTrB93PwYFN63W86gbrYsHKg9ul/4wnpWzVwIaX+O9hHs/AS4fAhycjINjHFTGS8oZfxoDpZOH+XpUGlhM3hUcqxiUonAGmj9QeXtAb2O/iqu4NSv3L8D5f1f2yoqBa+aPQaSq8Z7DO6BQKBASEoL4+HgMGjQIAGAwGBAfH4+JEydat3NERHRbaqU9OjdzxaeRwSgoLoejQg4XVcN7BrRVFVwDOjwFbJ9qPPvm0cYYCK+mAA9OMZ6NtMTJE2g7AEjZYt7mf7+xvTJyeyBkOPDHl+aPDpQrgF5RxnCo1FQeEF2bG8+QVhAGy3VUKZ45vEPR0dFYuXIlvv32WyQnJ2PChAnIz8+XRi8TEVHD56x0gJdGde8EQ/+elbf53Wd8LF4FUQ5sHA888hbw2DvGJ7U07wUMXWc8w5hXyVyKKi3wxEKgdYTp8ua9gWe/qnowCmAMd6NijY/6q+DdCRi1zdgHFx/g/lctf9bFF7CzN45gBoyjsD1aV709MsMzh3coMjISV69exaxZs5CRkYHg4GDExsaaDVIhIiJqMJy9gKHrgb9+B5J/Aq6nGpc7OAJPLTINbmp3ADLgv6ONo6l9OgO5acZBH8IA9Bxb+XY0fsAzK4D8bKAo13imz8mzevMc2skB3y7Aiz8ARdeNl8EdXU3POPYYYxzYcmSV8R5DwHgmM+IDYEv033WPvWecH5FqRCbEbcb0U53Q6/XQarXQ6XS8/5CIiOqWEMD1v4Aj3wKntxvP7gW/ZByRfDnxf/MqBpqOlDYYgOTNwIYR5uvrOxMIGWkMm9ZSMVF2QY5xXkN7FfDHSuDcLuPZxwcmG+drdHS1Xh+rIy8DyDlvnE/SrTng3RnQNAXsavfibk1yB8OhlTAcEhFRvck+C3wdbv685U7PAo/PrzzkFeqMU9zsmQdkJRsn5u7zunE6G23Tuu93TZUUGifBdlBZfgpMQ5ObBqx+Bsg+8/cypQYYvhnw7VqrAZHhsBFgOCQionpRfAPYFGWc88+SsbuBpvdVvY78HKBYZzw7p/Gt9S7ek4p0wA8vA2d2mrep3Y3zUt7uaTI1UJPcwQEpREREtqwot/JJqQHgxMbbr8PJHXBvwWBYm/KzgbNxltsKcoy3AVgJwyEREZHNq2LewVq+t42qqayo6snC87Prry+34DeCiIjIljm6AR0GV97ecVC9dYVuotKYTh10KytOwcNwSEREZMsUTsAjb1p+mkm34YA2oP77RICzL9D3TcttrfsZ53O0Es5zSEREZOvcWxoHnhz7r/H+Q5UrEDbROCK2OnMPUu2T2wOd/2F8hvXu941PdXFQG6cI6vVPq/5cOFrZSjhamYiI6p2hHCjSG58conS2dm8IMM4nmZcBlBUAcqVx0m4HZa1vpia5g2cOiYiI7hV2ckDtZu1e0M3s7ACtn7V7YYL3HBIRERGRhOGQiIiIiCQMh0REREQkYTgkIiIiIgnDIRERERFJGA6JiIiISMJwSEREREQShkMiIiIikjAcEhEREZGET0ghIiIiy0rygbwrQPIWQHcJCAoHfLsAmob1RA+qXQyHREREZK60ADizE/jvaEAYjMv+WAm4twSGbwJcA6zbP6ozvKxMRERE5vIygR/G/B0MK+ScB+LfM55VJJvEcEhERETm/toHGMott534PyA/u377Q/WG4ZCIiIjMFeZU3mYoM77IJjEcEhERkbnmfSpv82wHKF3qry9UrxgOiYiIyJy2GdCyr/lymQzovwBw9qr/PlG9YDgkIiIic86ewODlQN+ZgKObcVnTEGBULNAsxLp9ozrFqWyIiIjIMhcfoM9koNtLxlHLDmpA7W7tXlEdYzgkIiKiysnlnPT6HsPLykREREQkYTgkIiIiIgnDIRERERFJGA6JiIiISMJwSEREREQShkMiIiIikjAcEhEREZGE4ZCIiIiIJAyHRERERCRhOCQiIiIiCcMhEREREUkYDomIiIhIwnBIRERERBKGQyIiIiKSMBwSERERkYThkIiIiIgkDIdEREREJGE4JCIiIiKJzYTDCxcuYMyYMWjRogUcHR3RqlUrzJ49GyUlJSZ1f/75Jx544AGoVCr4+/tjwYIFZuvasGED2rVrB5VKhc6dO2Pbtm0m7UIIzJo1C76+vnB0dER4eDjOnDlTp/tHREREVB9sJhympKTAYDBgxYoVOHHiBD799FMsX74cb775plSj1+vRr18/NG/eHIcPH8bChQsxZ84cfPnll1LNvn37MHToUIwZMwaJiYkYNGgQBg0ahOPHj0s1CxYswGeffYbly5fjwIEDcHJyQkREBIqKiup1n4mIiIhqm0wIIazdibqycOFCLFu2DOfPnwcALFu2DDNnzkRGRgYUCgUAYPr06di4cSNSUlIAAJGRkcjPz8eWLVuk9dx///0IDg7G8uXLIYSAn58fJk+ejDfeeAMAoNPp4O3tjVWrVuH555+vVt/0ej20Wi10Oh00Gk1t7jYRERGRiZrkDps5c2iJTqeDu7u79D4hIQEPPvigFAwBICIiAqdOncL169elmvDwcJP1REREICEhAQCQmpqKjIwMkxqtVovQ0FCpxpLi4mLo9XqTFxEREVFDY7Ph8OzZs1iyZAleeeUVaVlGRga8vb1N6ireZ2RkVFlzc/vNn7NUY8m8efOg1Wqll7+//x3uGREREVHdafDhcPr06ZDJZFW+Ki4JV7h8+TIef/xxDBkyBGPHjrVSz03NmDEDOp1Oel28eNHaXSIiIiIyY2/tDtzO5MmTMXLkyCprWrZsKf13eno6+vbti169epkMNAEAHx8fZGZmmiyreO/j41Nlzc3tFct8fX1NaoKDgyvto1KphFKprHI/iIiIiKytwYdDT09PeHp6Vqv28uXL6Nu3L0JCQhATEwM7O9MTo2FhYZg5cyZKS0vh4OAAAIiLi0Pbtm3h5uYm1cTHx2PSpEnS5+Li4hAWFgYAaNGiBXx8fBAfHy+FQb1ejwMHDmDChAl3ubdERERE1tXgLytX1+XLl/Hwww8jICAAH330Ea5evYqMjAyT+wBfeOEFKBQKjBkzBidOnMD69euxePFiREdHSzX/+te/EBsbi48//hgpKSmYM2cODh06hIkTJwIAZDIZJk2ahPfffx+bN2/GsWPHMHz4cPj5+WHQoEH1vdtEREREtarBnzmsrri4OJw9exZnz55Fs2bNTNoqZuvRarXYuXMnoqKiEBISAg8PD8yaNQvjxo2Tanv16oW1a9firbfewptvvonWrVtj48aN6NSpk1QzdepU5OfnY9y4ccjNzUWfPn0QGxsLlUpVPztLREREVEdsep7DhozzHBIREVF94TyHRERERHRHGA6JiIiISMJwSEREREQShkMiIiIikjAcEhEREZGE4ZCIiIiIJAyHRERERCRhOCQiIiIiCcMhEREREUkYDomIiIhIwnBIRERERBKGQyIiIiKSMBwSERERkYThkIiIiIgkDIdEREREJGE4JCIiIiIJwyERERERSRgOiYiIiEjCcEhEREREEoZDIiIiIpIwHBIRERGRhOGQiIiIiCQMh0REREQkYTgkIiIiIgnDIRERERFJGA6JiIiISMJwSEREREQShkMiIiIikjAcEhEREZGE4ZCIiIiIJAyHRERERCRhOCQiIiIiCcMhEREREUkYDomIiIhIwnBIRERERBKGQyIiIiKSMBwSERERkYThkIiIiIgkDIdEREREJGE4JCIiIiIJwyERERERSRgOiYiIiEjCcEhEREREEoZDIiIiIpIwHBIRERGRxCbDYXFxMYKDgyGTyZCUlGTS9ueff+KBBx6ASqWCv78/FixYYPb5DRs2oF27dlCpVOjcuTO2bdtm0i6EwKxZs+Dr6wtHR0eEh4fjzJkzdblLRERERPXCJsPh1KlT4efnZ7Zcr9ejX79+aN68OQ4fPoyFCxdizpw5+PLLL6Waffv2YejQoRgzZgwSExMxaNAgDBo0CMePH5dqFixYgM8++wzLly/HgQMH4OTkhIiICBQVFdXL/hERERHVFZkQQli7E7Vp+/btiI6Oxg8//ICOHTsiMTERwcHBAIBly5Zh5syZyMjIgEKhAABMnz4dGzduREpKCgAgMjIS+fn52LJli7TO+++/H8HBwVi+fDmEEPDz88PkyZPxxhtvAAB0Oh28vb2xatUqPP/889Xqp16vh1arhU6ng0ajqcUjQERERGSqJrnDps4cZmZmYuzYsfjPf/4DtVpt1p6QkIAHH3xQCoYAEBERgVOnTuH69etSTXh4uMnnIiIikJCQAABITU1FRkaGSY1Wq0VoaKhUY0lxcTH0er3Ji4iIiKihsZlwKITAyJEjMX78eHTv3t1iTUZGBry9vU2WVbzPyMiosubm9ps/Z6nGknnz5kGr1Uovf3//GuwdERERUf1o8OFw+vTpkMlkVb5SUlKwZMkS5OXlYcaMGdbuskUzZsyATqeTXhcvXrR2l4iIiIjM2Fu7A7czefJkjBw5ssqali1bYteuXUhISIBSqTRp6969O1588UV8++238PHxQWZmpkl7xXsfHx/pT0s1N7dXLPP19TWpqbi30RKlUmnWNyIiIqKGpsGHQ09PT3h6et627rPPPsP7778vvU9PT0dERATWr1+P0NBQAEBYWBhmzpyJ0tJSODg4AADi4uLQtm1buLm5STXx8fGYNGmStK64uDiEhYUBAFq0aAEfHx/Ex8dLYVCv1+PAgQOYMGFCbewyERERkdU0+HBYXQEBASbvnZ2dAQCtWrVCs2bNAAAvvPAC3nnnHYwZMwbTpk3D8ePHsXjxYnz66afS5/71r3/hoYcewscff4wBAwbgu+++w6FDh6TpbmQyGSZNmoT3338frVu3RosWLfD222/Dz88PgwYNqp+dJSIiIqojNhMOq0Or1WLnzp2IiopCSEgIPDw8MGvWLIwbN06q6dWrF9auXYu33noLb775Jlq3bo2NGzeiU6dOUs3UqVORn5+PcePGITc3F3369EFsbCxUKpU1douIiIio1tjcPIeNBec5JCIiovpyz85zSERERER3h+GQiIiIiCQMh0REREQkYTgkIiIiIgnDIRERERFJGA6JiIiISMJwSEREREQShkMiIiIikjAcEhEREZGE4ZCIiIiIJAyHRERERCRhOCQiIiIiCcMhEREREUkYDomIiIhIwnBIRERERBKGQyIiIiKSMBwSERERkYThkIiIiIgkNQ6HV65cwerVq7Ft2zaUlJSYtOXn5+Pdd9+ttc4RERERUf2SCSFEdYv/+OMP9OvXDwaDAaWlpWjatCk2btyIjh07AgAyMzPh5+eH8vLyOuuwrdDr9dBqtdDpdNBoNNbuDhEREdmwmuSOGp05fPPNNzF48GBcv34dmZmZeOyxx/DQQw8hMTHxrjpMRERERA2DfU2KDx8+jKVLl8LOzg4uLi744osvEBAQgEcffRQ7duxAQEBAXfWTiIiIiOpBjcIhABQVFZm8nz59Ouzt7dGvXz988803tdYxIiIiIqp/NQqHnTp1wr59+9ClSxeT5W+88QYMBgOGDh1aq50jIiIiovpVo3sOhw8fjr1791psmzp1Kt555x1eWiYiIiJqxGo0WplqD0crExERUX2ps9HKRUVF2Lx5M/Ly8ixudPPmzSguLq5Zb4mIiIiowahROFyxYgUWL14MFxcXszaNRoPPPvsMK1eurLXOEREREVH9qlE4XLNmDSZNmlRp+6RJk/Dvf//7bvtERERERFZSo3B45swZdO3atdL2Ll264MyZM3fdKSIiIiKyjhqFw7KyMly9erXS9qtXr6KsrOyuO0VERERE1lGjcNixY0f8/PPPlbbv3LlTes4yERERETU+NQqHo0ePxnvvvYctW7aYtf3000+YO3cuRo8eXWudIyIiIqL6VaMnpIwbNw6//vornn76abRr1w5t27YFAKSkpOD06dN47rnnMG7cuDrpKBERERHVvRqdOQSA1atXY/369WjTpg1Onz6NU6dOoW3btli3bh3WrVtXF30kIiIionpSozOH5eXl+Oijj7B582aUlJTgySefxJw5c+Do6FhX/SMiIiKielSjM4cffPAB3nzzTTg7O6Np06b47LPPEBUVVVd9IyIiIqJ6VqNw+O9//xtffPEFduzYgY0bN+Knn37CmjVrYDAY6qp/RERERFSPahQO09LS8MQTT0jvw8PDIZPJkJ6eXusdIyIiIqL6V+NJsFUqlckyBwcHlJaW1mqniIiIiMg6ajQgRQiBkSNHQqlUSsuKioowfvx4ODk5Scv+7//+r/Z6SERERET1pkbhcMSIEWbLXnrppVrrDBERERFZV43CYUxMTF31g4iIiIgagBpPgk1EREREtovhkIiIiIgkDIdEREREJLG5cLh161aEhobC0dERbm5uGDRokEl7WloaBgwYALVaDS8vL0yZMgVlZWUmNXv27MF9990HpVKJoKAgrFq1ymw7S5cuRWBgIFQqFUJDQ3Hw4ME63CsiIiKi+mFT4fCHH37AsGHDMGrUKBw9ehS///47XnjhBam9vLwcAwYMQElJCfbt24dvv/0Wq1atwqxZs6Sa1NRUDBgwAH379kVSUhImTZqEl19+GTt27JBq1q9fj+joaMyePRtHjhxB165dERERgaysrHrdXyIiIqLaJhNCCGt3ojaUlZUhMDAQ77zzDsaMGWOxZvv27XjyySeRnp4Ob29vAMDy5csxbdo0XL16FQqFAtOmTcPWrVtx/Phx6XPPP/88cnNzERsbCwAIDQ1Fjx498PnnnwMADAYD/P398dprr2H69OnV6q9er4dWq4VOp4NGo7mbXSciIiKqUk1yh82cOTxy5AguX74MOzs7dOvWDb6+vujfv79JyEtISEDnzp2lYAgAERER0Ov1OHHihFQTHh5usu6IiAgkJCQAAEpKSnD48GGTGjs7O4SHh0s1lhQXF0Ov15u8iIiIiBoamwmH58+fBwDMmTMHb731FrZs2QI3Nzc8/PDDyMnJAQBkZGSYBEMA0vuMjIwqa/R6PQoLC5GdnY3y8nKLNRXrsGTevHnQarXSy9/f/+52mIiIiKgONPhwOH36dMhksipfKSkpMBgMAICZM2fi2WefRUhICGJiYiCTybBhwwYr7wUwY8YM6HQ66XXx4kVrd4mIiIjITI2ekGINkydPxsiRI6usadmyJa5cuQIA6NChg7RcqVSiZcuWSEtLAwD4+PiYjSrOzMyU2ir+rFh2c41Go4GjoyPkcjnkcrnFmop1WKJUKk2eSU1ERETUEDX4cOjp6QlPT8/b1oWEhECpVOLUqVPo06cPAKC0tBQXLlxA8+bNAQBhYWGYO3cusrKy4OXlBQCIi4uDRqORQmVYWBi2bdtmsu64uDiEhYUBABQKBUJCQhAfHy9Nk2MwGBAfH4+JEyfWyj4TERERWUuDv6xcXRqNBuPHj8fs2bOxc+dOnDp1ChMmTAAADBkyBADQr18/dOjQAcOGDcPRo0exY8cOvPXWW4iKipLO6o0fPx7nz5/H1KlTkZKSgi+++ALff/89Xn/9dWlb0dHRWLlyJb799lskJydjwoQJyM/Px6hRo+p/x4mIiIhqUYM/c1gTCxcuhL29PYYNG4bCwkKEhoZi165dcHNzAwDI5XJs2bIFEyZMQFhYGJycnDBixAi8++670jpatGiBrVu34vXXX8fixYvRrFkzfPXVV4iIiJBqIiMjcfXqVcyaNQsZGRkIDg5GbGys2SAVIiIiosbGZuY5bGw4zyERERHVl3tynkMiIiIiunsMh0REREQkYTgkIiIiIgnDIRERERFJGA6JiIiISMJwSEREREQShkMiIiIikjAcEhEREZGE4ZCIiIiIJAyHRERERCRhOCQiIiIiCcMhEREREUkYDomIiIhIwnBIRERERBKGQyIiIiKSMBwSERERkYThkIiIiIgkDIdEREREJGE4JCIiIiIJwyERERERSRgOiYiIiEjCcEhEREREEoZDIiIiIpIwHBIRERGRhOGQiIiIiCQMh0REREQkYTgkIiIiIgnDIRERERFJGA6JiIiISMJwSEREREQShkMiIiIikjAcEhEREZGE4ZCIiIiIJAyHRERERCRhOCQiIiIiCcMhEREREUkYDomIiIhIwnBIRERERBKGQyIiIiKSMBwSERERkYThkIiIiIgkDIdEREREJGE4JCIiIiIJwyERERERSRgOiYiIiEhiU+Hw9OnTGDhwIDw8PKDRaNCnTx/s3r3bpCYtLQ0DBgyAWq2Gl5cXpkyZgrKyMpOaPXv24L777oNSqURQUBBWrVpltq2lS5ciMDAQKpUKoaGhOHjwYF3uGhEREVG9sLd2B2rTk08+idatW2PXrl1wdHTEokWL8OSTT+LcuXPw8fFBeXk5BgwYAB8fH+zbtw9XrlzB8OHD4eDggA8++AAAkJqaigEDBmD8+PFYs2YN4uPj8fLLL8PX1xcREREAgPXr1yM6OhrLly9HaGgoFi1ahIiICJw6dQpeXl61uk/l5eUoLS2t1XU2FA4ODpDL5dbuBhEREd1EJoQQ1u5EbcjOzoanpyd+/fVXPPDAAwCAvLw8aDQaxMXFITw8HNu3b8eTTz6J9PR0eHt7AwCWL1+OadOm4erVq1AoFJg2bRq2bt2K48ePS+t+/vnnkZubi9jYWABAaGgoevTogc8//xwAYDAY4O/vj9deew3Tp0+vVn/1ej20Wi10Oh00Go1ZuxACGRkZyM3NvZvD0uC5urrCx8cHMpnM2l0hIiKyWbfLHTezmTOHTZo0Qdu2bfHvf/9buiS8YsUKeHl5ISQkBACQkJCAzp07S8EQACIiIjBhwgScOHEC3bp1Q0JCAsLDw03WHRERgUmTJgEASkpKcPjwYcyYMUNqt7OzQ3h4OBISEirtX3FxMYqLi6X3er2+yv2pCIZeXl5Qq9U2F56EECgoKEBWVhYAwNfX18o9IiIiIsCGwqFMJsPPP/+MQYMGwcXFBXZ2dvDy8kJsbCzc3NwAGAPXzcEQgPQ+IyOjyhq9Xo/CwkJcv34d5eXlFmtSUlIq7d+8efPwzjvvVGtfysvLpWDYpEmTan2mMXJ0dAQAZGVlwcvLi5eYiYiIGoAGPyBl+vTpkMlkVb5SUlIghEBUVBS8vLzw22+/4eDBgxg0aBCeeuopXLlyxdq7gRkzZkCn00mvixcvVlpbcY+hWq2ur+5ZTcU+2up9lURERI1Ngz9zOHnyZIwcObLKmpYtW2LXrl3YsmULrl+/Ll1L/+KLLxAXF4dvv/0W06dPh4+Pj9mo4szMTACAj4+P9GfFsptrNBoNHB0dIZfLIZfLLdZUrMMSpVIJpVJZrX2uYGuXki25F/aRiIioMWnw4dDT0xOenp63rSsoKABgvP/vZnZ2djAYDACAsLAwzJ07V7qMCQBxcXHQaDTo0KGDVLNt2zaTdcTFxSEsLAwAoFAoEBISgvj4eAwaNAiAcUBKfHw8Jk6ceOc7SkRERNQANPjLytUVFhYGNzc3jBgxAkePHsXp06cxZcoUaWoaAOjXrx86dOiAYcOG4ejRo9ixYwfeeustREVFSWf1xo8fj/Pnz2Pq1KlISUnBF198ge+//x6vv/66tK3o6GisXLkS3377LZKTkzFhwgTk5+dj1KhRVtn3ChcuXIBMJkNSUpJV+0FERESNl82EQw8PD8TGxuLGjRt45JFH0L17d+zduxebNm1C165dAQByuRxbtmyBXC5HWFgYXnrpJQwfPhzvvvuutJ4WLVpg69atiIuLQ9euXfHxxx/jq6++kuY4BIDIyEh89NFHmDVrFoKDg5GUlITY2FizQSqN3apVq+Dq6mrtbhAREVE9avCXlWuie/fu2LFjR5U1zZs3N7tsfKuHH34YiYmJVdZMnDiRl5Grqby8HDKZzOySPxERETU8/Ne6ETIYDFiwYAGCgoKgVCoREBCAuXPnmtVZOvO3ceNGk0EgR48eRd++feHi4gKNRoOQkBAcOnQIe/bswahRo6DT6aRR4XPmzAFgnLPxjTfeQNOmTeHk5ITQ0FDs2bPHbLubN29Ghw4doFQqkZaWVheHgoiIiGqZTZ05vFfMmDEDK1euxKeffoo+ffrgypUrVc6xWJUXX3wR3bp1w7JlyyCXy5GUlAQHBwf06tULixYtwqxZs3Dq1CkAgLOzMwDjWdOTJ0/iu+++g5+fH3788Uc8/vjjOHbsGFq3bg3AOEDoww8/xFdffYUmTZrU+mMFiYiIqG4wHDYyeXl5WLx4MT7//HOMGDECANCqVSv06dMHFy5cqPH60tLSMGXKFLRr1w4ApHAHAFqtFjKZzGSKnrS0NMTExCAtLQ1+fn4AgDfeeAOxsbGIiYmRnlFdWlqKL774Qrrfk4iIiBoHhsNGJjk5GcXFxXj00UdrZX3R0dF4+eWX8Z///Afh4eEYMmQIWrVqVWn9sWPHUF5ejjZt2pgsLy4uNnmai0KhQJcuXWqlj0RERFR/GA4bmYpHzlWHnZ0dhBAmy259EsmcOXPwwgsvYOvWrdi+fTtmz56N7777DoMHD7a4zhs3bkAul+Pw4cNmj7uruOxc0U9OcE1ERNT4cEBKI9O6dWs4OjoiPj7+trWenp7Iy8tDfn6+tMzSHIht2rTB66+/jp07d+KZZ55BTEwMAOPZv/LycpPabt26oby8HFlZWQgKCjJ5VfWEGCIiImocGA4bGZVKhWnTpmHq1Kn497//jXPnzmH//v34+uuvzWpDQ0OhVqvx5ptv4ty5c1i7di1WrVoltRcWFmLixInYs2cP/vrrL/z+++/4448/0L59ewBAYGAgbty4gfj4eGRnZ6OgoABt2rTBiy++iOHDh+P//u//kJqaioMHD2LevHnYunVrfR0GIiIiqiMMh43Q22+/jcmTJ2PWrFlo3749IiMjkZWVZVbn7u6O1atXY9u2bejcuTPWrVsnTUcDGCcFv3btGoYPH442bdrgueeeQ//+/fHOO+8AAHr16oXx48cjMjISnp6eWLBgAQAgJiYGw4cPx+TJk9G2bVsMGjQIf/zxBwICAupl/4nINhhKS1Fy+TKKz51DyeXLMJSUWLtLRARAJm69KY3qhV6vh1arhU6ng0ajMWkrKipCamoqWrRoAZVKZaUe1o97aV+J6G9l2dm4vn49cr6JgSE/HzJHR7gNHQr3USPh4Olp7e4R2ZyqcseteOaQiIjqlaGwENlffYXsJZ/D8L97okVhIXK++QZZH32M8rw8K/eQ6N7GcEhERPWqLDsb19estdim37wZZdeu1XOPiOhmDIdERFSvynU64JZptSRCoJzhkMiqGA6JiKhe2d1mvlY7J6d66gkRWcJwSERE9Uru7g5V584W2xSBgZDf9LQlIqp/DIdERFSv7N3c0PTjj+Dg72+63MsLzb5YytHKRFbGx+cREVG9UwQEoPma1Si9eBHF51OhCAiAonkAHPikJSKrYzgkIiKrcPDygoOXF9QhIdbuChHdhJeViYiIiEjCcEhEREREEoZDqjXz5s1Djx494OLiAi8vLwwaNAinTp2ydreIiIioBhgObZiuoATnsm4gMe06zl29AV1B3T7U/pdffkFUVBT279+PuLg4lJaWol+/fsj/3+OxiIiIqOHjgBQblZ5biGk//InfzmRLyx5s7YH5z3aBn2vVE9DeqdjYWJP3q1atgpeXFw4fPowHH3ywTrZJREREtYtnDm2QrqDELBgCwK9nsjH9hz/r/Ayi1A+dDgDg7u5eL9sjIiKiu8dwaIOyb5SYBcMKv57JRvaNug+HBoMBkyZNQu/evdGpU6c63x4RERHVDl5WtkH6okoeaP8/ebdprw1RUVE4fvw49u7dW+fbIiIiotrDcGiDNCqHKttdbtN+tyZOnIgtW7bg119/RbNmzep0W0RERFS7eFnZBnk4K/Bgaw+LbQ+29oCHs6JOtiuEwMSJE/Hjjz9i165daNGiRZ1sh4iIiOoOw6EN0qoVmP9sF7OA+GBrD3z4bBdo1XUTDqOiorB69WqsXbsWLi4uyMjIQEZGBgoLC+tke0RERFT7eFnZRvm5OmLJ0G7IvlGCvKJSuKgc4OGsqLNgCADLli0DADz88MMmy2NiYjBy5Mg62y4RERHVHoZDG6ZV120YvJUQot62RURERHWDl5WJiIiISMJwSEREREQShkMiIiIikjAcEhEREZGE4ZCIiIiIJAyHRERERCRhOCQiIiIiCcMhEREREUkYDomIiIhIwnBIRERERBKGQ1tWeB3IPg1cOgRknzG+r2O//vornnrqKfj5+UEmk2Hjxo11vk0iIiKqPXy2sq3SXQY2TQTO7/p7WatHgaeXANqmdbbZ/Px8dO3aFaNHj8YzzzxTZ9shIiKiusFwaIsKr5sHQwA4Fw9sfg34x9eAo1udbLp///7o379/naybiIiI6h4vK9ui/KvmwbDCuXhjOxEREZEFDIe2qEh/d+1ERER0z2I4tEUqzd21ExER0T2r0YTDuXPnolevXlCr1XB1dbVYk5aWhgEDBkCtVsPLywtTpkxBWVmZSc2ePXtw3333QalUIigoCKtWrTJbz9KlSxEYGAiVSoXQ0FAcPHjQpL2oqAhRUVFo0qQJnJ2d8eyzzyIzM7O2dvXuOXkaB59Y0upRYzsRERGRBY0mHJaUlGDIkCGYMGGCxfby8nIMGDAAJSUl2LdvH7799lusWrUKs2bNkmpSU1MxYMAA9O3bF0lJSZg0aRJefvll7NixQ6pZv349oqOjMXv2bBw5cgRdu3ZFREQEsrKypJrXX38dP/30EzZs2IBffvkF6enpDWtkrqObcVTyrQGxYrRyHQ1GISIiIhsgGpmYmBih1WrNlm/btk3Y2dmJjIwMadmyZcuERqMRxcXFQgghpk6dKjp27GjyucjISBERESG979mzp4iKipLel5eXCz8/PzFv3jwhhBC5ubnCwcFBbNiwQapJTk4WAERCQkK190On0wkAQqfTmbUVFhaKkydPisLCwmqvz6KCHCGunhLi4h/GPwty7m591ZCXlycSExNFYmKiACA++eQTkZiYKP766y+L9bW2r0RERFSpqnLHrRrNmcPbSUhIQOfOneHt7S0ti4iIgF6vx4kTJ6Sa8PBwk89FREQgISEBgPHs5OHDh01q7OzsEB4eLtUcPnwYpaWlJjXt2rVDQECAVGNJcXEx9Hq9yavOOboBHm2AZt2Nf9bDGcNDhw6hW7du6NatGwAgOjoa3bp1MzmDS0RERA2XzcxzmJGRYRIMAUjvMzIyqqzR6/UoLCzE9evXUV5ebrEmJSVFWodCoTC779Hb21vajiXz5s3DO++8c0f71pg8/PDDEEJYuxtERER0h6x65nD69OmQyWRVvipCWWM3Y8YM6HQ66XXx4kVrd4mIiIjIjFXPHE6ePBkjR46ssqZly5bVWpePj4/ZqOKKEcQ+Pj7Sn7eOKs7MzIRGo4GjoyPkcjnkcrnFmpvXUVJSgtzcXJOzhzfXWKJUKqFUKqu1L0RERETWYtUzh56enmjXrl2VL4VCUa11hYWF4dixYyajiuPi4qDRaNChQwepJj4+3uRzcXFxCAsLAwAoFAqEhISY1BgMBsTHx0s1ISEhcHBwMKk5deoU0tLSpBoiIiKixqrR3HOYlpaGnJwcpKWloby8HElJSQCAoKAgODs7o1+/fujQoQOGDRuGBQsWICMjA2+99RaioqKkM3bjx4/H559/jqlTp2L06NHYtWsXvv/+e2zdulXaTnR0NEaMGIHu3bujZ8+eWLRoEfLz8zFq1CgAgFarxZgxYxAdHQ13d3doNBq89tprCAsLw/3331/vx4WIiIioVtX94OnaMWLECAHA7LV7926p5sKFC6J///7C0dFReHh4iMmTJ4vS0lKT9ezevVsEBwcLhUIhWrZsKWJiYsy2tWTJEhEQECAUCoXo2bOn2L9/v0l7YWGhePXVV4Wbm5tQq9Vi8ODB4sqVKzXan3qZyqYRuJf2lYiIyFpqMpWNTAgOLbUGvV4PrVYLnU4Hjcb0cXZFRUVITU1FixYtoFKprNTD+nEv7SsREZG1VJU7bmUz8xwSERER0d1jOCQiIiIiCcMhEREREUkYDomIiIhIwnBow3TFOqTqUvHn1T+RqkuFrlhXL9tdunQpAgMDoVKpEBoaajY5ORERETVcjWaeQ6qZjPwMzN43G/vS90nLevv1xpxec+DjVPmTXO7W+vXrER0djeXLlyM0NBSLFi1CREQETp06BS8vrzrbLhEREdUOnjm0QbpinVkwBIDf03/HnH1z6vQM4ieffIKxY8di1KhR6NChA5YvXw61Wo1vvvmmzrZJREREtYfh0AblFOWYBcMKv6f/jpyinDrZbklJCQ4fPozw8HBpmZ2dHcLDw5GQkFAn2yQiIqLaxXBog/JK8u6q/U5lZ2ejvLwc3t7eJsu9vb2RkZFRJ9skIiKi2sVwaINcFC531U5ERET3LoZDG+Suckdvv94W23r79Ya7yr1Otuvh4QG5XI7MzEyT5ZmZmfDxqbtBMERERFR7GA5tkFapxZxec8wCYsVoZa1SWyfbVSgUCAkJQXx8vLTMYDAgPj4eYWFhdbJNIiIiql2cysZG+Tj54MMHP0ROUQ7ySvLgonCBu8q9zoJhhejoaIwYMQLdu3dHz549sWjRIuTn52PUqFF1ul0iIiKqHQyHNkyr1NZ5GLxVZGQkrl69ilmzZiEjIwPBwcGIjY01G6RCREREDRPDIdW6iRMnYuLEidbuBhEREd0B3nNIRERERBKGQyIiIiKSMBwSERERkYThkIiIiIgkDIdEREREJGE4JCIiIiIJwyERERERSRgOiYiIiEjCcEhEREREEoZDIiIiIpIwHNqwMp0OxefPo/DoURSfT0WZTlen21u2bBm6dOkCjUYDjUaDsLAwbN++vU63SURERLWLz1a2UaVXMpD+1lso+P13aZm6Tx/4vfceHHx96mSbzZo1w/z589G6dWsIIfDtt99i4MCBSExMRMeOHetkm0RERFS7eObQBpXpdGbBEAAK9u5F+ttv19kZxKeeegpPPPEEWrdujTZt2mDu3LlwdnbG/v3762R7REREVPt45tAGlV+7ZhYMKxTs3Yvya9dgr9XWbR/Ky7Fhwwbk5+cjLCysTrdFRES1ozQrC8VnzyJ/717Ye3rCuW9f2Ht7Q+7oaO2uUT1iOLRBhry827TfqLNtHzt2DGFhYSgqKoKzszN+/PFHdOjQoc62R0REtaP0SgYuvvIKik+flpZlLVgIvwUL4Bz+KAPiPYSXlW2QnYvLbdqd62zbbdu2RVJSEg4cOIAJEyZgxIgROHnyZJ1tj4iI7p6huBjZX64wCYYAACGQPm0ayrKyrNMxsgqGQxskb9IE6j59LLap+/SBvEmTOtu2QqFAUFAQQkJCMG/ePHTt2hWLFy+us+0REdHdK792Dbr/+9Fyo8GA/L2Wb1Ui28RwaIPstVr4vfeeWUBU9+kDv/ffq/P7DW9mMBhQXFxcb9sjIqKaEwYDRBV/V5fnXKvH3pC18Z5DG+Xg64OmH3+E8mvXYMi7ATsXZ8ibNKnTYDhjxgz0798fAQEByMvLw9q1a7Fnzx7s2LGjzrZJRER3z06thrJ9exQnJ1tsd+rVq557RNbEcGjD7LXaej1LmJWVheHDh+PKlSvQarXo0qULduzYgccee6ze+kBERDVn7+4O75lvIm3YcEAIkzZVly5wCAiwUs/IGhgOqdZ8/fXX1u4CERHdIVXHjmi+ZjUyP5iHouPHYeekhuvzQ+E+fBgcPD2t3T2qRwyHREREBLmjI9T33Qf/lV/CUFAAmVwOeZMmsHNwsHbXqJ4xHBIREZHE3s0NcHOzdjfIijhamYiIiIgkDIdEREREJGE4bMDELSPGbNG9sI9ERESNCcNhA+Twv5t/CwoKrNyTulexjw684ZmIiKhB4ICUBkgul8PV1RVZ/3uWpVqthkwms3KvapcQAgUFBcjKyoKrqyvkcrm1u0RERERgOGywfHx8AEAKiLbK1dVV2lciIiKyPobDBkomk8HX1xdeXl4oLS21dnfqhIODA88YEhERNTAMhw2cXC5ngCIiIqJ602gGpMydOxe9evWCWq2Gq6urWfvRo0cxdOhQ+Pv7w9HREe3bt8fixYvN6vbs2YP77rsPSqUSQUFBWLVqlVnN0qVLERgYCJVKhdDQUBw8eNCkvaioCFFRUWjSpAmcnZ3x7LPPIjMzs7Z2lYiIiMhqGk04LCkpwZAhQzBhwgSL7YcPH4aXlxdWr16NEydOYObMmZgxYwY+//xzqSY1NRUDBgxA3759kZSUhEmTJuHll1/Gjh07pJr169cjOjoas2fPxpEjR9C1a1dERESY3Pv3+uuv46effsKGDRvwyy+/ID09Hc8880zd7TwRERFRPZGJRjbR3KpVqzBp0iTk5ubetjYqKgrJycnYtWsXAGDatGnYunUrjh8/LtU8//zzyM3NRWxsLAAgNDQUPXr0kEKlwWCAv78/XnvtNUyfPh06nQ6enp5Yu3Yt/vGPfwAAUlJS0L59eyQkJOD++++32Jfi4mIUFxdL7/V6Pfz9/aHT6aDRaO7oWBARERFVh16vh1arrVbusOl7DnU6Hdzd3aX3CQkJCA8PN6mJiIjApEmTABjPTh4+fBgzZsyQ2u3s7BAeHo6EhAQAxjOUpaWlJutp164dAgICqgyH8+bNwzvvvGO2XK/X3/H+EREREVVHRd6ozjlBmw2H+/btw/r167F161ZpWUZGBry9vU3qvL29odfrUVhYiOvXr6O8vNxiTUpKirQOhUJhdt+jt7c3MjIyKu3PjBkzEB0dLb2/fPkyOnToAH9//zvdRSIiIqIaycvLg1arrbLGquFw+vTp+PDDD6usSU5ORrt27Wq03uPHj2PgwIGYPXs2+vXrdzddrDVKpRJKpVJ67+zsjIsXL8LFxQUymUy6zHzx4kVeZr4Jj4s5HhPLeFws43GxjMfFHI+JZbZyXIQQyMvLg5+f321rrRoOJ0+ejJEjR1ZZ07Jlyxqt8+TJk3j00Ucxbtw4vPXWWyZtPj4+ZqOKMzMzodFo4OjoKE0bY6mmYqJmHx8flJSUIDc31+Ts4c011WFnZ4dmzZqZLddoNI36y1dXeFzM8ZhYxuNiGY+LZTwu5nhMLLOF43K7M4YVrBoOPT094enpWWvrO3HiBB555BGMGDECc+fONWsPCwvDtm3bTJbFxcUhLCwMAKBQKBASEoL4+HgMGjQIgHFASnx8PCZOnAgACAkJgYODA+Lj4/Hss88CAE6dOoW0tDRpPURERESNVaO55zAtLQ05OTlIS0tDeXk5kpKSAABBQUFwdnbG8ePH8cgjjyAiIgLR0dHS/X9yuVwKoOPHj8fnn3+OqVOnYvTo0di1axe+//57k/sSo6OjMWLECHTv3h09e/bEokWLkJ+fj1GjRgEwpu4xY8YgOjoa7u7u0Gg0eO211xAWFlbpYBQiIiKiRkM0EiNGjBAAzF67d+8WQggxe/Zsi+3Nmzc3Wc/u3btFcHCwUCgUomXLliImJsZsW0uWLBEBAQFCoVCInj17iv3795u0FxYWildffVW4ubkJtVotBg8eLK5cuXJX+1dUVCRmz54tioqK7mo9tobHxRyPiWU8LpbxuFjG42KOx8Sye/G4NLp5DomIiIio7jSaJ6QQERERUd1jOCQiIiIiCcMhEREREUkYDomIiIhIwnBYy55++mkEBARApVLB19cXw4YNQ3p6uknNn3/+iQceeAAqlQr+/v5YsGCB2Xo2bNiAdu3aQaVSoXPnzmbzMwohMGvWLPj6+sLR0RHh4eE4c+aMSU1OTg5efPFFaDQauLq6YsyYMbhx40bt73QVLly4gDFjxqBFixZwdHREq1atMHv2bJSUlJjUyGQys9f+/ftN1mUrxwSo3nEB7q3vSoW5c+eiV69eUKvVZo+prGDp+/Ldd9+Z1OzZswf33XcflEolgoKCsGrVKrP1LF26FIGBgVCpVAgNDcXBgwdN2ouKihAVFYUmTZrA2dkZzz77rNkk+fWlOsclLS0NAwYMgFqthpeXF6ZMmYKysjKTGls7LrcKDAw0+27Mnz/fpKa+fq8am9v93BuzOXPmmH0vbn76WnW+07X1+9UoWHOotC365JNPREJCgrhw4YL4/fffRVhYmAgLC5PadTqd8Pb2Fi+++KI4fvy4WLdunXB0dBQrVqyQan7//Xchl8vFggULxMmTJ8Vbb70lHBwcxLFjx6Sa+fPnC61WKzZu3CiOHj0qnn76adGiRQtRWFgo1Tz++OOia9euYv/+/eK3334TQUFBYujQofVzIP5n+/btYuTIkWLHjh3i3LlzYtOmTcLLy0tMnjxZqklNTRUAxM8//yyuXLkivUpKSqQaWzomQlTvuNxr35UKs2bNEp988omIjo4WWq3WYg0AERMTY/J9uXl/zp8/L9RqtYiOjhYnT54US5YsEXK5XMTGxko13333nVAoFOKbb74RJ06cEGPHjhWurq4iMzNTqhk/frzw9/cX8fHx4tChQ+L+++8XvXr1qrN9r8rtjktZWZno1KmTCA8PF4mJiWLbtm3Cw8NDzJgxQ6qxxeNyq+bNm4t3333X5Ltx48YNqb0+f68ak+r83Buz2bNni44dO5p8L65evSq13+47XVu/X40Fw2Ed27Rpk5DJZFLQ+eKLL4Sbm5soLi6WaqZNmybatm0rvX/uuefEgAEDTNYTGhoqXnnlFSGEEAaDQfj4+IiFCxdK7bm5uUKpVIp169YJIYQ4efKkACD++OMPqWb79u1CJpOJy5cv1/6O1sCCBQtEixYtpPcV4TAxMbHSz9j6MRHC/Ljc69+VmJiYKsPhjz/+WOlnp06dKjp27GiyLDIyUkREREjve/bsKaKioqT35eXlws/PT8ybN08IYTxODg4OYsOGDVJNcnKyACASEhLuYI9qR2XHZdu2bcLOzk5kZGRIy5YtWyY0Go30HbLl41KhefPm4tNPP620vb5+rxqb2/3cG7vZs2eLrl27Wmyrzne6tn6/GgteVq5DOTk5WLNmDXr16gUHBwcAQEJCAh588EEoFAqpLiIiAqdOncL169elmvDwcJN1RUREICEhAQCQmpqKjIwMkxqtVovQ0FCpJiEhAa6urujevbtUEx4eDjs7Oxw4cKBudriadDod3N3dzZY//fTT8PLyQp8+fbB582aTNls/JoD5ceF3pWpRUVHw8PBAz5498c0330DcNGXr7Y5LSUkJDh8+bFJjZ2eH8PBwqebw4cMoLS01qWnXrh0CAgKkmoYkISEBnTt3hre3t7QsIiICer0eJ06ckGruheMyf/58NGnSBN26dcPChQtNLv3V1+9VY1Kdn7stOHPmDPz8/NCyZUu8+OKLSEtLA1C973Rt/H41JgyHdWDatGlwcnJCkyZNkJaWhk2bNkltGRkZJl8uANL7ikf+VVZzc/vNn6usxsvLy6Td3t4e7u7uUo01nD17FkuWLMErr7wiLXN2dsbHH3+MDRs2YOvWrejTpw8GDRpkEhBt+ZgAlo/Lvf5dqcq7776L77//HnFxcXj22Wfx6quvYsmSJVJ7ZcdFr9ejsLAQ2dnZKC8vv+1xUSgUZvf33VzTkNzN98WWjss///lPfPfdd9i9ezdeeeUVfPDBB5g6darUXl+/V41JdX7ujV1oaChWrVqF2NhYLFu2DKmpqXjggQeQl5dXre90bfx+NSYMh9Uwffp0izfA3/xKSUmR6qdMmYLExETs3LkTcrkcw4cPNzmrYQtqekwA4PLly3j88ccxZMgQjB07Vlru4eGB6OhohIaGokePHpg/fz5eeuklLFy4sL53667V5nGxJXdyXKry9ttvo3fv3ujWrRumTZuGqVOn3jPfl3tRTY5TdHQ0Hn74YXTp0gXjx4/Hxx9/jCVLlqC4uNjKe0HW1L9/fwwZMgRdunRBREQEtm3bhtzcXHz//ffW7lqDZG/tDjQGkydPxsiRI6usadmypfTfHh4e8PDwQJs2bdC+fXv4+/tj//79CAsLg4+Pj9kIqIr3Pj4+0p+Wam5ur1jm6+trUhMcHCzVZGVlmayjrKwMOTk50ufvRk2PSXp6Ovr27YtevXrhyy+/vO36Q0NDERcXJ71vDMcEqN3jYivfFaDmx6WmQkND8d5776G4uBhKpbLS46LRaODo6Ai5XA65XH7bY1dSUoLc3FyTMwo319yt2jwuPj4+ZqNLq/t9aWjH5VZ3c5xCQ0NRVlaGCxcuoG3btvX2e9WYeHh43PbnbmtcXV3Rpk0bnD17Fo899thtv9O18fvVqFj7pkdb99dffwkAYvfu3UKIv2+Gvnkk7owZM8xuhn7yySdN1hMWFmZ2M/RHH30ktet0OouDDA4dOiTV7NixwyqDDC5duiRat24tnn/+eVFWVlatz7z88suiW7du0ntbOyZC3P643IvflZtVNSDlVu+//75wc3OT3k+dOlV06tTJpGbo0KFmAy8mTpwovS8vLxdNmzY1G3jx3//+V6pJSUmx+sCL2w1IuXl06YoVK4RGoxFFRUVCCNs+LpVZvXq1sLOzEzk5OUKI+vu9amxu93O3NXl5ecLNzU0sXry4Wt/p2vr9aiwYDmvR/v37xZIlS0RiYqK4cOGCiI+PF7169RKtWrWSvjy5ubnC29tbDBs2TBw/flx89913Qq1Wm02jYG9vLz766CORnJwsZs+ebXEaBVdXV7Fp0ybx559/ioEDB1qcnqRbt27iwIEDYu/evaJ169b1Pj3JpUuXRFBQkHj00UfFpUuXTKYRqLBq1Sqxdu1akZycLJKTk8XcuXOFnZ2d+Oabb6QaWzomQlTvuNxr35UKf/31l0hMTBTvvPOOcHZ2FomJiSIxMVHk5eUJIYTYvHmzWLlypTh27Jg4c+aM+OKLL4RarRazZs2S1lExpcSUKVNEcnKyWLp0qcUpW5RKpVi1apU4efKkGDdunHB1dTUZjTh+/HgREBAgdu3aJQ4dOmQ2NVV9ut1xqZhqo1+/fiIpKUnExsYKT09Pi1Nt2NJxudm+ffvEp59+KpKSksS5c+fE6tWrhaenpxg+fLhUU5+/V41JdX7ujdnkyZPFnj17RGpqqvj9999FeHi48PDwEFlZWUKI23+na+v3q7FgOKxFf/75p+jbt69wd3cXSqVSBAYGivHjx4tLly6Z1B09elT06dNHKJVK0bRpUzF//nyzdX3//feiTZs2QqFQiI4dO4qtW7eatBsMBvH2228Lb29voVQqxaOPPipOnTplUnPt2jUxdOhQ4ezsLDQajRg1apT0D0l9iYmJEQAsviqsWrVKtG/fXqjVaqHRaETPnj1NphSoYCvHRIjqHRch7q3vSoURI0ZYPC4VZ9+3b98ugoODhbOzs3BychJdu3YVy5cvF+Xl5Sbr2b17twgODhYKhUK0bNlSxMTEmG1ryZIlIiAgQCgUCtGzZ0+xf/9+k/bCwkLx6quvCjc3N6FWq8XgwYNNAnx9ut1xEUKICxcuiP79+wtHR0fh4eEhJk+eLEpLS03WY2vH5WaHDx8WoaGhQqvVCpVKJdq3by8++OAD6X/OK9TX71Vjc7ufe2MWGRkpfH19hUKhEE2bNhWRkZHi7NmzUnt1vtO19fvVGMiEsLGREkRERER0xzhamYiIiIgkDIdEREREJGE4JCIiIiIJwyERERERSRgOiYiIiEjCcEhEREREEoZDIiIiIpIwHBIRERGRhOGQiIiIiCQMh0REVjZy5EjIZDLIZDIoFAoEBQXh3XffRVlZGQBACIEvv/wSoaGhcHZ2hqurK7p3745FixahoKAAAHDixAk8++yzCAwMhEwmw6JFi6y4R0TUmDEcEhE1AI8//jiuXLmCM2fOYPLkyZgzZw4WLlwIABg2bBgmTZqEgQMHYvfu3UhKSsLbb7+NTZs2YefOnQCAgoICtGzZEvPnz4ePj481d4WIGjk+W5mIyMpGjhyJ3NxcbNy4UVrWr18/5OXl4fXXX0dkZCQ2btyIgQMHmnxOCAG9Xg+tVmuyPDAwEJMmTcKkSZPqofdEZGt45pCIqAFydHRESUkJ1qxZg7Zt25oFQwCQyWRmwZCI6G4xHBIRNSBCCPz888/YsWMHHnnkEZw5cwZt27a1dreI6B7CcEhE1ABs2bIFzs7OUKlU6N+/PyIjIzFnzhzwzh8iqm/21u4AEREBffv2xbJly6BQKODn5wd7e+Nfz23atEFKSoqVe0dE9xKeOSQiagCcnJwQFBSEgIAAKRgCwAsvvIDTp09j06ZNZp8RQkCn09VnN4noHsBwSETUgD333HOIjIzE0KFD8cEHH+DQoUP466+/sGXLFoSHh2P37t0AgJKSEiQlJSEpKQklJSW4fPkykpKScPbsWSvvARE1NpzKhojIyixNZXMzg8GAL7/8Et988w1OnDgBe3t7tG7dGsOHD8fYsWPh6OiICxcuoEWLFmaffeihh7Bnz5663QEisikMh0REREQk4WVlIiIiIpIwHBIRERGRhOGQiIiIiCQMh0REREQkYTgkIiIiIgnDIRERERFJGA6JiIiISMJwSEREREQShkMiIiIikjAcEhEREZGE4ZCIiIiIJP8P6iUtA/HMYEcAAAAASUVORK5CYII=",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "#voir si cluster separer visuellemt \n",
+ "# ============================================================\n",
+ "# 10) Visualisation PCA d'un fond\n",
+ "# ============================================================\n",
+ "\n",
+ "example_fund = list(best_partitions.keys())[-1] #2 clusters: 7 et 211\n",
+ "\n",
+ "df_plot = best_partitions[example_fund][\"data\"].copy()\n",
+ "feature_cols = fund_results[example_fund][\"features\"]\n",
+ "X_scaled = prep_matrix(df_plot, feature_cols)\n",
+ "\n",
+ "pca = PCA(n_components=2, random_state=RANDOM_STATE)\n",
+ "X_pca = pca.fit_transform(X_scaled)\n",
+ "\n",
+ "plot_df = pd.DataFrame({\n",
+ " \"PC1\": X_pca[:, 0],\n",
+ " \"PC2\": X_pca[:, 1],\n",
+ " \"cluster\": df_plot[\"cluster\"].astype(str)\n",
+ "})\n",
+ "\n",
+ "plt.figure(figsize=(7, 5))\n",
+ "sns.scatterplot(data=plot_df, x=\"PC1\", y=\"PC2\", hue=\"cluster\")\n",
+ "plt.title(f\"PCA - {example_fund}\")\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "c38c38bf-0dad-4baf-9d2d-acd4087b3bc3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | cluster | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | fund_weight_mean | \n",
+ " 0.002533 | \n",
+ " 0.025296 | \n",
+ " 0.107030 | \n",
+ " 0.411546 | \n",
+ "
\n",
+ " \n",
+ " | fund_weight_last | \n",
+ " 0.000000 | \n",
+ " 0.016294 | \n",
+ " 0.000065 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | fund_aum_mean | \n",
+ " 862.764360 | \n",
+ " 4892.213434 | \n",
+ " 17080.303897 | \n",
+ " 198436.767999 | \n",
+ "
\n",
+ " \n",
+ " | fund_aum_last | \n",
+ " 0.000000 | \n",
+ " 1587.012888 | \n",
+ " 1.060556 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | held_month_share | \n",
+ " 0.757053 | \n",
+ " 0.765711 | \n",
+ " 0.957997 | \n",
+ " 0.546661 | \n",
+ "
\n",
+ " \n",
+ " | active_month_share | \n",
+ " 0.140641 | \n",
+ " 0.238654 | \n",
+ " 0.392454 | \n",
+ " 0.535638 | \n",
+ "
\n",
+ " \n",
+ " | entry_count | \n",
+ " 1.500000 | \n",
+ " 1.526829 | \n",
+ " 1.222222 | \n",
+ " 2.500000 | \n",
+ "
\n",
+ " \n",
+ " | exit_count | \n",
+ " 1.500000 | \n",
+ " 0.907317 | \n",
+ " 1.111111 | \n",
+ " 2.500000 | \n",
+ "
\n",
+ " \n",
+ " | turnover_mean | \n",
+ " 2.568131 | \n",
+ " 1.852132 | \n",
+ " 4.854434 | \n",
+ " 1282.868308 | \n",
+ "
\n",
+ " \n",
+ " | turnover_6m_mean | \n",
+ " 40.589203 | \n",
+ " 0.449401 | \n",
+ " 12.469088 | \n",
+ " 44.754050 | \n",
+ "
\n",
+ " \n",
+ " | flow_to_aum_mean | \n",
+ " -2.567005 | \n",
+ " -1.111582 | \n",
+ " -2.290458 | \n",
+ " -586.056551 | \n",
+ "
\n",
+ " \n",
+ " | flow_to_aum_6m_mean | \n",
+ " -40.584220 | \n",
+ " 0.075295 | \n",
+ " -12.401695 | \n",
+ " -5.753449 | \n",
+ "
\n",
+ " \n",
+ " | corr_flow_ret_1m | \n",
+ " -0.021390 | \n",
+ " 0.030890 | \n",
+ " 0.038466 | \n",
+ " 0.137629 | \n",
+ "
\n",
+ " \n",
+ " | buy_after_good_perf_share | \n",
+ " 0.099237 | \n",
+ " 0.089815 | \n",
+ " 0.131206 | \n",
+ " 0.030287 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "cluster 0 1 2 \\\n",
+ "fund_weight_mean 0.002533 0.025296 0.107030 \n",
+ "fund_weight_last 0.000000 0.016294 0.000065 \n",
+ "fund_aum_mean 862.764360 4892.213434 17080.303897 \n",
+ "fund_aum_last 0.000000 1587.012888 1.060556 \n",
+ "held_month_share 0.757053 0.765711 0.957997 \n",
+ "active_month_share 0.140641 0.238654 0.392454 \n",
+ "entry_count 1.500000 1.526829 1.222222 \n",
+ "exit_count 1.500000 0.907317 1.111111 \n",
+ "turnover_mean 2.568131 1.852132 4.854434 \n",
+ "turnover_6m_mean 40.589203 0.449401 12.469088 \n",
+ "flow_to_aum_mean -2.567005 -1.111582 -2.290458 \n",
+ "flow_to_aum_6m_mean -40.584220 0.075295 -12.401695 \n",
+ "corr_flow_ret_1m -0.021390 0.030890 0.038466 \n",
+ "buy_after_good_perf_share 0.099237 0.089815 0.131206 \n",
+ "\n",
+ "cluster 3 \n",
+ "fund_weight_mean 0.411546 \n",
+ "fund_weight_last 0.000000 \n",
+ "fund_aum_mean 198436.767999 \n",
+ "fund_aum_last 0.000000 \n",
+ "held_month_share 0.546661 \n",
+ "active_month_share 0.535638 \n",
+ "entry_count 2.500000 \n",
+ "exit_count 2.500000 \n",
+ "turnover_mean 1282.868308 \n",
+ "turnover_6m_mean 44.754050 \n",
+ "flow_to_aum_mean -586.056551 \n",
+ "flow_to_aum_6m_mean -5.753449 \n",
+ "corr_flow_ret_1m 0.137629 \n",
+ "buy_after_good_perf_share 0.030287 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "#profil moyen des clusters par un fond \n",
+ "\n",
+ "# ============================================================\n",
+ "# 11) Profil moyen des clusters pour un fond\n",
+ "# ============================================================\n",
+ "\n",
+ "display(\n",
+ " best_partitions[example_fund][\"data\"]\n",
+ " .groupby(\"cluster\")[feature_cols]\n",
+ " .mean()\n",
+ " .T\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}